LLVM 20.0.0git
VPlanRecipes.cpp
Go to the documentation of this file.
1//===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains implementations for different VPlan recipes.
11///
12//===----------------------------------------------------------------------===//
13
14#include "VPlan.h"
15#include "VPlanAnalysis.h"
16#include "VPlanPatternMatch.h"
17#include "VPlanUtils.h"
18#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/Twine.h"
22#include "llvm/IR/BasicBlock.h"
23#include "llvm/IR/IRBuilder.h"
24#include "llvm/IR/Instruction.h"
26#include "llvm/IR/Intrinsics.h"
27#include "llvm/IR/Type.h"
28#include "llvm/IR/Value.h"
32#include "llvm/Support/Debug.h"
37#include <cassert>
38
39using namespace llvm;
40
42
43namespace llvm {
45}
47
48#define LV_NAME "loop-vectorize"
49#define DEBUG_TYPE LV_NAME
50
52 switch (getVPDefID()) {
53 case VPInstructionSC:
54 return cast<VPInstruction>(this)->opcodeMayReadOrWriteFromMemory();
55 case VPInterleaveSC:
56 return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
57 case VPWidenStoreEVLSC:
58 case VPWidenStoreSC:
59 return true;
60 case VPReplicateSC:
61 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
62 ->mayWriteToMemory();
63 case VPWidenCallSC:
64 return !cast<VPWidenCallRecipe>(this)
65 ->getCalledScalarFunction()
66 ->onlyReadsMemory();
67 case VPWidenIntrinsicSC:
68 return cast<VPWidenIntrinsicRecipe>(this)->mayWriteToMemory();
69 case VPBranchOnMaskSC:
70 case VPScalarIVStepsSC:
71 case VPPredInstPHISC:
72 return false;
73 case VPBlendSC:
74 case VPReductionEVLSC:
75 case VPReductionSC:
76 case VPVectorPointerSC:
77 case VPWidenCanonicalIVSC:
78 case VPWidenCastSC:
79 case VPWidenGEPSC:
80 case VPWidenIntOrFpInductionSC:
81 case VPWidenLoadEVLSC:
82 case VPWidenLoadSC:
83 case VPWidenPHISC:
84 case VPWidenSC:
85 case VPWidenEVLSC:
86 case VPWidenSelectSC: {
87 const Instruction *I =
88 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
89 (void)I;
90 assert((!I || !I->mayWriteToMemory()) &&
91 "underlying instruction may write to memory");
92 return false;
93 }
94 default:
95 return true;
96 }
97}
98
100 switch (getVPDefID()) {
101 case VPInstructionSC:
102 return cast<VPInstruction>(this)->opcodeMayReadOrWriteFromMemory();
103 case VPWidenLoadEVLSC:
104 case VPWidenLoadSC:
105 return true;
106 case VPReplicateSC:
107 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
108 ->mayReadFromMemory();
109 case VPWidenCallSC:
110 return !cast<VPWidenCallRecipe>(this)
111 ->getCalledScalarFunction()
112 ->onlyWritesMemory();
113 case VPWidenIntrinsicSC:
114 return cast<VPWidenIntrinsicRecipe>(this)->mayReadFromMemory();
115 case VPBranchOnMaskSC:
116 case VPPredInstPHISC:
117 case VPScalarIVStepsSC:
118 case VPWidenStoreEVLSC:
119 case VPWidenStoreSC:
120 return false;
121 case VPBlendSC:
122 case VPReductionEVLSC:
123 case VPReductionSC:
124 case VPVectorPointerSC:
125 case VPWidenCanonicalIVSC:
126 case VPWidenCastSC:
127 case VPWidenGEPSC:
128 case VPWidenIntOrFpInductionSC:
129 case VPWidenPHISC:
130 case VPWidenSC:
131 case VPWidenEVLSC:
132 case VPWidenSelectSC: {
133 const Instruction *I =
134 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
135 (void)I;
136 assert((!I || !I->mayReadFromMemory()) &&
137 "underlying instruction may read from memory");
138 return false;
139 }
140 default:
141 return true;
142 }
143}
144
146 switch (getVPDefID()) {
147 case VPDerivedIVSC:
148 case VPPredInstPHISC:
149 case VPScalarCastSC:
150 case VPReverseVectorPointerSC:
151 return false;
152 case VPInstructionSC:
153 return mayWriteToMemory();
154 case VPWidenCallSC: {
155 Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction();
156 return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn();
157 }
158 case VPWidenIntrinsicSC:
159 return cast<VPWidenIntrinsicRecipe>(this)->mayHaveSideEffects();
160 case VPBlendSC:
161 case VPReductionEVLSC:
162 case VPReductionSC:
163 case VPScalarIVStepsSC:
164 case VPVectorPointerSC:
165 case VPWidenCanonicalIVSC:
166 case VPWidenCastSC:
167 case VPWidenGEPSC:
168 case VPWidenIntOrFpInductionSC:
169 case VPWidenPHISC:
170 case VPWidenPointerInductionSC:
171 case VPWidenSC:
172 case VPWidenEVLSC:
173 case VPWidenSelectSC: {
174 const Instruction *I =
175 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
176 (void)I;
177 assert((!I || !I->mayHaveSideEffects()) &&
178 "underlying instruction has side-effects");
179 return false;
180 }
181 case VPInterleaveSC:
182 return mayWriteToMemory();
183 case VPWidenLoadEVLSC:
184 case VPWidenLoadSC:
185 case VPWidenStoreEVLSC:
186 case VPWidenStoreSC:
187 assert(
188 cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() ==
190 "mayHaveSideffects result for ingredient differs from this "
191 "implementation");
192 return mayWriteToMemory();
193 case VPReplicateSC: {
194 auto *R = cast<VPReplicateRecipe>(this);
195 return R->getUnderlyingInstr()->mayHaveSideEffects();
196 }
197 default:
198 return true;
199 }
200}
201
203 assert(!Parent && "Recipe already in some VPBasicBlock");
204 assert(InsertPos->getParent() &&
205 "Insertion position not in any VPBasicBlock");
206 InsertPos->getParent()->insert(this, InsertPos->getIterator());
207}
208
211 assert(!Parent && "Recipe already in some VPBasicBlock");
212 assert(I == BB.end() || I->getParent() == &BB);
213 BB.insert(this, I);
214}
215
217 assert(!Parent && "Recipe already in some VPBasicBlock");
218 assert(InsertPos->getParent() &&
219 "Insertion position not in any VPBasicBlock");
220 InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator()));
221}
222
224 assert(getParent() && "Recipe not in any VPBasicBlock");
226 Parent = nullptr;
227}
228
230 assert(getParent() && "Recipe not in any VPBasicBlock");
232}
233
236 insertAfter(InsertPos);
237}
238
242 insertBefore(BB, I);
243}
244
246 // Get the underlying instruction for the recipe, if there is one. It is used
247 // to
248 // * decide if cost computation should be skipped for this recipe,
249 // * apply forced target instruction cost.
250 Instruction *UI = nullptr;
251 if (auto *S = dyn_cast<VPSingleDefRecipe>(this))
252 UI = dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
253 else if (auto *IG = dyn_cast<VPInterleaveRecipe>(this))
254 UI = IG->getInsertPos();
255 else if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(this))
256 UI = &WidenMem->getIngredient();
257
258 InstructionCost RecipeCost;
259 if (UI && Ctx.skipCostComputation(UI, VF.isVector())) {
260 RecipeCost = 0;
261 } else {
262 RecipeCost = computeCost(VF, Ctx);
263 if (UI && ForceTargetInstructionCost.getNumOccurrences() > 0 &&
264 RecipeCost.isValid())
266 }
267
268 LLVM_DEBUG({
269 dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": ";
270 dump();
271 });
272 return RecipeCost;
273}
274
276 VPCostContext &Ctx) const {
277 llvm_unreachable("subclasses should implement computeCost");
278}
279
281 assert(OpType == OperationType::FPMathOp &&
282 "recipe doesn't have fast math flags");
283 FastMathFlags Res;
284 Res.setAllowReassoc(FMFs.AllowReassoc);
285 Res.setNoNaNs(FMFs.NoNaNs);
286 Res.setNoInfs(FMFs.NoInfs);
287 Res.setNoSignedZeros(FMFs.NoSignedZeros);
288 Res.setAllowReciprocal(FMFs.AllowReciprocal);
289 Res.setAllowContract(FMFs.AllowContract);
290 Res.setApproxFunc(FMFs.ApproxFunc);
291 return Res;
292}
293
294#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
296#endif
297
298template <unsigned PartOpIdx>
299VPValue *
301 if (U.getNumOperands() == PartOpIdx + 1)
302 return U.getOperand(PartOpIdx);
303 return nullptr;
304}
305
306template <unsigned PartOpIdx>
308 if (auto *UnrollPartOp = getUnrollPartOperand(U))
309 return cast<ConstantInt>(UnrollPartOp->getLiveInIRValue())->getZExtValue();
310 return 0;
311}
312
315 const Twine &Name)
316 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),
317 Pred, DL),
318 Opcode(Opcode), Name(Name.str()) {
319 assert(Opcode == Instruction::ICmp &&
320 "only ICmp predicates supported at the moment");
321}
322
324 std::initializer_list<VPValue *> Operands,
325 FastMathFlags FMFs, DebugLoc DL, const Twine &Name)
326 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),
327 Opcode(Opcode), Name(Name.str()) {
328 // Make sure the VPInstruction is a floating-point operation.
329 assert(isFPMathOp() && "this op can't take fast-math flags");
330}
331
332bool VPInstruction::doesGeneratePerAllLanes() const {
333 return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);
334}
335
336bool VPInstruction::canGenerateScalarForFirstLane() const {
338 return true;
340 return true;
341 switch (Opcode) {
342 case Instruction::ICmp:
343 case Instruction::Select:
351 return true;
352 default:
353 return false;
354 }
355}
356
357Value *VPInstruction::generatePerLane(VPTransformState &State,
358 const VPLane &Lane) {
359 IRBuilderBase &Builder = State.Builder;
360
362 "only PtrAdd opcodes are supported for now");
363 return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
364 State.get(getOperand(1), Lane), Name);
365}
366
367Value *VPInstruction::generate(VPTransformState &State) {
368 IRBuilderBase &Builder = State.Builder;
369
371 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
372 Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
373 Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);
374 auto *Res =
375 Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
376 if (auto *I = dyn_cast<Instruction>(Res))
377 setFlags(I);
378 return Res;
379 }
380
381 switch (getOpcode()) {
382 case VPInstruction::Not: {
383 Value *A = State.get(getOperand(0));
384 return Builder.CreateNot(A, Name);
385 }
386 case Instruction::ICmp: {
387 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
388 Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
389 Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);
390 return Builder.CreateCmp(getPredicate(), A, B, Name);
391 }
392 case Instruction::Select: {
393 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
394 Value *Cond = State.get(getOperand(0), OnlyFirstLaneUsed);
395 Value *Op1 = State.get(getOperand(1), OnlyFirstLaneUsed);
396 Value *Op2 = State.get(getOperand(2), OnlyFirstLaneUsed);
397 return Builder.CreateSelect(Cond, Op1, Op2, Name);
398 }
400 // Get first lane of vector induction variable.
401 Value *VIVElem0 = State.get(getOperand(0), VPLane(0));
402 // Get the original loop tripcount.
403 Value *ScalarTC = State.get(getOperand(1), VPLane(0));
404
405 // If this part of the active lane mask is scalar, generate the CMP directly
406 // to avoid unnecessary extracts.
407 if (State.VF.isScalar())
408 return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC,
409 Name);
410
411 auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
412 auto *PredTy = VectorType::get(Int1Ty, State.VF);
413 return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
414 {PredTy, ScalarTC->getType()},
415 {VIVElem0, ScalarTC}, nullptr, Name);
416 }
418 // Generate code to combine the previous and current values in vector v3.
419 //
420 // vector.ph:
421 // v_init = vector(..., ..., ..., a[-1])
422 // br vector.body
423 //
424 // vector.body
425 // i = phi [0, vector.ph], [i+4, vector.body]
426 // v1 = phi [v_init, vector.ph], [v2, vector.body]
427 // v2 = a[i, i+1, i+2, i+3];
428 // v3 = vector(v1(3), v2(0, 1, 2))
429
430 auto *V1 = State.get(getOperand(0));
431 if (!V1->getType()->isVectorTy())
432 return V1;
433 Value *V2 = State.get(getOperand(1));
434 return Builder.CreateVectorSplice(V1, V2, -1, Name);
435 }
437 unsigned UF = getParent()->getPlan()->getUF();
438 Value *ScalarTC = State.get(getOperand(0), VPLane(0));
439 Value *Step = createStepForVF(Builder, ScalarTC->getType(), State.VF, UF);
440 Value *Sub = Builder.CreateSub(ScalarTC, Step);
441 Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
442 Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
443 return Builder.CreateSelect(Cmp, Sub, Zero);
444 }
446 // TODO: Restructure this code with an explicit remainder loop, vsetvli can
447 // be outside of the main loop.
448 Value *AVL = State.get(getOperand(0), /*IsScalar*/ true);
449 // Compute EVL
450 assert(AVL->getType()->isIntegerTy() &&
451 "Requested vector length should be an integer.");
452
453 assert(State.VF.isScalable() && "Expected scalable vector factor.");
454 Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue());
455
456 Value *EVL = State.Builder.CreateIntrinsic(
457 State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,
458 {AVL, VFArg, State.Builder.getTrue()});
459 return EVL;
460 }
462 unsigned Part = getUnrollPart(*this);
463 auto *IV = State.get(getOperand(0), VPLane(0));
464 assert(Part != 0 && "Must have a positive part");
465 // The canonical IV is incremented by the vectorization factor (num of
466 // SIMD elements) times the unroll part.
467 Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
468 return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),
470 }
472 Value *Cond = State.get(getOperand(0), VPLane(0));
473 // Replace the temporary unreachable terminator with a new conditional
474 // branch, hooking it up to backward destination for exiting blocks now and
475 // to forward destination(s) later when they are created.
476 BranchInst *CondBr =
477 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
478 CondBr->setSuccessor(0, nullptr);
480
481 if (!getParent()->isExiting())
482 return CondBr;
483
484 VPRegionBlock *ParentRegion = getParent()->getParent();
485 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
486 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
487 return CondBr;
488 }
490 // First create the compare.
491 Value *IV = State.get(getOperand(0), /*IsScalar*/ true);
492 Value *TC = State.get(getOperand(1), /*IsScalar*/ true);
493 Value *Cond = Builder.CreateICmpEQ(IV, TC);
494
495 // Now create the branch.
496 auto *Plan = getParent()->getPlan();
497 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
498 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
499
500 // Replace the temporary unreachable terminator with a new conditional
501 // branch, hooking it up to backward destination (the header) now and to the
502 // forward destination (the exit/middle block) later when it is created.
503 // Note that CreateCondBr expects a valid BB as first argument, so we need
504 // to set it to nullptr later.
505 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
506 State.CFG.VPBB2IRBB[Header]);
507 CondBr->setSuccessor(0, nullptr);
509 return CondBr;
510 }
512 // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
513 // and will be removed by breaking up the recipe further.
514 auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
515 auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
516 // Get its reduction variable descriptor.
517 const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
518
519 RecurKind RK = RdxDesc.getRecurrenceKind();
520
521 Type *PhiTy = OrigPhi->getType();
522 // The recipe's operands are the reduction phi, followed by one operand for
523 // each part of the reduction.
524 unsigned UF = getNumOperands() - 1;
525 VectorParts RdxParts(UF);
526 for (unsigned Part = 0; Part < UF; ++Part)
527 RdxParts[Part] = State.get(getOperand(1 + Part), PhiR->isInLoop());
528
529 // If the vector reduction can be performed in a smaller type, we truncate
530 // then extend the loop exit value to enable InstCombine to evaluate the
531 // entire expression in the smaller type.
532 // TODO: Handle this in truncateToMinBW.
533 if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
534 Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF);
535 for (unsigned Part = 0; Part < UF; ++Part)
536 RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
537 }
538 // Reduce all of the unrolled parts into a single vector.
539 Value *ReducedPartRdx = RdxParts[0];
540 unsigned Op = RdxDesc.getOpcode();
542 Op = Instruction::Or;
543
544 if (PhiR->isOrdered()) {
545 ReducedPartRdx = RdxParts[UF - 1];
546 } else {
547 // Floating-point operations should have some FMF to enable the reduction.
549 Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
550 for (unsigned Part = 1; Part < UF; ++Part) {
551 Value *RdxPart = RdxParts[Part];
552 if (Op != Instruction::ICmp && Op != Instruction::FCmp)
553 ReducedPartRdx = Builder.CreateBinOp(
554 (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
556 ReducedPartRdx =
557 createMinMaxOp(Builder, RecurKind::SMax, ReducedPartRdx, RdxPart);
558 else
559 ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
560 }
561 }
562
563 // Create the reduction after the loop. Note that inloop reductions create
564 // the target reduction in the loop using a Reduction recipe.
565 if ((State.VF.isVector() ||
568 !PhiR->isInLoop()) {
569 ReducedPartRdx =
570 createReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
571 // If the reduction can be performed in a smaller type, we need to extend
572 // the reduction to the wider type before we branch to the original loop.
573 if (PhiTy != RdxDesc.getRecurrenceType())
574 ReducedPartRdx = RdxDesc.isSigned()
575 ? Builder.CreateSExt(ReducedPartRdx, PhiTy)
576 : Builder.CreateZExt(ReducedPartRdx, PhiTy);
577 }
578
579 return ReducedPartRdx;
580 }
582 auto *CI = cast<ConstantInt>(getOperand(1)->getLiveInIRValue());
583 unsigned Offset = CI->getZExtValue();
584 assert(Offset > 0 && "Offset from end must be positive");
585 Value *Res;
586 if (State.VF.isVector()) {
587 assert(Offset <= State.VF.getKnownMinValue() &&
588 "invalid offset to extract from");
589 // Extract lane VF - Offset from the operand.
590 Res = State.get(getOperand(0), VPLane::getLaneFromEnd(State.VF, Offset));
591 } else {
592 assert(Offset <= 1 && "invalid offset to extract from");
593 Res = State.get(getOperand(0));
594 }
595 if (isa<ExtractElementInst>(Res))
596 Res->setName(Name);
597 return Res;
598 }
600 Value *A = State.get(getOperand(0));
601 Value *B = State.get(getOperand(1));
602 return Builder.CreateLogicalAnd(A, B, Name);
603 }
606 "can only generate first lane for PtrAdd");
607 Value *Ptr = State.get(getOperand(0), VPLane(0));
608 Value *Addend = State.get(getOperand(1), VPLane(0));
609 return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
610 }
612 Value *IncomingFromVPlanPred =
613 State.get(getOperand(0), /* IsScalar */ true);
614 Value *IncomingFromOtherPreds =
615 State.get(getOperand(1), /* IsScalar */ true);
616 auto *NewPhi =
617 Builder.CreatePHI(State.TypeAnalysis.inferScalarType(this), 2, Name);
618 BasicBlock *VPlanPred =
619 State.CFG
620 .VPBB2IRBB[cast<VPBasicBlock>(getParent()->getPredecessors()[0])];
621 NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred);
622 for (auto *OtherPred : predecessors(Builder.GetInsertBlock())) {
623 if (OtherPred == VPlanPred)
624 continue;
625 NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred);
626 }
627 return NewPhi;
628 }
630 Value *A = State.get(getOperand(0));
631 return Builder.CreateOrReduce(A);
632 }
633
634 default:
635 llvm_unreachable("Unsupported opcode for instruction");
636 }
637}
638
643}
644
647}
648
649#if !defined(NDEBUG)
650bool VPInstruction::isFPMathOp() const {
651 // Inspired by FPMathOperator::classof. Notable differences are that we don't
652 // support Call, PHI and Select opcodes here yet.
653 return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
654 Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
655 Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
656 Opcode == Instruction::FCmp || Opcode == Instruction::Select;
657}
658#endif
659
661 assert(!State.Lane && "VPInstruction executing an Lane");
663 assert((hasFastMathFlags() == isFPMathOp() ||
664 getOpcode() == Instruction::Select) &&
665 "Recipe not a FPMathOp but has fast-math flags?");
666 if (hasFastMathFlags())
669 bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&
672 bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
673 if (GeneratesPerAllLanes) {
674 for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
675 Lane != NumLanes; ++Lane) {
676 Value *GeneratedValue = generatePerLane(State, VPLane(Lane));
677 assert(GeneratedValue && "generatePerLane must produce a value");
678 State.set(this, GeneratedValue, VPLane(Lane));
679 }
680 return;
681 }
682
683 Value *GeneratedValue = generate(State);
684 if (!hasResult())
685 return;
686 assert(GeneratedValue && "generate must produce a value");
687 assert(
688 (GeneratedValue->getType()->isVectorTy() == !GeneratesPerFirstLaneOnly ||
689 State.VF.isScalar()) &&
690 "scalar value but not only first lane defined");
691 State.set(this, GeneratedValue,
692 /*IsScalar*/ GeneratesPerFirstLaneOnly);
693}
694
697 return false;
698 switch (getOpcode()) {
699 case Instruction::ICmp:
700 case Instruction::Select:
709 return false;
710 default:
711 return true;
712 }
713}
714
716 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
718 return vputils::onlyFirstLaneUsed(this);
719
720 switch (getOpcode()) {
721 default:
722 return false;
723 case Instruction::ICmp:
724 case Instruction::Select:
725 case Instruction::Or:
727 // TODO: Cover additional opcodes.
728 return vputils::onlyFirstLaneUsed(this);
736 return true;
737 };
738 llvm_unreachable("switch should return");
739}
740
742 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
744 return vputils::onlyFirstPartUsed(this);
745
746 switch (getOpcode()) {
747 default:
748 return false;
749 case Instruction::ICmp:
750 case Instruction::Select:
751 return vputils::onlyFirstPartUsed(this);
755 return true;
756 };
757 llvm_unreachable("switch should return");
758}
759
760#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
762 VPSlotTracker SlotTracker(getParent()->getPlan());
763 print(dbgs(), "", SlotTracker);
764}
765
767 VPSlotTracker &SlotTracker) const {
768 O << Indent << "EMIT ";
769
770 if (hasResult()) {
772 O << " = ";
773 }
774
775 switch (getOpcode()) {
777 O << "not";
778 break;
780 O << "combined load";
781 break;
783 O << "combined store";
784 break;
786 O << "active lane mask";
787 break;
789 O << "resume-phi";
790 break;
792 O << "EXPLICIT-VECTOR-LENGTH";
793 break;
795 O << "first-order splice";
796 break;
798 O << "branch-on-cond";
799 break;
801 O << "TC > VF ? TC - VF : 0";
802 break;
804 O << "VF * Part +";
805 break;
807 O << "branch-on-count";
808 break;
810 O << "extract-from-end";
811 break;
813 O << "compute-reduction-result";
814 break;
816 O << "logical-and";
817 break;
819 O << "ptradd";
820 break;
822 O << "any-of";
823 break;
824 default:
826 }
827
828 printFlags(O);
830
831 if (auto DL = getDebugLoc()) {
832 O << ", !dbg ";
833 DL.print(O);
834 }
835}
836#endif
837
839 assert((isa<PHINode>(&I) || getNumOperands() == 0) &&
840 "Only PHINodes can have extra operands");
841 for (const auto &[Idx, Op] : enumerate(operands())) {
842 VPValue *ExitValue = Op;
843 auto Lane = vputils::isUniformAfterVectorization(ExitValue)
847 auto *PredVPBB = Pred->getExitingBasicBlock();
848 BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
849 // Set insertion point in PredBB in case an extract needs to be generated.
850 // TODO: Model extracts explicitly.
851 State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
852 Value *V = State.get(ExitValue, VPLane(Lane));
853 auto *Phi = cast<PHINode>(&I);
854 // If there is no existing block for PredBB in the phi, add a new incoming
855 // value. Otherwise update the existing incoming value for PredBB.
856 if (Phi->getBasicBlockIndex(PredBB) == -1)
857 Phi->addIncoming(V, PredBB);
858 else
859 Phi->setIncomingValueForBlock(PredBB, V);
860 }
861
862 // Advance the insert point after the wrapped IR instruction. This allows
863 // interleaving VPIRInstructions and other recipes.
864 State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator()));
865}
866
868 VPCostContext &Ctx) const {
869 // The recipe wraps an existing IR instruction on the border of VPlan's scope,
870 // hence it does not contribute to the cost-modeling for the VPlan.
871 return 0;
872}
873
874#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
876 VPSlotTracker &SlotTracker) const {
877 O << Indent << "IR " << I;
878
879 if (getNumOperands() != 0) {
880 O << " (extra operand" << (getNumOperands() > 1 ? "s" : "") << ": ";
882 enumerate(operands()), O, [this, &O, &SlotTracker](auto Op) {
883 Op.value()->printAsOperand(O, SlotTracker);
884 O << " from ";
885 getParent()->getPredecessors()[Op.index()]->printAsOperand(O);
886 });
887 O << ")";
888 }
889}
890#endif
891
893 assert(State.VF.isVector() && "not widening");
895
896 FunctionType *VFTy = Variant->getFunctionType();
897 // Add return type if intrinsic is overloaded on it.
899 for (const auto &I : enumerate(arg_operands())) {
900 Value *Arg;
901 // Some vectorized function variants may also take a scalar argument,
902 // e.g. linear parameters for pointers. This needs to be the scalar value
903 // from the start of the respective part when interleaving.
904 if (!VFTy->getParamType(I.index())->isVectorTy())
905 Arg = State.get(I.value(), VPLane(0));
906 else
907 Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
908 Args.push_back(Arg);
909 }
910
911 assert(Variant != nullptr && "Can't create vector function.");
912
913 auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
915 if (CI)
916 CI->getOperandBundlesAsDefs(OpBundles);
917
918 CallInst *V = State.Builder.CreateCall(Variant, Args, OpBundles);
919 setFlags(V);
920
921 if (!V->getType()->isVoidTy())
922 State.set(this, V);
923 State.addMetadata(V, CI);
924}
925
927 VPCostContext &Ctx) const {
929 return Ctx.TTI.getCallInstrCost(nullptr, Variant->getReturnType(),
930 Variant->getFunctionType()->params(),
931 CostKind);
932}
933
934#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
936 VPSlotTracker &SlotTracker) const {
937 O << Indent << "WIDEN-CALL ";
938
939 Function *CalledFn = getCalledScalarFunction();
940 if (CalledFn->getReturnType()->isVoidTy())
941 O << "void ";
942 else {
944 O << " = ";
945 }
946
947 O << "call";
948 printFlags(O);
949 O << " @" << CalledFn->getName() << "(";
951 Op->printAsOperand(O, SlotTracker);
952 });
953 O << ")";
954
955 O << " (using library function";
956 if (Variant->hasName())
957 O << ": " << Variant->getName();
958 O << ")";
959}
960#endif
961
963 assert(State.VF.isVector() && "not widening");
965
966 SmallVector<Type *, 2> TysForDecl;
967 // Add return type if intrinsic is overloaded on it.
968 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1, State.TTI))
969 TysForDecl.push_back(VectorType::get(getResultType(), State.VF));
971 for (const auto &I : enumerate(operands())) {
972 // Some intrinsics have a scalar argument - don't replace it with a
973 // vector.
974 Value *Arg;
975 if (isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index(),
976 State.TTI))
977 Arg = State.get(I.value(), VPLane(0));
978 else
979 Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
980 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index(),
981 State.TTI))
982 TysForDecl.push_back(Arg->getType());
983 Args.push_back(Arg);
984 }
985
986 // Use vector version of the intrinsic.
987 Module *M = State.Builder.GetInsertBlock()->getModule();
988 Function *VectorF =
989 Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
990 assert(VectorF &&
991 "Can't retrieve vector intrinsic or vector-predication intrinsics.");
992
993 auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
995 if (CI)
996 CI->getOperandBundlesAsDefs(OpBundles);
997
998 CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
999
1000 setFlags(V);
1001
1002 if (!V->getType()->isVoidTy())
1003 State.set(this, V);
1004 State.addMetadata(V, CI);
1005}
1006
1008 VPCostContext &Ctx) const {
1010
1011 // Some backends analyze intrinsic arguments to determine cost. Use the
1012 // underlying value for the operand if it has one. Otherwise try to use the
1013 // operand of the underlying call instruction, if there is one. Otherwise
1014 // clear Arguments.
1015 // TODO: Rework TTI interface to be independent of concrete IR values.
1017 for (const auto &[Idx, Op] : enumerate(operands())) {
1018 auto *V = Op->getUnderlyingValue();
1019 if (!V) {
1020 // Push all the VP Intrinsic's ops into the Argments even if is nullptr.
1021 // Some VP Intrinsic's cost will assert the number of parameters.
1022 // Mainly appears in the following two scenarios:
1023 // 1. EVL Op is nullptr
1024 // 2. The Argmunt of the VP Intrinsic is also the VP Intrinsic
1025 if (VPIntrinsic::isVPIntrinsic(VectorIntrinsicID)) {
1026 Arguments.push_back(V);
1027 continue;
1028 }
1029 if (auto *UI = dyn_cast_or_null<CallBase>(getUnderlyingValue())) {
1030 Arguments.push_back(UI->getArgOperand(Idx));
1031 continue;
1032 }
1033 Arguments.clear();
1034 break;
1035 }
1036 Arguments.push_back(V);
1037 }
1038
1039 Type *RetTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1040 SmallVector<Type *> ParamTys;
1041 for (unsigned I = 0; I != getNumOperands(); ++I)
1042 ParamTys.push_back(
1044
1045 // TODO: Rework TTI interface to avoid reliance on underlying IntrinsicInst.
1047 IntrinsicCostAttributes CostAttrs(
1048 VectorIntrinsicID, RetTy, Arguments, ParamTys, FMF,
1049 dyn_cast_or_null<IntrinsicInst>(getUnderlyingValue()));
1050 return Ctx.TTI.getIntrinsicInstrCost(CostAttrs, CostKind);
1051}
1052
1054 return Intrinsic::getBaseName(VectorIntrinsicID);
1055}
1056
1058 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
1059 // Vector predication intrinsics only demand the the first lane the last
1060 // operand (the EVL operand).
1061 return VPIntrinsic::isVPIntrinsic(VectorIntrinsicID) &&
1062 Op == getOperand(getNumOperands() - 1);
1063}
1064
1065#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1067 VPSlotTracker &SlotTracker) const {
1068 O << Indent << "WIDEN-INTRINSIC ";
1069 if (ResultTy->isVoidTy()) {
1070 O << "void ";
1071 } else {
1073 O << " = ";
1074 }
1075
1076 O << "call";
1077 printFlags(O);
1078 O << getIntrinsicName() << "(";
1079
1081 Op->printAsOperand(O, SlotTracker);
1082 });
1083 O << ")";
1084}
1085#endif
1086
1089 IRBuilderBase &Builder = State.Builder;
1090
1091 Value *Address = State.get(getOperand(0));
1092 Value *IncAmt = State.get(getOperand(1), /*IsScalar=*/true);
1093 VectorType *VTy = cast<VectorType>(Address->getType());
1094
1095 // The histogram intrinsic requires a mask even if the recipe doesn't;
1096 // if the mask operand was omitted then all lanes should be executed and
1097 // we just need to synthesize an all-true mask.
1098 Value *Mask = nullptr;
1099 if (VPValue *VPMask = getMask())
1100 Mask = State.get(VPMask);
1101 else
1102 Mask =
1103 Builder.CreateVectorSplat(VTy->getElementCount(), Builder.getInt1(1));
1104
1105 // If this is a subtract, we want to invert the increment amount. We may
1106 // add a separate intrinsic in future, but for now we'll try this.
1107 if (Opcode == Instruction::Sub)
1108 IncAmt = Builder.CreateNeg(IncAmt);
1109 else
1110 assert(Opcode == Instruction::Add && "only add or sub supported for now");
1111
1112 State.Builder.CreateIntrinsic(Intrinsic::experimental_vector_histogram_add,
1113 {VTy, IncAmt->getType()},
1114 {Address, IncAmt, Mask});
1115}
1116
1118 VPCostContext &Ctx) const {
1119 // FIXME: Take the gather and scatter into account as well. For now we're
1120 // generating the same cost as the fallback path, but we'll likely
1121 // need to create a new TTI method for determining the cost, including
1122 // whether we can use base + vec-of-smaller-indices or just
1123 // vec-of-pointers.
1124 assert(VF.isVector() && "Invalid VF for histogram cost");
1125 Type *AddressTy = Ctx.Types.inferScalarType(getOperand(0));
1126 VPValue *IncAmt = getOperand(1);
1127 Type *IncTy = Ctx.Types.inferScalarType(IncAmt);
1128 VectorType *VTy = VectorType::get(IncTy, VF);
1129
1130 // Assume that a non-constant update value (or a constant != 1) requires
1131 // a multiply, and add that into the cost.
1132 InstructionCost MulCost =
1133 Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VTy);
1134 if (IncAmt->isLiveIn()) {
1135 ConstantInt *CI = dyn_cast<ConstantInt>(IncAmt->getLiveInIRValue());
1136
1137 if (CI && CI->getZExtValue() == 1)
1138 MulCost = TTI::TCC_Free;
1139 }
1140
1141 // Find the cost of the histogram operation itself.
1142 Type *PtrTy = VectorType::get(AddressTy, VF);
1143 Type *MaskTy = VectorType::get(Type::getInt1Ty(Ctx.LLVMCtx), VF);
1144 IntrinsicCostAttributes ICA(Intrinsic::experimental_vector_histogram_add,
1146 {PtrTy, IncTy, MaskTy});
1147
1148 // Add the costs together with the add/sub operation.
1149 return Ctx.TTI.getIntrinsicInstrCost(
1151 MulCost + Ctx.TTI.getArithmeticInstrCost(Opcode, VTy);
1152}
1153
1154#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1156 VPSlotTracker &SlotTracker) const {
1157 O << Indent << "WIDEN-HISTOGRAM buckets: ";
1159
1160 if (Opcode == Instruction::Sub)
1161 O << ", dec: ";
1162 else {
1163 assert(Opcode == Instruction::Add);
1164 O << ", inc: ";
1165 }
1167
1168 if (VPValue *Mask = getMask()) {
1169 O << ", mask: ";
1170 Mask->printAsOperand(O, SlotTracker);
1171 }
1172}
1173
1175 VPSlotTracker &SlotTracker) const {
1176 O << Indent << "WIDEN-SELECT ";
1178 O << " = select ";
1180 O << ", ";
1182 O << ", ";
1184 O << (isInvariantCond() ? " (condition is loop invariant)" : "");
1185}
1186#endif
1187
1190
1191 // The condition can be loop invariant but still defined inside the
1192 // loop. This means that we can't just use the original 'cond' value.
1193 // We have to take the 'vectorized' value and pick the first lane.
1194 // Instcombine will make this a no-op.
1195 auto *InvarCond =
1196 isInvariantCond() ? State.get(getCond(), VPLane(0)) : nullptr;
1197
1198 Value *Cond = InvarCond ? InvarCond : State.get(getCond());
1199 Value *Op0 = State.get(getOperand(1));
1200 Value *Op1 = State.get(getOperand(2));
1201 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
1202 State.set(this, Sel);
1203 State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1204}
1205
1207 VPCostContext &Ctx) const {
1208 SelectInst *SI = cast<SelectInst>(getUnderlyingValue());
1209 bool ScalarCond = getOperand(0)->isDefinedOutsideLoopRegions();
1210 Type *ScalarTy = Ctx.Types.inferScalarType(this);
1211 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1213
1214 VPValue *Op0, *Op1;
1215 using namespace llvm::VPlanPatternMatch;
1216 if (!ScalarCond && ScalarTy->getScalarSizeInBits() == 1 &&
1217 (match(this, m_LogicalAnd(m_VPValue(Op0), m_VPValue(Op1))) ||
1218 match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1))))) {
1219 // select x, y, false --> x & y
1220 // select x, true, y --> x | y
1221 const auto [Op1VK, Op1VP] = Ctx.getOperandInfo(Op0);
1222 const auto [Op2VK, Op2VP] = Ctx.getOperandInfo(Op1);
1223
1225 if (all_of(operands(),
1226 [](VPValue *Op) { return Op->getUnderlyingValue(); }))
1227 Operands.append(SI->op_begin(), SI->op_end());
1228 bool IsLogicalOr = match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1)));
1229 return Ctx.TTI.getArithmeticInstrCost(
1230 IsLogicalOr ? Instruction::Or : Instruction::And, VectorTy, CostKind,
1231 {Op1VK, Op1VP}, {Op2VK, Op2VP}, Operands, SI);
1232 }
1233
1234 Type *CondTy = Ctx.Types.inferScalarType(getOperand(0));
1235 if (!ScalarCond)
1236 CondTy = VectorType::get(CondTy, VF);
1237
1239 if (auto *Cmp = dyn_cast<CmpInst>(SI->getCondition()))
1240 Pred = Cmp->getPredicate();
1241 return Ctx.TTI.getCmpSelInstrCost(Instruction::Select, VectorTy, CondTy, Pred,
1244}
1245
1246VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
1247 const FastMathFlags &FMF) {
1248 AllowReassoc = FMF.allowReassoc();
1249 NoNaNs = FMF.noNaNs();
1250 NoInfs = FMF.noInfs();
1251 NoSignedZeros = FMF.noSignedZeros();
1252 AllowReciprocal = FMF.allowReciprocal();
1253 AllowContract = FMF.allowContract();
1254 ApproxFunc = FMF.approxFunc();
1255}
1256
1257#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1259 switch (OpType) {
1260 case OperationType::Cmp:
1262 break;
1263 case OperationType::DisjointOp:
1265 O << " disjoint";
1266 break;
1267 case OperationType::PossiblyExactOp:
1268 if (ExactFlags.IsExact)
1269 O << " exact";
1270 break;
1271 case OperationType::OverflowingBinOp:
1272 if (WrapFlags.HasNUW)
1273 O << " nuw";
1274 if (WrapFlags.HasNSW)
1275 O << " nsw";
1276 break;
1277 case OperationType::FPMathOp:
1279 break;
1280 case OperationType::GEPOp:
1281 if (GEPFlags.isInBounds())
1282 O << " inbounds";
1284 O << " nusw";
1286 O << " nuw";
1287 break;
1288 case OperationType::NonNegOp:
1289 if (NonNegFlags.NonNeg)
1290 O << " nneg";
1291 break;
1292 case OperationType::Other:
1293 break;
1294 }
1295 if (getNumOperands() > 0)
1296 O << " ";
1297}
1298#endif
1299
1302 auto &Builder = State.Builder;
1303 switch (Opcode) {
1304 case Instruction::Call:
1305 case Instruction::Br:
1306 case Instruction::PHI:
1307 case Instruction::GetElementPtr:
1308 case Instruction::Select:
1309 llvm_unreachable("This instruction is handled by a different recipe.");
1310 case Instruction::UDiv:
1311 case Instruction::SDiv:
1312 case Instruction::SRem:
1313 case Instruction::URem:
1314 case Instruction::Add:
1315 case Instruction::FAdd:
1316 case Instruction::Sub:
1317 case Instruction::FSub:
1318 case Instruction::FNeg:
1319 case Instruction::Mul:
1320 case Instruction::FMul:
1321 case Instruction::FDiv:
1322 case Instruction::FRem:
1323 case Instruction::Shl:
1324 case Instruction::LShr:
1325 case Instruction::AShr:
1326 case Instruction::And:
1327 case Instruction::Or:
1328 case Instruction::Xor: {
1329 // Just widen unops and binops.
1331 for (VPValue *VPOp : operands())
1332 Ops.push_back(State.get(VPOp));
1333
1334 Value *V = Builder.CreateNAryOp(Opcode, Ops);
1335
1336 if (auto *VecOp = dyn_cast<Instruction>(V))
1337 setFlags(VecOp);
1338
1339 // Use this vector value for all users of the original instruction.
1340 State.set(this, V);
1341 State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1342 break;
1343 }
1344 case Instruction::Freeze: {
1345 Value *Op = State.get(getOperand(0));
1346
1347 Value *Freeze = Builder.CreateFreeze(Op);
1348 State.set(this, Freeze);
1349 break;
1350 }
1351 case Instruction::ICmp:
1352 case Instruction::FCmp: {
1353 // Widen compares. Generate vector compares.
1354 bool FCmp = Opcode == Instruction::FCmp;
1355 Value *A = State.get(getOperand(0));
1356 Value *B = State.get(getOperand(1));
1357 Value *C = nullptr;
1358 if (FCmp) {
1359 // Propagate fast math flags.
1360 C = Builder.CreateFCmpFMF(
1361 getPredicate(), A, B,
1362 dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1363 } else {
1364 C = Builder.CreateICmp(getPredicate(), A, B);
1365 }
1366 State.set(this, C);
1367 State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1368 break;
1369 }
1370 default:
1371 // This instruction is not vectorized by simple widening.
1372 LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "
1373 << Instruction::getOpcodeName(Opcode));
1374 llvm_unreachable("Unhandled instruction!");
1375 } // end of switch.
1376
1377#if !defined(NDEBUG)
1378 // Verify that VPlan type inference results agree with the type of the
1379 // generated values.
1381 State.get(this)->getType() &&
1382 "inferred type and type from generated instructions do not match");
1383#endif
1384}
1385
1387 VPCostContext &Ctx) const {
1389 switch (Opcode) {
1390 case Instruction::FNeg: {
1391 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1392 return Ctx.TTI.getArithmeticInstrCost(
1393 Opcode, VectorTy, CostKind,
1396 }
1397
1398 case Instruction::UDiv:
1399 case Instruction::SDiv:
1400 case Instruction::SRem:
1401 case Instruction::URem:
1402 // More complex computation, let the legacy cost-model handle this for now.
1403 return Ctx.getLegacyCost(cast<Instruction>(getUnderlyingValue()), VF);
1404 case Instruction::Add:
1405 case Instruction::FAdd:
1406 case Instruction::Sub:
1407 case Instruction::FSub:
1408 case Instruction::Mul:
1409 case Instruction::FMul:
1410 case Instruction::FDiv:
1411 case Instruction::FRem:
1412 case Instruction::Shl:
1413 case Instruction::LShr:
1414 case Instruction::AShr:
1415 case Instruction::And:
1416 case Instruction::Or:
1417 case Instruction::Xor: {
1418 VPValue *RHS = getOperand(1);
1419 // Certain instructions can be cheaper to vectorize if they have a constant
1420 // second vector operand. One example of this are shifts on x86.
1423 if (RHS->isLiveIn())
1424 RHSInfo = Ctx.TTI.getOperandInfo(RHS->getLiveInIRValue());
1425
1426 if (RHSInfo.Kind == TargetTransformInfo::OK_AnyValue &&
1429 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1430 Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
1431
1433 if (CtxI)
1434 Operands.append(CtxI->value_op_begin(), CtxI->value_op_end());
1435 return Ctx.TTI.getArithmeticInstrCost(
1436 Opcode, VectorTy, CostKind,
1438 RHSInfo, Operands, CtxI, &Ctx.TLI);
1439 }
1440 case Instruction::Freeze: {
1441 // This opcode is unknown. Assume that it is the same as 'mul'.
1442 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1443 return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, CostKind);
1444 }
1445 case Instruction::ICmp:
1446 case Instruction::FCmp: {
1447 Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
1448 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
1449 return Ctx.TTI.getCmpSelInstrCost(Opcode, VectorTy, nullptr, getPredicate(),
1450 CostKind,
1453 }
1454 default:
1455 llvm_unreachable("Unsupported opcode for instruction");
1456 }
1457}
1458
1460 unsigned Opcode = getOpcode();
1461 // TODO: Support other opcodes
1462 if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode))
1463 llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute");
1464
1466
1467 assert(State.get(getOperand(0))->getType()->isVectorTy() &&
1468 "VPWidenEVLRecipe should not be used for scalars");
1469
1470 VPValue *EVL = getEVL();
1471 Value *EVLArg = State.get(EVL, /*NeedsScalar=*/true);
1472 IRBuilderBase &BuilderIR = State.Builder;
1473 VectorBuilder Builder(BuilderIR);
1474 Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
1475
1477 for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) {
1478 VPValue *VPOp = getOperand(I);
1479 Ops.push_back(State.get(VPOp));
1480 }
1481
1482 Builder.setMask(Mask).setEVL(EVLArg);
1483 Value *VPInst =
1484 Builder.createVectorInstruction(Opcode, Ops[0]->getType(), Ops, "vp.op");
1485 // Currently vp-intrinsics only accept FMF flags.
1486 // TODO: Enable other flags when support is added.
1487 if (isa<FPMathOperator>(VPInst))
1488 setFlags(cast<Instruction>(VPInst));
1489
1490 State.set(this, VPInst);
1491 State.addMetadata(VPInst,
1492 dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1493}
1494
1495#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1497 VPSlotTracker &SlotTracker) const {
1498 O << Indent << "WIDEN ";
1500 O << " = " << Instruction::getOpcodeName(Opcode);
1501 printFlags(O);
1503}
1504
1506 VPSlotTracker &SlotTracker) const {
1507 O << Indent << "WIDEN ";
1509 O << " = vp." << Instruction::getOpcodeName(getOpcode());
1510 printFlags(O);
1512}
1513#endif
1514
1517 auto &Builder = State.Builder;
1518 /// Vectorize casts.
1519 assert(State.VF.isVector() && "Not vectorizing?");
1520 Type *DestTy = VectorType::get(getResultType(), State.VF);
1521 VPValue *Op = getOperand(0);
1522 Value *A = State.get(Op);
1523 Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
1524 State.set(this, Cast);
1525 State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
1526 if (auto *CastOp = dyn_cast<Instruction>(Cast))
1527 setFlags(CastOp);
1528}
1529
1531 VPCostContext &Ctx) const {
1532 // TODO: In some cases, VPWidenCastRecipes are created but not considered in
1533 // the legacy cost model, including truncates/extends when evaluating a
1534 // reduction in a smaller type.
1535 if (!getUnderlyingValue())
1536 return 0;
1537 // Computes the CastContextHint from a recipes that may access memory.
1538 auto ComputeCCH = [&](const VPRecipeBase *R) -> TTI::CastContextHint {
1539 if (VF.isScalar())
1541 if (isa<VPInterleaveRecipe>(R))
1543 if (const auto *ReplicateRecipe = dyn_cast<VPReplicateRecipe>(R))
1544 return ReplicateRecipe->isPredicated() ? TTI::CastContextHint::Masked
1546 const auto *WidenMemoryRecipe = dyn_cast<VPWidenMemoryRecipe>(R);
1547 if (WidenMemoryRecipe == nullptr)
1549 if (!WidenMemoryRecipe->isConsecutive())
1551 if (WidenMemoryRecipe->isReverse())
1553 if (WidenMemoryRecipe->isMasked())
1556 };
1557
1558 VPValue *Operand = getOperand(0);
1560 // For Trunc/FPTrunc, get the context from the only user.
1561 if ((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
1563 if (auto *StoreRecipe = dyn_cast<VPRecipeBase>(*user_begin()))
1564 CCH = ComputeCCH(StoreRecipe);
1565 }
1566 // For Z/Sext, get the context from the operand.
1567 else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt ||
1568 Opcode == Instruction::FPExt) {
1569 if (Operand->isLiveIn())
1571 else if (Operand->getDefiningRecipe())
1572 CCH = ComputeCCH(Operand->getDefiningRecipe());
1573 }
1574
1575 auto *SrcTy =
1576 cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(Operand), VF));
1577 auto *DestTy = cast<VectorType>(toVectorTy(getResultType(), VF));
1578 // Arm TTI will use the underlying instruction to determine the cost.
1579 return Ctx.TTI.getCastInstrCost(
1580 Opcode, DestTy, SrcTy, CCH, TTI::TCK_RecipThroughput,
1581 dyn_cast_if_present<Instruction>(getUnderlyingValue()));
1582}
1583
1584#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1586 VPSlotTracker &SlotTracker) const {
1587 O << Indent << "WIDEN-CAST ";
1589 O << " = " << Instruction::getOpcodeName(Opcode);
1590 printFlags(O);
1592 O << " to " << *getResultType();
1593}
1594#endif
1595
1597 VPCostContext &Ctx) const {
1598 return Ctx.TTI.getCFInstrCost(Instruction::PHI, TTI::TCK_RecipThroughput);
1599}
1600
1601/// This function adds
1602/// (0 * Step, 1 * Step, 2 * Step, ...)
1603/// to each vector element of Val.
1604/// \p Opcode is relevant for FP induction variable.
1605static Value *getStepVector(Value *Val, Value *Step,
1607 IRBuilderBase &Builder) {
1608 assert(VF.isVector() && "only vector VFs are supported");
1609
1610 // Create and check the types.
1611 auto *ValVTy = cast<VectorType>(Val->getType());
1612 ElementCount VLen = ValVTy->getElementCount();
1613
1614 Type *STy = Val->getType()->getScalarType();
1615 assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
1616 "Induction Step must be an integer or FP");
1617 assert(Step->getType() == STy && "Step has wrong type");
1618
1620
1621 // Create a vector of consecutive numbers from zero to VF.
1622 VectorType *InitVecValVTy = ValVTy;
1623 if (STy->isFloatingPointTy()) {
1624 Type *InitVecValSTy =
1626 InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
1627 }
1628 Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
1629
1630 if (STy->isIntegerTy()) {
1631 Step = Builder.CreateVectorSplat(VLen, Step);
1632 assert(Step->getType() == Val->getType() && "Invalid step vec");
1633 // FIXME: The newly created binary instructions should contain nsw/nuw
1634 // flags, which can be found from the original scalar operations.
1635 Step = Builder.CreateMul(InitVec, Step);
1636 return Builder.CreateAdd(Val, Step, "induction");
1637 }
1638
1639 // Floating point induction.
1640 assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
1641 "Binary Opcode should be specified for FP induction");
1642 InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
1643
1644 Step = Builder.CreateVectorSplat(VLen, Step);
1645 Value *MulOp = Builder.CreateFMul(InitVec, Step);
1646 return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
1647}
1648
1649/// A helper function that returns an integer or floating-point constant with
1650/// value C.
1652 return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
1653 : ConstantFP::get(Ty, C);
1654}
1655
1657 assert(!State.Lane && "Int or FP induction being replicated.");
1658
1659 Value *Start = getStartValue()->getLiveInIRValue();
1661 TruncInst *Trunc = getTruncInst();
1662 IRBuilderBase &Builder = State.Builder;
1663 assert(getPHINode()->getType() == ID.getStartValue()->getType() &&
1664 "Types must match");
1665 assert(State.VF.isVector() && "must have vector VF");
1666
1667 // The value from the original loop to which we are mapping the new induction
1668 // variable.
1669 Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : getPHINode();
1670
1671 // Fast-math-flags propagate from the original induction instruction.
1672 IRBuilder<>::FastMathFlagGuard FMFG(Builder);
1673 if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
1674 Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
1675
1676 // Now do the actual transformations, and start with fetching the step value.
1677 Value *Step = State.get(getStepValue(), VPLane(0));
1678
1679 assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
1680 "Expected either an induction phi-node or a truncate of it!");
1681
1682 // Construct the initial value of the vector IV in the vector loop preheader
1683 auto CurrIP = Builder.saveIP();
1684 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1685 Builder.SetInsertPoint(VectorPH->getTerminator());
1686 if (isa<TruncInst>(EntryVal)) {
1687 assert(Start->getType()->isIntegerTy() &&
1688 "Truncation requires an integer type");
1689 auto *TruncType = cast<IntegerType>(EntryVal->getType());
1690 Step = Builder.CreateTrunc(Step, TruncType);
1691 Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
1692 }
1693
1694 Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
1695 Value *SteppedStart = getStepVector(SplatStart, Step, ID.getInductionOpcode(),
1696 State.VF, State.Builder);
1697
1698 // We create vector phi nodes for both integer and floating-point induction
1699 // variables. Here, we determine the kind of arithmetic we will perform.
1702 if (Step->getType()->isIntegerTy()) {
1703 AddOp = Instruction::Add;
1704 MulOp = Instruction::Mul;
1705 } else {
1706 AddOp = ID.getInductionOpcode();
1707 MulOp = Instruction::FMul;
1708 }
1709
1710 Value *SplatVF;
1711 if (VPValue *SplatVFOperand = getSplatVFValue()) {
1712 // The recipe has been unrolled. In that case, fetch the splat value for the
1713 // induction increment.
1714 SplatVF = State.get(SplatVFOperand);
1715 } else {
1716 // Multiply the vectorization factor by the step using integer or
1717 // floating-point arithmetic as appropriate.
1718 Type *StepType = Step->getType();
1719 Value *RuntimeVF = State.get(getVFValue(), VPLane(0));
1720 if (Step->getType()->isFloatingPointTy())
1721 RuntimeVF = Builder.CreateUIToFP(RuntimeVF, StepType);
1722 else
1723 RuntimeVF = Builder.CreateZExtOrTrunc(RuntimeVF, StepType);
1724 Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
1725
1726 // Create a vector splat to use in the induction update.
1727 SplatVF = Builder.CreateVectorSplat(State.VF, Mul);
1728 }
1729
1730 Builder.restoreIP(CurrIP);
1731
1732 // We may need to add the step a number of times, depending on the unroll
1733 // factor. The last of those goes into the PHI.
1734 PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");
1735 VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1736 VecInd->setDebugLoc(getDebugLoc());
1737 State.set(this, VecInd);
1738
1739 Instruction *LastInduction = cast<Instruction>(
1740 Builder.CreateBinOp(AddOp, VecInd, SplatVF, "vec.ind.next"));
1741 if (isa<TruncInst>(EntryVal))
1742 State.addMetadata(LastInduction, EntryVal);
1743 LastInduction->setDebugLoc(getDebugLoc());
1744
1745 VecInd->addIncoming(SteppedStart, VectorPH);
1746 // Add induction update using an incorrect block temporarily. The phi node
1747 // will be fixed after VPlan execution. Note that at this point the latch
1748 // block cannot be used, as it does not exist yet.
1749 // TODO: Model increment value in VPlan, by turning the recipe into a
1750 // multi-def and a subclass of VPHeaderPHIRecipe.
1751 VecInd->addIncoming(LastInduction, VectorPH);
1752}
1753
1754#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1756 VPSlotTracker &SlotTracker) const {
1757 O << Indent;
1759 O << " = WIDEN-INDUCTION ";
1761
1762 if (auto *TI = getTruncInst())
1763 O << " (truncated to " << *TI->getType() << ")";
1764}
1765#endif
1766
1768 // The step may be defined by a recipe in the preheader (e.g. if it requires
1769 // SCEV expansion), but for the canonical induction the step is required to be
1770 // 1, which is represented as live-in.
1772 return false;
1773 auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
1774 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
1775 auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin());
1776 return StartC && StartC->isZero() && StepC && StepC->isOne() &&
1777 getScalarType() == CanIV->getScalarType();
1778}
1779
1780#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1782 VPSlotTracker &SlotTracker) const {
1783 O << Indent;
1785 O << " = DERIVED-IV ";
1787 O << " + ";
1789 O << " * ";
1791}
1792#endif
1793
1795 // Fast-math-flags propagate from the original induction instruction.
1797 if (hasFastMathFlags())
1799
1800 /// Compute scalar induction steps. \p ScalarIV is the scalar induction
1801 /// variable on which to base the steps, \p Step is the size of the step.
1802
1803 Value *BaseIV = State.get(getOperand(0), VPLane(0));
1804 Value *Step = State.get(getStepValue(), VPLane(0));
1805 IRBuilderBase &Builder = State.Builder;
1806
1807 // Ensure step has the same type as that of scalar IV.
1808 Type *BaseIVTy = BaseIV->getType()->getScalarType();
1809 assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");
1810
1811 // We build scalar steps for both integer and floating-point induction
1812 // variables. Here, we determine the kind of arithmetic we will perform.
1815 if (BaseIVTy->isIntegerTy()) {
1816 AddOp = Instruction::Add;
1817 MulOp = Instruction::Mul;
1818 } else {
1819 AddOp = InductionOpcode;
1820 MulOp = Instruction::FMul;
1821 }
1822
1823 // Determine the number of scalars we need to generate for each unroll
1824 // iteration.
1825 bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
1826 // Compute the scalar steps and save the results in State.
1827 Type *IntStepTy =
1828 IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
1829 Type *VecIVTy = nullptr;
1830 Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
1831 if (!FirstLaneOnly && State.VF.isScalable()) {
1832 VecIVTy = VectorType::get(BaseIVTy, State.VF);
1833 UnitStepVec =
1834 Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
1835 SplatStep = Builder.CreateVectorSplat(State.VF, Step);
1836 SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
1837 }
1838
1839 unsigned StartLane = 0;
1840 unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
1841 if (State.Lane) {
1842 StartLane = State.Lane->getKnownLane();
1843 EndLane = StartLane + 1;
1844 }
1845 Value *StartIdx0 =
1846 createStepForVF(Builder, IntStepTy, State.VF, getUnrollPart(*this));
1847
1848 if (!FirstLaneOnly && State.VF.isScalable()) {
1849 auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
1850 auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
1851 if (BaseIVTy->isFloatingPointTy())
1852 InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
1853 auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
1854 auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
1855 State.set(this, Add);
1856 // It's useful to record the lane values too for the known minimum number
1857 // of elements so we do those below. This improves the code quality when
1858 // trying to extract the first element, for example.
1859 }
1860
1861 if (BaseIVTy->isFloatingPointTy())
1862 StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
1863
1864 for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
1865 Value *StartIdx = Builder.CreateBinOp(
1866 AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
1867 // The step returned by `createStepForVF` is a runtime-evaluated value
1868 // when VF is scalable. Otherwise, it should be folded into a Constant.
1869 assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
1870 "Expected StartIdx to be folded to a constant when VF is not "
1871 "scalable");
1872 auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
1873 auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
1874 State.set(this, Add, VPLane(Lane));
1875 }
1876}
1877
1878#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1880 VPSlotTracker &SlotTracker) const {
1881 O << Indent;
1883 O << " = SCALAR-STEPS ";
1885}
1886#endif
1887
1889 assert(State.VF.isVector() && "not widening");
1890 auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
1891 // Construct a vector GEP by widening the operands of the scalar GEP as
1892 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
1893 // results in a vector of pointers when at least one operand of the GEP
1894 // is vector-typed. Thus, to keep the representation compact, we only use
1895 // vector-typed operands for loop-varying values.
1896
1897 if (areAllOperandsInvariant()) {
1898 // If we are vectorizing, but the GEP has only loop-invariant operands,
1899 // the GEP we build (by only using vector-typed operands for
1900 // loop-varying values) would be a scalar pointer. Thus, to ensure we
1901 // produce a vector of pointers, we need to either arbitrarily pick an
1902 // operand to broadcast, or broadcast a clone of the original GEP.
1903 // Here, we broadcast a clone of the original.
1904 //
1905 // TODO: If at some point we decide to scalarize instructions having
1906 // loop-invariant operands, this special case will no longer be
1907 // required. We would add the scalarization decision to
1908 // collectLoopScalars() and teach getVectorValue() to broadcast
1909 // the lane-zero scalar value.
1911 for (unsigned I = 0, E = getNumOperands(); I != E; I++)
1912 Ops.push_back(State.get(getOperand(I), VPLane(0)));
1913
1914 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
1915 ArrayRef(Ops).drop_front(), "",
1917 Value *Splat = State.Builder.CreateVectorSplat(State.VF, NewGEP);
1918 State.set(this, Splat);
1919 State.addMetadata(Splat, GEP);
1920 } else {
1921 // If the GEP has at least one loop-varying operand, we are sure to
1922 // produce a vector of pointers unless VF is scalar.
1923 // The pointer operand of the new GEP. If it's loop-invariant, we
1924 // won't broadcast it.
1925 auto *Ptr = isPointerLoopInvariant() ? State.get(getOperand(0), VPLane(0))
1926 : State.get(getOperand(0));
1927
1928 // Collect all the indices for the new GEP. If any index is
1929 // loop-invariant, we won't broadcast it.
1931 for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
1932 VPValue *Operand = getOperand(I);
1933 if (isIndexLoopInvariant(I - 1))
1934 Indices.push_back(State.get(Operand, VPLane(0)));
1935 else
1936 Indices.push_back(State.get(Operand));
1937 }
1938
1939 // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
1940 // but it should be a vector, otherwise.
1941 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
1942 Indices, "", getGEPNoWrapFlags());
1943 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
1944 "NewGEP is not a pointer vector");
1945 State.set(this, NewGEP);
1946 State.addMetadata(NewGEP, GEP);
1947 }
1948}
1949
1950#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1952 VPSlotTracker &SlotTracker) const {
1953 O << Indent << "WIDEN-GEP ";
1954 O << (isPointerLoopInvariant() ? "Inv" : "Var");
1955 for (size_t I = 0; I < getNumOperands() - 1; ++I)
1956 O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";
1957
1958 O << " ";
1960 O << " = getelementptr";
1961 printFlags(O);
1963}
1964#endif
1965
1966static Type *getGEPIndexTy(bool IsScalable, bool IsReverse,
1967 unsigned CurrentPart, IRBuilderBase &Builder) {
1968 // Use i32 for the gep index type when the value is constant,
1969 // or query DataLayout for a more suitable index type otherwise.
1970 const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
1971 return IsScalable && (IsReverse || CurrentPart > 0)
1972 ? DL.getIndexType(Builder.getPtrTy(0))
1973 : Builder.getInt32Ty();
1974}
1975
1977 auto &Builder = State.Builder;
1979 unsigned CurrentPart = getUnrollPart(*this);
1980 Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true,
1981 CurrentPart, Builder);
1982
1983 // The wide store needs to start at the last vector element.
1984 Value *RunTimeVF = State.get(getVFValue(), VPLane(0));
1985 if (IndexTy != RunTimeVF->getType())
1986 RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy);
1987 // NumElt = -CurrentPart * RunTimeVF
1988 Value *NumElt = Builder.CreateMul(
1989 ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
1990 // LastLane = 1 - RunTimeVF
1991 Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
1992 Value *Ptr = State.get(getOperand(0), VPLane(0));
1993 Value *ResultPtr =
1994 Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", getGEPNoWrapFlags());
1995 ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "",
1997
1998 State.set(this, ResultPtr, /*IsScalar*/ true);
1999}
2000
2001#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2003 VPSlotTracker &SlotTracker) const {
2004 O << Indent;
2006 O << " = reverse-vector-pointer";
2007 printFlags(O);
2009}
2010#endif
2011
2013 auto &Builder = State.Builder;
2015 unsigned CurrentPart = getUnrollPart(*this);
2016 Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false,
2017 CurrentPart, Builder);
2018 Value *Ptr = State.get(getOperand(0), VPLane(0));
2019
2020 Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
2021 Value *ResultPtr =
2022 Builder.CreateGEP(IndexedTy, Ptr, Increment, "", getGEPNoWrapFlags());
2023
2024 State.set(this, ResultPtr, /*IsScalar*/ true);
2025}
2026
2027#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2029 VPSlotTracker &SlotTracker) const {
2030 O << Indent;
2032 O << " = vector-pointer ";
2033
2035}
2036#endif
2037
2039 assert(isNormalized() && "Expected blend to be normalized!");
2041 // We know that all PHIs in non-header blocks are converted into
2042 // selects, so we don't have to worry about the insertion order and we
2043 // can just use the builder.
2044 // At this point we generate the predication tree. There may be
2045 // duplications since this is a simple recursive scan, but future
2046 // optimizations will clean it up.
2047
2048 unsigned NumIncoming = getNumIncomingValues();
2049
2050 // Generate a sequence of selects of the form:
2051 // SELECT(Mask3, In3,
2052 // SELECT(Mask2, In2,
2053 // SELECT(Mask1, In1,
2054 // In0)))
2055 // Note that Mask0 is never used: lanes for which no path reaches this phi and
2056 // are essentially undef are taken from In0.
2057 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
2058 Value *Result = nullptr;
2059 for (unsigned In = 0; In < NumIncoming; ++In) {
2060 // We might have single edge PHIs (blocks) - use an identity
2061 // 'select' for the first PHI operand.
2062 Value *In0 = State.get(getIncomingValue(In), OnlyFirstLaneUsed);
2063 if (In == 0)
2064 Result = In0; // Initialize with the first incoming value.
2065 else {
2066 // Select between the current value and the previous incoming edge
2067 // based on the incoming mask.
2068 Value *Cond = State.get(getMask(In), OnlyFirstLaneUsed);
2069 Result = State.Builder.CreateSelect(Cond, In0, Result, "predphi");
2070 }
2071 }
2072 State.set(this, Result, OnlyFirstLaneUsed);
2073}
2074
2076 VPCostContext &Ctx) const {
2078
2079 // Handle cases where only the first lane is used the same way as the legacy
2080 // cost model.
2082 return Ctx.TTI.getCFInstrCost(Instruction::PHI, CostKind);
2083
2084 Type *ResultTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
2085 Type *CmpTy = toVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF);
2086 return (getNumIncomingValues() - 1) *
2087 Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResultTy, CmpTy,
2089}
2090
2091#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2093 VPSlotTracker &SlotTracker) const {
2094 O << Indent << "BLEND ";
2096 O << " =";
2097 if (getNumIncomingValues() == 1) {
2098 // Not a User of any mask: not really blending, this is a
2099 // single-predecessor phi.
2100 O << " ";
2102 } else {
2103 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
2104 O << " ";
2106 if (I == 0)
2107 continue;
2108 O << "/";
2110 }
2111 }
2112}
2113#endif
2114
2116 assert(!State.Lane && "Reduction being replicated.");
2117 Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
2118 RecurKind Kind = RdxDesc.getRecurrenceKind();
2119 // Propagate the fast-math flags carried by the underlying instruction.
2121 State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
2123 Value *NewVecOp = State.get(getVecOp());
2124 if (VPValue *Cond = getCondOp()) {
2125 Value *NewCond = State.get(Cond, State.VF.isScalar());
2126 VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
2127 Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
2128
2129 Value *Start;
2131 Start = RdxDesc.getRecurrenceStartValue();
2132 else
2133 Start = llvm::getRecurrenceIdentity(Kind, ElementTy,
2134 RdxDesc.getFastMathFlags());
2135 if (State.VF.isVector())
2136 Start = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Start);
2137
2138 Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Start);
2139 NewVecOp = Select;
2140 }
2141 Value *NewRed;
2142 Value *NextInChain;
2143 if (IsOrdered) {
2144 if (State.VF.isVector())
2145 NewRed =
2146 createOrderedReduction(State.Builder, RdxDesc, NewVecOp, PrevInChain);
2147 else
2148 NewRed = State.Builder.CreateBinOp(
2149 (Instruction::BinaryOps)RdxDesc.getOpcode(), PrevInChain, NewVecOp);
2150 PrevInChain = NewRed;
2151 NextInChain = NewRed;
2152 } else {
2153 PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
2154 NewRed = createReduction(State.Builder, RdxDesc, NewVecOp);
2156 NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),
2157 NewRed, PrevInChain);
2158 else
2159 NextInChain = State.Builder.CreateBinOp(
2160 (Instruction::BinaryOps)RdxDesc.getOpcode(), NewRed, PrevInChain);
2161 }
2162 State.set(this, NextInChain, /*IsScalar*/ true);
2163}
2164
2166 assert(!State.Lane && "Reduction being replicated.");
2167
2168 auto &Builder = State.Builder;
2169 // Propagate the fast-math flags carried by the underlying instruction.
2170 IRBuilderBase::FastMathFlagGuard FMFGuard(Builder);
2172 Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
2173
2174 RecurKind Kind = RdxDesc.getRecurrenceKind();
2175 Value *Prev = State.get(getChainOp(), /*IsScalar*/ true);
2176 Value *VecOp = State.get(getVecOp());
2177 Value *EVL = State.get(getEVL(), VPLane(0));
2178
2179 VectorBuilder VBuilder(Builder);
2180 VBuilder.setEVL(EVL);
2181 Value *Mask;
2182 // TODO: move the all-true mask generation into VectorBuilder.
2183 if (VPValue *CondOp = getCondOp())
2184 Mask = State.get(CondOp);
2185 else
2186 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2187 VBuilder.setMask(Mask);
2188
2189 Value *NewRed;
2190 if (isOrdered()) {
2191 NewRed = createOrderedReduction(VBuilder, RdxDesc, VecOp, Prev);
2192 } else {
2193 NewRed = createSimpleReduction(VBuilder, VecOp, RdxDesc);
2195 NewRed = createMinMaxOp(Builder, Kind, NewRed, Prev);
2196 else
2197 NewRed = Builder.CreateBinOp((Instruction::BinaryOps)RdxDesc.getOpcode(),
2198 NewRed, Prev);
2199 }
2200 State.set(this, NewRed, /*IsScalar*/ true);
2201}
2202
2204 VPCostContext &Ctx) const {
2205 RecurKind RdxKind = RdxDesc.getRecurrenceKind();
2206 Type *ElementTy = Ctx.Types.inferScalarType(this);
2207 auto *VectorTy = cast<VectorType>(toVectorTy(ElementTy, VF));
2209 unsigned Opcode = RdxDesc.getOpcode();
2210
2211 // TODO: Support any-of and in-loop reductions.
2212 assert(
2214 ForceTargetInstructionCost.getNumOccurrences() > 0) &&
2215 "Any-of reduction not implemented in VPlan-based cost model currently.");
2216 assert(
2217 (!cast<VPReductionPHIRecipe>(getOperand(0))->isInLoop() ||
2218 ForceTargetInstructionCost.getNumOccurrences() > 0) &&
2219 "In-loop reduction not implemented in VPlan-based cost model currently.");
2220
2221 assert(ElementTy->getTypeID() == RdxDesc.getRecurrenceType()->getTypeID() &&
2222 "Inferred type and recurrence type mismatch.");
2223
2224 // Cost = Reduction cost + BinOp cost
2226 Ctx.TTI.getArithmeticInstrCost(Opcode, ElementTy, CostKind);
2229 return Cost + Ctx.TTI.getMinMaxReductionCost(
2230 Id, VectorTy, RdxDesc.getFastMathFlags(), CostKind);
2231 }
2232
2233 return Cost + Ctx.TTI.getArithmeticReductionCost(
2234 Opcode, VectorTy, RdxDesc.getFastMathFlags(), CostKind);
2235}
2236
2237#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2239 VPSlotTracker &SlotTracker) const {
2240 O << Indent << "REDUCE ";
2242 O << " = ";
2244 O << " +";
2245 if (isa<FPMathOperator>(getUnderlyingInstr()))
2247 O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
2249 if (isConditional()) {
2250 O << ", ";
2252 }
2253 O << ")";
2254 if (RdxDesc.IntermediateStore)
2255 O << " (with final reduction value stored in invariant address sank "
2256 "outside of loop)";
2257}
2258
2260 VPSlotTracker &SlotTracker) const {
2262 O << Indent << "REDUCE ";
2264 O << " = ";
2266 O << " +";
2267 if (isa<FPMathOperator>(getUnderlyingInstr()))
2269 O << " vp.reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
2271 O << ", ";
2273 if (isConditional()) {
2274 O << ", ";
2276 }
2277 O << ")";
2278 if (RdxDesc.IntermediateStore)
2279 O << " (with final reduction value stored in invariant address sank "
2280 "outside of loop)";
2281}
2282#endif
2283
2285 // Find if the recipe is used by a widened recipe via an intervening
2286 // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
2287 return any_of(users(), [](const VPUser *U) {
2288 if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))
2289 return any_of(PredR->users(), [PredR](const VPUser *U) {
2290 return !U->usesScalars(PredR);
2291 });
2292 return false;
2293 });
2294}
2295
2297 VPCostContext &Ctx) const {
2298 Instruction *UI = cast<Instruction>(getUnderlyingValue());
2299 // VPReplicateRecipe may be cloned as part of an existing VPlan-to-VPlan
2300 // transform, avoid computing their cost multiple times for now.
2301 Ctx.SkipCostComputation.insert(UI);
2302 return Ctx.getLegacyCost(UI, VF);
2303}
2304
2305#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2307 VPSlotTracker &SlotTracker) const {
2308 O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
2309
2310 if (!getUnderlyingInstr()->getType()->isVoidTy()) {
2312 O << " = ";
2313 }
2314 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
2315 O << "call";
2316 printFlags(O);
2317 O << "@" << CB->getCalledFunction()->getName() << "(";
2319 O, [&O, &SlotTracker](VPValue *Op) {
2320 Op->printAsOperand(O, SlotTracker);
2321 });
2322 O << ")";
2323 } else {
2325 printFlags(O);
2327 }
2328
2329 if (shouldPack())
2330 O << " (S->V)";
2331}
2332#endif
2333
2334Value *VPScalarCastRecipe ::generate(VPTransformState &State) {
2337 "Codegen only implemented for first lane.");
2338 switch (Opcode) {
2339 case Instruction::SExt:
2340 case Instruction::ZExt:
2341 case Instruction::Trunc: {
2342 // Note: SExt/ZExt not used yet.
2343 Value *Op = State.get(getOperand(0), VPLane(0));
2344 return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
2345 }
2346 default:
2347 llvm_unreachable("opcode not implemented yet");
2348 }
2349}
2350
2351void VPScalarCastRecipe ::execute(VPTransformState &State) {
2352 State.set(this, generate(State), VPLane(0));
2353}
2354
2355#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2356void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
2357 VPSlotTracker &SlotTracker) const {
2358 O << Indent << "SCALAR-CAST ";
2359 printAsOperand(O, SlotTracker);
2360 O << " = " << Instruction::getOpcodeName(Opcode) << " ";
2361 printOperands(O, SlotTracker);
2362 O << " to " << *ResultTy;
2363}
2364#endif
2365
2367 assert(State.Lane && "Branch on Mask works only on single instance.");
2368
2369 unsigned Lane = State.Lane->getKnownLane();
2370
2371 Value *ConditionBit = nullptr;
2372 VPValue *BlockInMask = getMask();
2373 if (BlockInMask) {
2374 ConditionBit = State.get(BlockInMask);
2375 if (ConditionBit->getType()->isVectorTy())
2376 ConditionBit = State.Builder.CreateExtractElement(
2377 ConditionBit, State.Builder.getInt32(Lane));
2378 } else // Block in mask is all-one.
2379 ConditionBit = State.Builder.getTrue();
2380
2381 // Replace the temporary unreachable terminator with a new conditional branch,
2382 // whose two destinations will be set later when they are created.
2383 auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
2384 assert(isa<UnreachableInst>(CurrentTerminator) &&
2385 "Expected to replace unreachable terminator with conditional branch.");
2386 auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
2387 CondBr->setSuccessor(0, nullptr);
2388 ReplaceInstWithInst(CurrentTerminator, CondBr);
2389}
2390
2392 VPCostContext &Ctx) const {
2393 // The legacy cost model doesn't assign costs to branches for individual
2394 // replicate regions. Match the current behavior in the VPlan cost model for
2395 // now.
2396 return 0;
2397}
2398
2401 assert(State.Lane && "Predicated instruction PHI works per instance.");
2402 Instruction *ScalarPredInst =
2403 cast<Instruction>(State.get(getOperand(0), *State.Lane));
2404 BasicBlock *PredicatedBB = ScalarPredInst->getParent();
2405 BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
2406 assert(PredicatingBB && "Predicated block has no single predecessor.");
2407 assert(isa<VPReplicateRecipe>(getOperand(0)) &&
2408 "operand must be VPReplicateRecipe");
2409
2410 // By current pack/unpack logic we need to generate only a single phi node: if
2411 // a vector value for the predicated instruction exists at this point it means
2412 // the instruction has vector users only, and a phi for the vector value is
2413 // needed. In this case the recipe of the predicated instruction is marked to
2414 // also do that packing, thereby "hoisting" the insert-element sequence.
2415 // Otherwise, a phi node for the scalar value is needed.
2416 if (State.hasVectorValue(getOperand(0))) {
2417 Value *VectorValue = State.get(getOperand(0));
2418 InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
2419 PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
2420 VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
2421 VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
2422 if (State.hasVectorValue(this))
2423 State.reset(this, VPhi);
2424 else
2425 State.set(this, VPhi);
2426 // NOTE: Currently we need to update the value of the operand, so the next
2427 // predicated iteration inserts its generated value in the correct vector.
2428 State.reset(getOperand(0), VPhi);
2429 } else {
2430 if (vputils::onlyFirstLaneUsed(this) && !State.Lane->isFirstLane())
2431 return;
2432
2433 Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
2434 PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
2435 Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
2436 PredicatingBB);
2437 Phi->addIncoming(ScalarPredInst, PredicatedBB);
2438 if (State.hasScalarValue(this, *State.Lane))
2439 State.reset(this, Phi, *State.Lane);
2440 else
2441 State.set(this, Phi, *State.Lane);
2442 // NOTE: Currently we need to update the value of the operand, so the next
2443 // predicated iteration inserts its generated value in the correct vector.
2444 State.reset(getOperand(0), Phi, *State.Lane);
2445 }
2446}
2447
2448#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2450 VPSlotTracker &SlotTracker) const {
2451 O << Indent << "PHI-PREDICATED-INSTRUCTION ";
2453 O << " = ";
2455}
2456#endif
2457
2459 VPCostContext &Ctx) const {
2461 const Align Alignment =
2463 unsigned AS =
2466
2467 if (!Consecutive) {
2468 // TODO: Using the original IR may not be accurate.
2469 // Currently, ARM will use the underlying IR to calculate gather/scatter
2470 // instruction cost.
2472 assert(!Reverse &&
2473 "Inconsecutive memory access should not have the order.");
2474 return Ctx.TTI.getAddressComputationCost(Ty) +
2476 IsMasked, Alignment, CostKind,
2477 &Ingredient);
2478 }
2479
2481 if (IsMasked) {
2482 Cost += Ctx.TTI.getMaskedMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment,
2483 AS, CostKind);
2484 } else {
2485 TTI::OperandValueInfo OpInfo =
2487 Cost += Ctx.TTI.getMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment, AS,
2488 CostKind, OpInfo, &Ingredient);
2489 }
2490 if (!Reverse)
2491 return Cost;
2492
2494 cast<VectorType>(Ty), {}, CostKind, 0);
2495}
2496
2498 auto *LI = cast<LoadInst>(&Ingredient);
2499
2500 Type *ScalarDataTy = getLoadStoreType(&Ingredient);
2501 auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
2502 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2503 bool CreateGather = !isConsecutive();
2504
2505 auto &Builder = State.Builder;
2507 Value *Mask = nullptr;
2508 if (auto *VPMask = getMask()) {
2509 // Mask reversal is only needed for non-all-one (null) masks, as reverse
2510 // of a null all-one mask is a null mask.
2511 Mask = State.get(VPMask);
2512 if (isReverse())
2513 Mask = Builder.CreateVectorReverse(Mask, "reverse");
2514 }
2515
2516 Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateGather);
2517 Value *NewLI;
2518 if (CreateGather) {
2519 NewLI = Builder.CreateMaskedGather(DataTy, Addr, Alignment, Mask, nullptr,
2520 "wide.masked.gather");
2521 } else if (Mask) {
2522 NewLI =
2523 Builder.CreateMaskedLoad(DataTy, Addr, Alignment, Mask,
2524 PoisonValue::get(DataTy), "wide.masked.load");
2525 } else {
2526 NewLI = Builder.CreateAlignedLoad(DataTy, Addr, Alignment, "wide.load");
2527 }
2528 // Add metadata to the load, but setVectorValue to the reverse shuffle.
2529 State.addMetadata(NewLI, LI);
2530 if (Reverse)
2531 NewLI = Builder.CreateVectorReverse(NewLI, "reverse");
2532 State.set(this, NewLI);
2533}
2534
2535#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2537 VPSlotTracker &SlotTracker) const {
2538 O << Indent << "WIDEN ";
2540 O << " = load ";
2542}
2543#endif
2544
2545/// Use all-true mask for reverse rather than actual mask, as it avoids a
2546/// dependence w/o affecting the result.
2548 Value *EVL, const Twine &Name) {
2549 VectorType *ValTy = cast<VectorType>(Operand->getType());
2550 Value *AllTrueMask =
2551 Builder.CreateVectorSplat(ValTy->getElementCount(), Builder.getTrue());
2552 return Builder.CreateIntrinsic(ValTy, Intrinsic::experimental_vp_reverse,
2553 {Operand, AllTrueMask, EVL}, nullptr, Name);
2554}
2555
2557 auto *LI = cast<LoadInst>(&Ingredient);
2558
2559 Type *ScalarDataTy = getLoadStoreType(&Ingredient);
2560 auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
2561 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2562 bool CreateGather = !isConsecutive();
2563
2564 auto &Builder = State.Builder;
2566 CallInst *NewLI;
2567 Value *EVL = State.get(getEVL(), VPLane(0));
2568 Value *Addr = State.get(getAddr(), !CreateGather);
2569 Value *Mask = nullptr;
2570 if (VPValue *VPMask = getMask()) {
2571 Mask = State.get(VPMask);
2572 if (isReverse())
2573 Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
2574 } else {
2575 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2576 }
2577
2578 if (CreateGather) {
2579 NewLI =
2580 Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL},
2581 nullptr, "wide.masked.gather");
2582 } else {
2583 VectorBuilder VBuilder(Builder);
2584 VBuilder.setEVL(EVL).setMask(Mask);
2585 NewLI = cast<CallInst>(VBuilder.createVectorInstruction(
2586 Instruction::Load, DataTy, Addr, "vp.op.load"));
2587 }
2588 NewLI->addParamAttr(
2589 0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
2590 State.addMetadata(NewLI, LI);
2591 Instruction *Res = NewLI;
2592 if (isReverse())
2593 Res = createReverseEVL(Builder, Res, EVL, "vp.reverse");
2594 State.set(this, Res);
2595}
2596
2598 VPCostContext &Ctx) const {
2599 if (!Consecutive || IsMasked)
2600 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
2601
2602 // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()
2603 // here because the EVL recipes using EVL to replace the tail mask. But in the
2604 // legacy model, it will always calculate the cost of mask.
2605 // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
2606 // don't need to compare to the legacy cost model.
2608 const Align Alignment =
2610 unsigned AS =
2614 Ingredient.getOpcode(), Ty, Alignment, AS, CostKind);
2615 if (!Reverse)
2616 return Cost;
2617
2619 cast<VectorType>(Ty), {}, CostKind, 0);
2620}
2621
2622#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2624 VPSlotTracker &SlotTracker) const {
2625 O << Indent << "WIDEN ";
2627 O << " = vp.load ";
2629}
2630#endif
2631
2633 auto *SI = cast<StoreInst>(&Ingredient);
2634
2635 VPValue *StoredVPValue = getStoredValue();
2636 bool CreateScatter = !isConsecutive();
2637 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2638
2639 auto &Builder = State.Builder;
2641
2642 Value *Mask = nullptr;
2643 if (auto *VPMask = getMask()) {
2644 // Mask reversal is only needed for non-all-one (null) masks, as reverse
2645 // of a null all-one mask is a null mask.
2646 Mask = State.get(VPMask);
2647 if (isReverse())
2648 Mask = Builder.CreateVectorReverse(Mask, "reverse");
2649 }
2650
2651 Value *StoredVal = State.get(StoredVPValue);
2652 if (isReverse()) {
2653 // If we store to reverse consecutive memory locations, then we need
2654 // to reverse the order of elements in the stored value.
2655 StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
2656 // We don't want to update the value in the map as it might be used in
2657 // another expression. So don't call resetVectorValue(StoredVal).
2658 }
2659 Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateScatter);
2660 Instruction *NewSI = nullptr;
2661 if (CreateScatter)
2662 NewSI = Builder.CreateMaskedScatter(StoredVal, Addr, Alignment, Mask);
2663 else if (Mask)
2664 NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);
2665 else
2666 NewSI = Builder.CreateAlignedStore(StoredVal, Addr, Alignment);
2667 State.addMetadata(NewSI, SI);
2668}
2669
2670#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2672 VPSlotTracker &SlotTracker) const {
2673 O << Indent << "WIDEN store ";
2675}
2676#endif
2677
2679 auto *SI = cast<StoreInst>(&Ingredient);
2680
2681 VPValue *StoredValue = getStoredValue();
2682 bool CreateScatter = !isConsecutive();
2683 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2684
2685 auto &Builder = State.Builder;
2687
2688 CallInst *NewSI = nullptr;
2689 Value *StoredVal = State.get(StoredValue);
2690 Value *EVL = State.get(getEVL(), VPLane(0));
2691 if (isReverse())
2692 StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse");
2693 Value *Mask = nullptr;
2694 if (VPValue *VPMask = getMask()) {
2695 Mask = State.get(VPMask);
2696 if (isReverse())
2697 Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
2698 } else {
2699 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2700 }
2701 Value *Addr = State.get(getAddr(), !CreateScatter);
2702 if (CreateScatter) {
2703 NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),
2704 Intrinsic::vp_scatter,
2705 {StoredVal, Addr, Mask, EVL});
2706 } else {
2707 VectorBuilder VBuilder(Builder);
2708 VBuilder.setEVL(EVL).setMask(Mask);
2709 NewSI = cast<CallInst>(VBuilder.createVectorInstruction(
2710 Instruction::Store, Type::getVoidTy(EVL->getContext()),
2711 {StoredVal, Addr}));
2712 }
2713 NewSI->addParamAttr(
2714 1, Attribute::getWithAlignment(NewSI->getContext(), Alignment));
2715 State.addMetadata(NewSI, SI);
2716}
2717
2719 VPCostContext &Ctx) const {
2720 if (!Consecutive || IsMasked)
2721 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
2722
2723 // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()
2724 // here because the EVL recipes using EVL to replace the tail mask. But in the
2725 // legacy model, it will always calculate the cost of mask.
2726 // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
2727 // don't need to compare to the legacy cost model.
2729 const Align Alignment =
2731 unsigned AS =
2735 Ingredient.getOpcode(), Ty, Alignment, AS, CostKind);
2736 if (!Reverse)
2737 return Cost;
2738
2740 cast<VectorType>(Ty), {}, CostKind, 0);
2741}
2742
2743#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2745 VPSlotTracker &SlotTracker) const {
2746 O << Indent << "WIDEN vp.store ";
2748}
2749#endif
2750
2752 VectorType *DstVTy, const DataLayout &DL) {
2753 // Verify that V is a vector type with same number of elements as DstVTy.
2754 auto VF = DstVTy->getElementCount();
2755 auto *SrcVecTy = cast<VectorType>(V->getType());
2756 assert(VF == SrcVecTy->getElementCount() && "Vector dimensions do not match");
2757 Type *SrcElemTy = SrcVecTy->getElementType();
2758 Type *DstElemTy = DstVTy->getElementType();
2759 assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) &&
2760 "Vector elements must have same size");
2761
2762 // Do a direct cast if element types are castable.
2763 if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) {
2764 return Builder.CreateBitOrPointerCast(V, DstVTy);
2765 }
2766 // V cannot be directly casted to desired vector type.
2767 // May happen when V is a floating point vector but DstVTy is a vector of
2768 // pointers or vice-versa. Handle this using a two-step bitcast using an
2769 // intermediate Integer type for the bitcast i.e. Ptr <-> Int <-> Float.
2770 assert((DstElemTy->isPointerTy() != SrcElemTy->isPointerTy()) &&
2771 "Only one type should be a pointer type");
2772 assert((DstElemTy->isFloatingPointTy() != SrcElemTy->isFloatingPointTy()) &&
2773 "Only one type should be a floating point type");
2774 Type *IntTy =
2775 IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy));
2776 auto *VecIntTy = VectorType::get(IntTy, VF);
2777 Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);
2778 return Builder.CreateBitOrPointerCast(CastVal, DstVTy);
2779}
2780
2781/// Return a vector containing interleaved elements from multiple
2782/// smaller input vectors.
2784 const Twine &Name) {
2785 unsigned Factor = Vals.size();
2786 assert(Factor > 1 && "Tried to interleave invalid number of vectors");
2787
2788 VectorType *VecTy = cast<VectorType>(Vals[0]->getType());
2789#ifndef NDEBUG
2790 for (Value *Val : Vals)
2791 assert(Val->getType() == VecTy && "Tried to interleave mismatched types");
2792#endif
2793
2794 // Scalable vectors cannot use arbitrary shufflevectors (only splats), so
2795 // must use intrinsics to interleave.
2796 if (VecTy->isScalableTy()) {
2798 return Builder.CreateIntrinsic(WideVecTy, Intrinsic::vector_interleave2,
2799 Vals,
2800 /*FMFSource=*/nullptr, Name);
2801 }
2802
2803 // Fixed length. Start by concatenating all vectors into a wide vector.
2804 Value *WideVec = concatenateVectors(Builder, Vals);
2805
2806 // Interleave the elements into the wide vector.
2807 const unsigned NumElts = VecTy->getElementCount().getFixedValue();
2808 return Builder.CreateShuffleVector(
2809 WideVec, createInterleaveMask(NumElts, Factor), Name);
2810}
2811
2812// Try to vectorize the interleave group that \p Instr belongs to.
2813//
2814// E.g. Translate following interleaved load group (factor = 3):
2815// for (i = 0; i < N; i+=3) {
2816// R = Pic[i]; // Member of index 0
2817// G = Pic[i+1]; // Member of index 1
2818// B = Pic[i+2]; // Member of index 2
2819// ... // do something to R, G, B
2820// }
2821// To:
2822// %wide.vec = load <12 x i32> ; Read 4 tuples of R,G,B
2823// %R.vec = shuffle %wide.vec, poison, <0, 3, 6, 9> ; R elements
2824// %G.vec = shuffle %wide.vec, poison, <1, 4, 7, 10> ; G elements
2825// %B.vec = shuffle %wide.vec, poison, <2, 5, 8, 11> ; B elements
2826//
2827// Or translate following interleaved store group (factor = 3):
2828// for (i = 0; i < N; i+=3) {
2829// ... do something to R, G, B
2830// Pic[i] = R; // Member of index 0
2831// Pic[i+1] = G; // Member of index 1
2832// Pic[i+2] = B; // Member of index 2
2833// }
2834// To:
2835// %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7>
2836// %B_U.vec = shuffle %B.vec, poison, <0, 1, 2, 3, u, u, u, u>
2837// %interleaved.vec = shuffle %R_G.vec, %B_U.vec,
2838// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements
2839// store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B
2841 assert(!State.Lane && "Interleave group being replicated.");
2842 const InterleaveGroup<Instruction> *Group = IG;
2843 Instruction *Instr = Group->getInsertPos();
2844
2845 // Prepare for the vector type of the interleaved load/store.
2846 Type *ScalarTy = getLoadStoreType(Instr);
2847 unsigned InterleaveFactor = Group->getFactor();
2848 auto *VecTy = VectorType::get(ScalarTy, State.VF * InterleaveFactor);
2849
2850 // TODO: extend the masked interleaved-group support to reversed access.
2851 VPValue *BlockInMask = getMask();
2852 assert((!BlockInMask || !Group->isReverse()) &&
2853 "Reversed masked interleave-group not supported.");
2854
2855 VPValue *Addr = getAddr();
2856 Value *ResAddr = State.get(Addr, VPLane(0));
2857 if (auto *I = dyn_cast<Instruction>(ResAddr))
2858 State.setDebugLocFrom(I->getDebugLoc());
2859
2860 // If the group is reverse, adjust the index to refer to the last vector lane
2861 // instead of the first. We adjust the index from the first vector lane,
2862 // rather than directly getting the pointer for lane VF - 1, because the
2863 // pointer operand of the interleaved access is supposed to be uniform.
2864 if (Group->isReverse()) {
2865 Value *RuntimeVF =
2866 getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF);
2867 Value *Index =
2868 State.Builder.CreateSub(RuntimeVF, State.Builder.getInt32(1));
2869 Index = State.Builder.CreateMul(Index,
2870 State.Builder.getInt32(Group->getFactor()));
2871 Index = State.Builder.CreateNeg(Index);
2872
2873 bool InBounds = false;
2874 if (auto *Gep = dyn_cast<GetElementPtrInst>(ResAddr->stripPointerCasts()))
2875 InBounds = Gep->isInBounds();
2876 ResAddr = State.Builder.CreateGEP(ScalarTy, ResAddr, Index, "", InBounds);
2877 }
2878
2879 State.setDebugLocFrom(Instr->getDebugLoc());
2880 Value *PoisonVec = PoisonValue::get(VecTy);
2881
2882 auto CreateGroupMask = [&BlockInMask, &State,
2883 &InterleaveFactor](Value *MaskForGaps) -> Value * {
2884 if (State.VF.isScalable()) {
2885 assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");
2886 assert(InterleaveFactor == 2 &&
2887 "Unsupported deinterleave factor for scalable vectors");
2888 auto *ResBlockInMask = State.get(BlockInMask);
2889 SmallVector<Value *, 2> Ops = {ResBlockInMask, ResBlockInMask};
2890 auto *MaskTy = VectorType::get(State.Builder.getInt1Ty(),
2891 State.VF.getKnownMinValue() * 2, true);
2892 return State.Builder.CreateIntrinsic(
2893 MaskTy, Intrinsic::vector_interleave2, Ops,
2894 /*FMFSource=*/nullptr, "interleaved.mask");
2895 }
2896
2897 if (!BlockInMask)
2898 return MaskForGaps;
2899
2900 Value *ResBlockInMask = State.get(BlockInMask);
2901 Value *ShuffledMask = State.Builder.CreateShuffleVector(
2902 ResBlockInMask,
2903 createReplicatedMask(InterleaveFactor, State.VF.getKnownMinValue()),
2904 "interleaved.mask");
2905 return MaskForGaps ? State.Builder.CreateBinOp(Instruction::And,
2906 ShuffledMask, MaskForGaps)
2907 : ShuffledMask;
2908 };
2909
2910 const DataLayout &DL = Instr->getDataLayout();
2911 // Vectorize the interleaved load group.
2912 if (isa<LoadInst>(Instr)) {
2913 Value *MaskForGaps = nullptr;
2914 if (NeedsMaskForGaps) {
2915 MaskForGaps = createBitMaskForGaps(State.Builder,
2916 State.VF.getKnownMinValue(), *Group);
2917 assert(MaskForGaps && "Mask for Gaps is required but it is null");
2918 }
2919
2920 Instruction *NewLoad;
2921 if (BlockInMask || MaskForGaps) {
2922 Value *GroupMask = CreateGroupMask(MaskForGaps);
2923 NewLoad = State.Builder.CreateMaskedLoad(VecTy, ResAddr,
2924 Group->getAlign(), GroupMask,
2925 PoisonVec, "wide.masked.vec");
2926 } else
2927 NewLoad = State.Builder.CreateAlignedLoad(VecTy, ResAddr,
2928 Group->getAlign(), "wide.vec");
2929 Group->addMetadata(NewLoad);
2930
2932 const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
2933 if (VecTy->isScalableTy()) {
2934 assert(InterleaveFactor == 2 &&
2935 "Unsupported deinterleave factor for scalable vectors");
2936
2937 // Scalable vectors cannot use arbitrary shufflevectors (only splats),
2938 // so must use intrinsics to deinterleave.
2939 Value *DI = State.Builder.CreateIntrinsic(
2940 Intrinsic::vector_deinterleave2, VecTy, NewLoad,
2941 /*FMFSource=*/nullptr, "strided.vec");
2942 unsigned J = 0;
2943 for (unsigned I = 0; I < InterleaveFactor; ++I) {
2944 Instruction *Member = Group->getMember(I);
2945
2946 if (!Member)
2947 continue;
2948
2949 Value *StridedVec = State.Builder.CreateExtractValue(DI, I);
2950 // If this member has different type, cast the result type.
2951 if (Member->getType() != ScalarTy) {
2952 VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
2953 StridedVec =
2954 createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
2955 }
2956
2957 if (Group->isReverse())
2958 StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");
2959
2960 State.set(VPDefs[J], StridedVec);
2961 ++J;
2962 }
2963
2964 return;
2965 }
2966
2967 // For each member in the group, shuffle out the appropriate data from the
2968 // wide loads.
2969 unsigned J = 0;
2970 for (unsigned I = 0; I < InterleaveFactor; ++I) {
2971 Instruction *Member = Group->getMember(I);
2972
2973 // Skip the gaps in the group.
2974 if (!Member)
2975 continue;
2976
2977 auto StrideMask =
2978 createStrideMask(I, InterleaveFactor, State.VF.getKnownMinValue());
2979 Value *StridedVec =
2980 State.Builder.CreateShuffleVector(NewLoad, StrideMask, "strided.vec");
2981
2982 // If this member has different type, cast the result type.
2983 if (Member->getType() != ScalarTy) {
2984 assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
2985 VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
2986 StridedVec =
2987 createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
2988 }
2989
2990 if (Group->isReverse())
2991 StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");
2992
2993 State.set(VPDefs[J], StridedVec);
2994 ++J;
2995 }
2996 return;
2997 }
2998
2999 // The sub vector type for current instruction.
3000 auto *SubVT = VectorType::get(ScalarTy, State.VF);
3001
3002 // Vectorize the interleaved store group.
3003 Value *MaskForGaps =
3004 createBitMaskForGaps(State.Builder, State.VF.getKnownMinValue(), *Group);
3005 assert((!MaskForGaps || !State.VF.isScalable()) &&
3006 "masking gaps for scalable vectors is not yet supported.");
3007 ArrayRef<VPValue *> StoredValues = getStoredValues();
3008 // Collect the stored vector from each member.
3009 SmallVector<Value *, 4> StoredVecs;
3010 unsigned StoredIdx = 0;
3011 for (unsigned i = 0; i < InterleaveFactor; i++) {
3012 assert((Group->getMember(i) || MaskForGaps) &&
3013 "Fail to get a member from an interleaved store group");
3014 Instruction *Member = Group->getMember(i);
3015
3016 // Skip the gaps in the group.
3017 if (!Member) {
3018 Value *Undef = PoisonValue::get(SubVT);
3019 StoredVecs.push_back(Undef);
3020 continue;
3021 }
3022
3023 Value *StoredVec = State.get(StoredValues[StoredIdx]);
3024 ++StoredIdx;
3025
3026 if (Group->isReverse())
3027 StoredVec = State.Builder.CreateVectorReverse(StoredVec, "reverse");
3028
3029 // If this member has different type, cast it to a unified type.
3030
3031 if (StoredVec->getType() != SubVT)
3032 StoredVec = createBitOrPointerCast(State.Builder, StoredVec, SubVT, DL);
3033
3034 StoredVecs.push_back(StoredVec);
3035 }
3036
3037 // Interleave all the smaller vectors into one wider vector.
3038 Value *IVec = interleaveVectors(State.Builder, StoredVecs, "interleaved.vec");
3039 Instruction *NewStoreInstr;
3040 if (BlockInMask || MaskForGaps) {
3041 Value *GroupMask = CreateGroupMask(MaskForGaps);
3042 NewStoreInstr = State.Builder.CreateMaskedStore(
3043 IVec, ResAddr, Group->getAlign(), GroupMask);
3044 } else
3045 NewStoreInstr =
3046 State.Builder.CreateAlignedStore(IVec, ResAddr, Group->getAlign());
3047
3048 Group->addMetadata(NewStoreInstr);
3049}
3050
3051#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3053 VPSlotTracker &SlotTracker) const {
3054 O << Indent << "INTERLEAVE-GROUP with factor " << IG->getFactor() << " at ";
3055 IG->getInsertPos()->printAsOperand(O, false);
3056 O << ", ";
3058 VPValue *Mask = getMask();
3059 if (Mask) {
3060 O << ", ";
3061 Mask->printAsOperand(O, SlotTracker);
3062 }
3063
3064 unsigned OpIdx = 0;
3065 for (unsigned i = 0; i < IG->getFactor(); ++i) {
3066 if (!IG->getMember(i))
3067 continue;
3068 if (getNumStoreOperands() > 0) {
3069 O << "\n" << Indent << " store ";
3070 getOperand(1 + OpIdx)->printAsOperand(O, SlotTracker);
3071 O << " to index " << i;
3072 } else {
3073 O << "\n" << Indent << " ";
3075 O << " = load from index " << i;
3076 }
3077 ++OpIdx;
3078 }
3079}
3080#endif
3081
3083 VPCostContext &Ctx) const {
3084 Instruction *InsertPos = getInsertPos();
3085 // Find the VPValue index of the interleave group. We need to skip gaps.
3086 unsigned InsertPosIdx = 0;
3087 for (unsigned Idx = 0; IG->getFactor(); ++Idx)
3088 if (auto *Member = IG->getMember(Idx)) {
3089 if (Member == InsertPos)
3090 break;
3091 InsertPosIdx++;
3092 }
3093 Type *ValTy = Ctx.Types.inferScalarType(
3094 getNumDefinedValues() > 0 ? getVPValue(InsertPosIdx)
3095 : getStoredValues()[InsertPosIdx]);
3096 auto *VectorTy = cast<VectorType>(toVectorTy(ValTy, VF));
3097 unsigned AS = getLoadStoreAddressSpace(InsertPos);
3099
3100 unsigned InterleaveFactor = IG->getFactor();
3101 auto *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor);
3102
3103 // Holds the indices of existing members in the interleaved group.
3105 for (unsigned IF = 0; IF < InterleaveFactor; IF++)
3106 if (IG->getMember(IF))
3107 Indices.push_back(IF);
3108
3109 // Calculate the cost of the whole interleaved group.
3111 InsertPos->getOpcode(), WideVecTy, IG->getFactor(), Indices,
3112 IG->getAlign(), AS, CostKind, getMask(), NeedsMaskForGaps);
3113
3114 if (!IG->isReverse())
3115 return Cost;
3116
3117 return Cost + IG->getNumMembers() *
3119 VectorTy, std::nullopt, CostKind, 0);
3120}
3121
3122#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3124 VPSlotTracker &SlotTracker) const {
3125 O << Indent << "EMIT ";
3127 O << " = CANONICAL-INDUCTION ";
3129}
3130#endif
3131
3133 return IsScalarAfterVectorization &&
3134 (!IsScalable || vputils::onlyFirstLaneUsed(this));
3135}
3136
3138 assert(getInductionDescriptor().getKind() ==
3140 "Not a pointer induction according to InductionDescriptor!");
3141 assert(cast<PHINode>(getUnderlyingInstr())->getType()->isPointerTy() &&
3142 "Unexpected type.");
3144 "Recipe should have been replaced");
3145
3146 unsigned CurrentPart = getUnrollPart(*this);
3147
3148 // Build a pointer phi
3149 Value *ScalarStartValue = getStartValue()->getLiveInIRValue();
3150 Type *ScStValueType = ScalarStartValue->getType();
3151
3152 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3153 PHINode *NewPointerPhi = nullptr;
3154 if (CurrentPart == 0) {
3155 auto *IVR = cast<VPHeaderPHIRecipe>(&getParent()
3156 ->getPlan()
3157 ->getVectorLoopRegion()
3158 ->getEntryBasicBlock()
3159 ->front());
3160 PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, /*IsScalar*/ true));
3161 NewPointerPhi = PHINode::Create(ScStValueType, 2, "pointer.phi",
3162 CanonicalIV->getIterator());
3163 NewPointerPhi->addIncoming(ScalarStartValue, VectorPH);
3164 NewPointerPhi->setDebugLoc(getDebugLoc());
3165 } else {
3166 // The recipe has been unrolled. In that case, fetch the single pointer phi
3167 // shared among all unrolled parts of the recipe.
3168 auto *GEP =
3169 cast<GetElementPtrInst>(State.get(getFirstUnrolledPartOperand()));
3170 NewPointerPhi = cast<PHINode>(GEP->getPointerOperand());
3171 }
3172
3173 // A pointer induction, performed by using a gep
3174 BasicBlock::iterator InductionLoc = State.Builder.GetInsertPoint();
3175 Value *ScalarStepValue = State.get(getStepValue(), VPLane(0));
3176 Type *PhiType = State.TypeAnalysis.inferScalarType(getStepValue());
3177 Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF);
3178 // Add induction update using an incorrect block temporarily. The phi node
3179 // will be fixed after VPlan execution. Note that at this point the latch
3180 // block cannot be used, as it does not exist yet.
3181 // TODO: Model increment value in VPlan, by turning the recipe into a
3182 // multi-def and a subclass of VPHeaderPHIRecipe.
3183 if (CurrentPart == 0) {
3184 // The recipe represents the first part of the pointer induction. Create the
3185 // GEP to increment the phi across all unrolled parts.
3186 unsigned UF = CurrentPart == 0 ? getParent()->getPlan()->getUF() : 1;
3187 Value *NumUnrolledElems =
3188 State.Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, UF));
3189
3190 Value *InductionGEP = GetElementPtrInst::Create(
3191 State.Builder.getInt8Ty(), NewPointerPhi,
3192 State.Builder.CreateMul(ScalarStepValue, NumUnrolledElems), "ptr.ind",
3193 InductionLoc);
3194
3195 NewPointerPhi->addIncoming(InductionGEP, VectorPH);
3196 }
3197
3198 // Create actual address geps that use the pointer phi as base and a
3199 // vectorized version of the step value (<step*0, ..., step*N>) as offset.
3200 Type *VecPhiType = VectorType::get(PhiType, State.VF);
3201 Value *StartOffsetScalar = State.Builder.CreateMul(
3202 RuntimeVF, ConstantInt::get(PhiType, CurrentPart));
3203 Value *StartOffset =
3204 State.Builder.CreateVectorSplat(State.VF, StartOffsetScalar);
3205 // Create a vector of consecutive numbers from zero to VF.
3206 StartOffset = State.Builder.CreateAdd(
3207 StartOffset, State.Builder.CreateStepVector(VecPhiType));
3208
3209 assert(ScalarStepValue == State.get(getOperand(1), VPLane(0)) &&
3210 "scalar step must be the same across all parts");
3211 Value *GEP = State.Builder.CreateGEP(
3212 State.Builder.getInt8Ty(), NewPointerPhi,
3213 State.Builder.CreateMul(StartOffset, State.Builder.CreateVectorSplat(
3214 State.VF, ScalarStepValue)),
3215 "vector.gep");
3216 State.set(this, GEP);
3217}
3218
3219#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3221 VPSlotTracker &SlotTracker) const {
3222 assert((getNumOperands() == 2 || getNumOperands() == 4) &&
3223 "unexpected number of operands");
3224 O << Indent << "EMIT ";
3226 O << " = WIDEN-POINTER-INDUCTION ";
3228 O << ", ";
3230 if (getNumOperands() == 4) {
3231 O << ", ";
3233 O << ", ";
3235 }
3236}
3237#endif
3238
3240 assert(!State.Lane && "cannot be used in per-lane");
3241 if (State.ExpandedSCEVs.contains(Expr)) {
3242 // SCEV Expr has already been expanded, result must already be set. At the
3243 // moment we have to execute the entry block twice (once before skeleton
3244 // creation to get expanded SCEVs used by the skeleton and once during
3245 // regular VPlan execution).
3247 assert(State.get(this, VPLane(0)) == State.ExpandedSCEVs[Expr] &&
3248 "Results must match");
3249 return;
3250 }
3251
3252 const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
3253 SCEVExpander Exp(SE, DL, "induction");
3254
3255 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
3256 &*State.Builder.GetInsertPoint());
3257 State.ExpandedSCEVs[Expr] = Res;
3258 State.set(this, Res, VPLane(0));
3259}
3260
3261#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3263 VPSlotTracker &SlotTracker) const {
3264 O << Indent << "EMIT ";
3266 O << " = EXPAND SCEV " << *Expr;
3267}
3268#endif
3269
3271 Value *CanonicalIV = State.get(getOperand(0), /*IsScalar*/ true);
3272 Type *STy = CanonicalIV->getType();
3273 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
3274 ElementCount VF = State.VF;
3275 Value *VStart = VF.isScalar()
3276 ? CanonicalIV
3277 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
3278 Value *VStep = createStepForVF(Builder, STy, VF, getUnrollPart(*this));
3279 if (VF.isVector()) {
3280 VStep = Builder.CreateVectorSplat(VF, VStep);
3281 VStep =
3282 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
3283 }
3284 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
3285 State.set(this, CanonicalVectorIV);
3286}
3287
3288#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3290 VPSlotTracker &SlotTracker) const {
3291 O << Indent << "EMIT ";
3293 O << " = WIDEN-CANONICAL-INDUCTION ";
3295}
3296#endif
3297
3299 auto &Builder = State.Builder;
3300 // Create a vector from the initial value.
3301 auto *VectorInit = getStartValue()->getLiveInIRValue();
3302
3303 Type *VecTy = State.VF.isScalar()
3304 ? VectorInit->getType()
3305 : VectorType::get(VectorInit->getType(), State.VF);
3306
3307 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3308 if (State.VF.isVector()) {
3309 auto *IdxTy = Builder.getInt32Ty();
3310 auto *One = ConstantInt::get(IdxTy, 1);
3311 IRBuilder<>::InsertPointGuard Guard(Builder);
3312 Builder.SetInsertPoint(VectorPH->getTerminator());
3313 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
3314 auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
3315 VectorInit = Builder.CreateInsertElement(
3316 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
3317 }
3318
3319 // Create a phi node for the new recurrence.
3320 PHINode *Phi = PHINode::Create(VecTy, 2, "vector.recur");
3321 Phi->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
3322 Phi->addIncoming(VectorInit, VectorPH);
3323 State.set(this, Phi);
3324}
3325
3328 VPCostContext &Ctx) const {
3330 if (VF.isScalar())
3331 return Ctx.TTI.getCFInstrCost(Instruction::PHI, CostKind);
3332
3333 if (VF.isScalable() && VF.getKnownMinValue() == 1)
3335
3337 std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1);
3338 Type *VectorTy =
3339 toVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
3340
3342 cast<VectorType>(VectorTy), Mask, CostKind,
3343 VF.getKnownMinValue() - 1);
3344}
3345
3346#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3348 VPSlotTracker &SlotTracker) const {
3349 O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
3351 O << " = phi ";
3353}
3354#endif
3355
3357 auto &Builder = State.Builder;
3358
3359 // Reductions do not have to start at zero. They can start with
3360 // any loop invariant values.
3361 VPValue *StartVPV = getStartValue();
3362 Value *StartV = StartVPV->getLiveInIRValue();
3363
3364 // In order to support recurrences we need to be able to vectorize Phi nodes.
3365 // Phi nodes have cycles, so we need to vectorize them in two stages. This is
3366 // stage #1: We create a new vector PHI node with no incoming edges. We'll use
3367 // this value when we vectorize all of the instructions that use the PHI.
3368 bool ScalarPHI = State.VF.isScalar() || IsInLoop;
3369 Type *VecTy = ScalarPHI ? StartV->getType()
3370 : VectorType::get(StartV->getType(), State.VF);
3371
3372 BasicBlock *HeaderBB = State.CFG.PrevBB;
3373 assert(State.CurrentParentLoop->getHeader() == HeaderBB &&
3374 "recipe must be in the vector loop header");
3375 auto *Phi = PHINode::Create(VecTy, 2, "vec.phi");
3376 Phi->insertBefore(HeaderBB->getFirstInsertionPt());
3377 State.set(this, Phi, IsInLoop);
3378
3379 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3380
3381 Value *Iden = nullptr;
3382 RecurKind RK = RdxDesc.getRecurrenceKind();
3383 unsigned CurrentPart = getUnrollPart(*this);
3384
3387 // MinMax and AnyOf reductions have the start value as their identity.
3388 if (ScalarPHI) {
3389 Iden = StartV;
3390 } else {
3391 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
3392 Builder.SetInsertPoint(VectorPH->getTerminator());
3393 StartV = Iden = State.get(StartVPV);
3394 }
3396 // [I|F]FindLastIV will use a sentinel value to initialize the reduction
3397 // phi or the resume value from the main vector loop when vectorizing the
3398 // epilogue loop. In the exit block, ComputeReductionResult will generate
3399 // checks to verify if the reduction result is the sentinel value. If the
3400 // result is the sentinel value, it will be corrected back to the start
3401 // value.
3402 // TODO: The sentinel value is not always necessary. When the start value is
3403 // a constant, and smaller than the start value of the induction variable,
3404 // the start value can be directly used to initialize the reduction phi.
3405 Iden = StartV;
3406 if (!ScalarPHI) {
3407 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
3408 Builder.SetInsertPoint(VectorPH->getTerminator());
3409 StartV = Iden = Builder.CreateVectorSplat(State.VF, Iden);
3410 }
3411 } else {
3412 Iden = llvm::getRecurrenceIdentity(RK, VecTy->getScalarType(),
3413 RdxDesc.getFastMathFlags());
3414
3415 if (!ScalarPHI) {
3416 if (CurrentPart == 0) {
3417 // Create start and identity vector values for the reduction in the
3418 // preheader.
3419 // TODO: Introduce recipes in VPlan preheader to create initial values.
3420 Iden = Builder.CreateVectorSplat(State.VF, Iden);
3421 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
3422 Builder.SetInsertPoint(VectorPH->getTerminator());
3423 Constant *Zero = Builder.getInt32(0);
3424 StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
3425 } else {
3426 Iden = Builder.CreateVectorSplat(State.VF, Iden);
3427 }
3428 }
3429 }
3430
3431 Phi = cast<PHINode>(State.get(this, IsInLoop));
3432 Value *StartVal = (CurrentPart == 0) ? StartV : Iden;
3433 Phi->addIncoming(StartVal, VectorPH);
3434}
3435
3436#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3438 VPSlotTracker &SlotTracker) const {
3439 O << Indent << "WIDEN-REDUCTION-PHI ";
3440
3442 O << " = phi ";
3444}
3445#endif
3446
3449 "Non-native vplans are not expected to have VPWidenPHIRecipes.");
3450
3451 Value *Op0 = State.get(getOperand(0));
3452 Type *VecTy = Op0->getType();
3453 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
3454 State.set(this, VecPhi);
3455}
3456
3457#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3459 VPSlotTracker &SlotTracker) const {
3460 O << Indent << "WIDEN-PHI ";
3461
3462 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
3463 // Unless all incoming values are modeled in VPlan print the original PHI
3464 // directly.
3465 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
3466 // values as VPValues.
3467 if (getNumOperands() != OriginalPhi->getNumOperands()) {
3468 O << VPlanIngredient(OriginalPhi);
3469 return;
3470 }
3471
3473 O << " = phi ";
3475}
3476#endif
3477
3478// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
3479// remove VPActiveLaneMaskPHIRecipe.
3481 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3482 Value *StartMask = State.get(getOperand(0));
3483 PHINode *Phi =
3484 State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
3485 Phi->addIncoming(StartMask, VectorPH);
3486 Phi->setDebugLoc(getDebugLoc());
3487 State.set(this, Phi);
3488}
3489
3490#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3492 VPSlotTracker &SlotTracker) const {
3493 O << Indent << "ACTIVE-LANE-MASK-PHI ";
3494
3496 O << " = phi ";
3498}
3499#endif
3500
3501#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3503 VPSlotTracker &SlotTracker) const {
3504 O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";
3505
3507 O << " = phi ";
3509}
3510#endif
3511
3513 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3514 Value *Start = State.get(getStartValue(), VPLane(0));
3515 PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, Name);
3516 Phi->addIncoming(Start, VectorPH);
3517 Phi->setDebugLoc(getDebugLoc());
3518 State.set(this, Phi, /*IsScalar=*/true);
3519}
3520
3521#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3523 VPSlotTracker &SlotTracker) const {
3524 O << Indent << "SCALAR-PHI ";
3526 O << " = phi ";
3528}
3529#endif
AMDGPU Lower Kernel Arguments
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
std::string Name
Hexagon Common GEP
cl::opt< unsigned > ForceTargetInstructionCost("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for " "an instruction to a single constant value. Mostly " "useful for getting consistent testing."))
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
static Instruction * createReverseEVL(IRBuilderBase &Builder, Value *Operand, Value *EVL, const Twine &Name)
Use all-true mask for reverse rather than actual mask, as it avoids a dependence w/o affecting the re...
static Value * interleaveVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vals, const Twine &Name)
Return a vector containing interleaved elements from multiple smaller input vectors.
static Value * createBitOrPointerCast(IRBuilderBase &Builder, Value *V, VectorType *DstVTy, const DataLayout &DL)
cl::opt< unsigned > ForceTargetInstructionCost
static Value * getStepVector(Value *Val, Value *Step, Instruction::BinaryOps BinOp, ElementCount VF, IRBuilderBase &Builder)
This function adds (0 * Step, 1 * Step, 2 * Step, ...) to each vector element of Val.
static Type * getGEPIndexTy(bool IsScalable, bool IsReverse, unsigned CurrentPart, IRBuilderBase &Builder)
static Constant * getSignedIntOrFpConstant(Type *Ty, int64_t C)
A helper function that returns an integer or floating-point constant with value C.
This file contains the declarations of the Vectorization Plan base classes:
Value * RHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
static Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
Definition: Attributes.cpp:234
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
InstListType::const_iterator getFirstNonPHIIt() const
Iterator returning form of getFirstNonPHI.
Definition: BasicBlock.cpp:374
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:459
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:296
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:292
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1502
This class represents a function call, abstracting a target machine's calling convention.
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:696
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:698
static StringRef getPredicateName(Predicate P)
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:126
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
A debug info location.
Definition: DebugLoc.h:33
constexpr bool isVector() const
One or more elements.
Definition: TypeSize.h:326
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
void setAllowContract(bool B=true)
Definition: FMF.h:91
bool noSignedZeros() const
Definition: FMF.h:68
bool noInfs() const
Definition: FMF.h:67
void setAllowReciprocal(bool B=true)
Definition: FMF.h:88
bool allowReciprocal() const
Definition: FMF.h:69
void print(raw_ostream &O) const
Print fast-math flags to O.
Definition: Operator.cpp:271
void setNoSignedZeros(bool B=true)
Definition: FMF.h:85
bool allowReassoc() const
Flag queries.
Definition: FMF.h:65
bool approxFunc() const
Definition: FMF.h:71
void setNoNaNs(bool B=true)
Definition: FMF.h:79
void setAllowReassoc(bool B=true)
Flag setters.
Definition: FMF.h:76
bool noNaNs() const
Definition: FMF.h:66
void setApproxFunc(bool B=true)
Definition: FMF.h:94
void setNoInfs(bool B=true)
Definition: FMF.h:82
bool allowContract() const
Definition: FMF.h:70
Class to represent function types.
Definition: DerivedTypes.h:105
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:137
ArrayRef< Type * > params() const
Definition: DerivedTypes.h:132
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216
bool willReturn() const
Determine if the function will return.
Definition: Function.h:662
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition: Function.h:595
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:221
bool hasNoUnsignedSignedWrap() const
bool hasNoUnsignedWrap() const
bool isInBounds() const
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Definition: Instructions.h:956
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:113
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition: IRBuilder.h:480
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2510
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition: IRBuilder.h:530
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2105
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2498
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1814
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2050
Value * CreateVectorSplice(Value *V1, Value *V2, int64_t Imm, const Twine &Name="")
Return a vector splice intrinsic if using scalable vectors, otherwise return a shufflevector.
Definition: IRBuilder.cpp:1125
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1153
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2554
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:485
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:546
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1043
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:194
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2044
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2573
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:545
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1986
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Definition: IRBuilder.h:2185
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2092
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:193
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:330
Value * CreateVectorReverse(Value *V, const Twine &Name="")
Return a vector value that contains the vector V reversed.
Definition: IRBuilder.cpp:1109
Value * CreateFCmpFMF(CmpInst::Predicate P, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2397
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1873
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
Definition: IRBuilder.h:1732
CallInst * CreateOrReduce(Value *Src)
Create a vector int OR reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:424
InsertPoint saveIP() const
Returns the current insert point.
Definition: IRBuilder.h:296
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:890
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:505
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2233
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2403
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2434
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1756
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2269
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1386
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1163
Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Definition: IRBuilder.cpp:958
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2032
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2532
LLVMContext & getContext() const
Definition: IRBuilder.h:195
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:566
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1369
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2448
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2018
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:588
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1670
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1687
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Definition: IRBuilder.h:308
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:199
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1833
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2379
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1613
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:535
Value * CreateStepVector(Type *DstType, const Twine &Name="")
Creates a vector of type DstType with the linear sequence <0, 1, ...>
Definition: IRBuilder.cpp:108
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1403
CallInst * CreateMaskedScatter(Value *Val, Value *Ptrs, Align Alignment, Value *Mask=nullptr)
Create a call to Masked Scatter intrinsic.
Definition: IRBuilder.cpp:627
CallInst * CreateMaskedGather(Type *Ty, Value *Ptrs, Align Alignment, Value *Mask=nullptr, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Gather intrinsic.
Definition: IRBuilder.cpp:596
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2704
A struct for saving information about induction variables.
@ IK_PtrInduction
Pointer induction var. Step = C.
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
static InstructionCost getInvalid(CostType Val=0)
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:99
bool isBinaryOp() const
Definition: Instruction.h:279
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
const char * getOpcodeName() const
Definition: Instruction.h:276
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
bool isUnaryOp() const
Definition: Instruction.h:278
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:472
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:480
uint32_t getFactor() const
Definition: VectorUtils.h:496
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:550
bool isReverse() const
Definition: VectorUtils.h:495
InstTy * getInsertPos() const
Definition: VectorUtils.h:566
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Align getAlign() const
Definition: VectorUtils.h:497
BlockT * getHeader() const
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:77
FastMathFlags getFastMathFlags() const
static unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
Type * getRecurrenceType() const
Returns the type of the recurrence.
TrackingVH< Value > getRecurrenceStartValue() const
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isFindLastIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
bool isSigned() const
Returns true if all source operands of the recurrence are SExtInsts.
RecurKind getRecurrenceKind() const
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This class uses information about analyze scalars to rewrite expressions in canonical form.
Type * getType() const
Return the LLVM type of this SCEV expression.
This class represents the LLVM 'select' instruction.
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:698
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo Op1Info={OK_AnyValue, OP_None}, OperandValueInfo Op2Info={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of vector reduction intrinsics.
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF=FastMathFlags(), TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask={}, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
@ TCC_Free
Expected to fold away in lowering.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Reverse
Reverse the order of the vector.
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency) const
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
CastContextHint
Represents a hint about the context in which a cast is used.
@ Reversed
The cast is used with a reversed load/store.
@ Masked
The cast is used with a masked load/store.
@ None
The cast is not used with a load/store of any kind.
@ Normal
The cast is used with a normal load/store.
@ Interleave
The cast is used with an interleaved load/store.
@ GatherScatter
The cast is used with a gather/scatter.
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264
static IntegerType * getInt1Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static Type * getVoidTy(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
value_op_iterator value_op_end()
Definition: User.h:309
Value * getOperand(unsigned i) const
Definition: User.h:228
value_op_iterator value_op_begin()
Definition: User.h:306
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:3478
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:3528
iterator end()
Definition: VPlan.h:3512
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:3541
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:2465
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:2470
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition: VPlan.h:2460
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition: VPlan.h:2456
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:397
VPRegionBlock * getParent()
Definition: VPlan.h:489
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:178
const VPBlocksTy & getPredecessors() const
Definition: VPlan.h:520
VPlan * getPlan()
Definition: VPlan.cpp:153
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:158
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2832
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPBranchOnMaskRecipe.
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:292
void dump() const
Dump the VPDef to stderr (for debugging).
Definition: VPlan.cpp:114
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
Definition: VPlanValue.h:414
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
Definition: VPlanValue.h:409
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition: VPlanValue.h:387
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Definition: VPlanValue.h:399
unsigned getVPDefID() const
Definition: VPlanValue.h:419
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:3408
VPValue * getStartValue() const
Definition: VPlan.h:3407
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:2062
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition: VPlan.h:1803
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
@ ResumePhi
Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
Definition: VPlan.h:1209
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:1197
@ CanonicalIVIncrementForPart
Definition: VPlan.h:1212
@ CalculateTripCountMinusVF
Definition: VPlan.h:1210
bool hasResult() const
Definition: VPlan.h:1332
bool opcodeMayReadOrWriteFromMemory() const
Returns true if the underlying opcode may read from or write to memory.
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
unsigned getOpcode() const
Definition: VPlan.h:1309
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
bool isSingleScalar() const
Returns true if this VPInstruction's operands are single scalars and the result is also a single scal...
void execute(VPTransformState &State) override
Generate the instruction.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2544
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2550
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition: VPlan.h:2557
Instruction * getInsertPos() const
Definition: VPlan.h:2592
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInterleaveRecipe.
unsigned getNumStoreOperands() const
Returns the number of stored operands of this interleave group.
Definition: VPlan.h:2581
static bool isVPIntrinsic(Intrinsic::ID)
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition: VPlan.h:153
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlan.h:194
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset)
Definition: VPlan.h:180
static VPLane getFirstLane()
Definition: VPlan.h:178
void execute(VPTransformState &State) override
Generates phi nodes for live-outs (from a replicate region) as needed to retain SSA form.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:714
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
virtual InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
VPBasicBlock * getParent()
Definition: VPlan.h:739
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:808
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Class to record LLVM IR flag for a recipe along with it.
Definition: VPlan.h:924
ExactFlagsTy ExactFlags
Definition: VPlan.h:974
FastMathFlagsTy FMFs
Definition: VPlan.h:977
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:976
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition: VPlan.h:1144
void setFlags(Instruction *I) const
Set the IR flags for I.
Definition: VPlan.h:1105
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:1147
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:973
GEPNoWrapFlags GEPFlags
Definition: VPlan.h:975
WrapFlagsTy WrapFlags
Definition: VPlan.h:972
bool hasNoUnsignedWrap() const
Definition: VPlan.h:1151
void printFlags(raw_ostream &O) const
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:1138
bool hasNoSignedWrap() const
Definition: VPlan.h:1157
FastMathFlags getFastMathFlags() const
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition: VPlan.h:2705
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition: VPlan.h:2663
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of VPReductionRecipe.
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2667
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Return the recurrence decriptor for the in-loop reduction.
Definition: VPlan.h:2657
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2669
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition: VPlan.h:2661
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2665
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:3649
const VPBlockBase * getEntry() const
Definition: VPlan.h:3682
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPReplicateRecipe.
unsigned getOpcode() const
Definition: VPlan.h:2792
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:3465
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:910
LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
This class can be used to assign names to VPValues.
Definition: VPlanValue.h:440
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Definition: VPlanAnalysis.h:65
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
VPValue * getUnrollPartOperand(VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:200
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition: VPlan.cpp:1456
operand_range operands()
Definition: VPlanValue.h:257
unsigned getNumOperands() const
Definition: VPlanValue.h:236
operand_iterator op_begin()
Definition: VPlanValue.h:253
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:237
virtual bool onlyFirstLaneUsed(const VPValue *Op) const
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlanValue.h:272
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop region.
Definition: VPlan.cpp:1417
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:123
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition: VPlan.cpp:1452
friend class VPInstruction
Definition: VPlanValue.h:47
bool hasMoreThanOneUniqueUser() const
Returns true if the value has more than one unique user.
Definition: VPlanValue.h:138
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:77
user_iterator user_begin()
Definition: VPlanValue.h:128
unsigned getNumUsers() const
Definition: VPlanValue.h:111
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:172
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition: VPlanValue.h:167
user_range users()
Definition: VPlanValue.h:132
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Function * getCalledScalarFunction() const
Definition: VPlan.h:1751
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCallRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
operand_range arg_operands()
Definition: VPlan.h:1755
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1574
void execute(VPTransformState &State) override
Produce widened copies of the cast.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override final
Print the recipe.
void execute(VPTransformState &State) override final
Produce a vp-intrinsic using the opcode and operands of the recipe, processing EVL elements.
VPValue * getEVL()
Definition: VPlan.h:1502
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
PHINode * getPHINode() const
Definition: VPlan.h:2118
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:2115
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:2121
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:2193
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:2202
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition: VPlan.h:1694
void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
Definition: VPlan.h:2903
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition: VPlan.h:2900
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition: VPlan.h:2939
Instruction & Ingredient
Definition: VPlan.h:2894
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
bool Consecutive
Whether the accessed addresses are consecutive.
Definition: VPlan.h:2897
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2953
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2946
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition: VPlan.h:2943
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
VPValue * getFirstUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the first unrolled part,...
Definition: VPlan.h:2247
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
void execute(VPTransformState &State) override
Produce a widened instruction using the opcode and operands of the recipe, processing State....
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getOpcode() const
Definition: VPlan.h:1468
unsigned getUF() const
Definition: VPlan.h:3951
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:694
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
VectorBuilder & setEVL(Value *NewExplicitVectorLength)
Definition: VectorBuilder.h:82
VectorBuilder & setMask(Value *NewMask)
Definition: VectorBuilder.h:78
Value * createVectorInstruction(unsigned Opcode, Type *ReturnTy, ArrayRef< Value * > VecOpArray, const Twine &Name=Twine())
Base class of all SIMD vector types.
Definition: DerivedTypes.h:427
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:665
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static VectorType * getDoubleElementsVectorType(VectorType *VTy)
This static method returns a VectorType with twice as many elements as the input type and the same el...
Definition: DerivedTypes.h:541
Type * getElementType() const
Definition: DerivedTypes.h:460
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
iterator erase(iterator where)
Definition: ilist.h:204
pointer remove(iterator &IT)
Definition: ilist.h:188
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
StringRef getBaseName(ID id)
Return the LLVM name for an intrinsic, without encoded types for overloading, such as "llvm....
Definition: Intrinsics.cpp:41
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
bool isUniformAfterVectorization(const VPValue *VPV)
Returns true if VPV is uniform after vectorization.
Definition: VPlanUtils.h:39
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
Definition: VPlanUtils.cpp:21
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlanUtils.cpp:16
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void ReplaceInstWithInst(BasicBlock *BB, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
Value * createSimpleReduction(IRBuilderBase &B, Value *Src, RecurKind RdxKind)
Create a reduction of the given vector.
Definition: LoopUtils.cpp:1278
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
Definition: LoopUtils.cpp:989
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition: STLExtras.h:2207
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
Definition: LoopUtils.cpp:1076
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
cl::opt< bool > EnableVPlanNativePath("enable-vplan-native-path", cl::Hidden, cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization."))
Definition: VPlan.cpp:53
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool isPointerTy(const Type *T)
Definition: SPIRVUtils.h:255
Value * createOrderedReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence descriptor Desc.
Definition: LoopUtils.cpp:1341
Value * createReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, PHINode *OrigPhi=nullptr)
Create a generic reduction using a recurrence descriptor Desc Fast-math-flags are propagated using th...
Definition: LoopUtils.cpp:1323
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:33
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ Add
Sum of integers.
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
Definition: LoopUtils.cpp:1270
DWARFExpression::Operation Op
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
InstructionCost Cost
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Struct to hold various analysis needed for cost computations.
Definition: VPlan.h:682
LLVMContext & LLVMCtx
Definition: VPlan.h:686
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
Definition: VPlan.cpp:1665
bool skipCostComputation(Instruction *UI, bool IsVector) const
Return true if the cost for UI shouldn't be computed, e.g.
InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const
Return the cost for UI with VF using the legacy cost model as fallback until computing the cost of al...
VPTypeAnalysis Types
Definition: VPlan.h:685
const TargetLibraryInfo & TLI
Definition: VPlan.h:684
const TargetTransformInfo & TTI
Definition: VPlan.h:683
SmallPtrSet< Instruction *, 8 > SkipCostComputation
Definition: VPlan.h:688
void execute(VPTransformState &State) override
Generate the phi nodes.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:344
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:352
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:348
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:236
bool hasScalarValue(VPValue *Def, VPLane Lane)
Definition: VPlan.h:269
bool hasVectorValue(VPValue *Def)
Definition: VPlan.h:267
DenseMap< const SCEV *, Value * > ExpandedSCEVs
Map SCEVs to their expanded values.
Definition: VPlan.h:389
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition: VPlan.h:392
void addMetadata(Value *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:361
Value * get(VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
Definition: VPlan.cpp:249
struct llvm::VPTransformState::CFGState CFG
std::optional< VPLane > Lane
Hold the index to generate specific scalar instructions.
Definition: VPlan.h:250
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:369
const TargetTransformInfo * TTI
Target Transform Info.
Definition: VPlan.h:242
void reset(VPValue *Def, Value *V)
Reset an existing vector value for Def and a given Part.
Definition: VPlan.h:290
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
Definition: VPlan.h:245
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:372
Loop * CurrentParentLoop
The parent loop object for the current scope, or nullptr.
Definition: VPlan.h:378
void set(VPValue *Def, Value *V, bool IsScalar=false)
Set the generated vector Value for a given VPValue, if IsScalar is false.
Definition: VPlan.h:279
void execute(VPTransformState &State) override
Generate the wide load or gather.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:3023
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate a wide load or gather.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool isInvariantCond() const
Definition: VPlan.h:1847
VPValue * getCond() const
Definition: VPlan.h:1843
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenSelectRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition: VPlan.h:3102
void execute(VPTransformState &State) override
Generate the wide store or scatter.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:3105
void execute(VPTransformState &State) override
Generate a wide store or scatter.
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition: VPlan.h:3067
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.