LLVM 20.0.0git
VPlanRecipes.cpp
Go to the documentation of this file.
1//===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains implementations for different VPlan recipes.
11///
12//===----------------------------------------------------------------------===//
13
14#include "VPlan.h"
15#include "VPlanAnalysis.h"
16#include "VPlanPatternMatch.h"
17#include "VPlanUtils.h"
18#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/Twine.h"
22#include "llvm/IR/BasicBlock.h"
23#include "llvm/IR/IRBuilder.h"
24#include "llvm/IR/Instruction.h"
26#include "llvm/IR/Intrinsics.h"
27#include "llvm/IR/Type.h"
28#include "llvm/IR/Value.h"
32#include "llvm/Support/Debug.h"
37#include <cassert>
38
39using namespace llvm;
40
42
43namespace llvm {
45}
47
48#define LV_NAME "loop-vectorize"
49#define DEBUG_TYPE LV_NAME
50
52 switch (getVPDefID()) {
53 case VPInstructionSC:
54 if (Instruction::isBinaryOp(cast<VPInstruction>(this)->getOpcode()))
55 return false;
56 switch (cast<VPInstruction>(this)->getOpcode()) {
57 case Instruction::Or:
58 case Instruction::ICmp:
59 case Instruction::Select:
68 return false;
69 default:
70 return true;
71 }
72 case VPInterleaveSC:
73 return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
74 case VPWidenStoreEVLSC:
75 case VPWidenStoreSC:
76 return true;
77 case VPReplicateSC:
78 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
79 ->mayWriteToMemory();
80 case VPWidenCallSC:
81 return !cast<VPWidenCallRecipe>(this)
82 ->getCalledScalarFunction()
83 ->onlyReadsMemory();
84 case VPWidenIntrinsicSC:
85 return cast<VPWidenIntrinsicRecipe>(this)->mayWriteToMemory();
86 case VPBranchOnMaskSC:
87 case VPScalarIVStepsSC:
88 case VPPredInstPHISC:
89 return false;
90 case VPBlendSC:
91 case VPReductionEVLSC:
92 case VPReductionSC:
93 case VPVectorPointerSC:
94 case VPWidenCanonicalIVSC:
95 case VPWidenCastSC:
96 case VPWidenGEPSC:
97 case VPWidenIntOrFpInductionSC:
98 case VPWidenLoadEVLSC:
99 case VPWidenLoadSC:
100 case VPWidenPHISC:
101 case VPWidenSC:
102 case VPWidenEVLSC:
103 case VPWidenSelectSC: {
104 const Instruction *I =
105 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
106 (void)I;
107 assert((!I || !I->mayWriteToMemory()) &&
108 "underlying instruction may write to memory");
109 return false;
110 }
111 default:
112 return true;
113 }
114}
115
117 switch (getVPDefID()) {
118 case VPWidenLoadEVLSC:
119 case VPWidenLoadSC:
120 return true;
121 case VPReplicateSC:
122 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
123 ->mayReadFromMemory();
124 case VPWidenCallSC:
125 return !cast<VPWidenCallRecipe>(this)
126 ->getCalledScalarFunction()
127 ->onlyWritesMemory();
128 case VPWidenIntrinsicSC:
129 return cast<VPWidenIntrinsicRecipe>(this)->mayReadFromMemory();
130 case VPBranchOnMaskSC:
131 case VPPredInstPHISC:
132 case VPScalarIVStepsSC:
133 case VPWidenStoreEVLSC:
134 case VPWidenStoreSC:
135 return false;
136 case VPBlendSC:
137 case VPReductionEVLSC:
138 case VPReductionSC:
139 case VPVectorPointerSC:
140 case VPWidenCanonicalIVSC:
141 case VPWidenCastSC:
142 case VPWidenGEPSC:
143 case VPWidenIntOrFpInductionSC:
144 case VPWidenPHISC:
145 case VPWidenSC:
146 case VPWidenEVLSC:
147 case VPWidenSelectSC: {
148 const Instruction *I =
149 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
150 (void)I;
151 assert((!I || !I->mayReadFromMemory()) &&
152 "underlying instruction may read from memory");
153 return false;
154 }
155 default:
156 return true;
157 }
158}
159
161 switch (getVPDefID()) {
162 case VPDerivedIVSC:
163 case VPPredInstPHISC:
164 case VPScalarCastSC:
165 case VPReverseVectorPointerSC:
166 return false;
167 case VPInstructionSC:
168 return mayWriteToMemory();
169 case VPWidenCallSC: {
170 Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction();
171 return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn();
172 }
173 case VPWidenIntrinsicSC:
174 return cast<VPWidenIntrinsicRecipe>(this)->mayHaveSideEffects();
175 case VPBlendSC:
176 case VPReductionEVLSC:
177 case VPReductionSC:
178 case VPScalarIVStepsSC:
179 case VPVectorPointerSC:
180 case VPWidenCanonicalIVSC:
181 case VPWidenCastSC:
182 case VPWidenGEPSC:
183 case VPWidenIntOrFpInductionSC:
184 case VPWidenPHISC:
185 case VPWidenPointerInductionSC:
186 case VPWidenSC:
187 case VPWidenEVLSC:
188 case VPWidenSelectSC: {
189 const Instruction *I =
190 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
191 (void)I;
192 assert((!I || !I->mayHaveSideEffects()) &&
193 "underlying instruction has side-effects");
194 return false;
195 }
196 case VPInterleaveSC:
197 return mayWriteToMemory();
198 case VPWidenLoadEVLSC:
199 case VPWidenLoadSC:
200 case VPWidenStoreEVLSC:
201 case VPWidenStoreSC:
202 assert(
203 cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() ==
205 "mayHaveSideffects result for ingredient differs from this "
206 "implementation");
207 return mayWriteToMemory();
208 case VPReplicateSC: {
209 auto *R = cast<VPReplicateRecipe>(this);
210 return R->getUnderlyingInstr()->mayHaveSideEffects();
211 }
212 default:
213 return true;
214 }
215}
216
218 assert(!Parent && "Recipe already in some VPBasicBlock");
219 assert(InsertPos->getParent() &&
220 "Insertion position not in any VPBasicBlock");
221 InsertPos->getParent()->insert(this, InsertPos->getIterator());
222}
223
226 assert(!Parent && "Recipe already in some VPBasicBlock");
227 assert(I == BB.end() || I->getParent() == &BB);
228 BB.insert(this, I);
229}
230
232 assert(!Parent && "Recipe already in some VPBasicBlock");
233 assert(InsertPos->getParent() &&
234 "Insertion position not in any VPBasicBlock");
235 InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator()));
236}
237
239 assert(getParent() && "Recipe not in any VPBasicBlock");
241 Parent = nullptr;
242}
243
245 assert(getParent() && "Recipe not in any VPBasicBlock");
247}
248
251 insertAfter(InsertPos);
252}
253
257 insertBefore(BB, I);
258}
259
261 // Get the underlying instruction for the recipe, if there is one. It is used
262 // to
263 // * decide if cost computation should be skipped for this recipe,
264 // * apply forced target instruction cost.
265 Instruction *UI = nullptr;
266 if (auto *S = dyn_cast<VPSingleDefRecipe>(this))
267 UI = dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
268 else if (auto *IG = dyn_cast<VPInterleaveRecipe>(this))
269 UI = IG->getInsertPos();
270 else if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(this))
271 UI = &WidenMem->getIngredient();
272
273 InstructionCost RecipeCost;
274 if (UI && Ctx.skipCostComputation(UI, VF.isVector())) {
275 RecipeCost = 0;
276 } else {
277 RecipeCost = computeCost(VF, Ctx);
278 if (UI && ForceTargetInstructionCost.getNumOccurrences() > 0 &&
279 RecipeCost.isValid())
281 }
282
283 LLVM_DEBUG({
284 dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": ";
285 dump();
286 });
287 return RecipeCost;
288}
289
291 VPCostContext &Ctx) const {
292 llvm_unreachable("subclasses should implement computeCost");
293}
294
296 assert(OpType == OperationType::FPMathOp &&
297 "recipe doesn't have fast math flags");
298 FastMathFlags Res;
299 Res.setAllowReassoc(FMFs.AllowReassoc);
300 Res.setNoNaNs(FMFs.NoNaNs);
301 Res.setNoInfs(FMFs.NoInfs);
302 Res.setNoSignedZeros(FMFs.NoSignedZeros);
303 Res.setAllowReciprocal(FMFs.AllowReciprocal);
304 Res.setAllowContract(FMFs.AllowContract);
305 Res.setApproxFunc(FMFs.ApproxFunc);
306 return Res;
307}
308
309#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
311#endif
312
313template <unsigned PartOpIdx>
314VPValue *
316 if (U.getNumOperands() == PartOpIdx + 1)
317 return U.getOperand(PartOpIdx);
318 return nullptr;
319}
320
321template <unsigned PartOpIdx>
323 if (auto *UnrollPartOp = getUnrollPartOperand(U))
324 return cast<ConstantInt>(UnrollPartOp->getLiveInIRValue())->getZExtValue();
325 return 0;
326}
327
330 const Twine &Name)
331 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),
332 Pred, DL),
333 Opcode(Opcode), Name(Name.str()) {
334 assert(Opcode == Instruction::ICmp &&
335 "only ICmp predicates supported at the moment");
336}
337
339 std::initializer_list<VPValue *> Operands,
340 FastMathFlags FMFs, DebugLoc DL, const Twine &Name)
341 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),
342 Opcode(Opcode), Name(Name.str()) {
343 // Make sure the VPInstruction is a floating-point operation.
344 assert(isFPMathOp() && "this op can't take fast-math flags");
345}
346
347bool VPInstruction::doesGeneratePerAllLanes() const {
348 return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);
349}
350
351bool VPInstruction::canGenerateScalarForFirstLane() const {
353 return true;
355 return true;
356 switch (Opcode) {
357 case Instruction::ICmp:
358 case Instruction::Select:
366 return true;
367 default:
368 return false;
369 }
370}
371
372Value *VPInstruction::generatePerLane(VPTransformState &State,
373 const VPLane &Lane) {
374 IRBuilderBase &Builder = State.Builder;
375
377 "only PtrAdd opcodes are supported for now");
378 return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
379 State.get(getOperand(1), Lane), Name);
380}
381
382Value *VPInstruction::generate(VPTransformState &State) {
383 IRBuilderBase &Builder = State.Builder;
384
386 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
387 Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
388 Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);
389 auto *Res =
390 Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
391 if (auto *I = dyn_cast<Instruction>(Res))
392 setFlags(I);
393 return Res;
394 }
395
396 switch (getOpcode()) {
397 case VPInstruction::Not: {
398 Value *A = State.get(getOperand(0));
399 return Builder.CreateNot(A, Name);
400 }
401 case Instruction::ICmp: {
402 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
403 Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
404 Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);
405 return Builder.CreateCmp(getPredicate(), A, B, Name);
406 }
407 case Instruction::Select: {
408 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
409 Value *Cond = State.get(getOperand(0), OnlyFirstLaneUsed);
410 Value *Op1 = State.get(getOperand(1), OnlyFirstLaneUsed);
411 Value *Op2 = State.get(getOperand(2), OnlyFirstLaneUsed);
412 return Builder.CreateSelect(Cond, Op1, Op2, Name);
413 }
415 // Get first lane of vector induction variable.
416 Value *VIVElem0 = State.get(getOperand(0), VPLane(0));
417 // Get the original loop tripcount.
418 Value *ScalarTC = State.get(getOperand(1), VPLane(0));
419
420 // If this part of the active lane mask is scalar, generate the CMP directly
421 // to avoid unnecessary extracts.
422 if (State.VF.isScalar())
423 return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC,
424 Name);
425
426 auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
427 auto *PredTy = VectorType::get(Int1Ty, State.VF);
428 return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
429 {PredTy, ScalarTC->getType()},
430 {VIVElem0, ScalarTC}, nullptr, Name);
431 }
433 // Generate code to combine the previous and current values in vector v3.
434 //
435 // vector.ph:
436 // v_init = vector(..., ..., ..., a[-1])
437 // br vector.body
438 //
439 // vector.body
440 // i = phi [0, vector.ph], [i+4, vector.body]
441 // v1 = phi [v_init, vector.ph], [v2, vector.body]
442 // v2 = a[i, i+1, i+2, i+3];
443 // v3 = vector(v1(3), v2(0, 1, 2))
444
445 auto *V1 = State.get(getOperand(0));
446 if (!V1->getType()->isVectorTy())
447 return V1;
448 Value *V2 = State.get(getOperand(1));
449 return Builder.CreateVectorSplice(V1, V2, -1, Name);
450 }
452 unsigned UF = getParent()->getPlan()->getUF();
453 Value *ScalarTC = State.get(getOperand(0), VPLane(0));
454 Value *Step = createStepForVF(Builder, ScalarTC->getType(), State.VF, UF);
455 Value *Sub = Builder.CreateSub(ScalarTC, Step);
456 Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
457 Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
458 return Builder.CreateSelect(Cmp, Sub, Zero);
459 }
461 // TODO: Restructure this code with an explicit remainder loop, vsetvli can
462 // be outside of the main loop.
463 Value *AVL = State.get(getOperand(0), /*IsScalar*/ true);
464 // Compute EVL
465 assert(AVL->getType()->isIntegerTy() &&
466 "Requested vector length should be an integer.");
467
468 assert(State.VF.isScalable() && "Expected scalable vector factor.");
469 Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue());
470
471 Value *EVL = State.Builder.CreateIntrinsic(
472 State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,
473 {AVL, VFArg, State.Builder.getTrue()});
474 return EVL;
475 }
477 unsigned Part = getUnrollPart(*this);
478 auto *IV = State.get(getOperand(0), VPLane(0));
479 assert(Part != 0 && "Must have a positive part");
480 // The canonical IV is incremented by the vectorization factor (num of
481 // SIMD elements) times the unroll part.
482 Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
483 return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),
485 }
487 Value *Cond = State.get(getOperand(0), VPLane(0));
488 // Replace the temporary unreachable terminator with a new conditional
489 // branch, hooking it up to backward destination for exiting blocks now and
490 // to forward destination(s) later when they are created.
491 BranchInst *CondBr =
492 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
493 CondBr->setSuccessor(0, nullptr);
495
496 if (!getParent()->isExiting())
497 return CondBr;
498
499 VPRegionBlock *ParentRegion = getParent()->getParent();
500 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
501 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
502 return CondBr;
503 }
505 // First create the compare.
506 Value *IV = State.get(getOperand(0), /*IsScalar*/ true);
507 Value *TC = State.get(getOperand(1), /*IsScalar*/ true);
508 Value *Cond = Builder.CreateICmpEQ(IV, TC);
509
510 // Now create the branch.
511 auto *Plan = getParent()->getPlan();
512 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
513 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
514
515 // Replace the temporary unreachable terminator with a new conditional
516 // branch, hooking it up to backward destination (the header) now and to the
517 // forward destination (the exit/middle block) later when it is created.
518 // Note that CreateCondBr expects a valid BB as first argument, so we need
519 // to set it to nullptr later.
520 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
521 State.CFG.VPBB2IRBB[Header]);
522 CondBr->setSuccessor(0, nullptr);
524 return CondBr;
525 }
527 // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
528 // and will be removed by breaking up the recipe further.
529 auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
530 auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
531 // Get its reduction variable descriptor.
532 const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
533
534 RecurKind RK = RdxDesc.getRecurrenceKind();
535
536 Type *PhiTy = OrigPhi->getType();
537 // The recipe's operands are the reduction phi, followed by one operand for
538 // each part of the reduction.
539 unsigned UF = getNumOperands() - 1;
540 VectorParts RdxParts(UF);
541 for (unsigned Part = 0; Part < UF; ++Part)
542 RdxParts[Part] = State.get(getOperand(1 + Part), PhiR->isInLoop());
543
544 // If the vector reduction can be performed in a smaller type, we truncate
545 // then extend the loop exit value to enable InstCombine to evaluate the
546 // entire expression in the smaller type.
547 // TODO: Handle this in truncateToMinBW.
548 if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
549 Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF);
550 for (unsigned Part = 0; Part < UF; ++Part)
551 RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
552 }
553 // Reduce all of the unrolled parts into a single vector.
554 Value *ReducedPartRdx = RdxParts[0];
555 unsigned Op = RdxDesc.getOpcode();
557 Op = Instruction::Or;
558
559 if (PhiR->isOrdered()) {
560 ReducedPartRdx = RdxParts[UF - 1];
561 } else {
562 // Floating-point operations should have some FMF to enable the reduction.
564 Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
565 for (unsigned Part = 1; Part < UF; ++Part) {
566 Value *RdxPart = RdxParts[Part];
567 if (Op != Instruction::ICmp && Op != Instruction::FCmp)
568 ReducedPartRdx = Builder.CreateBinOp(
569 (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
571 ReducedPartRdx =
572 createMinMaxOp(Builder, RecurKind::SMax, ReducedPartRdx, RdxPart);
573 else
574 ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
575 }
576 }
577
578 // Create the reduction after the loop. Note that inloop reductions create
579 // the target reduction in the loop using a Reduction recipe.
580 if ((State.VF.isVector() ||
583 !PhiR->isInLoop()) {
584 ReducedPartRdx =
585 createReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
586 // If the reduction can be performed in a smaller type, we need to extend
587 // the reduction to the wider type before we branch to the original loop.
588 if (PhiTy != RdxDesc.getRecurrenceType())
589 ReducedPartRdx = RdxDesc.isSigned()
590 ? Builder.CreateSExt(ReducedPartRdx, PhiTy)
591 : Builder.CreateZExt(ReducedPartRdx, PhiTy);
592 }
593
594 return ReducedPartRdx;
595 }
597 auto *CI = cast<ConstantInt>(getOperand(1)->getLiveInIRValue());
598 unsigned Offset = CI->getZExtValue();
599 assert(Offset > 0 && "Offset from end must be positive");
600 Value *Res;
601 if (State.VF.isVector()) {
602 assert(Offset <= State.VF.getKnownMinValue() &&
603 "invalid offset to extract from");
604 // Extract lane VF - Offset from the operand.
605 Res = State.get(getOperand(0), VPLane::getLaneFromEnd(State.VF, Offset));
606 } else {
607 assert(Offset <= 1 && "invalid offset to extract from");
608 Res = State.get(getOperand(0));
609 }
610 if (isa<ExtractElementInst>(Res))
611 Res->setName(Name);
612 return Res;
613 }
615 Value *A = State.get(getOperand(0));
616 Value *B = State.get(getOperand(1));
617 return Builder.CreateLogicalAnd(A, B, Name);
618 }
621 "can only generate first lane for PtrAdd");
622 Value *Ptr = State.get(getOperand(0), VPLane(0));
623 Value *Addend = State.get(getOperand(1), VPLane(0));
624 return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
625 }
627 Value *IncomingFromVPlanPred =
628 State.get(getOperand(0), /* IsScalar */ true);
629 Value *IncomingFromOtherPreds =
630 State.get(getOperand(1), /* IsScalar */ true);
631 auto *NewPhi =
632 Builder.CreatePHI(State.TypeAnalysis.inferScalarType(this), 2, Name);
633 BasicBlock *VPlanPred =
634 State.CFG
635 .VPBB2IRBB[cast<VPBasicBlock>(getParent()->getPredecessors()[0])];
636 NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred);
637 for (auto *OtherPred : predecessors(Builder.GetInsertBlock())) {
638 if (OtherPred == VPlanPred)
639 continue;
640 NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred);
641 }
642 return NewPhi;
643 }
645 Value *A = State.get(getOperand(0));
646 return Builder.CreateOrReduce(A);
647 }
648
649 default:
650 llvm_unreachable("Unsupported opcode for instruction");
651 }
652}
653
658}
659
662}
663
664#if !defined(NDEBUG)
665bool VPInstruction::isFPMathOp() const {
666 // Inspired by FPMathOperator::classof. Notable differences are that we don't
667 // support Call, PHI and Select opcodes here yet.
668 return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
669 Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
670 Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
671 Opcode == Instruction::FCmp || Opcode == Instruction::Select;
672}
673#endif
674
676 assert(!State.Lane && "VPInstruction executing an Lane");
678 assert((hasFastMathFlags() == isFPMathOp() ||
679 getOpcode() == Instruction::Select) &&
680 "Recipe not a FPMathOp but has fast-math flags?");
681 if (hasFastMathFlags())
684 bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&
687 bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
688 if (GeneratesPerAllLanes) {
689 for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
690 Lane != NumLanes; ++Lane) {
691 Value *GeneratedValue = generatePerLane(State, VPLane(Lane));
692 assert(GeneratedValue && "generatePerLane must produce a value");
693 State.set(this, GeneratedValue, VPLane(Lane));
694 }
695 return;
696 }
697
698 Value *GeneratedValue = generate(State);
699 if (!hasResult())
700 return;
701 assert(GeneratedValue && "generate must produce a value");
702 assert(
703 (GeneratedValue->getType()->isVectorTy() == !GeneratesPerFirstLaneOnly ||
704 State.VF.isScalar()) &&
705 "scalar value but not only first lane defined");
706 State.set(this, GeneratedValue,
707 /*IsScalar*/ GeneratesPerFirstLaneOnly);
708}
709
711 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
713 return vputils::onlyFirstLaneUsed(this);
714
715 switch (getOpcode()) {
716 default:
717 return false;
718 case Instruction::ICmp:
719 case Instruction::Select:
720 case Instruction::Or:
722 // TODO: Cover additional opcodes.
723 return vputils::onlyFirstLaneUsed(this);
731 return true;
732 };
733 llvm_unreachable("switch should return");
734}
735
737 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
739 return vputils::onlyFirstPartUsed(this);
740
741 switch (getOpcode()) {
742 default:
743 return false;
744 case Instruction::ICmp:
745 case Instruction::Select:
746 return vputils::onlyFirstPartUsed(this);
750 return true;
751 };
752 llvm_unreachable("switch should return");
753}
754
755#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
757 VPSlotTracker SlotTracker(getParent()->getPlan());
758 print(dbgs(), "", SlotTracker);
759}
760
762 VPSlotTracker &SlotTracker) const {
763 O << Indent << "EMIT ";
764
765 if (hasResult()) {
767 O << " = ";
768 }
769
770 switch (getOpcode()) {
772 O << "not";
773 break;
775 O << "combined load";
776 break;
778 O << "combined store";
779 break;
781 O << "active lane mask";
782 break;
784 O << "resume-phi";
785 break;
787 O << "EXPLICIT-VECTOR-LENGTH";
788 break;
790 O << "first-order splice";
791 break;
793 O << "branch-on-cond";
794 break;
796 O << "TC > VF ? TC - VF : 0";
797 break;
799 O << "VF * Part +";
800 break;
802 O << "branch-on-count";
803 break;
805 O << "extract-from-end";
806 break;
808 O << "compute-reduction-result";
809 break;
811 O << "logical-and";
812 break;
814 O << "ptradd";
815 break;
817 O << "any-of";
818 break;
819 default:
821 }
822
823 printFlags(O);
825
826 if (auto DL = getDebugLoc()) {
827 O << ", !dbg ";
828 DL.print(O);
829 }
830}
831#endif
832
834 assert((isa<PHINode>(&I) || getNumOperands() == 0) &&
835 "Only PHINodes can have extra operands");
836 for (const auto &[Idx, Op] : enumerate(operands())) {
837 VPValue *ExitValue = Op;
838 auto Lane = vputils::isUniformAfterVectorization(ExitValue)
842 auto *PredVPBB = Pred->getExitingBasicBlock();
843 BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
844 // Set insertion point in PredBB in case an extract needs to be generated.
845 // TODO: Model extracts explicitly.
846 State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
847 Value *V = State.get(ExitValue, VPLane(Lane));
848 auto *Phi = cast<PHINode>(&I);
849 // If there is no existing block for PredBB in the phi, add a new incoming
850 // value. Otherwise update the existing incoming value for PredBB.
851 if (Phi->getBasicBlockIndex(PredBB) == -1)
852 Phi->addIncoming(V, PredBB);
853 else
854 Phi->setIncomingValueForBlock(PredBB, V);
855 }
856
857 // Advance the insert point after the wrapped IR instruction. This allows
858 // interleaving VPIRInstructions and other recipes.
859 State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator()));
860}
861
863 VPCostContext &Ctx) const {
864 // The recipe wraps an existing IR instruction on the border of VPlan's scope,
865 // hence it does not contribute to the cost-modeling for the VPlan.
866 return 0;
867}
868
869#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
871 VPSlotTracker &SlotTracker) const {
872 O << Indent << "IR " << I;
873
874 if (getNumOperands() != 0) {
875 O << " (extra operand" << (getNumOperands() > 1 ? "s" : "") << ": ";
877 enumerate(operands()), O, [this, &O, &SlotTracker](auto Op) {
878 Op.value()->printAsOperand(O, SlotTracker);
879 O << " from ";
880 getParent()->getPredecessors()[Op.index()]->printAsOperand(O);
881 });
882 O << ")";
883 }
884}
885#endif
886
888 assert(State.VF.isVector() && "not widening");
890
891 FunctionType *VFTy = Variant->getFunctionType();
892 // Add return type if intrinsic is overloaded on it.
894 for (const auto &I : enumerate(arg_operands())) {
895 Value *Arg;
896 // Some vectorized function variants may also take a scalar argument,
897 // e.g. linear parameters for pointers. This needs to be the scalar value
898 // from the start of the respective part when interleaving.
899 if (!VFTy->getParamType(I.index())->isVectorTy())
900 Arg = State.get(I.value(), VPLane(0));
901 else
902 Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
903 Args.push_back(Arg);
904 }
905
906 assert(Variant != nullptr && "Can't create vector function.");
907
908 auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
910 if (CI)
911 CI->getOperandBundlesAsDefs(OpBundles);
912
913 CallInst *V = State.Builder.CreateCall(Variant, Args, OpBundles);
914 setFlags(V);
915
916 if (!V->getType()->isVoidTy())
917 State.set(this, V);
918 State.addMetadata(V, CI);
919}
920
922 VPCostContext &Ctx) const {
924 return Ctx.TTI.getCallInstrCost(nullptr, Variant->getReturnType(),
925 Variant->getFunctionType()->params(),
926 CostKind);
927}
928
929#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
931 VPSlotTracker &SlotTracker) const {
932 O << Indent << "WIDEN-CALL ";
933
934 Function *CalledFn = getCalledScalarFunction();
935 if (CalledFn->getReturnType()->isVoidTy())
936 O << "void ";
937 else {
939 O << " = ";
940 }
941
942 O << "call";
943 printFlags(O);
944 O << " @" << CalledFn->getName() << "(";
946 Op->printAsOperand(O, SlotTracker);
947 });
948 O << ")";
949
950 O << " (using library function";
951 if (Variant->hasName())
952 O << ": " << Variant->getName();
953 O << ")";
954}
955#endif
956
958 assert(State.VF.isVector() && "not widening");
960
961 SmallVector<Type *, 2> TysForDecl;
962 // Add return type if intrinsic is overloaded on it.
963 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1, State.TTI))
964 TysForDecl.push_back(VectorType::get(getResultType(), State.VF));
966 for (const auto &I : enumerate(operands())) {
967 // Some intrinsics have a scalar argument - don't replace it with a
968 // vector.
969 Value *Arg;
970 if (isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index(),
971 State.TTI))
972 Arg = State.get(I.value(), VPLane(0));
973 else
974 Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
975 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index(),
976 State.TTI))
977 TysForDecl.push_back(Arg->getType());
978 Args.push_back(Arg);
979 }
980
981 // Use vector version of the intrinsic.
982 Module *M = State.Builder.GetInsertBlock()->getModule();
983 Function *VectorF =
984 Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
985 assert(VectorF &&
986 "Can't retrieve vector intrinsic or vector-predication intrinsics.");
987
988 auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
990 if (CI)
991 CI->getOperandBundlesAsDefs(OpBundles);
992
993 CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
994
995 setFlags(V);
996
997 if (!V->getType()->isVoidTy())
998 State.set(this, V);
999 State.addMetadata(V, CI);
1000}
1001
1003 VPCostContext &Ctx) const {
1005
1006 // Some backends analyze intrinsic arguments to determine cost. Use the
1007 // underlying value for the operand if it has one. Otherwise try to use the
1008 // operand of the underlying call instruction, if there is one. Otherwise
1009 // clear Arguments.
1010 // TODO: Rework TTI interface to be independent of concrete IR values.
1012 for (const auto &[Idx, Op] : enumerate(operands())) {
1013 auto *V = Op->getUnderlyingValue();
1014 if (!V) {
1015 // Push all the VP Intrinsic's ops into the Argments even if is nullptr.
1016 // Some VP Intrinsic's cost will assert the number of parameters.
1017 // Mainly appears in the following two scenarios:
1018 // 1. EVL Op is nullptr
1019 // 2. The Argmunt of the VP Intrinsic is also the VP Intrinsic
1020 if (VPIntrinsic::isVPIntrinsic(VectorIntrinsicID)) {
1021 Arguments.push_back(V);
1022 continue;
1023 }
1024 if (auto *UI = dyn_cast_or_null<CallBase>(getUnderlyingValue())) {
1025 Arguments.push_back(UI->getArgOperand(Idx));
1026 continue;
1027 }
1028 Arguments.clear();
1029 break;
1030 }
1031 Arguments.push_back(V);
1032 }
1033
1034 Type *RetTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1035 SmallVector<Type *> ParamTys;
1036 for (unsigned I = 0; I != getNumOperands(); ++I)
1037 ParamTys.push_back(
1039
1040 // TODO: Rework TTI interface to avoid reliance on underlying IntrinsicInst.
1042 IntrinsicCostAttributes CostAttrs(
1043 VectorIntrinsicID, RetTy, Arguments, ParamTys, FMF,
1044 dyn_cast_or_null<IntrinsicInst>(getUnderlyingValue()));
1045 return Ctx.TTI.getIntrinsicInstrCost(CostAttrs, CostKind);
1046}
1047
1049 return Intrinsic::getBaseName(VectorIntrinsicID);
1050}
1051
1053 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
1054 // Vector predication intrinsics only demand the the first lane the last
1055 // operand (the EVL operand).
1056 return VPIntrinsic::isVPIntrinsic(VectorIntrinsicID) &&
1057 Op == getOperand(getNumOperands() - 1);
1058}
1059
1060#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1062 VPSlotTracker &SlotTracker) const {
1063 O << Indent << "WIDEN-INTRINSIC ";
1064 if (ResultTy->isVoidTy()) {
1065 O << "void ";
1066 } else {
1068 O << " = ";
1069 }
1070
1071 O << "call";
1072 printFlags(O);
1073 O << getIntrinsicName() << "(";
1074
1076 Op->printAsOperand(O, SlotTracker);
1077 });
1078 O << ")";
1079}
1080#endif
1081
1084 IRBuilderBase &Builder = State.Builder;
1085
1086 Value *Address = State.get(getOperand(0));
1087 Value *IncAmt = State.get(getOperand(1), /*IsScalar=*/true);
1088 VectorType *VTy = cast<VectorType>(Address->getType());
1089
1090 // The histogram intrinsic requires a mask even if the recipe doesn't;
1091 // if the mask operand was omitted then all lanes should be executed and
1092 // we just need to synthesize an all-true mask.
1093 Value *Mask = nullptr;
1094 if (VPValue *VPMask = getMask())
1095 Mask = State.get(VPMask);
1096 else
1097 Mask =
1098 Builder.CreateVectorSplat(VTy->getElementCount(), Builder.getInt1(1));
1099
1100 // If this is a subtract, we want to invert the increment amount. We may
1101 // add a separate intrinsic in future, but for now we'll try this.
1102 if (Opcode == Instruction::Sub)
1103 IncAmt = Builder.CreateNeg(IncAmt);
1104 else
1105 assert(Opcode == Instruction::Add && "only add or sub supported for now");
1106
1107 State.Builder.CreateIntrinsic(Intrinsic::experimental_vector_histogram_add,
1108 {VTy, IncAmt->getType()},
1109 {Address, IncAmt, Mask});
1110}
1111
1113 VPCostContext &Ctx) const {
1114 // FIXME: Take the gather and scatter into account as well. For now we're
1115 // generating the same cost as the fallback path, but we'll likely
1116 // need to create a new TTI method for determining the cost, including
1117 // whether we can use base + vec-of-smaller-indices or just
1118 // vec-of-pointers.
1119 assert(VF.isVector() && "Invalid VF for histogram cost");
1120 Type *AddressTy = Ctx.Types.inferScalarType(getOperand(0));
1121 VPValue *IncAmt = getOperand(1);
1122 Type *IncTy = Ctx.Types.inferScalarType(IncAmt);
1123 VectorType *VTy = VectorType::get(IncTy, VF);
1124
1125 // Assume that a non-constant update value (or a constant != 1) requires
1126 // a multiply, and add that into the cost.
1127 InstructionCost MulCost =
1128 Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VTy);
1129 if (IncAmt->isLiveIn()) {
1130 ConstantInt *CI = dyn_cast<ConstantInt>(IncAmt->getLiveInIRValue());
1131
1132 if (CI && CI->getZExtValue() == 1)
1133 MulCost = TTI::TCC_Free;
1134 }
1135
1136 // Find the cost of the histogram operation itself.
1137 Type *PtrTy = VectorType::get(AddressTy, VF);
1138 Type *MaskTy = VectorType::get(Type::getInt1Ty(Ctx.LLVMCtx), VF);
1139 IntrinsicCostAttributes ICA(Intrinsic::experimental_vector_histogram_add,
1141 {PtrTy, IncTy, MaskTy});
1142
1143 // Add the costs together with the add/sub operation.
1144 return Ctx.TTI.getIntrinsicInstrCost(
1146 MulCost + Ctx.TTI.getArithmeticInstrCost(Opcode, VTy);
1147}
1148
1149#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1151 VPSlotTracker &SlotTracker) const {
1152 O << Indent << "WIDEN-HISTOGRAM buckets: ";
1154
1155 if (Opcode == Instruction::Sub)
1156 O << ", dec: ";
1157 else {
1158 assert(Opcode == Instruction::Add);
1159 O << ", inc: ";
1160 }
1162
1163 if (VPValue *Mask = getMask()) {
1164 O << ", mask: ";
1165 Mask->printAsOperand(O, SlotTracker);
1166 }
1167}
1168
1170 VPSlotTracker &SlotTracker) const {
1171 O << Indent << "WIDEN-SELECT ";
1173 O << " = select ";
1175 O << ", ";
1177 O << ", ";
1179 O << (isInvariantCond() ? " (condition is loop invariant)" : "");
1181#endif
1182
1185
1186 // The condition can be loop invariant but still defined inside the
1187 // loop. This means that we can't just use the original 'cond' value.
1188 // We have to take the 'vectorized' value and pick the first lane.
1189 // Instcombine will make this a no-op.
1190 auto *InvarCond =
1191 isInvariantCond() ? State.get(getCond(), VPLane(0)) : nullptr;
1192
1193 Value *Cond = InvarCond ? InvarCond : State.get(getCond());
1194 Value *Op0 = State.get(getOperand(1));
1195 Value *Op1 = State.get(getOperand(2));
1196 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
1197 State.set(this, Sel);
1198 State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1199}
1200
1202 VPCostContext &Ctx) const {
1203 SelectInst *SI = cast<SelectInst>(getUnderlyingValue());
1204 bool ScalarCond = getOperand(0)->isDefinedOutsideLoopRegions();
1205 Type *ScalarTy = Ctx.Types.inferScalarType(this);
1206 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1208
1209 VPValue *Op0, *Op1;
1210 using namespace llvm::VPlanPatternMatch;
1211 if (!ScalarCond && ScalarTy->getScalarSizeInBits() == 1 &&
1212 (match(this, m_LogicalAnd(m_VPValue(Op0), m_VPValue(Op1))) ||
1213 match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1))))) {
1214 // select x, y, false --> x & y
1215 // select x, true, y --> x | y
1216 const auto [Op1VK, Op1VP] = Ctx.getOperandInfo(Op0);
1217 const auto [Op2VK, Op2VP] = Ctx.getOperandInfo(Op1);
1218
1220 if (all_of(operands(),
1221 [](VPValue *Op) { return Op->getUnderlyingValue(); }))
1222 Operands.append(SI->op_begin(), SI->op_end());
1223 bool IsLogicalOr = match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1)));
1224 return Ctx.TTI.getArithmeticInstrCost(
1225 IsLogicalOr ? Instruction::Or : Instruction::And, VectorTy, CostKind,
1226 {Op1VK, Op1VP}, {Op2VK, Op2VP}, Operands, SI);
1227 }
1228
1229 Type *CondTy = Ctx.Types.inferScalarType(getOperand(0));
1230 if (!ScalarCond)
1231 CondTy = VectorType::get(CondTy, VF);
1232
1234 if (auto *Cmp = dyn_cast<CmpInst>(SI->getCondition()))
1235 Pred = Cmp->getPredicate();
1236 return Ctx.TTI.getCmpSelInstrCost(Instruction::Select, VectorTy, CondTy, Pred,
1239}
1240
1241VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
1242 const FastMathFlags &FMF) {
1243 AllowReassoc = FMF.allowReassoc();
1244 NoNaNs = FMF.noNaNs();
1245 NoInfs = FMF.noInfs();
1246 NoSignedZeros = FMF.noSignedZeros();
1247 AllowReciprocal = FMF.allowReciprocal();
1248 AllowContract = FMF.allowContract();
1249 ApproxFunc = FMF.approxFunc();
1250}
1251
1252#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1254 switch (OpType) {
1255 case OperationType::Cmp:
1257 break;
1258 case OperationType::DisjointOp:
1260 O << " disjoint";
1261 break;
1262 case OperationType::PossiblyExactOp:
1263 if (ExactFlags.IsExact)
1264 O << " exact";
1265 break;
1266 case OperationType::OverflowingBinOp:
1267 if (WrapFlags.HasNUW)
1268 O << " nuw";
1269 if (WrapFlags.HasNSW)
1270 O << " nsw";
1271 break;
1272 case OperationType::FPMathOp:
1274 break;
1275 case OperationType::GEPOp:
1276 if (GEPFlags.isInBounds())
1277 O << " inbounds";
1279 O << " nusw";
1281 O << " nuw";
1282 break;
1283 case OperationType::NonNegOp:
1284 if (NonNegFlags.NonNeg)
1285 O << " nneg";
1286 break;
1287 case OperationType::Other:
1288 break;
1289 }
1290 if (getNumOperands() > 0)
1291 O << " ";
1292}
1293#endif
1294
1297 auto &Builder = State.Builder;
1298 switch (Opcode) {
1299 case Instruction::Call:
1300 case Instruction::Br:
1301 case Instruction::PHI:
1302 case Instruction::GetElementPtr:
1303 case Instruction::Select:
1304 llvm_unreachable("This instruction is handled by a different recipe.");
1305 case Instruction::UDiv:
1306 case Instruction::SDiv:
1307 case Instruction::SRem:
1308 case Instruction::URem:
1309 case Instruction::Add:
1310 case Instruction::FAdd:
1311 case Instruction::Sub:
1312 case Instruction::FSub:
1313 case Instruction::FNeg:
1314 case Instruction::Mul:
1315 case Instruction::FMul:
1316 case Instruction::FDiv:
1317 case Instruction::FRem:
1318 case Instruction::Shl:
1319 case Instruction::LShr:
1320 case Instruction::AShr:
1321 case Instruction::And:
1322 case Instruction::Or:
1323 case Instruction::Xor: {
1324 // Just widen unops and binops.
1326 for (VPValue *VPOp : operands())
1327 Ops.push_back(State.get(VPOp));
1328
1329 Value *V = Builder.CreateNAryOp(Opcode, Ops);
1330
1331 if (auto *VecOp = dyn_cast<Instruction>(V))
1332 setFlags(VecOp);
1333
1334 // Use this vector value for all users of the original instruction.
1335 State.set(this, V);
1336 State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1337 break;
1338 }
1339 case Instruction::Freeze: {
1340 Value *Op = State.get(getOperand(0));
1341
1342 Value *Freeze = Builder.CreateFreeze(Op);
1343 State.set(this, Freeze);
1344 break;
1345 }
1346 case Instruction::ICmp:
1347 case Instruction::FCmp: {
1348 // Widen compares. Generate vector compares.
1349 bool FCmp = Opcode == Instruction::FCmp;
1350 Value *A = State.get(getOperand(0));
1351 Value *B = State.get(getOperand(1));
1352 Value *C = nullptr;
1353 if (FCmp) {
1354 // Propagate fast math flags.
1355 C = Builder.CreateFCmpFMF(
1356 getPredicate(), A, B,
1357 dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1358 } else {
1359 C = Builder.CreateICmp(getPredicate(), A, B);
1360 }
1361 State.set(this, C);
1362 State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1363 break;
1364 }
1365 default:
1366 // This instruction is not vectorized by simple widening.
1367 LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "
1368 << Instruction::getOpcodeName(Opcode));
1369 llvm_unreachable("Unhandled instruction!");
1370 } // end of switch.
1371
1372#if !defined(NDEBUG)
1373 // Verify that VPlan type inference results agree with the type of the
1374 // generated values.
1376 State.get(this)->getType() &&
1377 "inferred type and type from generated instructions do not match");
1378#endif
1379}
1380
1382 VPCostContext &Ctx) const {
1384 switch (Opcode) {
1385 case Instruction::FNeg: {
1386 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1387 return Ctx.TTI.getArithmeticInstrCost(
1388 Opcode, VectorTy, CostKind,
1391 }
1392
1393 case Instruction::UDiv:
1394 case Instruction::SDiv:
1395 case Instruction::SRem:
1396 case Instruction::URem:
1397 // More complex computation, let the legacy cost-model handle this for now.
1398 return Ctx.getLegacyCost(cast<Instruction>(getUnderlyingValue()), VF);
1399 case Instruction::Add:
1400 case Instruction::FAdd:
1401 case Instruction::Sub:
1402 case Instruction::FSub:
1403 case Instruction::Mul:
1404 case Instruction::FMul:
1405 case Instruction::FDiv:
1406 case Instruction::FRem:
1407 case Instruction::Shl:
1408 case Instruction::LShr:
1409 case Instruction::AShr:
1410 case Instruction::And:
1411 case Instruction::Or:
1412 case Instruction::Xor: {
1413 VPValue *RHS = getOperand(1);
1414 // Certain instructions can be cheaper to vectorize if they have a constant
1415 // second vector operand. One example of this are shifts on x86.
1418 if (RHS->isLiveIn())
1419 RHSInfo = Ctx.TTI.getOperandInfo(RHS->getLiveInIRValue());
1420
1421 if (RHSInfo.Kind == TargetTransformInfo::OK_AnyValue &&
1424 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1425 Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
1426
1428 if (CtxI)
1429 Operands.append(CtxI->value_op_begin(), CtxI->value_op_end());
1430 return Ctx.TTI.getArithmeticInstrCost(
1431 Opcode, VectorTy, CostKind,
1433 RHSInfo, Operands, CtxI, &Ctx.TLI);
1434 }
1435 case Instruction::Freeze: {
1436 // This opcode is unknown. Assume that it is the same as 'mul'.
1437 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1438 return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, CostKind);
1439 }
1440 case Instruction::ICmp:
1441 case Instruction::FCmp: {
1442 Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
1443 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
1444 return Ctx.TTI.getCmpSelInstrCost(Opcode, VectorTy, nullptr, getPredicate(),
1445 CostKind,
1448 }
1449 default:
1450 llvm_unreachable("Unsupported opcode for instruction");
1451 }
1452}
1453
1455 unsigned Opcode = getOpcode();
1456 // TODO: Support other opcodes
1457 if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode))
1458 llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute");
1459
1461
1462 assert(State.get(getOperand(0))->getType()->isVectorTy() &&
1463 "VPWidenEVLRecipe should not be used for scalars");
1464
1465 VPValue *EVL = getEVL();
1466 Value *EVLArg = State.get(EVL, /*NeedsScalar=*/true);
1467 IRBuilderBase &BuilderIR = State.Builder;
1468 VectorBuilder Builder(BuilderIR);
1469 Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
1470
1472 for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) {
1473 VPValue *VPOp = getOperand(I);
1474 Ops.push_back(State.get(VPOp));
1475 }
1476
1477 Builder.setMask(Mask).setEVL(EVLArg);
1478 Value *VPInst =
1479 Builder.createVectorInstruction(Opcode, Ops[0]->getType(), Ops, "vp.op");
1480 // Currently vp-intrinsics only accept FMF flags.
1481 // TODO: Enable other flags when support is added.
1482 if (isa<FPMathOperator>(VPInst))
1483 setFlags(cast<Instruction>(VPInst));
1484
1485 State.set(this, VPInst);
1486 State.addMetadata(VPInst,
1487 dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1488}
1489
1490#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1492 VPSlotTracker &SlotTracker) const {
1493 O << Indent << "WIDEN ";
1495 O << " = " << Instruction::getOpcodeName(Opcode);
1496 printFlags(O);
1498}
1499
1501 VPSlotTracker &SlotTracker) const {
1502 O << Indent << "WIDEN ";
1504 O << " = vp." << Instruction::getOpcodeName(getOpcode());
1505 printFlags(O);
1507}
1508#endif
1509
1512 auto &Builder = State.Builder;
1513 /// Vectorize casts.
1514 assert(State.VF.isVector() && "Not vectorizing?");
1515 Type *DestTy = VectorType::get(getResultType(), State.VF);
1516 VPValue *Op = getOperand(0);
1517 Value *A = State.get(Op);
1518 Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
1519 State.set(this, Cast);
1520 State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
1521 if (auto *CastOp = dyn_cast<Instruction>(Cast))
1522 setFlags(CastOp);
1523}
1524
1526 VPCostContext &Ctx) const {
1527 // TODO: In some cases, VPWidenCastRecipes are created but not considered in
1528 // the legacy cost model, including truncates/extends when evaluating a
1529 // reduction in a smaller type.
1530 if (!getUnderlyingValue())
1531 return 0;
1532 // Computes the CastContextHint from a recipes that may access memory.
1533 auto ComputeCCH = [&](const VPRecipeBase *R) -> TTI::CastContextHint {
1534 if (VF.isScalar())
1536 if (isa<VPInterleaveRecipe>(R))
1538 if (const auto *ReplicateRecipe = dyn_cast<VPReplicateRecipe>(R))
1539 return ReplicateRecipe->isPredicated() ? TTI::CastContextHint::Masked
1541 const auto *WidenMemoryRecipe = dyn_cast<VPWidenMemoryRecipe>(R);
1542 if (WidenMemoryRecipe == nullptr)
1544 if (!WidenMemoryRecipe->isConsecutive())
1546 if (WidenMemoryRecipe->isReverse())
1548 if (WidenMemoryRecipe->isMasked())
1551 };
1552
1553 VPValue *Operand = getOperand(0);
1555 // For Trunc/FPTrunc, get the context from the only user.
1556 if ((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
1558 if (auto *StoreRecipe = dyn_cast<VPRecipeBase>(*user_begin()))
1559 CCH = ComputeCCH(StoreRecipe);
1560 }
1561 // For Z/Sext, get the context from the operand.
1562 else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt ||
1563 Opcode == Instruction::FPExt) {
1564 if (Operand->isLiveIn())
1566 else if (Operand->getDefiningRecipe())
1567 CCH = ComputeCCH(Operand->getDefiningRecipe());
1568 }
1569
1570 auto *SrcTy =
1571 cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(Operand), VF));
1572 auto *DestTy = cast<VectorType>(toVectorTy(getResultType(), VF));
1573 // Arm TTI will use the underlying instruction to determine the cost.
1574 return Ctx.TTI.getCastInstrCost(
1575 Opcode, DestTy, SrcTy, CCH, TTI::TCK_RecipThroughput,
1576 dyn_cast_if_present<Instruction>(getUnderlyingValue()));
1577}
1578
1579#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1581 VPSlotTracker &SlotTracker) const {
1582 O << Indent << "WIDEN-CAST ";
1584 O << " = " << Instruction::getOpcodeName(Opcode);
1585 printFlags(O);
1587 O << " to " << *getResultType();
1588}
1589#endif
1590
1592 VPCostContext &Ctx) const {
1593 return Ctx.TTI.getCFInstrCost(Instruction::PHI, TTI::TCK_RecipThroughput);
1594}
1595
1596/// This function adds
1597/// (0 * Step, 1 * Step, 2 * Step, ...)
1598/// to each vector element of Val.
1599/// \p Opcode is relevant for FP induction variable.
1600static Value *getStepVector(Value *Val, Value *Step,
1602 IRBuilderBase &Builder) {
1603 assert(VF.isVector() && "only vector VFs are supported");
1604
1605 // Create and check the types.
1606 auto *ValVTy = cast<VectorType>(Val->getType());
1607 ElementCount VLen = ValVTy->getElementCount();
1608
1609 Type *STy = Val->getType()->getScalarType();
1610 assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
1611 "Induction Step must be an integer or FP");
1612 assert(Step->getType() == STy && "Step has wrong type");
1613
1615
1616 // Create a vector of consecutive numbers from zero to VF.
1617 VectorType *InitVecValVTy = ValVTy;
1618 if (STy->isFloatingPointTy()) {
1619 Type *InitVecValSTy =
1621 InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
1622 }
1623 Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
1624
1625 if (STy->isIntegerTy()) {
1626 Step = Builder.CreateVectorSplat(VLen, Step);
1627 assert(Step->getType() == Val->getType() && "Invalid step vec");
1628 // FIXME: The newly created binary instructions should contain nsw/nuw
1629 // flags, which can be found from the original scalar operations.
1630 Step = Builder.CreateMul(InitVec, Step);
1631 return Builder.CreateAdd(Val, Step, "induction");
1632 }
1633
1634 // Floating point induction.
1635 assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
1636 "Binary Opcode should be specified for FP induction");
1637 InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
1638
1639 Step = Builder.CreateVectorSplat(VLen, Step);
1640 Value *MulOp = Builder.CreateFMul(InitVec, Step);
1641 return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
1642}
1643
1644/// A helper function that returns an integer or floating-point constant with
1645/// value C.
1647 return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
1648 : ConstantFP::get(Ty, C);
1649}
1650
1652 assert(!State.Lane && "Int or FP induction being replicated.");
1653
1654 Value *Start = getStartValue()->getLiveInIRValue();
1656 TruncInst *Trunc = getTruncInst();
1657 IRBuilderBase &Builder = State.Builder;
1658 assert(getPHINode()->getType() == ID.getStartValue()->getType() &&
1659 "Types must match");
1660 assert(State.VF.isVector() && "must have vector VF");
1661
1662 // The value from the original loop to which we are mapping the new induction
1663 // variable.
1664 Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : getPHINode();
1665
1666 // Fast-math-flags propagate from the original induction instruction.
1667 IRBuilder<>::FastMathFlagGuard FMFG(Builder);
1668 if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
1669 Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
1670
1671 // Now do the actual transformations, and start with fetching the step value.
1672 Value *Step = State.get(getStepValue(), VPLane(0));
1673
1674 assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
1675 "Expected either an induction phi-node or a truncate of it!");
1676
1677 // Construct the initial value of the vector IV in the vector loop preheader
1678 auto CurrIP = Builder.saveIP();
1679 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1680 Builder.SetInsertPoint(VectorPH->getTerminator());
1681 if (isa<TruncInst>(EntryVal)) {
1682 assert(Start->getType()->isIntegerTy() &&
1683 "Truncation requires an integer type");
1684 auto *TruncType = cast<IntegerType>(EntryVal->getType());
1685 Step = Builder.CreateTrunc(Step, TruncType);
1686 Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
1687 }
1688
1689 Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
1690 Value *SteppedStart = getStepVector(SplatStart, Step, ID.getInductionOpcode(),
1691 State.VF, State.Builder);
1692
1693 // We create vector phi nodes for both integer and floating-point induction
1694 // variables. Here, we determine the kind of arithmetic we will perform.
1697 if (Step->getType()->isIntegerTy()) {
1698 AddOp = Instruction::Add;
1699 MulOp = Instruction::Mul;
1700 } else {
1701 AddOp = ID.getInductionOpcode();
1702 MulOp = Instruction::FMul;
1703 }
1704
1705 Value *SplatVF;
1706 if (VPValue *SplatVFOperand = getSplatVFValue()) {
1707 // The recipe has been unrolled. In that case, fetch the splat value for the
1708 // induction increment.
1709 SplatVF = State.get(SplatVFOperand);
1710 } else {
1711 // Multiply the vectorization factor by the step using integer or
1712 // floating-point arithmetic as appropriate.
1713 Type *StepType = Step->getType();
1714 Value *RuntimeVF = State.get(getVFValue(), VPLane(0));
1715 if (Step->getType()->isFloatingPointTy())
1716 RuntimeVF = Builder.CreateUIToFP(RuntimeVF, StepType);
1717 else
1718 RuntimeVF = Builder.CreateZExtOrTrunc(RuntimeVF, StepType);
1719 Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
1720
1721 // Create a vector splat to use in the induction update.
1722 SplatVF = Builder.CreateVectorSplat(State.VF, Mul);
1723 }
1724
1725 Builder.restoreIP(CurrIP);
1726
1727 // We may need to add the step a number of times, depending on the unroll
1728 // factor. The last of those goes into the PHI.
1729 PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");
1730 VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1731 VecInd->setDebugLoc(getDebugLoc());
1732 State.set(this, VecInd);
1733
1734 Instruction *LastInduction = cast<Instruction>(
1735 Builder.CreateBinOp(AddOp, VecInd, SplatVF, "vec.ind.next"));
1736 if (isa<TruncInst>(EntryVal))
1737 State.addMetadata(LastInduction, EntryVal);
1738 LastInduction->setDebugLoc(getDebugLoc());
1739
1740 VecInd->addIncoming(SteppedStart, VectorPH);
1741 // Add induction update using an incorrect block temporarily. The phi node
1742 // will be fixed after VPlan execution. Note that at this point the latch
1743 // block cannot be used, as it does not exist yet.
1744 // TODO: Model increment value in VPlan, by turning the recipe into a
1745 // multi-def and a subclass of VPHeaderPHIRecipe.
1746 VecInd->addIncoming(LastInduction, VectorPH);
1747}
1748
1749#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1751 VPSlotTracker &SlotTracker) const {
1752 O << Indent;
1754 O << " = WIDEN-INDUCTION ";
1756
1757 if (auto *TI = getTruncInst())
1758 O << " (truncated to " << *TI->getType() << ")";
1759}
1760#endif
1761
1763 // The step may be defined by a recipe in the preheader (e.g. if it requires
1764 // SCEV expansion), but for the canonical induction the step is required to be
1765 // 1, which is represented as live-in.
1767 return false;
1768 auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
1769 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
1770 auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin());
1771 return StartC && StartC->isZero() && StepC && StepC->isOne() &&
1772 getScalarType() == CanIV->getScalarType();
1773}
1774
1775#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1777 VPSlotTracker &SlotTracker) const {
1778 O << Indent;
1780 O << " = DERIVED-IV ";
1782 O << " + ";
1784 O << " * ";
1786}
1787#endif
1788
1790 // Fast-math-flags propagate from the original induction instruction.
1792 if (hasFastMathFlags())
1794
1795 /// Compute scalar induction steps. \p ScalarIV is the scalar induction
1796 /// variable on which to base the steps, \p Step is the size of the step.
1797
1798 Value *BaseIV = State.get(getOperand(0), VPLane(0));
1799 Value *Step = State.get(getStepValue(), VPLane(0));
1800 IRBuilderBase &Builder = State.Builder;
1801
1802 // Ensure step has the same type as that of scalar IV.
1803 Type *BaseIVTy = BaseIV->getType()->getScalarType();
1804 assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");
1805
1806 // We build scalar steps for both integer and floating-point induction
1807 // variables. Here, we determine the kind of arithmetic we will perform.
1810 if (BaseIVTy->isIntegerTy()) {
1811 AddOp = Instruction::Add;
1812 MulOp = Instruction::Mul;
1813 } else {
1814 AddOp = InductionOpcode;
1815 MulOp = Instruction::FMul;
1816 }
1817
1818 // Determine the number of scalars we need to generate for each unroll
1819 // iteration.
1820 bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
1821 // Compute the scalar steps and save the results in State.
1822 Type *IntStepTy =
1823 IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
1824 Type *VecIVTy = nullptr;
1825 Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
1826 if (!FirstLaneOnly && State.VF.isScalable()) {
1827 VecIVTy = VectorType::get(BaseIVTy, State.VF);
1828 UnitStepVec =
1829 Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
1830 SplatStep = Builder.CreateVectorSplat(State.VF, Step);
1831 SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
1832 }
1833
1834 unsigned StartLane = 0;
1835 unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
1836 if (State.Lane) {
1837 StartLane = State.Lane->getKnownLane();
1838 EndLane = StartLane + 1;
1839 }
1840 Value *StartIdx0 =
1841 createStepForVF(Builder, IntStepTy, State.VF, getUnrollPart(*this));
1842
1843 if (!FirstLaneOnly && State.VF.isScalable()) {
1844 auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
1845 auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
1846 if (BaseIVTy->isFloatingPointTy())
1847 InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
1848 auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
1849 auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
1850 State.set(this, Add);
1851 // It's useful to record the lane values too for the known minimum number
1852 // of elements so we do those below. This improves the code quality when
1853 // trying to extract the first element, for example.
1854 }
1855
1856 if (BaseIVTy->isFloatingPointTy())
1857 StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
1858
1859 for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
1860 Value *StartIdx = Builder.CreateBinOp(
1861 AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
1862 // The step returned by `createStepForVF` is a runtime-evaluated value
1863 // when VF is scalable. Otherwise, it should be folded into a Constant.
1864 assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
1865 "Expected StartIdx to be folded to a constant when VF is not "
1866 "scalable");
1867 auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
1868 auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
1869 State.set(this, Add, VPLane(Lane));
1870 }
1871}
1872
1873#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1875 VPSlotTracker &SlotTracker) const {
1876 O << Indent;
1878 O << " = SCALAR-STEPS ";
1880}
1881#endif
1882
1884 assert(State.VF.isVector() && "not widening");
1885 auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
1886 // Construct a vector GEP by widening the operands of the scalar GEP as
1887 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
1888 // results in a vector of pointers when at least one operand of the GEP
1889 // is vector-typed. Thus, to keep the representation compact, we only use
1890 // vector-typed operands for loop-varying values.
1891
1892 if (areAllOperandsInvariant()) {
1893 // If we are vectorizing, but the GEP has only loop-invariant operands,
1894 // the GEP we build (by only using vector-typed operands for
1895 // loop-varying values) would be a scalar pointer. Thus, to ensure we
1896 // produce a vector of pointers, we need to either arbitrarily pick an
1897 // operand to broadcast, or broadcast a clone of the original GEP.
1898 // Here, we broadcast a clone of the original.
1899 //
1900 // TODO: If at some point we decide to scalarize instructions having
1901 // loop-invariant operands, this special case will no longer be
1902 // required. We would add the scalarization decision to
1903 // collectLoopScalars() and teach getVectorValue() to broadcast
1904 // the lane-zero scalar value.
1906 for (unsigned I = 0, E = getNumOperands(); I != E; I++)
1907 Ops.push_back(State.get(getOperand(I), VPLane(0)));
1908
1909 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
1910 ArrayRef(Ops).drop_front(), "",
1912 Value *Splat = State.Builder.CreateVectorSplat(State.VF, NewGEP);
1913 State.set(this, Splat);
1914 State.addMetadata(Splat, GEP);
1915 } else {
1916 // If the GEP has at least one loop-varying operand, we are sure to
1917 // produce a vector of pointers unless VF is scalar.
1918 // The pointer operand of the new GEP. If it's loop-invariant, we
1919 // won't broadcast it.
1920 auto *Ptr = isPointerLoopInvariant() ? State.get(getOperand(0), VPLane(0))
1921 : State.get(getOperand(0));
1922
1923 // Collect all the indices for the new GEP. If any index is
1924 // loop-invariant, we won't broadcast it.
1926 for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
1927 VPValue *Operand = getOperand(I);
1928 if (isIndexLoopInvariant(I - 1))
1929 Indices.push_back(State.get(Operand, VPLane(0)));
1930 else
1931 Indices.push_back(State.get(Operand));
1932 }
1933
1934 // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
1935 // but it should be a vector, otherwise.
1936 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
1937 Indices, "", getGEPNoWrapFlags());
1938 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
1939 "NewGEP is not a pointer vector");
1940 State.set(this, NewGEP);
1941 State.addMetadata(NewGEP, GEP);
1942 }
1943}
1944
1945#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1947 VPSlotTracker &SlotTracker) const {
1948 O << Indent << "WIDEN-GEP ";
1949 O << (isPointerLoopInvariant() ? "Inv" : "Var");
1950 for (size_t I = 0; I < getNumOperands() - 1; ++I)
1951 O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";
1952
1953 O << " ";
1955 O << " = getelementptr";
1956 printFlags(O);
1958}
1959#endif
1960
1961static Type *getGEPIndexTy(bool IsScalable, bool IsReverse,
1962 unsigned CurrentPart, IRBuilderBase &Builder) {
1963 // Use i32 for the gep index type when the value is constant,
1964 // or query DataLayout for a more suitable index type otherwise.
1965 const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
1966 return IsScalable && (IsReverse || CurrentPart > 0)
1967 ? DL.getIndexType(Builder.getPtrTy(0))
1968 : Builder.getInt32Ty();
1969}
1970
1972 auto &Builder = State.Builder;
1974 unsigned CurrentPart = getUnrollPart(*this);
1975 Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true,
1976 CurrentPart, Builder);
1977
1978 // The wide store needs to start at the last vector element.
1979 Value *RunTimeVF = State.get(getVFValue(), VPLane(0));
1980 if (IndexTy != RunTimeVF->getType())
1981 RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy);
1982 // NumElt = -CurrentPart * RunTimeVF
1983 Value *NumElt = Builder.CreateMul(
1984 ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
1985 // LastLane = 1 - RunTimeVF
1986 Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
1987 Value *Ptr = State.get(getOperand(0), VPLane(0));
1988 Value *ResultPtr =
1989 Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", getGEPNoWrapFlags());
1990 ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "",
1992
1993 State.set(this, ResultPtr, /*IsScalar*/ true);
1994}
1995
1996#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1998 VPSlotTracker &SlotTracker) const {
1999 O << Indent;
2001 O << " = reverse-vector-pointer";
2002 printFlags(O);
2004}
2005#endif
2006
2008 auto &Builder = State.Builder;
2010 unsigned CurrentPart = getUnrollPart(*this);
2011 Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false,
2012 CurrentPart, Builder);
2013 Value *Ptr = State.get(getOperand(0), VPLane(0));
2014
2015 Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
2016 Value *ResultPtr =
2017 Builder.CreateGEP(IndexedTy, Ptr, Increment, "", getGEPNoWrapFlags());
2018
2019 State.set(this, ResultPtr, /*IsScalar*/ true);
2020}
2021
2022#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2024 VPSlotTracker &SlotTracker) const {
2025 O << Indent;
2027 O << " = vector-pointer ";
2028
2030}
2031#endif
2032
2034 assert(isNormalized() && "Expected blend to be normalized!");
2036 // We know that all PHIs in non-header blocks are converted into
2037 // selects, so we don't have to worry about the insertion order and we
2038 // can just use the builder.
2039 // At this point we generate the predication tree. There may be
2040 // duplications since this is a simple recursive scan, but future
2041 // optimizations will clean it up.
2042
2043 unsigned NumIncoming = getNumIncomingValues();
2044
2045 // Generate a sequence of selects of the form:
2046 // SELECT(Mask3, In3,
2047 // SELECT(Mask2, In2,
2048 // SELECT(Mask1, In1,
2049 // In0)))
2050 // Note that Mask0 is never used: lanes for which no path reaches this phi and
2051 // are essentially undef are taken from In0.
2052 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
2053 Value *Result = nullptr;
2054 for (unsigned In = 0; In < NumIncoming; ++In) {
2055 // We might have single edge PHIs (blocks) - use an identity
2056 // 'select' for the first PHI operand.
2057 Value *In0 = State.get(getIncomingValue(In), OnlyFirstLaneUsed);
2058 if (In == 0)
2059 Result = In0; // Initialize with the first incoming value.
2060 else {
2061 // Select between the current value and the previous incoming edge
2062 // based on the incoming mask.
2063 Value *Cond = State.get(getMask(In), OnlyFirstLaneUsed);
2064 Result = State.Builder.CreateSelect(Cond, In0, Result, "predphi");
2065 }
2066 }
2067 State.set(this, Result, OnlyFirstLaneUsed);
2068}
2069
2071 VPCostContext &Ctx) const {
2073
2074 // Handle cases where only the first lane is used the same way as the legacy
2075 // cost model.
2077 return Ctx.TTI.getCFInstrCost(Instruction::PHI, CostKind);
2078
2079 Type *ResultTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
2080 Type *CmpTy = toVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF);
2081 return (getNumIncomingValues() - 1) *
2082 Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResultTy, CmpTy,
2084}
2085
2086#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2088 VPSlotTracker &SlotTracker) const {
2089 O << Indent << "BLEND ";
2091 O << " =";
2092 if (getNumIncomingValues() == 1) {
2093 // Not a User of any mask: not really blending, this is a
2094 // single-predecessor phi.
2095 O << " ";
2097 } else {
2098 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
2099 O << " ";
2101 if (I == 0)
2102 continue;
2103 O << "/";
2105 }
2106 }
2107}
2108#endif
2109
2111 assert(!State.Lane && "Reduction being replicated.");
2112 Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
2113 RecurKind Kind = RdxDesc.getRecurrenceKind();
2114 // Propagate the fast-math flags carried by the underlying instruction.
2116 State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
2118 Value *NewVecOp = State.get(getVecOp());
2119 if (VPValue *Cond = getCondOp()) {
2120 Value *NewCond = State.get(Cond, State.VF.isScalar());
2121 VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
2122 Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
2123
2124 Value *Start;
2126 Start = RdxDesc.getRecurrenceStartValue();
2127 else
2128 Start = llvm::getRecurrenceIdentity(Kind, ElementTy,
2129 RdxDesc.getFastMathFlags());
2130 if (State.VF.isVector())
2131 Start = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Start);
2132
2133 Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Start);
2134 NewVecOp = Select;
2135 }
2136 Value *NewRed;
2137 Value *NextInChain;
2138 if (IsOrdered) {
2139 if (State.VF.isVector())
2140 NewRed =
2141 createOrderedReduction(State.Builder, RdxDesc, NewVecOp, PrevInChain);
2142 else
2143 NewRed = State.Builder.CreateBinOp(
2144 (Instruction::BinaryOps)RdxDesc.getOpcode(), PrevInChain, NewVecOp);
2145 PrevInChain = NewRed;
2146 NextInChain = NewRed;
2147 } else {
2148 PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
2149 NewRed = createReduction(State.Builder, RdxDesc, NewVecOp);
2151 NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),
2152 NewRed, PrevInChain);
2153 else
2154 NextInChain = State.Builder.CreateBinOp(
2155 (Instruction::BinaryOps)RdxDesc.getOpcode(), NewRed, PrevInChain);
2156 }
2157 State.set(this, NextInChain, /*IsScalar*/ true);
2158}
2159
2161 assert(!State.Lane && "Reduction being replicated.");
2162
2163 auto &Builder = State.Builder;
2164 // Propagate the fast-math flags carried by the underlying instruction.
2165 IRBuilderBase::FastMathFlagGuard FMFGuard(Builder);
2167 Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
2168
2169 RecurKind Kind = RdxDesc.getRecurrenceKind();
2170 Value *Prev = State.get(getChainOp(), /*IsScalar*/ true);
2171 Value *VecOp = State.get(getVecOp());
2172 Value *EVL = State.get(getEVL(), VPLane(0));
2173
2174 VectorBuilder VBuilder(Builder);
2175 VBuilder.setEVL(EVL);
2176 Value *Mask;
2177 // TODO: move the all-true mask generation into VectorBuilder.
2178 if (VPValue *CondOp = getCondOp())
2179 Mask = State.get(CondOp);
2180 else
2181 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2182 VBuilder.setMask(Mask);
2183
2184 Value *NewRed;
2185 if (isOrdered()) {
2186 NewRed = createOrderedReduction(VBuilder, RdxDesc, VecOp, Prev);
2187 } else {
2188 NewRed = createSimpleReduction(VBuilder, VecOp, RdxDesc);
2190 NewRed = createMinMaxOp(Builder, Kind, NewRed, Prev);
2191 else
2192 NewRed = Builder.CreateBinOp((Instruction::BinaryOps)RdxDesc.getOpcode(),
2193 NewRed, Prev);
2194 }
2195 State.set(this, NewRed, /*IsScalar*/ true);
2196}
2197
2199 VPCostContext &Ctx) const {
2200 RecurKind RdxKind = RdxDesc.getRecurrenceKind();
2201 Type *ElementTy = Ctx.Types.inferScalarType(this);
2202 auto *VectorTy = cast<VectorType>(toVectorTy(ElementTy, VF));
2204 unsigned Opcode = RdxDesc.getOpcode();
2205
2206 // TODO: Support any-of and in-loop reductions.
2207 assert(
2209 ForceTargetInstructionCost.getNumOccurrences() > 0) &&
2210 "Any-of reduction not implemented in VPlan-based cost model currently.");
2211 assert(
2212 (!cast<VPReductionPHIRecipe>(getOperand(0))->isInLoop() ||
2213 ForceTargetInstructionCost.getNumOccurrences() > 0) &&
2214 "In-loop reduction not implemented in VPlan-based cost model currently.");
2215
2216 assert(ElementTy->getTypeID() == RdxDesc.getRecurrenceType()->getTypeID() &&
2217 "Inferred type and recurrence type mismatch.");
2218
2219 // Cost = Reduction cost + BinOp cost
2221 Ctx.TTI.getArithmeticInstrCost(Opcode, ElementTy, CostKind);
2224 return Cost + Ctx.TTI.getMinMaxReductionCost(
2225 Id, VectorTy, RdxDesc.getFastMathFlags(), CostKind);
2226 }
2227
2228 return Cost + Ctx.TTI.getArithmeticReductionCost(
2229 Opcode, VectorTy, RdxDesc.getFastMathFlags(), CostKind);
2230}
2231
2232#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2234 VPSlotTracker &SlotTracker) const {
2235 O << Indent << "REDUCE ";
2237 O << " = ";
2239 O << " +";
2240 if (isa<FPMathOperator>(getUnderlyingInstr()))
2242 O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
2244 if (isConditional()) {
2245 O << ", ";
2247 }
2248 O << ")";
2249 if (RdxDesc.IntermediateStore)
2250 O << " (with final reduction value stored in invariant address sank "
2251 "outside of loop)";
2252}
2253
2255 VPSlotTracker &SlotTracker) const {
2257 O << Indent << "REDUCE ";
2259 O << " = ";
2261 O << " +";
2262 if (isa<FPMathOperator>(getUnderlyingInstr()))
2264 O << " vp.reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
2266 O << ", ";
2268 if (isConditional()) {
2269 O << ", ";
2271 }
2272 O << ")";
2273 if (RdxDesc.IntermediateStore)
2274 O << " (with final reduction value stored in invariant address sank "
2275 "outside of loop)";
2276}
2277#endif
2278
2280 // Find if the recipe is used by a widened recipe via an intervening
2281 // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
2282 return any_of(users(), [](const VPUser *U) {
2283 if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))
2284 return any_of(PredR->users(), [PredR](const VPUser *U) {
2285 return !U->usesScalars(PredR);
2286 });
2287 return false;
2288 });
2289}
2290
2292 VPCostContext &Ctx) const {
2293 Instruction *UI = cast<Instruction>(getUnderlyingValue());
2294 // VPReplicateRecipe may be cloned as part of an existing VPlan-to-VPlan
2295 // transform, avoid computing their cost multiple times for now.
2296 Ctx.SkipCostComputation.insert(UI);
2297 return Ctx.getLegacyCost(UI, VF);
2298}
2299
2300#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2302 VPSlotTracker &SlotTracker) const {
2303 O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
2304
2305 if (!getUnderlyingInstr()->getType()->isVoidTy()) {
2307 O << " = ";
2308 }
2309 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
2310 O << "call";
2311 printFlags(O);
2312 O << "@" << CB->getCalledFunction()->getName() << "(";
2314 O, [&O, &SlotTracker](VPValue *Op) {
2315 Op->printAsOperand(O, SlotTracker);
2316 });
2317 O << ")";
2318 } else {
2320 printFlags(O);
2322 }
2323
2324 if (shouldPack())
2325 O << " (S->V)";
2326}
2327#endif
2328
2329Value *VPScalarCastRecipe ::generate(VPTransformState &State) {
2332 "Codegen only implemented for first lane.");
2333 switch (Opcode) {
2334 case Instruction::SExt:
2335 case Instruction::ZExt:
2336 case Instruction::Trunc: {
2337 // Note: SExt/ZExt not used yet.
2338 Value *Op = State.get(getOperand(0), VPLane(0));
2339 return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
2340 }
2341 default:
2342 llvm_unreachable("opcode not implemented yet");
2343 }
2344}
2345
2346void VPScalarCastRecipe ::execute(VPTransformState &State) {
2347 State.set(this, generate(State), VPLane(0));
2348}
2349
2350#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2351void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
2352 VPSlotTracker &SlotTracker) const {
2353 O << Indent << "SCALAR-CAST ";
2354 printAsOperand(O, SlotTracker);
2355 O << " = " << Instruction::getOpcodeName(Opcode) << " ";
2356 printOperands(O, SlotTracker);
2357 O << " to " << *ResultTy;
2358}
2359#endif
2360
2362 assert(State.Lane && "Branch on Mask works only on single instance.");
2363
2364 unsigned Lane = State.Lane->getKnownLane();
2365
2366 Value *ConditionBit = nullptr;
2367 VPValue *BlockInMask = getMask();
2368 if (BlockInMask) {
2369 ConditionBit = State.get(BlockInMask);
2370 if (ConditionBit->getType()->isVectorTy())
2371 ConditionBit = State.Builder.CreateExtractElement(
2372 ConditionBit, State.Builder.getInt32(Lane));
2373 } else // Block in mask is all-one.
2374 ConditionBit = State.Builder.getTrue();
2375
2376 // Replace the temporary unreachable terminator with a new conditional branch,
2377 // whose two destinations will be set later when they are created.
2378 auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
2379 assert(isa<UnreachableInst>(CurrentTerminator) &&
2380 "Expected to replace unreachable terminator with conditional branch.");
2381 auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
2382 CondBr->setSuccessor(0, nullptr);
2383 ReplaceInstWithInst(CurrentTerminator, CondBr);
2384}
2385
2387 VPCostContext &Ctx) const {
2388 // The legacy cost model doesn't assign costs to branches for individual
2389 // replicate regions. Match the current behavior in the VPlan cost model for
2390 // now.
2391 return 0;
2392}
2393
2396 assert(State.Lane && "Predicated instruction PHI works per instance.");
2397 Instruction *ScalarPredInst =
2398 cast<Instruction>(State.get(getOperand(0), *State.Lane));
2399 BasicBlock *PredicatedBB = ScalarPredInst->getParent();
2400 BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
2401 assert(PredicatingBB && "Predicated block has no single predecessor.");
2402 assert(isa<VPReplicateRecipe>(getOperand(0)) &&
2403 "operand must be VPReplicateRecipe");
2404
2405 // By current pack/unpack logic we need to generate only a single phi node: if
2406 // a vector value for the predicated instruction exists at this point it means
2407 // the instruction has vector users only, and a phi for the vector value is
2408 // needed. In this case the recipe of the predicated instruction is marked to
2409 // also do that packing, thereby "hoisting" the insert-element sequence.
2410 // Otherwise, a phi node for the scalar value is needed.
2411 if (State.hasVectorValue(getOperand(0))) {
2412 Value *VectorValue = State.get(getOperand(0));
2413 InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
2414 PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
2415 VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
2416 VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
2417 if (State.hasVectorValue(this))
2418 State.reset(this, VPhi);
2419 else
2420 State.set(this, VPhi);
2421 // NOTE: Currently we need to update the value of the operand, so the next
2422 // predicated iteration inserts its generated value in the correct vector.
2423 State.reset(getOperand(0), VPhi);
2424 } else {
2425 if (vputils::onlyFirstLaneUsed(this) && !State.Lane->isFirstLane())
2426 return;
2427
2428 Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
2429 PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
2430 Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
2431 PredicatingBB);
2432 Phi->addIncoming(ScalarPredInst, PredicatedBB);
2433 if (State.hasScalarValue(this, *State.Lane))
2434 State.reset(this, Phi, *State.Lane);
2435 else
2436 State.set(this, Phi, *State.Lane);
2437 // NOTE: Currently we need to update the value of the operand, so the next
2438 // predicated iteration inserts its generated value in the correct vector.
2439 State.reset(getOperand(0), Phi, *State.Lane);
2440 }
2441}
2442
2443#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2445 VPSlotTracker &SlotTracker) const {
2446 O << Indent << "PHI-PREDICATED-INSTRUCTION ";
2448 O << " = ";
2450}
2451#endif
2452
2454 VPCostContext &Ctx) const {
2456 const Align Alignment =
2458 unsigned AS =
2461
2462 if (!Consecutive) {
2463 // TODO: Using the original IR may not be accurate.
2464 // Currently, ARM will use the underlying IR to calculate gather/scatter
2465 // instruction cost.
2467 assert(!Reverse &&
2468 "Inconsecutive memory access should not have the order.");
2469 return Ctx.TTI.getAddressComputationCost(Ty) +
2471 IsMasked, Alignment, CostKind,
2472 &Ingredient);
2473 }
2474
2476 if (IsMasked) {
2477 Cost += Ctx.TTI.getMaskedMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment,
2478 AS, CostKind);
2479 } else {
2480 TTI::OperandValueInfo OpInfo =
2482 Cost += Ctx.TTI.getMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment, AS,
2483 CostKind, OpInfo, &Ingredient);
2484 }
2485 if (!Reverse)
2486 return Cost;
2487
2489 cast<VectorType>(Ty), {}, CostKind, 0);
2490}
2491
2493 auto *LI = cast<LoadInst>(&Ingredient);
2494
2495 Type *ScalarDataTy = getLoadStoreType(&Ingredient);
2496 auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
2497 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2498 bool CreateGather = !isConsecutive();
2499
2500 auto &Builder = State.Builder;
2502 Value *Mask = nullptr;
2503 if (auto *VPMask = getMask()) {
2504 // Mask reversal is only needed for non-all-one (null) masks, as reverse
2505 // of a null all-one mask is a null mask.
2506 Mask = State.get(VPMask);
2507 if (isReverse())
2508 Mask = Builder.CreateVectorReverse(Mask, "reverse");
2509 }
2510
2511 Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateGather);
2512 Value *NewLI;
2513 if (CreateGather) {
2514 NewLI = Builder.CreateMaskedGather(DataTy, Addr, Alignment, Mask, nullptr,
2515 "wide.masked.gather");
2516 } else if (Mask) {
2517 NewLI =
2518 Builder.CreateMaskedLoad(DataTy, Addr, Alignment, Mask,
2519 PoisonValue::get(DataTy), "wide.masked.load");
2520 } else {
2521 NewLI = Builder.CreateAlignedLoad(DataTy, Addr, Alignment, "wide.load");
2522 }
2523 // Add metadata to the load, but setVectorValue to the reverse shuffle.
2524 State.addMetadata(NewLI, LI);
2525 if (Reverse)
2526 NewLI = Builder.CreateVectorReverse(NewLI, "reverse");
2527 State.set(this, NewLI);
2528}
2529
2530#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2532 VPSlotTracker &SlotTracker) const {
2533 O << Indent << "WIDEN ";
2535 O << " = load ";
2537}
2538#endif
2539
2540/// Use all-true mask for reverse rather than actual mask, as it avoids a
2541/// dependence w/o affecting the result.
2543 Value *EVL, const Twine &Name) {
2544 VectorType *ValTy = cast<VectorType>(Operand->getType());
2545 Value *AllTrueMask =
2546 Builder.CreateVectorSplat(ValTy->getElementCount(), Builder.getTrue());
2547 return Builder.CreateIntrinsic(ValTy, Intrinsic::experimental_vp_reverse,
2548 {Operand, AllTrueMask, EVL}, nullptr, Name);
2549}
2550
2552 auto *LI = cast<LoadInst>(&Ingredient);
2553
2554 Type *ScalarDataTy = getLoadStoreType(&Ingredient);
2555 auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
2556 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2557 bool CreateGather = !isConsecutive();
2558
2559 auto &Builder = State.Builder;
2561 CallInst *NewLI;
2562 Value *EVL = State.get(getEVL(), VPLane(0));
2563 Value *Addr = State.get(getAddr(), !CreateGather);
2564 Value *Mask = nullptr;
2565 if (VPValue *VPMask = getMask()) {
2566 Mask = State.get(VPMask);
2567 if (isReverse())
2568 Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
2569 } else {
2570 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2571 }
2572
2573 if (CreateGather) {
2574 NewLI =
2575 Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL},
2576 nullptr, "wide.masked.gather");
2577 } else {
2578 VectorBuilder VBuilder(Builder);
2579 VBuilder.setEVL(EVL).setMask(Mask);
2580 NewLI = cast<CallInst>(VBuilder.createVectorInstruction(
2581 Instruction::Load, DataTy, Addr, "vp.op.load"));
2582 }
2583 NewLI->addParamAttr(
2584 0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
2585 State.addMetadata(NewLI, LI);
2586 Instruction *Res = NewLI;
2587 if (isReverse())
2588 Res = createReverseEVL(Builder, Res, EVL, "vp.reverse");
2589 State.set(this, Res);
2590}
2591
2593 VPCostContext &Ctx) const {
2594 if (!Consecutive || IsMasked)
2595 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
2596
2597 // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()
2598 // here because the EVL recipes using EVL to replace the tail mask. But in the
2599 // legacy model, it will always calculate the cost of mask.
2600 // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
2601 // don't need to compare to the legacy cost model.
2603 const Align Alignment =
2605 unsigned AS =
2609 Ingredient.getOpcode(), Ty, Alignment, AS, CostKind);
2610 if (!Reverse)
2611 return Cost;
2612
2614 cast<VectorType>(Ty), {}, CostKind, 0);
2615}
2616
2617#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2619 VPSlotTracker &SlotTracker) const {
2620 O << Indent << "WIDEN ";
2622 O << " = vp.load ";
2624}
2625#endif
2626
2628 auto *SI = cast<StoreInst>(&Ingredient);
2629
2630 VPValue *StoredVPValue = getStoredValue();
2631 bool CreateScatter = !isConsecutive();
2632 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2633
2634 auto &Builder = State.Builder;
2636
2637 Value *Mask = nullptr;
2638 if (auto *VPMask = getMask()) {
2639 // Mask reversal is only needed for non-all-one (null) masks, as reverse
2640 // of a null all-one mask is a null mask.
2641 Mask = State.get(VPMask);
2642 if (isReverse())
2643 Mask = Builder.CreateVectorReverse(Mask, "reverse");
2644 }
2645
2646 Value *StoredVal = State.get(StoredVPValue);
2647 if (isReverse()) {
2648 // If we store to reverse consecutive memory locations, then we need
2649 // to reverse the order of elements in the stored value.
2650 StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
2651 // We don't want to update the value in the map as it might be used in
2652 // another expression. So don't call resetVectorValue(StoredVal).
2653 }
2654 Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateScatter);
2655 Instruction *NewSI = nullptr;
2656 if (CreateScatter)
2657 NewSI = Builder.CreateMaskedScatter(StoredVal, Addr, Alignment, Mask);
2658 else if (Mask)
2659 NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);
2660 else
2661 NewSI = Builder.CreateAlignedStore(StoredVal, Addr, Alignment);
2662 State.addMetadata(NewSI, SI);
2663}
2664
2665#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2667 VPSlotTracker &SlotTracker) const {
2668 O << Indent << "WIDEN store ";
2670}
2671#endif
2672
2674 auto *SI = cast<StoreInst>(&Ingredient);
2675
2676 VPValue *StoredValue = getStoredValue();
2677 bool CreateScatter = !isConsecutive();
2678 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2679
2680 auto &Builder = State.Builder;
2682
2683 CallInst *NewSI = nullptr;
2684 Value *StoredVal = State.get(StoredValue);
2685 Value *EVL = State.get(getEVL(), VPLane(0));
2686 if (isReverse())
2687 StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse");
2688 Value *Mask = nullptr;
2689 if (VPValue *VPMask = getMask()) {
2690 Mask = State.get(VPMask);
2691 if (isReverse())
2692 Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
2693 } else {
2694 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2695 }
2696 Value *Addr = State.get(getAddr(), !CreateScatter);
2697 if (CreateScatter) {
2698 NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),
2699 Intrinsic::vp_scatter,
2700 {StoredVal, Addr, Mask, EVL});
2701 } else {
2702 VectorBuilder VBuilder(Builder);
2703 VBuilder.setEVL(EVL).setMask(Mask);
2704 NewSI = cast<CallInst>(VBuilder.createVectorInstruction(
2705 Instruction::Store, Type::getVoidTy(EVL->getContext()),
2706 {StoredVal, Addr}));
2707 }
2708 NewSI->addParamAttr(
2709 1, Attribute::getWithAlignment(NewSI->getContext(), Alignment));
2710 State.addMetadata(NewSI, SI);
2711}
2712
2714 VPCostContext &Ctx) const {
2715 if (!Consecutive || IsMasked)
2716 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
2717
2718 // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()
2719 // here because the EVL recipes using EVL to replace the tail mask. But in the
2720 // legacy model, it will always calculate the cost of mask.
2721 // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
2722 // don't need to compare to the legacy cost model.
2724 const Align Alignment =
2726 unsigned AS =
2730 Ingredient.getOpcode(), Ty, Alignment, AS, CostKind);
2731 if (!Reverse)
2732 return Cost;
2733
2735 cast<VectorType>(Ty), {}, CostKind, 0);
2736}
2737
2738#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2740 VPSlotTracker &SlotTracker) const {
2741 O << Indent << "WIDEN vp.store ";
2743}
2744#endif
2745
2747 VectorType *DstVTy, const DataLayout &DL) {
2748 // Verify that V is a vector type with same number of elements as DstVTy.
2749 auto VF = DstVTy->getElementCount();
2750 auto *SrcVecTy = cast<VectorType>(V->getType());
2751 assert(VF == SrcVecTy->getElementCount() && "Vector dimensions do not match");
2752 Type *SrcElemTy = SrcVecTy->getElementType();
2753 Type *DstElemTy = DstVTy->getElementType();
2754 assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) &&
2755 "Vector elements must have same size");
2756
2757 // Do a direct cast if element types are castable.
2758 if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) {
2759 return Builder.CreateBitOrPointerCast(V, DstVTy);
2760 }
2761 // V cannot be directly casted to desired vector type.
2762 // May happen when V is a floating point vector but DstVTy is a vector of
2763 // pointers or vice-versa. Handle this using a two-step bitcast using an
2764 // intermediate Integer type for the bitcast i.e. Ptr <-> Int <-> Float.
2765 assert((DstElemTy->isPointerTy() != SrcElemTy->isPointerTy()) &&
2766 "Only one type should be a pointer type");
2767 assert((DstElemTy->isFloatingPointTy() != SrcElemTy->isFloatingPointTy()) &&
2768 "Only one type should be a floating point type");
2769 Type *IntTy =
2770 IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy));
2771 auto *VecIntTy = VectorType::get(IntTy, VF);
2772 Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);
2773 return Builder.CreateBitOrPointerCast(CastVal, DstVTy);
2774}
2775
2776/// Return a vector containing interleaved elements from multiple
2777/// smaller input vectors.
2779 const Twine &Name) {
2780 unsigned Factor = Vals.size();
2781 assert(Factor > 1 && "Tried to interleave invalid number of vectors");
2782
2783 VectorType *VecTy = cast<VectorType>(Vals[0]->getType());
2784#ifndef NDEBUG
2785 for (Value *Val : Vals)
2786 assert(Val->getType() == VecTy && "Tried to interleave mismatched types");
2787#endif
2788
2789 // Scalable vectors cannot use arbitrary shufflevectors (only splats), so
2790 // must use intrinsics to interleave.
2791 if (VecTy->isScalableTy()) {
2793 return Builder.CreateIntrinsic(WideVecTy, Intrinsic::vector_interleave2,
2794 Vals,
2795 /*FMFSource=*/nullptr, Name);
2796 }
2797
2798 // Fixed length. Start by concatenating all vectors into a wide vector.
2799 Value *WideVec = concatenateVectors(Builder, Vals);
2800
2801 // Interleave the elements into the wide vector.
2802 const unsigned NumElts = VecTy->getElementCount().getFixedValue();
2803 return Builder.CreateShuffleVector(
2804 WideVec, createInterleaveMask(NumElts, Factor), Name);
2805}
2806
2807// Try to vectorize the interleave group that \p Instr belongs to.
2808//
2809// E.g. Translate following interleaved load group (factor = 3):
2810// for (i = 0; i < N; i+=3) {
2811// R = Pic[i]; // Member of index 0
2812// G = Pic[i+1]; // Member of index 1
2813// B = Pic[i+2]; // Member of index 2
2814// ... // do something to R, G, B
2815// }
2816// To:
2817// %wide.vec = load <12 x i32> ; Read 4 tuples of R,G,B
2818// %R.vec = shuffle %wide.vec, poison, <0, 3, 6, 9> ; R elements
2819// %G.vec = shuffle %wide.vec, poison, <1, 4, 7, 10> ; G elements
2820// %B.vec = shuffle %wide.vec, poison, <2, 5, 8, 11> ; B elements
2821//
2822// Or translate following interleaved store group (factor = 3):
2823// for (i = 0; i < N; i+=3) {
2824// ... do something to R, G, B
2825// Pic[i] = R; // Member of index 0
2826// Pic[i+1] = G; // Member of index 1
2827// Pic[i+2] = B; // Member of index 2
2828// }
2829// To:
2830// %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7>
2831// %B_U.vec = shuffle %B.vec, poison, <0, 1, 2, 3, u, u, u, u>
2832// %interleaved.vec = shuffle %R_G.vec, %B_U.vec,
2833// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements
2834// store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B
2836 assert(!State.Lane && "Interleave group being replicated.");
2837 const InterleaveGroup<Instruction> *Group = IG;
2838 Instruction *Instr = Group->getInsertPos();
2839
2840 // Prepare for the vector type of the interleaved load/store.
2841 Type *ScalarTy = getLoadStoreType(Instr);
2842 unsigned InterleaveFactor = Group->getFactor();
2843 auto *VecTy = VectorType::get(ScalarTy, State.VF * InterleaveFactor);
2844
2845 // TODO: extend the masked interleaved-group support to reversed access.
2846 VPValue *BlockInMask = getMask();
2847 assert((!BlockInMask || !Group->isReverse()) &&
2848 "Reversed masked interleave-group not supported.");
2849
2850 VPValue *Addr = getAddr();
2851 Value *ResAddr = State.get(Addr, VPLane(0));
2852 if (auto *I = dyn_cast<Instruction>(ResAddr))
2853 State.setDebugLocFrom(I->getDebugLoc());
2854
2855 // If the group is reverse, adjust the index to refer to the last vector lane
2856 // instead of the first. We adjust the index from the first vector lane,
2857 // rather than directly getting the pointer for lane VF - 1, because the
2858 // pointer operand of the interleaved access is supposed to be uniform.
2859 if (Group->isReverse()) {
2860 Value *RuntimeVF =
2861 getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF);
2862 Value *Index =
2863 State.Builder.CreateSub(RuntimeVF, State.Builder.getInt32(1));
2864 Index = State.Builder.CreateMul(Index,
2865 State.Builder.getInt32(Group->getFactor()));
2866 Index = State.Builder.CreateNeg(Index);
2867
2868 bool InBounds = false;
2869 if (auto *Gep = dyn_cast<GetElementPtrInst>(ResAddr->stripPointerCasts()))
2870 InBounds = Gep->isInBounds();
2871 ResAddr = State.Builder.CreateGEP(ScalarTy, ResAddr, Index, "", InBounds);
2872 }
2873
2874 State.setDebugLocFrom(Instr->getDebugLoc());
2875 Value *PoisonVec = PoisonValue::get(VecTy);
2876
2877 auto CreateGroupMask = [&BlockInMask, &State,
2878 &InterleaveFactor](Value *MaskForGaps) -> Value * {
2879 if (State.VF.isScalable()) {
2880 assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");
2881 assert(InterleaveFactor == 2 &&
2882 "Unsupported deinterleave factor for scalable vectors");
2883 auto *ResBlockInMask = State.get(BlockInMask);
2884 SmallVector<Value *, 2> Ops = {ResBlockInMask, ResBlockInMask};
2885 auto *MaskTy = VectorType::get(State.Builder.getInt1Ty(),
2886 State.VF.getKnownMinValue() * 2, true);
2887 return State.Builder.CreateIntrinsic(
2888 MaskTy, Intrinsic::vector_interleave2, Ops,
2889 /*FMFSource=*/nullptr, "interleaved.mask");
2890 }
2891
2892 if (!BlockInMask)
2893 return MaskForGaps;
2894
2895 Value *ResBlockInMask = State.get(BlockInMask);
2896 Value *ShuffledMask = State.Builder.CreateShuffleVector(
2897 ResBlockInMask,
2898 createReplicatedMask(InterleaveFactor, State.VF.getKnownMinValue()),
2899 "interleaved.mask");
2900 return MaskForGaps ? State.Builder.CreateBinOp(Instruction::And,
2901 ShuffledMask, MaskForGaps)
2902 : ShuffledMask;
2903 };
2904
2905 const DataLayout &DL = Instr->getDataLayout();
2906 // Vectorize the interleaved load group.
2907 if (isa<LoadInst>(Instr)) {
2908 Value *MaskForGaps = nullptr;
2909 if (NeedsMaskForGaps) {
2910 MaskForGaps = createBitMaskForGaps(State.Builder,
2911 State.VF.getKnownMinValue(), *Group);
2912 assert(MaskForGaps && "Mask for Gaps is required but it is null");
2913 }
2914
2915 Instruction *NewLoad;
2916 if (BlockInMask || MaskForGaps) {
2917 Value *GroupMask = CreateGroupMask(MaskForGaps);
2918 NewLoad = State.Builder.CreateMaskedLoad(VecTy, ResAddr,
2919 Group->getAlign(), GroupMask,
2920 PoisonVec, "wide.masked.vec");
2921 } else
2922 NewLoad = State.Builder.CreateAlignedLoad(VecTy, ResAddr,
2923 Group->getAlign(), "wide.vec");
2924 Group->addMetadata(NewLoad);
2925
2927 const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
2928 if (VecTy->isScalableTy()) {
2929 assert(InterleaveFactor == 2 &&
2930 "Unsupported deinterleave factor for scalable vectors");
2931
2932 // Scalable vectors cannot use arbitrary shufflevectors (only splats),
2933 // so must use intrinsics to deinterleave.
2934 Value *DI = State.Builder.CreateIntrinsic(
2935 Intrinsic::vector_deinterleave2, VecTy, NewLoad,
2936 /*FMFSource=*/nullptr, "strided.vec");
2937 unsigned J = 0;
2938 for (unsigned I = 0; I < InterleaveFactor; ++I) {
2939 Instruction *Member = Group->getMember(I);
2940
2941 if (!Member)
2942 continue;
2943
2944 Value *StridedVec = State.Builder.CreateExtractValue(DI, I);
2945 // If this member has different type, cast the result type.
2946 if (Member->getType() != ScalarTy) {
2947 VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
2948 StridedVec =
2949 createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
2950 }
2951
2952 if (Group->isReverse())
2953 StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");
2954
2955 State.set(VPDefs[J], StridedVec);
2956 ++J;
2957 }
2958
2959 return;
2960 }
2961
2962 // For each member in the group, shuffle out the appropriate data from the
2963 // wide loads.
2964 unsigned J = 0;
2965 for (unsigned I = 0; I < InterleaveFactor; ++I) {
2966 Instruction *Member = Group->getMember(I);
2967
2968 // Skip the gaps in the group.
2969 if (!Member)
2970 continue;
2971
2972 auto StrideMask =
2973 createStrideMask(I, InterleaveFactor, State.VF.getKnownMinValue());
2974 Value *StridedVec =
2975 State.Builder.CreateShuffleVector(NewLoad, StrideMask, "strided.vec");
2976
2977 // If this member has different type, cast the result type.
2978 if (Member->getType() != ScalarTy) {
2979 assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
2980 VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
2981 StridedVec =
2982 createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
2983 }
2984
2985 if (Group->isReverse())
2986 StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");
2987
2988 State.set(VPDefs[J], StridedVec);
2989 ++J;
2990 }
2991 return;
2992 }
2993
2994 // The sub vector type for current instruction.
2995 auto *SubVT = VectorType::get(ScalarTy, State.VF);
2996
2997 // Vectorize the interleaved store group.
2998 Value *MaskForGaps =
2999 createBitMaskForGaps(State.Builder, State.VF.getKnownMinValue(), *Group);
3000 assert((!MaskForGaps || !State.VF.isScalable()) &&
3001 "masking gaps for scalable vectors is not yet supported.");
3002 ArrayRef<VPValue *> StoredValues = getStoredValues();
3003 // Collect the stored vector from each member.
3004 SmallVector<Value *, 4> StoredVecs;
3005 unsigned StoredIdx = 0;
3006 for (unsigned i = 0; i < InterleaveFactor; i++) {
3007 assert((Group->getMember(i) || MaskForGaps) &&
3008 "Fail to get a member from an interleaved store group");
3009 Instruction *Member = Group->getMember(i);
3010
3011 // Skip the gaps in the group.
3012 if (!Member) {
3013 Value *Undef = PoisonValue::get(SubVT);
3014 StoredVecs.push_back(Undef);
3015 continue;
3016 }
3017
3018 Value *StoredVec = State.get(StoredValues[StoredIdx]);
3019 ++StoredIdx;
3020
3021 if (Group->isReverse())
3022 StoredVec = State.Builder.CreateVectorReverse(StoredVec, "reverse");
3023
3024 // If this member has different type, cast it to a unified type.
3025
3026 if (StoredVec->getType() != SubVT)
3027 StoredVec = createBitOrPointerCast(State.Builder, StoredVec, SubVT, DL);
3028
3029 StoredVecs.push_back(StoredVec);
3030 }
3031
3032 // Interleave all the smaller vectors into one wider vector.
3033 Value *IVec = interleaveVectors(State.Builder, StoredVecs, "interleaved.vec");
3034 Instruction *NewStoreInstr;
3035 if (BlockInMask || MaskForGaps) {
3036 Value *GroupMask = CreateGroupMask(MaskForGaps);
3037 NewStoreInstr = State.Builder.CreateMaskedStore(
3038 IVec, ResAddr, Group->getAlign(), GroupMask);
3039 } else
3040 NewStoreInstr =
3041 State.Builder.CreateAlignedStore(IVec, ResAddr, Group->getAlign());
3042
3043 Group->addMetadata(NewStoreInstr);
3044}
3045
3046#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3048 VPSlotTracker &SlotTracker) const {
3049 O << Indent << "INTERLEAVE-GROUP with factor " << IG->getFactor() << " at ";
3050 IG->getInsertPos()->printAsOperand(O, false);
3051 O << ", ";
3053 VPValue *Mask = getMask();
3054 if (Mask) {
3055 O << ", ";
3056 Mask->printAsOperand(O, SlotTracker);
3057 }
3058
3059 unsigned OpIdx = 0;
3060 for (unsigned i = 0; i < IG->getFactor(); ++i) {
3061 if (!IG->getMember(i))
3062 continue;
3063 if (getNumStoreOperands() > 0) {
3064 O << "\n" << Indent << " store ";
3065 getOperand(1 + OpIdx)->printAsOperand(O, SlotTracker);
3066 O << " to index " << i;
3067 } else {
3068 O << "\n" << Indent << " ";
3070 O << " = load from index " << i;
3071 }
3072 ++OpIdx;
3073 }
3074}
3075#endif
3076
3078 VPCostContext &Ctx) const {
3079 Instruction *InsertPos = getInsertPos();
3080 // Find the VPValue index of the interleave group. We need to skip gaps.
3081 unsigned InsertPosIdx = 0;
3082 for (unsigned Idx = 0; IG->getFactor(); ++Idx)
3083 if (auto *Member = IG->getMember(Idx)) {
3084 if (Member == InsertPos)
3085 break;
3086 InsertPosIdx++;
3087 }
3088 Type *ValTy = Ctx.Types.inferScalarType(
3089 getNumDefinedValues() > 0 ? getVPValue(InsertPosIdx)
3090 : getStoredValues()[InsertPosIdx]);
3091 auto *VectorTy = cast<VectorType>(toVectorTy(ValTy, VF));
3092 unsigned AS = getLoadStoreAddressSpace(InsertPos);
3094
3095 unsigned InterleaveFactor = IG->getFactor();
3096 auto *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor);
3097
3098 // Holds the indices of existing members in the interleaved group.
3100 for (unsigned IF = 0; IF < InterleaveFactor; IF++)
3101 if (IG->getMember(IF))
3102 Indices.push_back(IF);
3103
3104 // Calculate the cost of the whole interleaved group.
3106 InsertPos->getOpcode(), WideVecTy, IG->getFactor(), Indices,
3107 IG->getAlign(), AS, CostKind, getMask(), NeedsMaskForGaps);
3108
3109 if (!IG->isReverse())
3110 return Cost;
3111
3112 return Cost + IG->getNumMembers() *
3114 VectorTy, std::nullopt, CostKind, 0);
3115}
3116
3117#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3119 VPSlotTracker &SlotTracker) const {
3120 O << Indent << "EMIT ";
3122 O << " = CANONICAL-INDUCTION ";
3124}
3125#endif
3126
3128 return IsScalarAfterVectorization &&
3129 (!IsScalable || vputils::onlyFirstLaneUsed(this));
3130}
3131
3133 assert(getInductionDescriptor().getKind() ==
3135 "Not a pointer induction according to InductionDescriptor!");
3136 assert(cast<PHINode>(getUnderlyingInstr())->getType()->isPointerTy() &&
3137 "Unexpected type.");
3139 "Recipe should have been replaced");
3140
3141 unsigned CurrentPart = getUnrollPart(*this);
3142
3143 // Build a pointer phi
3144 Value *ScalarStartValue = getStartValue()->getLiveInIRValue();
3145 Type *ScStValueType = ScalarStartValue->getType();
3146
3147 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3148 PHINode *NewPointerPhi = nullptr;
3149 if (CurrentPart == 0) {
3150 auto *IVR = cast<VPHeaderPHIRecipe>(&getParent()
3151 ->getPlan()
3152 ->getVectorLoopRegion()
3153 ->getEntryBasicBlock()
3154 ->front());
3155 PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, /*IsScalar*/ true));
3156 NewPointerPhi = PHINode::Create(ScStValueType, 2, "pointer.phi",
3157 CanonicalIV->getIterator());
3158 NewPointerPhi->addIncoming(ScalarStartValue, VectorPH);
3159 NewPointerPhi->setDebugLoc(getDebugLoc());
3160 } else {
3161 // The recipe has been unrolled. In that case, fetch the single pointer phi
3162 // shared among all unrolled parts of the recipe.
3163 auto *GEP =
3164 cast<GetElementPtrInst>(State.get(getFirstUnrolledPartOperand()));
3165 NewPointerPhi = cast<PHINode>(GEP->getPointerOperand());
3166 }
3167
3168 // A pointer induction, performed by using a gep
3169 BasicBlock::iterator InductionLoc = State.Builder.GetInsertPoint();
3170 Value *ScalarStepValue = State.get(getStepValue(), VPLane(0));
3171 Type *PhiType = State.TypeAnalysis.inferScalarType(getStepValue());
3172 Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF);
3173 // Add induction update using an incorrect block temporarily. The phi node
3174 // will be fixed after VPlan execution. Note that at this point the latch
3175 // block cannot be used, as it does not exist yet.
3176 // TODO: Model increment value in VPlan, by turning the recipe into a
3177 // multi-def and a subclass of VPHeaderPHIRecipe.
3178 if (CurrentPart == 0) {
3179 // The recipe represents the first part of the pointer induction. Create the
3180 // GEP to increment the phi across all unrolled parts.
3181 unsigned UF = CurrentPart == 0 ? getParent()->getPlan()->getUF() : 1;
3182 Value *NumUnrolledElems =
3183 State.Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, UF));
3184
3185 Value *InductionGEP = GetElementPtrInst::Create(
3186 State.Builder.getInt8Ty(), NewPointerPhi,
3187 State.Builder.CreateMul(ScalarStepValue, NumUnrolledElems), "ptr.ind",
3188 InductionLoc);
3189
3190 NewPointerPhi->addIncoming(InductionGEP, VectorPH);
3191 }
3192
3193 // Create actual address geps that use the pointer phi as base and a
3194 // vectorized version of the step value (<step*0, ..., step*N>) as offset.
3195 Type *VecPhiType = VectorType::get(PhiType, State.VF);
3196 Value *StartOffsetScalar = State.Builder.CreateMul(
3197 RuntimeVF, ConstantInt::get(PhiType, CurrentPart));
3198 Value *StartOffset =
3199 State.Builder.CreateVectorSplat(State.VF, StartOffsetScalar);
3200 // Create a vector of consecutive numbers from zero to VF.
3201 StartOffset = State.Builder.CreateAdd(
3202 StartOffset, State.Builder.CreateStepVector(VecPhiType));
3203
3204 assert(ScalarStepValue == State.get(getOperand(1), VPLane(0)) &&
3205 "scalar step must be the same across all parts");
3206 Value *GEP = State.Builder.CreateGEP(
3207 State.Builder.getInt8Ty(), NewPointerPhi,
3208 State.Builder.CreateMul(StartOffset, State.Builder.CreateVectorSplat(
3209 State.VF, ScalarStepValue)),
3210 "vector.gep");
3211 State.set(this, GEP);
3212}
3213
3214#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3216 VPSlotTracker &SlotTracker) const {
3217 assert((getNumOperands() == 2 || getNumOperands() == 4) &&
3218 "unexpected number of operands");
3219 O << Indent << "EMIT ";
3221 O << " = WIDEN-POINTER-INDUCTION ";
3223 O << ", ";
3225 if (getNumOperands() == 4) {
3226 O << ", ";
3228 O << ", ";
3230 }
3231}
3232#endif
3233
3235 assert(!State.Lane && "cannot be used in per-lane");
3236 if (State.ExpandedSCEVs.contains(Expr)) {
3237 // SCEV Expr has already been expanded, result must already be set. At the
3238 // moment we have to execute the entry block twice (once before skeleton
3239 // creation to get expanded SCEVs used by the skeleton and once during
3240 // regular VPlan execution).
3242 assert(State.get(this, VPLane(0)) == State.ExpandedSCEVs[Expr] &&
3243 "Results must match");
3244 return;
3245 }
3246
3247 const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
3248 SCEVExpander Exp(SE, DL, "induction");
3249
3250 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
3251 &*State.Builder.GetInsertPoint());
3252 State.ExpandedSCEVs[Expr] = Res;
3253 State.set(this, Res, VPLane(0));
3254}
3255
3256#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3258 VPSlotTracker &SlotTracker) const {
3259 O << Indent << "EMIT ";
3261 O << " = EXPAND SCEV " << *Expr;
3262}
3263#endif
3264
3266 Value *CanonicalIV = State.get(getOperand(0), /*IsScalar*/ true);
3267 Type *STy = CanonicalIV->getType();
3268 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
3269 ElementCount VF = State.VF;
3270 Value *VStart = VF.isScalar()
3271 ? CanonicalIV
3272 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
3273 Value *VStep = createStepForVF(Builder, STy, VF, getUnrollPart(*this));
3274 if (VF.isVector()) {
3275 VStep = Builder.CreateVectorSplat(VF, VStep);
3276 VStep =
3277 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
3278 }
3279 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
3280 State.set(this, CanonicalVectorIV);
3281}
3282
3283#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3285 VPSlotTracker &SlotTracker) const {
3286 O << Indent << "EMIT ";
3288 O << " = WIDEN-CANONICAL-INDUCTION ";
3290}
3291#endif
3292
3294 auto &Builder = State.Builder;
3295 // Create a vector from the initial value.
3296 auto *VectorInit = getStartValue()->getLiveInIRValue();
3297
3298 Type *VecTy = State.VF.isScalar()
3299 ? VectorInit->getType()
3300 : VectorType::get(VectorInit->getType(), State.VF);
3301
3302 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3303 if (State.VF.isVector()) {
3304 auto *IdxTy = Builder.getInt32Ty();
3305 auto *One = ConstantInt::get(IdxTy, 1);
3306 IRBuilder<>::InsertPointGuard Guard(Builder);
3307 Builder.SetInsertPoint(VectorPH->getTerminator());
3308 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
3309 auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
3310 VectorInit = Builder.CreateInsertElement(
3311 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
3312 }
3313
3314 // Create a phi node for the new recurrence.
3315 PHINode *Phi = PHINode::Create(VecTy, 2, "vector.recur");
3316 Phi->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
3317 Phi->addIncoming(VectorInit, VectorPH);
3318 State.set(this, Phi);
3319}
3320
3323 VPCostContext &Ctx) const {
3325 if (VF.isScalar())
3326 return Ctx.TTI.getCFInstrCost(Instruction::PHI, CostKind);
3327
3328 if (VF.isScalable() && VF.getKnownMinValue() == 1)
3330
3332 std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1);
3333 Type *VectorTy =
3334 toVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
3335
3337 cast<VectorType>(VectorTy), Mask, CostKind,
3338 VF.getKnownMinValue() - 1);
3339}
3340
3341#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3343 VPSlotTracker &SlotTracker) const {
3344 O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
3346 O << " = phi ";
3348}
3349#endif
3350
3352 auto &Builder = State.Builder;
3353
3354 // Reductions do not have to start at zero. They can start with
3355 // any loop invariant values.
3356 VPValue *StartVPV = getStartValue();
3357 Value *StartV = StartVPV->getLiveInIRValue();
3358
3359 // In order to support recurrences we need to be able to vectorize Phi nodes.
3360 // Phi nodes have cycles, so we need to vectorize them in two stages. This is
3361 // stage #1: We create a new vector PHI node with no incoming edges. We'll use
3362 // this value when we vectorize all of the instructions that use the PHI.
3363 bool ScalarPHI = State.VF.isScalar() || IsInLoop;
3364 Type *VecTy = ScalarPHI ? StartV->getType()
3365 : VectorType::get(StartV->getType(), State.VF);
3366
3367 BasicBlock *HeaderBB = State.CFG.PrevBB;
3368 assert(State.CurrentParentLoop->getHeader() == HeaderBB &&
3369 "recipe must be in the vector loop header");
3370 auto *Phi = PHINode::Create(VecTy, 2, "vec.phi");
3371 Phi->insertBefore(HeaderBB->getFirstInsertionPt());
3372 State.set(this, Phi, IsInLoop);
3373
3374 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3375
3376 Value *Iden = nullptr;
3377 RecurKind RK = RdxDesc.getRecurrenceKind();
3378 unsigned CurrentPart = getUnrollPart(*this);
3379
3382 // MinMax and AnyOf reductions have the start value as their identity.
3383 if (ScalarPHI) {
3384 Iden = StartV;
3385 } else {
3386 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
3387 Builder.SetInsertPoint(VectorPH->getTerminator());
3388 StartV = Iden = State.get(StartVPV);
3389 }
3391 // [I|F]FindLastIV will use a sentinel value to initialize the reduction
3392 // phi or the resume value from the main vector loop when vectorizing the
3393 // epilogue loop. In the exit block, ComputeReductionResult will generate
3394 // checks to verify if the reduction result is the sentinel value. If the
3395 // result is the sentinel value, it will be corrected back to the start
3396 // value.
3397 // TODO: The sentinel value is not always necessary. When the start value is
3398 // a constant, and smaller than the start value of the induction variable,
3399 // the start value can be directly used to initialize the reduction phi.
3400 Iden = StartV;
3401 if (!ScalarPHI) {
3402 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
3403 Builder.SetInsertPoint(VectorPH->getTerminator());
3404 StartV = Iden = Builder.CreateVectorSplat(State.VF, Iden);
3405 }
3406 } else {
3407 Iden = llvm::getRecurrenceIdentity(RK, VecTy->getScalarType(),
3408 RdxDesc.getFastMathFlags());
3409
3410 if (!ScalarPHI) {
3411 if (CurrentPart == 0) {
3412 // Create start and identity vector values for the reduction in the
3413 // preheader.
3414 // TODO: Introduce recipes in VPlan preheader to create initial values.
3415 Iden = Builder.CreateVectorSplat(State.VF, Iden);
3416 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
3417 Builder.SetInsertPoint(VectorPH->getTerminator());
3418 Constant *Zero = Builder.getInt32(0);
3419 StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
3420 } else {
3421 Iden = Builder.CreateVectorSplat(State.VF, Iden);
3422 }
3423 }
3424 }
3425
3426 Phi = cast<PHINode>(State.get(this, IsInLoop));
3427 Value *StartVal = (CurrentPart == 0) ? StartV : Iden;
3428 Phi->addIncoming(StartVal, VectorPH);
3429}
3430
3431#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3433 VPSlotTracker &SlotTracker) const {
3434 O << Indent << "WIDEN-REDUCTION-PHI ";
3435
3437 O << " = phi ";
3439}
3440#endif
3441
3444 "Non-native vplans are not expected to have VPWidenPHIRecipes.");
3445
3446 Value *Op0 = State.get(getOperand(0));
3447 Type *VecTy = Op0->getType();
3448 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
3449 State.set(this, VecPhi);
3450}
3451
3452#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3454 VPSlotTracker &SlotTracker) const {
3455 O << Indent << "WIDEN-PHI ";
3456
3457 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
3458 // Unless all incoming values are modeled in VPlan print the original PHI
3459 // directly.
3460 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
3461 // values as VPValues.
3462 if (getNumOperands() != OriginalPhi->getNumOperands()) {
3463 O << VPlanIngredient(OriginalPhi);
3464 return;
3465 }
3466
3468 O << " = phi ";
3470}
3471#endif
3472
3473// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
3474// remove VPActiveLaneMaskPHIRecipe.
3476 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3477 Value *StartMask = State.get(getOperand(0));
3478 PHINode *Phi =
3479 State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
3480 Phi->addIncoming(StartMask, VectorPH);
3481 Phi->setDebugLoc(getDebugLoc());
3482 State.set(this, Phi);
3483}
3484
3485#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3487 VPSlotTracker &SlotTracker) const {
3488 O << Indent << "ACTIVE-LANE-MASK-PHI ";
3489
3491 O << " = phi ";
3493}
3494#endif
3495
3496#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3498 VPSlotTracker &SlotTracker) const {
3499 O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";
3500
3502 O << " = phi ";
3504}
3505#endif
3506
3508 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3509 Value *Start = State.get(getStartValue(), VPLane(0));
3510 PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, Name);
3511 Phi->addIncoming(Start, VectorPH);
3512 Phi->setDebugLoc(getDebugLoc());
3513 State.set(this, Phi, /*IsScalar=*/true);
3514}
3515
3516#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3518 VPSlotTracker &SlotTracker) const {
3519 O << Indent << "SCALAR-PHI ";
3521 O << " = phi ";
3523}
3524#endif
AMDGPU Lower Kernel Arguments
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
std::string Name
Hexagon Common GEP
cl::opt< unsigned > ForceTargetInstructionCost("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for " "an instruction to a single constant value. Mostly " "useful for getting consistent testing."))
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
static Instruction * createReverseEVL(IRBuilderBase &Builder, Value *Operand, Value *EVL, const Twine &Name)
Use all-true mask for reverse rather than actual mask, as it avoids a dependence w/o affecting the re...
static Value * interleaveVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vals, const Twine &Name)
Return a vector containing interleaved elements from multiple smaller input vectors.
static Value * createBitOrPointerCast(IRBuilderBase &Builder, Value *V, VectorType *DstVTy, const DataLayout &DL)
cl::opt< unsigned > ForceTargetInstructionCost
static Value * getStepVector(Value *Val, Value *Step, Instruction::BinaryOps BinOp, ElementCount VF, IRBuilderBase &Builder)
This function adds (0 * Step, 1 * Step, 2 * Step, ...) to each vector element of Val.
static Type * getGEPIndexTy(bool IsScalable, bool IsReverse, unsigned CurrentPart, IRBuilderBase &Builder)
static Constant * getSignedIntOrFpConstant(Type *Ty, int64_t C)
A helper function that returns an integer or floating-point constant with value C.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
This file contains the declarations of the Vectorization Plan base classes:
Value * RHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
static Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
Definition: Attributes.cpp:234
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
InstListType::const_iterator getFirstNonPHIIt() const
Iterator returning form of getFirstNonPHI.
Definition: BasicBlock.cpp:374
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:459
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:296
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:292
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1502
This class represents a function call, abstracting a target machine's calling convention.
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:696
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:698
static StringRef getPredicateName(Predicate P)
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:126
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
A debug info location.
Definition: DebugLoc.h:33
constexpr bool isVector() const
One or more elements.
Definition: TypeSize.h:326
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
void setAllowContract(bool B=true)
Definition: FMF.h:91
bool noSignedZeros() const
Definition: FMF.h:68
bool noInfs() const
Definition: FMF.h:67
void setAllowReciprocal(bool B=true)
Definition: FMF.h:88
bool allowReciprocal() const
Definition: FMF.h:69
void print(raw_ostream &O) const
Print fast-math flags to O.
Definition: Operator.cpp:271
void setNoSignedZeros(bool B=true)
Definition: FMF.h:85
bool allowReassoc() const
Flag queries.
Definition: FMF.h:65
bool approxFunc() const
Definition: FMF.h:71
void setNoNaNs(bool B=true)
Definition: FMF.h:79
void setAllowReassoc(bool B=true)
Flag setters.
Definition: FMF.h:76
bool noNaNs() const
Definition: FMF.h:66
void setApproxFunc(bool B=true)
Definition: FMF.h:94
void setNoInfs(bool B=true)
Definition: FMF.h:82
bool allowContract() const
Definition: FMF.h:70
Class to represent function types.
Definition: DerivedTypes.h:105
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:137
ArrayRef< Type * > params() const
Definition: DerivedTypes.h:132
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216
bool willReturn() const
Determine if the function will return.
Definition: Function.h:662
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition: Function.h:595
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:221
bool hasNoUnsignedSignedWrap() const
bool hasNoUnsignedWrap() const
bool isInBounds() const
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Definition: Instructions.h:956
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:108
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition: IRBuilder.h:475
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2505
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition: IRBuilder.h:525
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2100
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2493
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1809
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2045
Value * CreateVectorSplice(Value *V1, Value *V2, int64_t Imm, const Twine &Name="")
Return a vector splice intrinsic if using scalable vectors, otherwise return a shufflevector.
Definition: IRBuilder.cpp:1125
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1153
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2549
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:480
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:546
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1043
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:189
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2039
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2568
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:540
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1981
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Definition: IRBuilder.h:2180
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2087
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:188
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:325
Value * CreateVectorReverse(Value *V, const Twine &Name="")
Return a vector value that contains the vector V reversed.
Definition: IRBuilder.cpp:1109
Value * CreateFCmpFMF(CmpInst::Predicate P, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2392
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1868
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
Definition: IRBuilder.h:1727
CallInst * CreateOrReduce(Value *Src)
Create a vector int OR reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:424
InsertPoint saveIP() const
Returns the current insert point.
Definition: IRBuilder.h:291
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:890
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:500
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2228
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2398
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2429
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1751
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2264
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1381
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1158
Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Definition: IRBuilder.cpp:958
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2027
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2527
LLVMContext & getContext() const
Definition: IRBuilder.h:190
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:566
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1364
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2443
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2013
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:583
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1665
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1682
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Definition: IRBuilder.h:303
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:194
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1828
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2374
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1608
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:530
Value * CreateStepVector(Type *DstType, const Twine &Name="")
Creates a vector of type DstType with the linear sequence <0, 1, ...>
Definition: IRBuilder.cpp:108
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1398
CallInst * CreateMaskedScatter(Value *Val, Value *Ptrs, Align Alignment, Value *Mask=nullptr)
Create a call to Masked Scatter intrinsic.
Definition: IRBuilder.cpp:627
CallInst * CreateMaskedGather(Type *Ty, Value *Ptrs, Align Alignment, Value *Mask=nullptr, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Gather intrinsic.
Definition: IRBuilder.cpp:596
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2699
A struct for saving information about induction variables.
@ IK_PtrInduction
Pointer induction var. Step = C.
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
static InstructionCost getInvalid(CostType Val=0)
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:99
bool isBinaryOp() const
Definition: Instruction.h:279
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
const char * getOpcodeName() const
Definition: Instruction.h:276
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
bool isUnaryOp() const
Definition: Instruction.h:278
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:472
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:480
uint32_t getFactor() const
Definition: VectorUtils.h:496
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:550
bool isReverse() const
Definition: VectorUtils.h:495
InstTy * getInsertPos() const
Definition: VectorUtils.h:566
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Align getAlign() const
Definition: VectorUtils.h:497
BlockT * getHeader() const
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:77
FastMathFlags getFastMathFlags() const
static unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
Type * getRecurrenceType() const
Returns the type of the recurrence.
TrackingVH< Value > getRecurrenceStartValue() const
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isFindLastIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
bool isSigned() const
Returns true if all source operands of the recurrence are SExtInsts.
RecurKind getRecurrenceKind() const
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This class uses information about analyze scalars to rewrite expressions in canonical form.
Type * getType() const
Return the LLVM type of this SCEV expression.
This class represents the LLVM 'select' instruction.
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:698
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo Op1Info={OK_AnyValue, OP_None}, OperandValueInfo Op2Info={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of vector reduction intrinsics.
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF=FastMathFlags(), TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask={}, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
@ TCC_Free
Expected to fold away in lowering.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Reverse
Reverse the order of the vector.
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency) const
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
CastContextHint
Represents a hint about the context in which a cast is used.
@ Reversed
The cast is used with a reversed load/store.
@ Masked
The cast is used with a masked load/store.
@ None
The cast is not used with a load/store of any kind.
@ Normal
The cast is used with a normal load/store.
@ Interleave
The cast is used with an interleaved load/store.
@ GatherScatter
The cast is used with a gather/scatter.
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264
static IntegerType * getInt1Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static Type * getVoidTy(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
value_op_iterator value_op_end()
Definition: User.h:309
Value * getOperand(unsigned i) const
Definition: User.h:228
value_op_iterator value_op_begin()
Definition: User.h:306
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:3475
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:3525
iterator end()
Definition: VPlan.h:3509
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:3538
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:2462
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:2467
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition: VPlan.h:2457
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition: VPlan.h:2453
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:397
VPRegionBlock * getParent()
Definition: VPlan.h:489
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:178
const VPBlocksTy & getPredecessors() const
Definition: VPlan.h:520
VPlan * getPlan()
Definition: VPlan.cpp:153
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:158
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2829
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPBranchOnMaskRecipe.
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:292
void dump() const
Dump the VPDef to stderr (for debugging).
Definition: VPlan.cpp:114
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
Definition: VPlanValue.h:414
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
Definition: VPlanValue.h:409
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition: VPlanValue.h:387
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Definition: VPlanValue.h:399
unsigned getVPDefID() const
Definition: VPlanValue.h:419
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:3405
VPValue * getStartValue() const
Definition: VPlan.h:3404
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:2059
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition: VPlan.h:1800
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
@ ResumePhi
Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
Definition: VPlan.h:1209
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:1197
@ CanonicalIVIncrementForPart
Definition: VPlan.h:1212
@ CalculateTripCountMinusVF
Definition: VPlan.h:1210
bool hasResult() const
Definition: VPlan.h:1332
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
unsigned getOpcode() const
Definition: VPlan.h:1309
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
bool isSingleScalar() const
Returns true if this VPInstruction's operands are single scalars and the result is also a single scal...
void execute(VPTransformState &State) override
Generate the instruction.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2541
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2547
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition: VPlan.h:2554
Instruction * getInsertPos() const
Definition: VPlan.h:2589
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInterleaveRecipe.
unsigned getNumStoreOperands() const
Returns the number of stored operands of this interleave group.
Definition: VPlan.h:2578
static bool isVPIntrinsic(Intrinsic::ID)
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition: VPlan.h:153
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlan.h:194
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset)
Definition: VPlan.h:180
static VPLane getFirstLane()
Definition: VPlan.h:178
void execute(VPTransformState &State) override
Generates phi nodes for live-outs (from a replicate region) as needed to retain SSA form.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:714
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
virtual InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
VPBasicBlock * getParent()
Definition: VPlan.h:739
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:808
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Class to record LLVM IR flag for a recipe along with it.
Definition: VPlan.h:924
ExactFlagsTy ExactFlags
Definition: VPlan.h:974
FastMathFlagsTy FMFs
Definition: VPlan.h:977
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:976
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition: VPlan.h:1144
void setFlags(Instruction *I) const
Set the IR flags for I.
Definition: VPlan.h:1105
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:1147
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:973
GEPNoWrapFlags GEPFlags
Definition: VPlan.h:975
WrapFlagsTy WrapFlags
Definition: VPlan.h:972
bool hasNoUnsignedWrap() const
Definition: VPlan.h:1151
void printFlags(raw_ostream &O) const
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:1138
bool hasNoSignedWrap() const
Definition: VPlan.h:1157
FastMathFlags getFastMathFlags() const
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition: VPlan.h:2702
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition: VPlan.h:2660
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of VPReductionRecipe.
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2664
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Return the recurrence decriptor for the in-loop reduction.
Definition: VPlan.h:2654
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2666
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition: VPlan.h:2658
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2662
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:3646
const VPBlockBase * getEntry() const
Definition: VPlan.h:3679
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPReplicateRecipe.
unsigned getOpcode() const
Definition: VPlan.h:2789
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:3462
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:910
LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
This class can be used to assign names to VPValues.
Definition: VPlanValue.h:440
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Definition: VPlanAnalysis.h:65
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
VPValue * getUnrollPartOperand(VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:200
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition: VPlan.cpp:1456
operand_range operands()
Definition: VPlanValue.h:257
unsigned getNumOperands() const
Definition: VPlanValue.h:236
operand_iterator op_begin()
Definition: VPlanValue.h:253
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:237
virtual bool onlyFirstLaneUsed(const VPValue *Op) const
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlanValue.h:272
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop region.
Definition: VPlan.cpp:1417
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:123
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition: VPlan.cpp:1452
friend class VPInstruction
Definition: VPlanValue.h:47
bool hasMoreThanOneUniqueUser() const
Returns true if the value has more than one unique user.
Definition: VPlanValue.h:138
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:77
user_iterator user_begin()
Definition: VPlanValue.h:128
unsigned getNumUsers() const
Definition: VPlanValue.h:111
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:172
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition: VPlanValue.h:167
user_range users()
Definition: VPlanValue.h:132
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Function * getCalledScalarFunction() const
Definition: VPlan.h:1748
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCallRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
operand_range arg_operands()
Definition: VPlan.h:1752
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1571
void execute(VPTransformState &State) override
Produce widened copies of the cast.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override final
Print the recipe.
void execute(VPTransformState &State) override final
Produce a vp-intrinsic using the opcode and operands of the recipe, processing EVL elements.
VPValue * getEVL()
Definition: VPlan.h:1499
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
PHINode * getPHINode() const
Definition: VPlan.h:2115
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:2112
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:2118
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:2190
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:2199
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition: VPlan.h:1691
void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
Definition: VPlan.h:2900
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition: VPlan.h:2897
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition: VPlan.h:2936
Instruction & Ingredient
Definition: VPlan.h:2891
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
bool Consecutive
Whether the accessed addresses are consecutive.
Definition: VPlan.h:2894
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2950
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2943
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition: VPlan.h:2940
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
VPValue * getFirstUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the first unrolled part,...
Definition: VPlan.h:2244
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
void execute(VPTransformState &State) override
Produce a widened instruction using the opcode and operands of the recipe, processing State....
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getOpcode() const
Definition: VPlan.h:1465
unsigned getUF() const
Definition: VPlan.h:3948
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:694
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
VectorBuilder & setEVL(Value *NewExplicitVectorLength)
Definition: VectorBuilder.h:82
VectorBuilder & setMask(Value *NewMask)
Definition: VectorBuilder.h:78
Value * createVectorInstruction(unsigned Opcode, Type *ReturnTy, ArrayRef< Value * > VecOpArray, const Twine &Name=Twine())
Base class of all SIMD vector types.
Definition: DerivedTypes.h:427
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:665
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static VectorType * getDoubleElementsVectorType(VectorType *VTy)
This static method returns a VectorType with twice as many elements as the input type and the same el...
Definition: DerivedTypes.h:541
Type * getElementType() const
Definition: DerivedTypes.h:460
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
iterator erase(iterator where)
Definition: ilist.h:204
pointer remove(iterator &IT)
Definition: ilist.h:188
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
StringRef getBaseName(ID id)
Return the LLVM name for an intrinsic, without encoded types for overloading, such as "llvm....
Definition: Intrinsics.cpp:41
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
bool isUniformAfterVectorization(const VPValue *VPV)
Returns true if VPV is uniform after vectorization.
Definition: VPlanUtils.h:39
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
Definition: VPlanUtils.cpp:21
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlanUtils.cpp:16
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void ReplaceInstWithInst(BasicBlock *BB, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
Value * createSimpleReduction(IRBuilderBase &B, Value *Src, RecurKind RdxKind)
Create a reduction of the given vector.
Definition: LoopUtils.cpp:1278
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
Definition: LoopUtils.cpp:989
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition: STLExtras.h:2207
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
Definition: LoopUtils.cpp:1076
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
cl::opt< bool > EnableVPlanNativePath("enable-vplan-native-path", cl::Hidden, cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization."))
Definition: VPlan.cpp:53
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool isPointerTy(const Type *T)
Definition: SPIRVUtils.h:250
Value * createOrderedReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence descriptor Desc.
Definition: LoopUtils.cpp:1341
Value * createReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, PHINode *OrigPhi=nullptr)
Create a generic reduction using a recurrence descriptor Desc Fast-math-flags are propagated using th...
Definition: LoopUtils.cpp:1323
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:33
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ Add
Sum of integers.
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
Definition: LoopUtils.cpp:1270
DWARFExpression::Operation Op
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
InstructionCost Cost
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Struct to hold various analysis needed for cost computations.
Definition: VPlan.h:682
LLVMContext & LLVMCtx
Definition: VPlan.h:686
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
Definition: VPlan.cpp:1665
bool skipCostComputation(Instruction *UI, bool IsVector) const
Return true if the cost for UI shouldn't be computed, e.g.
InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const
Return the cost for UI with VF using the legacy cost model as fallback until computing the cost of al...
VPTypeAnalysis Types
Definition: VPlan.h:685
const TargetLibraryInfo & TLI
Definition: VPlan.h:684
const TargetTransformInfo & TTI
Definition: VPlan.h:683
SmallPtrSet< Instruction *, 8 > SkipCostComputation
Definition: VPlan.h:688
void execute(VPTransformState &State) override
Generate the phi nodes.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:344
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:352
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:348
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:236
bool hasScalarValue(VPValue *Def, VPLane Lane)
Definition: VPlan.h:269
bool hasVectorValue(VPValue *Def)
Definition: VPlan.h:267
DenseMap< const SCEV *, Value * > ExpandedSCEVs
Map SCEVs to their expanded values.
Definition: VPlan.h:389
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition: VPlan.h:392
void addMetadata(Value *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:361
Value * get(VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
Definition: VPlan.cpp:249
struct llvm::VPTransformState::CFGState CFG
std::optional< VPLane > Lane
Hold the index to generate specific scalar instructions.
Definition: VPlan.h:250
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:369
const TargetTransformInfo * TTI
Target Transform Info.
Definition: VPlan.h:242
void reset(VPValue *Def, Value *V)
Reset an existing vector value for Def and a given Part.
Definition: VPlan.h:290
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
Definition: VPlan.h:245
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:372
Loop * CurrentParentLoop
The parent loop object for the current scope, or nullptr.
Definition: VPlan.h:378
void set(VPValue *Def, Value *V, bool IsScalar=false)
Set the generated vector Value for a given VPValue, if IsScalar is false.
Definition: VPlan.h:279
void execute(VPTransformState &State) override
Generate the wide load or gather.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:3020
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate a wide load or gather.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool isInvariantCond() const
Definition: VPlan.h:1844
VPValue * getCond() const
Definition: VPlan.h:1840
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenSelectRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition: VPlan.h:3099
void execute(VPTransformState &State) override
Generate the wide store or scatter.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:3102
void execute(VPTransformState &State) override
Generate a wide store or scatter.
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition: VPlan.h:3064
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.