LLVM 20.0.0git
VPlanRecipes.cpp
Go to the documentation of this file.
1//===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains implementations for different VPlan recipes.
11///
12//===----------------------------------------------------------------------===//
13
14#include "VPlan.h"
15#include "VPlanAnalysis.h"
16#include "VPlanPatternMatch.h"
17#include "VPlanUtils.h"
18#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/Twine.h"
22#include "llvm/IR/BasicBlock.h"
23#include "llvm/IR/IRBuilder.h"
24#include "llvm/IR/Instruction.h"
26#include "llvm/IR/Intrinsics.h"
27#include "llvm/IR/Type.h"
28#include "llvm/IR/Value.h"
32#include "llvm/Support/Debug.h"
37#include <cassert>
38
39using namespace llvm;
40
42
43namespace llvm {
45}
47
48#define LV_NAME "loop-vectorize"
49#define DEBUG_TYPE LV_NAME
50
52 switch (getVPDefID()) {
53 case VPInstructionSC:
54 if (Instruction::isBinaryOp(cast<VPInstruction>(this)->getOpcode()))
55 return false;
56 switch (cast<VPInstruction>(this)->getOpcode()) {
57 case Instruction::Or:
58 case Instruction::ICmp:
59 case Instruction::Select:
68 return false;
69 default:
70 return true;
71 }
72 case VPInterleaveSC:
73 return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
74 case VPWidenStoreEVLSC:
75 case VPWidenStoreSC:
76 return true;
77 case VPReplicateSC:
78 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
79 ->mayWriteToMemory();
80 case VPWidenCallSC:
81 return !cast<VPWidenCallRecipe>(this)
82 ->getCalledScalarFunction()
83 ->onlyReadsMemory();
84 case VPWidenIntrinsicSC:
85 return cast<VPWidenIntrinsicRecipe>(this)->mayWriteToMemory();
86 case VPBranchOnMaskSC:
87 case VPScalarIVStepsSC:
88 case VPPredInstPHISC:
89 return false;
90 case VPBlendSC:
91 case VPReductionEVLSC:
92 case VPReductionSC:
93 case VPVectorPointerSC:
94 case VPWidenCanonicalIVSC:
95 case VPWidenCastSC:
96 case VPWidenGEPSC:
97 case VPWidenIntOrFpInductionSC:
98 case VPWidenLoadEVLSC:
99 case VPWidenLoadSC:
100 case VPWidenPHISC:
101 case VPWidenSC:
102 case VPWidenEVLSC:
103 case VPWidenSelectSC: {
104 const Instruction *I =
105 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
106 (void)I;
107 assert((!I || !I->mayWriteToMemory()) &&
108 "underlying instruction may write to memory");
109 return false;
110 }
111 default:
112 return true;
113 }
114}
115
117 switch (getVPDefID()) {
118 case VPWidenLoadEVLSC:
119 case VPWidenLoadSC:
120 return true;
121 case VPReplicateSC:
122 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
123 ->mayReadFromMemory();
124 case VPWidenCallSC:
125 return !cast<VPWidenCallRecipe>(this)
126 ->getCalledScalarFunction()
127 ->onlyWritesMemory();
128 case VPWidenIntrinsicSC:
129 return cast<VPWidenIntrinsicRecipe>(this)->mayReadFromMemory();
130 case VPBranchOnMaskSC:
131 case VPPredInstPHISC:
132 case VPScalarIVStepsSC:
133 case VPWidenStoreEVLSC:
134 case VPWidenStoreSC:
135 return false;
136 case VPBlendSC:
137 case VPReductionEVLSC:
138 case VPReductionSC:
139 case VPVectorPointerSC:
140 case VPWidenCanonicalIVSC:
141 case VPWidenCastSC:
142 case VPWidenGEPSC:
143 case VPWidenIntOrFpInductionSC:
144 case VPWidenPHISC:
145 case VPWidenSC:
146 case VPWidenEVLSC:
147 case VPWidenSelectSC: {
148 const Instruction *I =
149 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
150 (void)I;
151 assert((!I || !I->mayReadFromMemory()) &&
152 "underlying instruction may read from memory");
153 return false;
154 }
155 default:
156 return true;
157 }
158}
159
161 switch (getVPDefID()) {
162 case VPDerivedIVSC:
163 case VPPredInstPHISC:
164 case VPScalarCastSC:
165 case VPReverseVectorPointerSC:
166 return false;
167 case VPInstructionSC:
168 return mayWriteToMemory();
169 case VPWidenCallSC: {
170 Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction();
171 return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn();
172 }
173 case VPWidenIntrinsicSC:
174 return cast<VPWidenIntrinsicRecipe>(this)->mayHaveSideEffects();
175 case VPBlendSC:
176 case VPReductionEVLSC:
177 case VPReductionSC:
178 case VPScalarIVStepsSC:
179 case VPVectorPointerSC:
180 case VPWidenCanonicalIVSC:
181 case VPWidenCastSC:
182 case VPWidenGEPSC:
183 case VPWidenIntOrFpInductionSC:
184 case VPWidenPHISC:
185 case VPWidenPointerInductionSC:
186 case VPWidenSC:
187 case VPWidenEVLSC:
188 case VPWidenSelectSC: {
189 const Instruction *I =
190 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
191 (void)I;
192 assert((!I || !I->mayHaveSideEffects()) &&
193 "underlying instruction has side-effects");
194 return false;
195 }
196 case VPInterleaveSC:
197 return mayWriteToMemory();
198 case VPWidenLoadEVLSC:
199 case VPWidenLoadSC:
200 case VPWidenStoreEVLSC:
201 case VPWidenStoreSC:
202 assert(
203 cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() ==
205 "mayHaveSideffects result for ingredient differs from this "
206 "implementation");
207 return mayWriteToMemory();
208 case VPReplicateSC: {
209 auto *R = cast<VPReplicateRecipe>(this);
210 return R->getUnderlyingInstr()->mayHaveSideEffects();
211 }
212 default:
213 return true;
214 }
215}
216
218 assert(!Parent && "Recipe already in some VPBasicBlock");
219 assert(InsertPos->getParent() &&
220 "Insertion position not in any VPBasicBlock");
221 InsertPos->getParent()->insert(this, InsertPos->getIterator());
222}
223
226 assert(!Parent && "Recipe already in some VPBasicBlock");
227 assert(I == BB.end() || I->getParent() == &BB);
228 BB.insert(this, I);
229}
230
232 assert(!Parent && "Recipe already in some VPBasicBlock");
233 assert(InsertPos->getParent() &&
234 "Insertion position not in any VPBasicBlock");
235 InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator()));
236}
237
239 assert(getParent() && "Recipe not in any VPBasicBlock");
241 Parent = nullptr;
242}
243
245 assert(getParent() && "Recipe not in any VPBasicBlock");
247}
248
251 insertAfter(InsertPos);
252}
253
257 insertBefore(BB, I);
258}
259
261 // Get the underlying instruction for the recipe, if there is one. It is used
262 // to
263 // * decide if cost computation should be skipped for this recipe,
264 // * apply forced target instruction cost.
265 Instruction *UI = nullptr;
266 if (auto *S = dyn_cast<VPSingleDefRecipe>(this))
267 UI = dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
268 else if (auto *IG = dyn_cast<VPInterleaveRecipe>(this))
269 UI = IG->getInsertPos();
270 else if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(this))
271 UI = &WidenMem->getIngredient();
272
273 InstructionCost RecipeCost;
274 if (UI && Ctx.skipCostComputation(UI, VF.isVector())) {
275 RecipeCost = 0;
276 } else {
277 RecipeCost = computeCost(VF, Ctx);
278 if (UI && ForceTargetInstructionCost.getNumOccurrences() > 0 &&
279 RecipeCost.isValid())
281 }
282
283 LLVM_DEBUG({
284 dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": ";
285 dump();
286 });
287 return RecipeCost;
288}
289
291 VPCostContext &Ctx) const {
292 llvm_unreachable("subclasses should implement computeCost");
293}
294
296 assert(OpType == OperationType::FPMathOp &&
297 "recipe doesn't have fast math flags");
298 FastMathFlags Res;
299 Res.setAllowReassoc(FMFs.AllowReassoc);
300 Res.setNoNaNs(FMFs.NoNaNs);
301 Res.setNoInfs(FMFs.NoInfs);
302 Res.setNoSignedZeros(FMFs.NoSignedZeros);
303 Res.setAllowReciprocal(FMFs.AllowReciprocal);
304 Res.setAllowContract(FMFs.AllowContract);
305 Res.setApproxFunc(FMFs.ApproxFunc);
306 return Res;
307}
308
309#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
311#endif
312
313template <unsigned PartOpIdx>
314VPValue *
316 if (U.getNumOperands() == PartOpIdx + 1)
317 return U.getOperand(PartOpIdx);
318 return nullptr;
319}
320
321template <unsigned PartOpIdx>
323 if (auto *UnrollPartOp = getUnrollPartOperand(U))
324 return cast<ConstantInt>(UnrollPartOp->getLiveInIRValue())->getZExtValue();
325 return 0;
326}
327
330 const Twine &Name)
331 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),
332 Pred, DL),
333 Opcode(Opcode), Name(Name.str()) {
334 assert(Opcode == Instruction::ICmp &&
335 "only ICmp predicates supported at the moment");
336}
337
339 std::initializer_list<VPValue *> Operands,
340 FastMathFlags FMFs, DebugLoc DL, const Twine &Name)
341 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),
342 Opcode(Opcode), Name(Name.str()) {
343 // Make sure the VPInstruction is a floating-point operation.
344 assert(isFPMathOp() && "this op can't take fast-math flags");
345}
346
347bool VPInstruction::doesGeneratePerAllLanes() const {
348 return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);
349}
350
351bool VPInstruction::canGenerateScalarForFirstLane() const {
353 return true;
355 return true;
356 switch (Opcode) {
357 case Instruction::ICmp:
358 case Instruction::Select:
366 return true;
367 default:
368 return false;
369 }
370}
371
372Value *VPInstruction::generatePerLane(VPTransformState &State,
373 const VPLane &Lane) {
374 IRBuilderBase &Builder = State.Builder;
375
377 "only PtrAdd opcodes are supported for now");
378 return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
379 State.get(getOperand(1), Lane), Name);
380}
381
382Value *VPInstruction::generate(VPTransformState &State) {
383 IRBuilderBase &Builder = State.Builder;
384
386 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
387 Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
388 Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);
389 auto *Res =
390 Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
391 if (auto *I = dyn_cast<Instruction>(Res))
392 setFlags(I);
393 return Res;
394 }
395
396 switch (getOpcode()) {
397 case VPInstruction::Not: {
398 Value *A = State.get(getOperand(0));
399 return Builder.CreateNot(A, Name);
400 }
401 case Instruction::ICmp: {
402 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
403 Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
404 Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);
405 return Builder.CreateCmp(getPredicate(), A, B, Name);
406 }
407 case Instruction::Select: {
408 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
409 Value *Cond = State.get(getOperand(0), OnlyFirstLaneUsed);
410 Value *Op1 = State.get(getOperand(1), OnlyFirstLaneUsed);
411 Value *Op2 = State.get(getOperand(2), OnlyFirstLaneUsed);
412 return Builder.CreateSelect(Cond, Op1, Op2, Name);
413 }
415 // Get first lane of vector induction variable.
416 Value *VIVElem0 = State.get(getOperand(0), VPLane(0));
417 // Get the original loop tripcount.
418 Value *ScalarTC = State.get(getOperand(1), VPLane(0));
419
420 // If this part of the active lane mask is scalar, generate the CMP directly
421 // to avoid unnecessary extracts.
422 if (State.VF.isScalar())
423 return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC,
424 Name);
425
426 auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
427 auto *PredTy = VectorType::get(Int1Ty, State.VF);
428 return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
429 {PredTy, ScalarTC->getType()},
430 {VIVElem0, ScalarTC}, nullptr, Name);
431 }
433 // Generate code to combine the previous and current values in vector v3.
434 //
435 // vector.ph:
436 // v_init = vector(..., ..., ..., a[-1])
437 // br vector.body
438 //
439 // vector.body
440 // i = phi [0, vector.ph], [i+4, vector.body]
441 // v1 = phi [v_init, vector.ph], [v2, vector.body]
442 // v2 = a[i, i+1, i+2, i+3];
443 // v3 = vector(v1(3), v2(0, 1, 2))
444
445 auto *V1 = State.get(getOperand(0));
446 if (!V1->getType()->isVectorTy())
447 return V1;
448 Value *V2 = State.get(getOperand(1));
449 return Builder.CreateVectorSplice(V1, V2, -1, Name);
450 }
452 unsigned UF = getParent()->getPlan()->getUF();
453 Value *ScalarTC = State.get(getOperand(0), VPLane(0));
454 Value *Step = createStepForVF(Builder, ScalarTC->getType(), State.VF, UF);
455 Value *Sub = Builder.CreateSub(ScalarTC, Step);
456 Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
457 Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
458 return Builder.CreateSelect(Cmp, Sub, Zero);
459 }
461 // TODO: Restructure this code with an explicit remainder loop, vsetvli can
462 // be outside of the main loop.
463 Value *AVL = State.get(getOperand(0), /*IsScalar*/ true);
464 // Compute EVL
465 assert(AVL->getType()->isIntegerTy() &&
466 "Requested vector length should be an integer.");
467
468 assert(State.VF.isScalable() && "Expected scalable vector factor.");
469 Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue());
470
471 Value *EVL = State.Builder.CreateIntrinsic(
472 State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,
473 {AVL, VFArg, State.Builder.getTrue()});
474 return EVL;
475 }
477 unsigned Part = getUnrollPart(*this);
478 auto *IV = State.get(getOperand(0), VPLane(0));
479 assert(Part != 0 && "Must have a positive part");
480 // The canonical IV is incremented by the vectorization factor (num of
481 // SIMD elements) times the unroll part.
482 Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
483 return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),
485 }
487 Value *Cond = State.get(getOperand(0), VPLane(0));
488 // Replace the temporary unreachable terminator with a new conditional
489 // branch, hooking it up to backward destination for exiting blocks now and
490 // to forward destination(s) later when they are created.
491 BranchInst *CondBr =
492 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
493 CondBr->setSuccessor(0, nullptr);
495
496 if (!getParent()->isExiting())
497 return CondBr;
498
499 VPRegionBlock *ParentRegion = getParent()->getParent();
500 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
501 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
502 return CondBr;
503 }
505 // First create the compare.
506 Value *IV = State.get(getOperand(0), /*IsScalar*/ true);
507 Value *TC = State.get(getOperand(1), /*IsScalar*/ true);
508 Value *Cond = Builder.CreateICmpEQ(IV, TC);
509
510 // Now create the branch.
511 auto *Plan = getParent()->getPlan();
512 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
513 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
514
515 // Replace the temporary unreachable terminator with a new conditional
516 // branch, hooking it up to backward destination (the header) now and to the
517 // forward destination (the exit/middle block) later when it is created.
518 // Note that CreateCondBr expects a valid BB as first argument, so we need
519 // to set it to nullptr later.
520 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
521 State.CFG.VPBB2IRBB[Header]);
522 CondBr->setSuccessor(0, nullptr);
524 return CondBr;
525 }
527 // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
528 // and will be removed by breaking up the recipe further.
529 auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
530 auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
531 // Get its reduction variable descriptor.
532 const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
533
534 RecurKind RK = RdxDesc.getRecurrenceKind();
535
536 Type *PhiTy = OrigPhi->getType();
537 // The recipe's operands are the reduction phi, followed by one operand for
538 // each part of the reduction.
539 unsigned UF = getNumOperands() - 1;
540 VectorParts RdxParts(UF);
541 for (unsigned Part = 0; Part < UF; ++Part)
542 RdxParts[Part] = State.get(getOperand(1 + Part), PhiR->isInLoop());
543
544 // If the vector reduction can be performed in a smaller type, we truncate
545 // then extend the loop exit value to enable InstCombine to evaluate the
546 // entire expression in the smaller type.
547 // TODO: Handle this in truncateToMinBW.
548 if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
549 Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF);
550 for (unsigned Part = 0; Part < UF; ++Part)
551 RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
552 }
553 // Reduce all of the unrolled parts into a single vector.
554 Value *ReducedPartRdx = RdxParts[0];
555 unsigned Op = RdxDesc.getOpcode();
557 Op = Instruction::Or;
558
559 if (PhiR->isOrdered()) {
560 ReducedPartRdx = RdxParts[UF - 1];
561 } else {
562 // Floating-point operations should have some FMF to enable the reduction.
564 Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
565 for (unsigned Part = 1; Part < UF; ++Part) {
566 Value *RdxPart = RdxParts[Part];
567 if (Op != Instruction::ICmp && Op != Instruction::FCmp)
568 ReducedPartRdx = Builder.CreateBinOp(
569 (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
571 ReducedPartRdx =
572 createMinMaxOp(Builder, RecurKind::SMax, ReducedPartRdx, RdxPart);
573 else
574 ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
575 }
576 }
577
578 // Create the reduction after the loop. Note that inloop reductions create
579 // the target reduction in the loop using a Reduction recipe.
580 if ((State.VF.isVector() ||
583 !PhiR->isInLoop()) {
584 ReducedPartRdx =
585 createReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
586 // If the reduction can be performed in a smaller type, we need to extend
587 // the reduction to the wider type before we branch to the original loop.
588 if (PhiTy != RdxDesc.getRecurrenceType())
589 ReducedPartRdx = RdxDesc.isSigned()
590 ? Builder.CreateSExt(ReducedPartRdx, PhiTy)
591 : Builder.CreateZExt(ReducedPartRdx, PhiTy);
592 }
593
594 return ReducedPartRdx;
595 }
597 auto *CI = cast<ConstantInt>(getOperand(1)->getLiveInIRValue());
598 unsigned Offset = CI->getZExtValue();
599 assert(Offset > 0 && "Offset from end must be positive");
600 Value *Res;
601 if (State.VF.isVector()) {
602 assert(Offset <= State.VF.getKnownMinValue() &&
603 "invalid offset to extract from");
604 // Extract lane VF - Offset from the operand.
605 Res = State.get(getOperand(0), VPLane::getLaneFromEnd(State.VF, Offset));
606 } else {
607 assert(Offset <= 1 && "invalid offset to extract from");
608 Res = State.get(getOperand(0));
609 }
610 if (isa<ExtractElementInst>(Res))
611 Res->setName(Name);
612 return Res;
613 }
615 Value *A = State.get(getOperand(0));
616 Value *B = State.get(getOperand(1));
617 return Builder.CreateLogicalAnd(A, B, Name);
618 }
621 "can only generate first lane for PtrAdd");
622 Value *Ptr = State.get(getOperand(0), VPLane(0));
623 Value *Addend = State.get(getOperand(1), VPLane(0));
624 return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
625 }
627 Value *IncomingFromVPlanPred =
628 State.get(getOperand(0), /* IsScalar */ true);
629 Value *IncomingFromOtherPreds =
630 State.get(getOperand(1), /* IsScalar */ true);
631 auto *NewPhi =
632 Builder.CreatePHI(IncomingFromOtherPreds->getType(), 2, Name);
633 BasicBlock *VPlanPred =
634 State.CFG
635 .VPBB2IRBB[cast<VPBasicBlock>(getParent()->getPredecessors()[0])];
636 NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred);
637 for (auto *OtherPred : predecessors(Builder.GetInsertBlock())) {
638 if (OtherPred == VPlanPred)
639 continue;
640 NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred);
641 }
642 return NewPhi;
643 }
645 Value *A = State.get(getOperand(0));
646 return Builder.CreateOrReduce(A);
647 }
648
649 default:
650 llvm_unreachable("Unsupported opcode for instruction");
651 }
652}
653
658}
659
662}
663
664#if !defined(NDEBUG)
665bool VPInstruction::isFPMathOp() const {
666 // Inspired by FPMathOperator::classof. Notable differences are that we don't
667 // support Call, PHI and Select opcodes here yet.
668 return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
669 Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
670 Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
671 Opcode == Instruction::FCmp || Opcode == Instruction::Select;
672}
673#endif
674
676 assert(!State.Lane && "VPInstruction executing an Lane");
678 assert((hasFastMathFlags() == isFPMathOp() ||
679 getOpcode() == Instruction::Select) &&
680 "Recipe not a FPMathOp but has fast-math flags?");
681 if (hasFastMathFlags())
684 bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&
687 bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
688 if (GeneratesPerAllLanes) {
689 for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
690 Lane != NumLanes; ++Lane) {
691 Value *GeneratedValue = generatePerLane(State, VPLane(Lane));
692 assert(GeneratedValue && "generatePerLane must produce a value");
693 State.set(this, GeneratedValue, VPLane(Lane));
694 }
695 return;
696 }
697
698 Value *GeneratedValue = generate(State);
699 if (!hasResult())
700 return;
701 assert(GeneratedValue && "generate must produce a value");
702 assert(
703 (GeneratedValue->getType()->isVectorTy() == !GeneratesPerFirstLaneOnly ||
704 State.VF.isScalar()) &&
705 "scalar value but not only first lane defined");
706 State.set(this, GeneratedValue,
707 /*IsScalar*/ GeneratesPerFirstLaneOnly);
708}
709
711 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
713 return vputils::onlyFirstLaneUsed(this);
714
715 switch (getOpcode()) {
716 default:
717 return false;
718 case Instruction::ICmp:
719 case Instruction::Select:
720 case Instruction::Or:
722 // TODO: Cover additional opcodes.
723 return vputils::onlyFirstLaneUsed(this);
731 return true;
732 };
733 llvm_unreachable("switch should return");
734}
735
737 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
739 return vputils::onlyFirstPartUsed(this);
740
741 switch (getOpcode()) {
742 default:
743 return false;
744 case Instruction::ICmp:
745 case Instruction::Select:
746 return vputils::onlyFirstPartUsed(this);
750 return true;
751 };
752 llvm_unreachable("switch should return");
753}
754
755#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
757 VPSlotTracker SlotTracker(getParent()->getPlan());
758 print(dbgs(), "", SlotTracker);
759}
760
762 VPSlotTracker &SlotTracker) const {
763 O << Indent << "EMIT ";
764
765 if (hasResult()) {
767 O << " = ";
768 }
769
770 switch (getOpcode()) {
772 O << "not";
773 break;
775 O << "combined load";
776 break;
778 O << "combined store";
779 break;
781 O << "active lane mask";
782 break;
784 O << "resume-phi";
785 break;
787 O << "EXPLICIT-VECTOR-LENGTH";
788 break;
790 O << "first-order splice";
791 break;
793 O << "branch-on-cond";
794 break;
796 O << "TC > VF ? TC - VF : 0";
797 break;
799 O << "VF * Part +";
800 break;
802 O << "branch-on-count";
803 break;
805 O << "extract-from-end";
806 break;
808 O << "compute-reduction-result";
809 break;
811 O << "logical-and";
812 break;
814 O << "ptradd";
815 break;
817 O << "any-of";
818 break;
819 default:
821 }
822
823 printFlags(O);
825
826 if (auto DL = getDebugLoc()) {
827 O << ", !dbg ";
828 DL.print(O);
829 }
830}
831#endif
832
834 assert((isa<PHINode>(&I) || getNumOperands() == 0) &&
835 "Only PHINodes can have extra operands");
836 for (const auto &[Idx, Op] : enumerate(operands())) {
837 VPValue *ExitValue = Op;
838 auto Lane = vputils::isUniformAfterVectorization(ExitValue)
842 auto *PredVPBB = Pred->getExitingBasicBlock();
843 BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
844 // Set insertion point in PredBB in case an extract needs to be generated.
845 // TODO: Model extracts explicitly.
846 State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
847 Value *V = State.get(ExitValue, VPLane(Lane));
848 auto *Phi = cast<PHINode>(&I);
849 // If there is no existing block for PredBB in the phi, add a new incoming
850 // value. Otherwise update the existing incoming value for PredBB.
851 if (Phi->getBasicBlockIndex(PredBB) == -1)
852 Phi->addIncoming(V, PredBB);
853 else
854 Phi->setIncomingValueForBlock(PredBB, V);
855 }
856
857 // Advance the insert point after the wrapped IR instruction. This allows
858 // interleaving VPIRInstructions and other recipes.
859 State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator()));
860}
861
863 VPCostContext &Ctx) const {
864 // The recipe wraps an existing IR instruction on the border of VPlan's scope,
865 // hence it does not contribute to the cost-modeling for the VPlan.
866 return 0;
867}
868
869#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
871 VPSlotTracker &SlotTracker) const {
872 O << Indent << "IR " << I;
873
874 if (getNumOperands() != 0) {
875 O << " (extra operand" << (getNumOperands() > 1 ? "s" : "") << ": ";
877 enumerate(operands()), O, [this, &O, &SlotTracker](auto Op) {
878 Op.value()->printAsOperand(O, SlotTracker);
879 O << " from ";
880 getParent()->getPredecessors()[Op.index()]->printAsOperand(O);
881 });
882 O << ")";
883 }
884}
885#endif
886
888 assert(State.VF.isVector() && "not widening");
890
891 FunctionType *VFTy = Variant->getFunctionType();
892 // Add return type if intrinsic is overloaded on it.
894 for (const auto &I : enumerate(arg_operands())) {
895 Value *Arg;
896 // Some vectorized function variants may also take a scalar argument,
897 // e.g. linear parameters for pointers. This needs to be the scalar value
898 // from the start of the respective part when interleaving.
899 if (!VFTy->getParamType(I.index())->isVectorTy())
900 Arg = State.get(I.value(), VPLane(0));
901 else
902 Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
903 Args.push_back(Arg);
904 }
905
906 assert(Variant != nullptr && "Can't create vector function.");
907
908 auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
910 if (CI)
911 CI->getOperandBundlesAsDefs(OpBundles);
912
913 CallInst *V = State.Builder.CreateCall(Variant, Args, OpBundles);
914 setFlags(V);
915
916 if (!V->getType()->isVoidTy())
917 State.set(this, V);
918 State.addMetadata(V, CI);
919}
920
922 VPCostContext &Ctx) const {
924 return Ctx.TTI.getCallInstrCost(nullptr, Variant->getReturnType(),
925 Variant->getFunctionType()->params(),
926 CostKind);
927}
928
929#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
931 VPSlotTracker &SlotTracker) const {
932 O << Indent << "WIDEN-CALL ";
933
934 Function *CalledFn = getCalledScalarFunction();
935 if (CalledFn->getReturnType()->isVoidTy())
936 O << "void ";
937 else {
939 O << " = ";
940 }
941
942 O << "call";
943 printFlags(O);
944 O << " @" << CalledFn->getName() << "(";
946 Op->printAsOperand(O, SlotTracker);
947 });
948 O << ")";
949
950 O << " (using library function";
951 if (Variant->hasName())
952 O << ": " << Variant->getName();
953 O << ")";
954}
955#endif
956
958 assert(State.VF.isVector() && "not widening");
960
961 SmallVector<Type *, 2> TysForDecl;
962 // Add return type if intrinsic is overloaded on it.
963 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1, State.TTI))
964 TysForDecl.push_back(VectorType::get(getResultType(), State.VF));
966 for (const auto &I : enumerate(operands())) {
967 // Some intrinsics have a scalar argument - don't replace it with a
968 // vector.
969 Value *Arg;
970 if (isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index(),
971 State.TTI))
972 Arg = State.get(I.value(), VPLane(0));
973 else
974 Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
975 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index(),
976 State.TTI))
977 TysForDecl.push_back(Arg->getType());
978 Args.push_back(Arg);
979 }
980
981 // Use vector version of the intrinsic.
982 Module *M = State.Builder.GetInsertBlock()->getModule();
983 Function *VectorF =
984 Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
985 assert(VectorF &&
986 "Can't retrieve vector intrinsic or vector-predication intrinsics.");
987
988 auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
990 if (CI)
991 CI->getOperandBundlesAsDefs(OpBundles);
992
993 CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
994
995 setFlags(V);
996
997 if (!V->getType()->isVoidTy())
998 State.set(this, V);
999 State.addMetadata(V, CI);
1000}
1001
1003 VPCostContext &Ctx) const {
1005
1006 // Some backends analyze intrinsic arguments to determine cost. Use the
1007 // underlying value for the operand if it has one. Otherwise try to use the
1008 // operand of the underlying call instruction, if there is one. Otherwise
1009 // clear Arguments.
1010 // TODO: Rework TTI interface to be independent of concrete IR values.
1012 for (const auto &[Idx, Op] : enumerate(operands())) {
1013 auto *V = Op->getUnderlyingValue();
1014 if (!V) {
1015 // Push all the VP Intrinsic's ops into the Argments even if is nullptr.
1016 // Some VP Intrinsic's cost will assert the number of parameters.
1017 // Mainly appears in the following two scenarios:
1018 // 1. EVL Op is nullptr
1019 // 2. The Argmunt of the VP Intrinsic is also the VP Intrinsic
1020 if (VPIntrinsic::isVPIntrinsic(VectorIntrinsicID)) {
1021 Arguments.push_back(V);
1022 continue;
1023 }
1024 if (auto *UI = dyn_cast_or_null<CallBase>(getUnderlyingValue())) {
1025 Arguments.push_back(UI->getArgOperand(Idx));
1026 continue;
1027 }
1028 Arguments.clear();
1029 break;
1030 }
1031 Arguments.push_back(V);
1032 }
1033
1034 Type *RetTy = ToVectorTy(Ctx.Types.inferScalarType(this), VF);
1035 SmallVector<Type *> ParamTys;
1036 for (unsigned I = 0; I != getNumOperands(); ++I)
1037 ParamTys.push_back(
1039
1040 // TODO: Rework TTI interface to avoid reliance on underlying IntrinsicInst.
1042 IntrinsicCostAttributes CostAttrs(
1043 VectorIntrinsicID, RetTy, Arguments, ParamTys, FMF,
1044 dyn_cast_or_null<IntrinsicInst>(getUnderlyingValue()));
1045 return Ctx.TTI.getIntrinsicInstrCost(CostAttrs, CostKind);
1046}
1047
1049 return Intrinsic::getBaseName(VectorIntrinsicID);
1050}
1051
1053 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
1054 // Vector predication intrinsics only demand the the first lane the last
1055 // operand (the EVL operand).
1056 return VPIntrinsic::isVPIntrinsic(VectorIntrinsicID) &&
1057 Op == getOperand(getNumOperands() - 1);
1058}
1059
1060#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1062 VPSlotTracker &SlotTracker) const {
1063 O << Indent << "WIDEN-INTRINSIC ";
1064 if (ResultTy->isVoidTy()) {
1065 O << "void ";
1066 } else {
1068 O << " = ";
1069 }
1070
1071 O << "call";
1072 printFlags(O);
1073 O << getIntrinsicName() << "(";
1074
1076 Op->printAsOperand(O, SlotTracker);
1077 });
1078 O << ")";
1079}
1080#endif
1081
1084 IRBuilderBase &Builder = State.Builder;
1085
1086 Value *Address = State.get(getOperand(0));
1087 Value *IncAmt = State.get(getOperand(1), /*IsScalar=*/true);
1088 VectorType *VTy = cast<VectorType>(Address->getType());
1089
1090 // The histogram intrinsic requires a mask even if the recipe doesn't;
1091 // if the mask operand was omitted then all lanes should be executed and
1092 // we just need to synthesize an all-true mask.
1093 Value *Mask = nullptr;
1094 if (VPValue *VPMask = getMask())
1095 Mask = State.get(VPMask);
1096 else
1097 Mask =
1098 Builder.CreateVectorSplat(VTy->getElementCount(), Builder.getInt1(1));
1099
1100 // If this is a subtract, we want to invert the increment amount. We may
1101 // add a separate intrinsic in future, but for now we'll try this.
1102 if (Opcode == Instruction::Sub)
1103 IncAmt = Builder.CreateNeg(IncAmt);
1104 else
1105 assert(Opcode == Instruction::Add && "only add or sub supported for now");
1106
1107 State.Builder.CreateIntrinsic(Intrinsic::experimental_vector_histogram_add,
1108 {VTy, IncAmt->getType()},
1109 {Address, IncAmt, Mask});
1110}
1111
1113 VPCostContext &Ctx) const {
1114 // FIXME: Take the gather and scatter into account as well. For now we're
1115 // generating the same cost as the fallback path, but we'll likely
1116 // need to create a new TTI method for determining the cost, including
1117 // whether we can use base + vec-of-smaller-indices or just
1118 // vec-of-pointers.
1119 assert(VF.isVector() && "Invalid VF for histogram cost");
1120 Type *AddressTy = Ctx.Types.inferScalarType(getOperand(0));
1121 VPValue *IncAmt = getOperand(1);
1122 Type *IncTy = Ctx.Types.inferScalarType(IncAmt);
1123 VectorType *VTy = VectorType::get(IncTy, VF);
1124
1125 // Assume that a non-constant update value (or a constant != 1) requires
1126 // a multiply, and add that into the cost.
1127 InstructionCost MulCost =
1128 Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VTy);
1129 if (IncAmt->isLiveIn()) {
1130 ConstantInt *CI = dyn_cast<ConstantInt>(IncAmt->getLiveInIRValue());
1131
1132 if (CI && CI->getZExtValue() == 1)
1133 MulCost = TTI::TCC_Free;
1134 }
1135
1136 // Find the cost of the histogram operation itself.
1137 Type *PtrTy = VectorType::get(AddressTy, VF);
1138 Type *MaskTy = VectorType::get(Type::getInt1Ty(Ctx.LLVMCtx), VF);
1139 IntrinsicCostAttributes ICA(Intrinsic::experimental_vector_histogram_add,
1141 {PtrTy, IncTy, MaskTy});
1142
1143 // Add the costs together with the add/sub operation.
1144 return Ctx.TTI.getIntrinsicInstrCost(
1146 MulCost + Ctx.TTI.getArithmeticInstrCost(Opcode, VTy);
1147}
1148
1149#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1151 VPSlotTracker &SlotTracker) const {
1152 O << Indent << "WIDEN-HISTOGRAM buckets: ";
1154
1155 if (Opcode == Instruction::Sub)
1156 O << ", dec: ";
1157 else {
1158 assert(Opcode == Instruction::Add);
1159 O << ", inc: ";
1160 }
1162
1163 if (VPValue *Mask = getMask()) {
1164 O << ", mask: ";
1165 Mask->printAsOperand(O, SlotTracker);
1166 }
1167}
1168
1170 VPSlotTracker &SlotTracker) const {
1171 O << Indent << "WIDEN-SELECT ";
1173 O << " = select ";
1175 O << ", ";
1177 O << ", ";
1179 O << (isInvariantCond() ? " (condition is loop invariant)" : "");
1180}
1181#endif
1182
1185
1186 // The condition can be loop invariant but still defined inside the
1187 // loop. This means that we can't just use the original 'cond' value.
1188 // We have to take the 'vectorized' value and pick the first lane.
1189 // Instcombine will make this a no-op.
1190 auto *InvarCond =
1191 isInvariantCond() ? State.get(getCond(), VPLane(0)) : nullptr;
1192
1193 Value *Cond = InvarCond ? InvarCond : State.get(getCond());
1194 Value *Op0 = State.get(getOperand(1));
1195 Value *Op1 = State.get(getOperand(2));
1196 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
1197 State.set(this, Sel);
1198 State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1199}
1200
1202 VPCostContext &Ctx) const {
1203 SelectInst *SI = cast<SelectInst>(getUnderlyingValue());
1204 bool ScalarCond = getOperand(0)->isDefinedOutsideLoopRegions();
1205 Type *ScalarTy = Ctx.Types.inferScalarType(this);
1206 Type *VectorTy = ToVectorTy(Ctx.Types.inferScalarType(this), VF);
1208
1209 VPValue *Op0, *Op1;
1210 using namespace llvm::VPlanPatternMatch;
1211 if (!ScalarCond && ScalarTy->getScalarSizeInBits() == 1 &&
1212 (match(this, m_LogicalAnd(m_VPValue(Op0), m_VPValue(Op1))) ||
1213 match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1))))) {
1214 // select x, y, false --> x & y
1215 // select x, true, y --> x | y
1216 const auto [Op1VK, Op1VP] = Ctx.getOperandInfo(Op0);
1217 const auto [Op2VK, Op2VP] = Ctx.getOperandInfo(Op1);
1218
1220 if (all_of(operands(),
1221 [](VPValue *Op) { return Op->getUnderlyingValue(); }))
1222 Operands.append(SI->op_begin(), SI->op_end());
1223 bool IsLogicalOr = match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1)));
1224 return Ctx.TTI.getArithmeticInstrCost(
1225 IsLogicalOr ? Instruction::Or : Instruction::And, VectorTy, CostKind,
1226 {Op1VK, Op1VP}, {Op2VK, Op2VP}, Operands, SI);
1227 }
1228
1229 Type *CondTy = Ctx.Types.inferScalarType(getOperand(0));
1230 if (!ScalarCond)
1231 CondTy = VectorType::get(CondTy, VF);
1232
1234 if (auto *Cmp = dyn_cast<CmpInst>(SI->getCondition()))
1235 Pred = Cmp->getPredicate();
1236 return Ctx.TTI.getCmpSelInstrCost(Instruction::Select, VectorTy, CondTy, Pred,
1239}
1240
1241VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
1242 const FastMathFlags &FMF) {
1243 AllowReassoc = FMF.allowReassoc();
1244 NoNaNs = FMF.noNaNs();
1245 NoInfs = FMF.noInfs();
1246 NoSignedZeros = FMF.noSignedZeros();
1247 AllowReciprocal = FMF.allowReciprocal();
1248 AllowContract = FMF.allowContract();
1249 ApproxFunc = FMF.approxFunc();
1250}
1251
1252#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1254 switch (OpType) {
1255 case OperationType::Cmp:
1257 break;
1258 case OperationType::DisjointOp:
1260 O << " disjoint";
1261 break;
1262 case OperationType::PossiblyExactOp:
1263 if (ExactFlags.IsExact)
1264 O << " exact";
1265 break;
1266 case OperationType::OverflowingBinOp:
1267 if (WrapFlags.HasNUW)
1268 O << " nuw";
1269 if (WrapFlags.HasNSW)
1270 O << " nsw";
1271 break;
1272 case OperationType::FPMathOp:
1274 break;
1275 case OperationType::GEPOp:
1276 if (GEPFlags.isInBounds())
1277 O << " inbounds";
1279 O << " nusw";
1281 O << " nuw";
1282 break;
1283 case OperationType::NonNegOp:
1284 if (NonNegFlags.NonNeg)
1285 O << " nneg";
1286 break;
1287 case OperationType::Other:
1288 break;
1289 }
1290 if (getNumOperands() > 0)
1291 O << " ";
1292}
1293#endif
1294
1297 auto &Builder = State.Builder;
1298 switch (Opcode) {
1299 case Instruction::Call:
1300 case Instruction::Br:
1301 case Instruction::PHI:
1302 case Instruction::GetElementPtr:
1303 case Instruction::Select:
1304 llvm_unreachable("This instruction is handled by a different recipe.");
1305 case Instruction::UDiv:
1306 case Instruction::SDiv:
1307 case Instruction::SRem:
1308 case Instruction::URem:
1309 case Instruction::Add:
1310 case Instruction::FAdd:
1311 case Instruction::Sub:
1312 case Instruction::FSub:
1313 case Instruction::FNeg:
1314 case Instruction::Mul:
1315 case Instruction::FMul:
1316 case Instruction::FDiv:
1317 case Instruction::FRem:
1318 case Instruction::Shl:
1319 case Instruction::LShr:
1320 case Instruction::AShr:
1321 case Instruction::And:
1322 case Instruction::Or:
1323 case Instruction::Xor: {
1324 // Just widen unops and binops.
1326 for (VPValue *VPOp : operands())
1327 Ops.push_back(State.get(VPOp));
1328
1329 Value *V = Builder.CreateNAryOp(Opcode, Ops);
1330
1331 if (auto *VecOp = dyn_cast<Instruction>(V))
1332 setFlags(VecOp);
1333
1334 // Use this vector value for all users of the original instruction.
1335 State.set(this, V);
1336 State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1337 break;
1338 }
1339 case Instruction::Freeze: {
1340 Value *Op = State.get(getOperand(0));
1341
1342 Value *Freeze = Builder.CreateFreeze(Op);
1343 State.set(this, Freeze);
1344 break;
1345 }
1346 case Instruction::ICmp:
1347 case Instruction::FCmp: {
1348 // Widen compares. Generate vector compares.
1349 bool FCmp = Opcode == Instruction::FCmp;
1350 Value *A = State.get(getOperand(0));
1351 Value *B = State.get(getOperand(1));
1352 Value *C = nullptr;
1353 if (FCmp) {
1354 // Propagate fast math flags.
1355 IRBuilder<>::FastMathFlagGuard FMFG(Builder);
1356 if (auto *I = dyn_cast_or_null<Instruction>(getUnderlyingValue()))
1357 Builder.setFastMathFlags(I->getFastMathFlags());
1358 C = Builder.CreateFCmp(getPredicate(), A, B);
1359 } else {
1360 C = Builder.CreateICmp(getPredicate(), A, B);
1361 }
1362 State.set(this, C);
1363 State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1364 break;
1365 }
1366 default:
1367 // This instruction is not vectorized by simple widening.
1368 LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "
1369 << Instruction::getOpcodeName(Opcode));
1370 llvm_unreachable("Unhandled instruction!");
1371 } // end of switch.
1372
1373#if !defined(NDEBUG)
1374 // Verify that VPlan type inference results agree with the type of the
1375 // generated values.
1377 State.get(this)->getType() &&
1378 "inferred type and type from generated instructions do not match");
1379#endif
1380}
1381
1383 VPCostContext &Ctx) const {
1385 switch (Opcode) {
1386 case Instruction::FNeg: {
1387 Type *VectorTy = ToVectorTy(Ctx.Types.inferScalarType(this), VF);
1388 return Ctx.TTI.getArithmeticInstrCost(
1389 Opcode, VectorTy, CostKind,
1392 }
1393
1394 case Instruction::UDiv:
1395 case Instruction::SDiv:
1396 case Instruction::SRem:
1397 case Instruction::URem:
1398 // More complex computation, let the legacy cost-model handle this for now.
1399 return Ctx.getLegacyCost(cast<Instruction>(getUnderlyingValue()), VF);
1400 case Instruction::Add:
1401 case Instruction::FAdd:
1402 case Instruction::Sub:
1403 case Instruction::FSub:
1404 case Instruction::Mul:
1405 case Instruction::FMul:
1406 case Instruction::FDiv:
1407 case Instruction::FRem:
1408 case Instruction::Shl:
1409 case Instruction::LShr:
1410 case Instruction::AShr:
1411 case Instruction::And:
1412 case Instruction::Or:
1413 case Instruction::Xor: {
1414 VPValue *RHS = getOperand(1);
1415 // Certain instructions can be cheaper to vectorize if they have a constant
1416 // second vector operand. One example of this are shifts on x86.
1419 if (RHS->isLiveIn())
1420 RHSInfo = Ctx.TTI.getOperandInfo(RHS->getLiveInIRValue());
1421
1422 if (RHSInfo.Kind == TargetTransformInfo::OK_AnyValue &&
1425 Type *VectorTy = ToVectorTy(Ctx.Types.inferScalarType(this), VF);
1426 Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
1427
1429 if (CtxI)
1430 Operands.append(CtxI->value_op_begin(), CtxI->value_op_end());
1431 return Ctx.TTI.getArithmeticInstrCost(
1432 Opcode, VectorTy, CostKind,
1434 RHSInfo, Operands, CtxI, &Ctx.TLI);
1435 }
1436 case Instruction::Freeze: {
1437 // This opcode is unknown. Assume that it is the same as 'mul'.
1438 Type *VectorTy = ToVectorTy(Ctx.Types.inferScalarType(this), VF);
1439 return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, CostKind);
1440 }
1441 case Instruction::ICmp:
1442 case Instruction::FCmp: {
1443 Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
1444 Type *VectorTy = ToVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
1445 return Ctx.TTI.getCmpSelInstrCost(Opcode, VectorTy, nullptr, getPredicate(),
1446 CostKind,
1449 }
1450 default:
1451 llvm_unreachable("Unsupported opcode for instruction");
1452 }
1453}
1454
1456 unsigned Opcode = getOpcode();
1457 // TODO: Support other opcodes
1458 if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode))
1459 llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute");
1460
1462
1463 assert(State.get(getOperand(0))->getType()->isVectorTy() &&
1464 "VPWidenEVLRecipe should not be used for scalars");
1465
1466 VPValue *EVL = getEVL();
1467 Value *EVLArg = State.get(EVL, /*NeedsScalar=*/true);
1468 IRBuilderBase &BuilderIR = State.Builder;
1469 VectorBuilder Builder(BuilderIR);
1470 Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
1471
1473 for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) {
1474 VPValue *VPOp = getOperand(I);
1475 Ops.push_back(State.get(VPOp));
1476 }
1477
1478 Builder.setMask(Mask).setEVL(EVLArg);
1479 Value *VPInst =
1480 Builder.createVectorInstruction(Opcode, Ops[0]->getType(), Ops, "vp.op");
1481 // Currently vp-intrinsics only accept FMF flags.
1482 // TODO: Enable other flags when support is added.
1483 if (isa<FPMathOperator>(VPInst))
1484 setFlags(cast<Instruction>(VPInst));
1485
1486 State.set(this, VPInst);
1487 State.addMetadata(VPInst,
1488 dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1489}
1490
1491#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1493 VPSlotTracker &SlotTracker) const {
1494 O << Indent << "WIDEN ";
1496 O << " = " << Instruction::getOpcodeName(Opcode);
1497 printFlags(O);
1499}
1500
1502 VPSlotTracker &SlotTracker) const {
1503 O << Indent << "WIDEN ";
1505 O << " = vp." << Instruction::getOpcodeName(getOpcode());
1506 printFlags(O);
1508}
1509#endif
1510
1513 auto &Builder = State.Builder;
1514 /// Vectorize casts.
1515 assert(State.VF.isVector() && "Not vectorizing?");
1516 Type *DestTy = VectorType::get(getResultType(), State.VF);
1517 VPValue *Op = getOperand(0);
1518 Value *A = State.get(Op);
1519 Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
1520 State.set(this, Cast);
1521 State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
1522 if (auto *CastOp = dyn_cast<Instruction>(Cast))
1523 setFlags(CastOp);
1524}
1525
1527 VPCostContext &Ctx) const {
1528 // TODO: In some cases, VPWidenCastRecipes are created but not considered in
1529 // the legacy cost model, including truncates/extends when evaluating a
1530 // reduction in a smaller type.
1531 if (!getUnderlyingValue())
1532 return 0;
1533 // Computes the CastContextHint from a recipes that may access memory.
1534 auto ComputeCCH = [&](const VPRecipeBase *R) -> TTI::CastContextHint {
1535 if (VF.isScalar())
1537 if (isa<VPInterleaveRecipe>(R))
1539 if (const auto *ReplicateRecipe = dyn_cast<VPReplicateRecipe>(R))
1540 return ReplicateRecipe->isPredicated() ? TTI::CastContextHint::Masked
1542 const auto *WidenMemoryRecipe = dyn_cast<VPWidenMemoryRecipe>(R);
1543 if (WidenMemoryRecipe == nullptr)
1545 if (!WidenMemoryRecipe->isConsecutive())
1547 if (WidenMemoryRecipe->isReverse())
1549 if (WidenMemoryRecipe->isMasked())
1552 };
1553
1554 VPValue *Operand = getOperand(0);
1556 // For Trunc/FPTrunc, get the context from the only user.
1557 if ((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
1559 if (auto *StoreRecipe = dyn_cast<VPRecipeBase>(*user_begin()))
1560 CCH = ComputeCCH(StoreRecipe);
1561 }
1562 // For Z/Sext, get the context from the operand.
1563 else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt ||
1564 Opcode == Instruction::FPExt) {
1565 if (Operand->isLiveIn())
1567 else if (Operand->getDefiningRecipe())
1568 CCH = ComputeCCH(Operand->getDefiningRecipe());
1569 }
1570
1571 auto *SrcTy =
1572 cast<VectorType>(ToVectorTy(Ctx.Types.inferScalarType(Operand), VF));
1573 auto *DestTy = cast<VectorType>(ToVectorTy(getResultType(), VF));
1574 // Arm TTI will use the underlying instruction to determine the cost.
1575 return Ctx.TTI.getCastInstrCost(
1576 Opcode, DestTy, SrcTy, CCH, TTI::TCK_RecipThroughput,
1577 dyn_cast_if_present<Instruction>(getUnderlyingValue()));
1578}
1579
1580#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1582 VPSlotTracker &SlotTracker) const {
1583 O << Indent << "WIDEN-CAST ";
1585 O << " = " << Instruction::getOpcodeName(Opcode) << " ";
1586 printFlags(O);
1588 O << " to " << *getResultType();
1589}
1590#endif
1591
1593 VPCostContext &Ctx) const {
1594 return Ctx.TTI.getCFInstrCost(Instruction::PHI, TTI::TCK_RecipThroughput);
1595}
1596
1597/// This function adds
1598/// (0 * Step, 1 * Step, 2 * Step, ...)
1599/// to each vector element of Val.
1600/// \p Opcode is relevant for FP induction variable.
1601static Value *getStepVector(Value *Val, Value *Step,
1603 IRBuilderBase &Builder) {
1604 assert(VF.isVector() && "only vector VFs are supported");
1605
1606 // Create and check the types.
1607 auto *ValVTy = cast<VectorType>(Val->getType());
1608 ElementCount VLen = ValVTy->getElementCount();
1609
1610 Type *STy = Val->getType()->getScalarType();
1611 assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
1612 "Induction Step must be an integer or FP");
1613 assert(Step->getType() == STy && "Step has wrong type");
1614
1616
1617 // Create a vector of consecutive numbers from zero to VF.
1618 VectorType *InitVecValVTy = ValVTy;
1619 if (STy->isFloatingPointTy()) {
1620 Type *InitVecValSTy =
1622 InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
1623 }
1624 Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
1625
1626 if (STy->isIntegerTy()) {
1627 Step = Builder.CreateVectorSplat(VLen, Step);
1628 assert(Step->getType() == Val->getType() && "Invalid step vec");
1629 // FIXME: The newly created binary instructions should contain nsw/nuw
1630 // flags, which can be found from the original scalar operations.
1631 Step = Builder.CreateMul(InitVec, Step);
1632 return Builder.CreateAdd(Val, Step, "induction");
1633 }
1634
1635 // Floating point induction.
1636 assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
1637 "Binary Opcode should be specified for FP induction");
1638 InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
1639
1640 Step = Builder.CreateVectorSplat(VLen, Step);
1641 Value *MulOp = Builder.CreateFMul(InitVec, Step);
1642 return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
1643}
1644
1645/// A helper function that returns an integer or floating-point constant with
1646/// value C.
1648 return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
1649 : ConstantFP::get(Ty, C);
1650}
1651
1653 assert(!State.Lane && "Int or FP induction being replicated.");
1654
1655 Value *Start = getStartValue()->getLiveInIRValue();
1657 TruncInst *Trunc = getTruncInst();
1658 IRBuilderBase &Builder = State.Builder;
1659 assert(getPHINode()->getType() == ID.getStartValue()->getType() &&
1660 "Types must match");
1661 assert(State.VF.isVector() && "must have vector VF");
1662
1663 // The value from the original loop to which we are mapping the new induction
1664 // variable.
1665 Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : getPHINode();
1666
1667 // Fast-math-flags propagate from the original induction instruction.
1668 IRBuilder<>::FastMathFlagGuard FMFG(Builder);
1669 if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
1670 Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
1671
1672 // Now do the actual transformations, and start with fetching the step value.
1673 Value *Step = State.get(getStepValue(), VPLane(0));
1674
1675 assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
1676 "Expected either an induction phi-node or a truncate of it!");
1677
1678 // Construct the initial value of the vector IV in the vector loop preheader
1679 auto CurrIP = Builder.saveIP();
1680 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1681 Builder.SetInsertPoint(VectorPH->getTerminator());
1682 if (isa<TruncInst>(EntryVal)) {
1683 assert(Start->getType()->isIntegerTy() &&
1684 "Truncation requires an integer type");
1685 auto *TruncType = cast<IntegerType>(EntryVal->getType());
1686 Step = Builder.CreateTrunc(Step, TruncType);
1687 Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
1688 }
1689
1690 Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
1691 Value *SteppedStart = getStepVector(SplatStart, Step, ID.getInductionOpcode(),
1692 State.VF, State.Builder);
1693
1694 // We create vector phi nodes for both integer and floating-point induction
1695 // variables. Here, we determine the kind of arithmetic we will perform.
1698 if (Step->getType()->isIntegerTy()) {
1699 AddOp = Instruction::Add;
1700 MulOp = Instruction::Mul;
1701 } else {
1702 AddOp = ID.getInductionOpcode();
1703 MulOp = Instruction::FMul;
1704 }
1705
1706 Value *SplatVF;
1707 if (VPValue *SplatVFOperand = getSplatVFValue()) {
1708 // The recipe has been unrolled. In that case, fetch the splat value for the
1709 // induction increment.
1710 SplatVF = State.get(SplatVFOperand);
1711 } else {
1712 // Multiply the vectorization factor by the step using integer or
1713 // floating-point arithmetic as appropriate.
1714 Type *StepType = Step->getType();
1715 Value *RuntimeVF = State.get(getVFValue(), VPLane(0));
1716 if (Step->getType()->isFloatingPointTy())
1717 RuntimeVF = Builder.CreateUIToFP(RuntimeVF, StepType);
1718 else
1719 RuntimeVF = Builder.CreateZExtOrTrunc(RuntimeVF, StepType);
1720 Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
1721
1722 // Create a vector splat to use in the induction update.
1723 SplatVF = Builder.CreateVectorSplat(State.VF, Mul);
1724 }
1725
1726 Builder.restoreIP(CurrIP);
1727
1728 // We may need to add the step a number of times, depending on the unroll
1729 // factor. The last of those goes into the PHI.
1730 PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");
1731 VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1732 VecInd->setDebugLoc(getDebugLoc());
1733 State.set(this, VecInd);
1734
1735 Instruction *LastInduction = cast<Instruction>(
1736 Builder.CreateBinOp(AddOp, VecInd, SplatVF, "vec.ind.next"));
1737 if (isa<TruncInst>(EntryVal))
1738 State.addMetadata(LastInduction, EntryVal);
1739 LastInduction->setDebugLoc(getDebugLoc());
1740
1741 VecInd->addIncoming(SteppedStart, VectorPH);
1742 // Add induction update using an incorrect block temporarily. The phi node
1743 // will be fixed after VPlan execution. Note that at this point the latch
1744 // block cannot be used, as it does not exist yet.
1745 // TODO: Model increment value in VPlan, by turning the recipe into a
1746 // multi-def and a subclass of VPHeaderPHIRecipe.
1747 VecInd->addIncoming(LastInduction, VectorPH);
1748}
1749
1750#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1752 VPSlotTracker &SlotTracker) const {
1753 O << Indent;
1755 O << " = WIDEN-INDUCTION ";
1757
1758 if (auto *TI = getTruncInst())
1759 O << " (truncated to " << *TI->getType() << ")";
1760}
1761#endif
1762
1764 // The step may be defined by a recipe in the preheader (e.g. if it requires
1765 // SCEV expansion), but for the canonical induction the step is required to be
1766 // 1, which is represented as live-in.
1768 return false;
1769 auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
1770 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
1771 auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin());
1772 return StartC && StartC->isZero() && StepC && StepC->isOne() &&
1773 getScalarType() == CanIV->getScalarType();
1774}
1775
1776#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1778 VPSlotTracker &SlotTracker) const {
1779 O << Indent;
1781 O << " = DERIVED-IV ";
1783 O << " + ";
1785 O << " * ";
1787}
1788#endif
1789
1791 // Fast-math-flags propagate from the original induction instruction.
1793 if (hasFastMathFlags())
1795
1796 /// Compute scalar induction steps. \p ScalarIV is the scalar induction
1797 /// variable on which to base the steps, \p Step is the size of the step.
1798
1799 Value *BaseIV = State.get(getOperand(0), VPLane(0));
1800 Value *Step = State.get(getStepValue(), VPLane(0));
1801 IRBuilderBase &Builder = State.Builder;
1802
1803 // Ensure step has the same type as that of scalar IV.
1804 Type *BaseIVTy = BaseIV->getType()->getScalarType();
1805 assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");
1806
1807 // We build scalar steps for both integer and floating-point induction
1808 // variables. Here, we determine the kind of arithmetic we will perform.
1811 if (BaseIVTy->isIntegerTy()) {
1812 AddOp = Instruction::Add;
1813 MulOp = Instruction::Mul;
1814 } else {
1815 AddOp = InductionOpcode;
1816 MulOp = Instruction::FMul;
1817 }
1818
1819 // Determine the number of scalars we need to generate for each unroll
1820 // iteration.
1821 bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
1822 // Compute the scalar steps and save the results in State.
1823 Type *IntStepTy =
1824 IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
1825 Type *VecIVTy = nullptr;
1826 Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
1827 if (!FirstLaneOnly && State.VF.isScalable()) {
1828 VecIVTy = VectorType::get(BaseIVTy, State.VF);
1829 UnitStepVec =
1830 Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
1831 SplatStep = Builder.CreateVectorSplat(State.VF, Step);
1832 SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
1833 }
1834
1835 unsigned StartLane = 0;
1836 unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
1837 if (State.Lane) {
1838 StartLane = State.Lane->getKnownLane();
1839 EndLane = StartLane + 1;
1840 }
1841 Value *StartIdx0 =
1842 createStepForVF(Builder, IntStepTy, State.VF, getUnrollPart(*this));
1843
1844 if (!FirstLaneOnly && State.VF.isScalable()) {
1845 auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
1846 auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
1847 if (BaseIVTy->isFloatingPointTy())
1848 InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
1849 auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
1850 auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
1851 State.set(this, Add);
1852 // It's useful to record the lane values too for the known minimum number
1853 // of elements so we do those below. This improves the code quality when
1854 // trying to extract the first element, for example.
1855 }
1856
1857 if (BaseIVTy->isFloatingPointTy())
1858 StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
1859
1860 for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
1861 Value *StartIdx = Builder.CreateBinOp(
1862 AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
1863 // The step returned by `createStepForVF` is a runtime-evaluated value
1864 // when VF is scalable. Otherwise, it should be folded into a Constant.
1865 assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
1866 "Expected StartIdx to be folded to a constant when VF is not "
1867 "scalable");
1868 auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
1869 auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
1870 State.set(this, Add, VPLane(Lane));
1871 }
1872}
1873
1874#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1876 VPSlotTracker &SlotTracker) const {
1877 O << Indent;
1879 O << " = SCALAR-STEPS ";
1881}
1882#endif
1883
1885 assert(State.VF.isVector() && "not widening");
1886 auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
1887 // Construct a vector GEP by widening the operands of the scalar GEP as
1888 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
1889 // results in a vector of pointers when at least one operand of the GEP
1890 // is vector-typed. Thus, to keep the representation compact, we only use
1891 // vector-typed operands for loop-varying values.
1892
1893 if (areAllOperandsInvariant()) {
1894 // If we are vectorizing, but the GEP has only loop-invariant operands,
1895 // the GEP we build (by only using vector-typed operands for
1896 // loop-varying values) would be a scalar pointer. Thus, to ensure we
1897 // produce a vector of pointers, we need to either arbitrarily pick an
1898 // operand to broadcast, or broadcast a clone of the original GEP.
1899 // Here, we broadcast a clone of the original.
1900 //
1901 // TODO: If at some point we decide to scalarize instructions having
1902 // loop-invariant operands, this special case will no longer be
1903 // required. We would add the scalarization decision to
1904 // collectLoopScalars() and teach getVectorValue() to broadcast
1905 // the lane-zero scalar value.
1907 for (unsigned I = 0, E = getNumOperands(); I != E; I++)
1908 Ops.push_back(State.get(getOperand(I), VPLane(0)));
1909
1910 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
1911 ArrayRef(Ops).drop_front(), "",
1913 Value *Splat = State.Builder.CreateVectorSplat(State.VF, NewGEP);
1914 State.set(this, Splat);
1915 State.addMetadata(Splat, GEP);
1916 } else {
1917 // If the GEP has at least one loop-varying operand, we are sure to
1918 // produce a vector of pointers unless VF is scalar.
1919 // The pointer operand of the new GEP. If it's loop-invariant, we
1920 // won't broadcast it.
1921 auto *Ptr = isPointerLoopInvariant() ? State.get(getOperand(0), VPLane(0))
1922 : State.get(getOperand(0));
1923
1924 // Collect all the indices for the new GEP. If any index is
1925 // loop-invariant, we won't broadcast it.
1927 for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
1928 VPValue *Operand = getOperand(I);
1929 if (isIndexLoopInvariant(I - 1))
1930 Indices.push_back(State.get(Operand, VPLane(0)));
1931 else
1932 Indices.push_back(State.get(Operand));
1933 }
1934
1935 // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
1936 // but it should be a vector, otherwise.
1937 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
1938 Indices, "", getGEPNoWrapFlags());
1939 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
1940 "NewGEP is not a pointer vector");
1941 State.set(this, NewGEP);
1942 State.addMetadata(NewGEP, GEP);
1943 }
1944}
1945
1946#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1948 VPSlotTracker &SlotTracker) const {
1949 O << Indent << "WIDEN-GEP ";
1950 O << (isPointerLoopInvariant() ? "Inv" : "Var");
1951 for (size_t I = 0; I < getNumOperands() - 1; ++I)
1952 O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";
1953
1954 O << " ";
1956 O << " = getelementptr";
1957 printFlags(O);
1959}
1960#endif
1961
1962static Type *getGEPIndexTy(bool IsScalable, bool IsReverse,
1963 unsigned CurrentPart, IRBuilderBase &Builder) {
1964 // Use i32 for the gep index type when the value is constant,
1965 // or query DataLayout for a more suitable index type otherwise.
1966 const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
1967 return IsScalable && (IsReverse || CurrentPart > 0)
1968 ? DL.getIndexType(Builder.getPtrTy(0))
1969 : Builder.getInt32Ty();
1970}
1971
1973 auto &Builder = State.Builder;
1975 unsigned CurrentPart = getUnrollPart(*this);
1976 Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true,
1977 CurrentPart, Builder);
1978
1979 // The wide store needs to start at the last vector element.
1980 Value *RunTimeVF = State.get(getVFValue(), VPLane(0));
1981 if (IndexTy != RunTimeVF->getType())
1982 RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy);
1983 // NumElt = -CurrentPart * RunTimeVF
1984 Value *NumElt = Builder.CreateMul(
1985 ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
1986 // LastLane = 1 - RunTimeVF
1987 Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
1988 Value *Ptr = State.get(getOperand(0), VPLane(0));
1989 Value *ResultPtr =
1990 Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", getGEPNoWrapFlags());
1991 ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "",
1993
1994 State.set(this, ResultPtr, /*IsScalar*/ true);
1995}
1996
1997#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1999 VPSlotTracker &SlotTracker) const {
2000 O << Indent;
2002 O << " = reverse-vector-pointer";
2003 printFlags(O);
2004 O << " ";
2006}
2007#endif
2008
2010 auto &Builder = State.Builder;
2012 unsigned CurrentPart = getUnrollPart(*this);
2013 Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false,
2014 CurrentPart, Builder);
2015 Value *Ptr = State.get(getOperand(0), VPLane(0));
2016
2017 Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
2018 Value *ResultPtr =
2019 Builder.CreateGEP(IndexedTy, Ptr, Increment, "", getGEPNoWrapFlags());
2020
2021 State.set(this, ResultPtr, /*IsScalar*/ true);
2022}
2023
2024#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2026 VPSlotTracker &SlotTracker) const {
2027 O << Indent;
2029 O << " = vector-pointer ";
2030
2032}
2033#endif
2034
2036 assert(isNormalized() && "Expected blend to be normalized!");
2038 // We know that all PHIs in non-header blocks are converted into
2039 // selects, so we don't have to worry about the insertion order and we
2040 // can just use the builder.
2041 // At this point we generate the predication tree. There may be
2042 // duplications since this is a simple recursive scan, but future
2043 // optimizations will clean it up.
2044
2045 unsigned NumIncoming = getNumIncomingValues();
2046
2047 // Generate a sequence of selects of the form:
2048 // SELECT(Mask3, In3,
2049 // SELECT(Mask2, In2,
2050 // SELECT(Mask1, In1,
2051 // In0)))
2052 // Note that Mask0 is never used: lanes for which no path reaches this phi and
2053 // are essentially undef are taken from In0.
2054 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
2055 Value *Result = nullptr;
2056 for (unsigned In = 0; In < NumIncoming; ++In) {
2057 // We might have single edge PHIs (blocks) - use an identity
2058 // 'select' for the first PHI operand.
2059 Value *In0 = State.get(getIncomingValue(In), OnlyFirstLaneUsed);
2060 if (In == 0)
2061 Result = In0; // Initialize with the first incoming value.
2062 else {
2063 // Select between the current value and the previous incoming edge
2064 // based on the incoming mask.
2065 Value *Cond = State.get(getMask(In), OnlyFirstLaneUsed);
2066 Result = State.Builder.CreateSelect(Cond, In0, Result, "predphi");
2067 }
2068 }
2069 State.set(this, Result, OnlyFirstLaneUsed);
2070}
2071
2073 VPCostContext &Ctx) const {
2075
2076 // Handle cases where only the first lane is used the same way as the legacy
2077 // cost model.
2079 return Ctx.TTI.getCFInstrCost(Instruction::PHI, CostKind);
2080
2081 Type *ResultTy = ToVectorTy(Ctx.Types.inferScalarType(this), VF);
2082 Type *CmpTy = ToVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF);
2083 return (getNumIncomingValues() - 1) *
2084 Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResultTy, CmpTy,
2086}
2087
2088#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2090 VPSlotTracker &SlotTracker) const {
2091 O << Indent << "BLEND ";
2093 O << " =";
2094 if (getNumIncomingValues() == 1) {
2095 // Not a User of any mask: not really blending, this is a
2096 // single-predecessor phi.
2097 O << " ";
2099 } else {
2100 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
2101 O << " ";
2103 if (I == 0)
2104 continue;
2105 O << "/";
2107 }
2108 }
2109}
2110#endif
2111
2113 assert(!State.Lane && "Reduction being replicated.");
2114 Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
2115 RecurKind Kind = RdxDesc.getRecurrenceKind();
2116 // Propagate the fast-math flags carried by the underlying instruction.
2118 State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
2119 Value *NewVecOp = State.get(getVecOp());
2120 if (VPValue *Cond = getCondOp()) {
2121 Value *NewCond = State.get(Cond, State.VF.isScalar());
2122 VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
2123 Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
2124
2125 Value *Start;
2127 Start = RdxDesc.getRecurrenceStartValue();
2128 else
2129 Start = llvm::getRecurrenceIdentity(Kind, ElementTy,
2130 RdxDesc.getFastMathFlags());
2131 if (State.VF.isVector())
2132 Start = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Start);
2133
2134 Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Start);
2135 NewVecOp = Select;
2136 }
2137 Value *NewRed;
2138 Value *NextInChain;
2139 if (IsOrdered) {
2140 if (State.VF.isVector())
2141 NewRed =
2142 createOrderedReduction(State.Builder, RdxDesc, NewVecOp, PrevInChain);
2143 else
2144 NewRed = State.Builder.CreateBinOp(
2145 (Instruction::BinaryOps)RdxDesc.getOpcode(), PrevInChain, NewVecOp);
2146 PrevInChain = NewRed;
2147 NextInChain = NewRed;
2148 } else {
2149 PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
2150 NewRed = createReduction(State.Builder, RdxDesc, NewVecOp);
2152 NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),
2153 NewRed, PrevInChain);
2154 else
2155 NextInChain = State.Builder.CreateBinOp(
2156 (Instruction::BinaryOps)RdxDesc.getOpcode(), NewRed, PrevInChain);
2157 }
2158 State.set(this, NextInChain, /*IsScalar*/ true);
2159}
2160
2162 assert(!State.Lane && "Reduction being replicated.");
2163
2164 auto &Builder = State.Builder;
2165 // Propagate the fast-math flags carried by the underlying instruction.
2166 IRBuilderBase::FastMathFlagGuard FMFGuard(Builder);
2168 Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
2169
2170 RecurKind Kind = RdxDesc.getRecurrenceKind();
2171 Value *Prev = State.get(getChainOp(), /*IsScalar*/ true);
2172 Value *VecOp = State.get(getVecOp());
2173 Value *EVL = State.get(getEVL(), VPLane(0));
2174
2175 VectorBuilder VBuilder(Builder);
2176 VBuilder.setEVL(EVL);
2177 Value *Mask;
2178 // TODO: move the all-true mask generation into VectorBuilder.
2179 if (VPValue *CondOp = getCondOp())
2180 Mask = State.get(CondOp);
2181 else
2182 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2183 VBuilder.setMask(Mask);
2184
2185 Value *NewRed;
2186 if (isOrdered()) {
2187 NewRed = createOrderedReduction(VBuilder, RdxDesc, VecOp, Prev);
2188 } else {
2189 NewRed = createSimpleReduction(VBuilder, VecOp, RdxDesc);
2191 NewRed = createMinMaxOp(Builder, Kind, NewRed, Prev);
2192 else
2193 NewRed = Builder.CreateBinOp((Instruction::BinaryOps)RdxDesc.getOpcode(),
2194 NewRed, Prev);
2195 }
2196 State.set(this, NewRed, /*IsScalar*/ true);
2197}
2198
2200 VPCostContext &Ctx) const {
2201 RecurKind RdxKind = RdxDesc.getRecurrenceKind();
2202 Type *ElementTy = Ctx.Types.inferScalarType(this);
2203 auto *VectorTy = cast<VectorType>(ToVectorTy(ElementTy, VF));
2205 unsigned Opcode = RdxDesc.getOpcode();
2206
2207 // TODO: Support any-of and in-loop reductions.
2208 assert(
2210 ForceTargetInstructionCost.getNumOccurrences() > 0) &&
2211 "Any-of reduction not implemented in VPlan-based cost model currently.");
2212 assert(
2213 (!cast<VPReductionPHIRecipe>(getOperand(0))->isInLoop() ||
2214 ForceTargetInstructionCost.getNumOccurrences() > 0) &&
2215 "In-loop reduction not implemented in VPlan-based cost model currently.");
2216
2217 assert(ElementTy->getTypeID() == RdxDesc.getRecurrenceType()->getTypeID() &&
2218 "Inferred type and recurrence type mismatch.");
2219
2220 // Cost = Reduction cost + BinOp cost
2222 Ctx.TTI.getArithmeticInstrCost(Opcode, ElementTy, CostKind);
2225 return Cost + Ctx.TTI.getMinMaxReductionCost(
2226 Id, VectorTy, RdxDesc.getFastMathFlags(), CostKind);
2227 }
2228
2229 return Cost + Ctx.TTI.getArithmeticReductionCost(
2230 Opcode, VectorTy, RdxDesc.getFastMathFlags(), CostKind);
2231}
2232
2233#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2235 VPSlotTracker &SlotTracker) const {
2236 O << Indent << "REDUCE ";
2238 O << " = ";
2240 O << " +";
2241 if (isa<FPMathOperator>(getUnderlyingInstr()))
2243 O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
2245 if (isConditional()) {
2246 O << ", ";
2248 }
2249 O << ")";
2250 if (RdxDesc.IntermediateStore)
2251 O << " (with final reduction value stored in invariant address sank "
2252 "outside of loop)";
2253}
2254
2256 VPSlotTracker &SlotTracker) const {
2258 O << Indent << "REDUCE ";
2260 O << " = ";
2262 O << " +";
2263 if (isa<FPMathOperator>(getUnderlyingInstr()))
2265 O << " vp.reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
2267 O << ", ";
2269 if (isConditional()) {
2270 O << ", ";
2272 }
2273 O << ")";
2274 if (RdxDesc.IntermediateStore)
2275 O << " (with final reduction value stored in invariant address sank "
2276 "outside of loop)";
2277}
2278#endif
2279
2281 // Find if the recipe is used by a widened recipe via an intervening
2282 // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
2283 return any_of(users(), [](const VPUser *U) {
2284 if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))
2285 return any_of(PredR->users(), [PredR](const VPUser *U) {
2286 return !U->usesScalars(PredR);
2287 });
2288 return false;
2289 });
2290}
2291
2293 VPCostContext &Ctx) const {
2294 Instruction *UI = cast<Instruction>(getUnderlyingValue());
2295 // VPReplicateRecipe may be cloned as part of an existing VPlan-to-VPlan
2296 // transform, avoid computing their cost multiple times for now.
2297 Ctx.SkipCostComputation.insert(UI);
2298 return Ctx.getLegacyCost(UI, VF);
2299}
2300
2301#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2303 VPSlotTracker &SlotTracker) const {
2304 O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
2305
2306 if (!getUnderlyingInstr()->getType()->isVoidTy()) {
2308 O << " = ";
2309 }
2310 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
2311 O << "call";
2312 printFlags(O);
2313 O << "@" << CB->getCalledFunction()->getName() << "(";
2315 O, [&O, &SlotTracker](VPValue *Op) {
2316 Op->printAsOperand(O, SlotTracker);
2317 });
2318 O << ")";
2319 } else {
2321 printFlags(O);
2323 }
2324
2325 if (shouldPack())
2326 O << " (S->V)";
2327}
2328#endif
2329
2330Value *VPScalarCastRecipe ::generate(VPTransformState &State) {
2332 "Codegen only implemented for first lane.");
2333 switch (Opcode) {
2334 case Instruction::SExt:
2335 case Instruction::ZExt:
2336 case Instruction::Trunc: {
2337 // Note: SExt/ZExt not used yet.
2338 Value *Op = State.get(getOperand(0), VPLane(0));
2339 return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
2340 }
2341 default:
2342 llvm_unreachable("opcode not implemented yet");
2343 }
2344}
2345
2346void VPScalarCastRecipe ::execute(VPTransformState &State) {
2347 State.set(this, generate(State), VPLane(0));
2348}
2349
2350#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2351void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
2352 VPSlotTracker &SlotTracker) const {
2353 O << Indent << "SCALAR-CAST ";
2354 printAsOperand(O, SlotTracker);
2355 O << " = " << Instruction::getOpcodeName(Opcode) << " ";
2356 printOperands(O, SlotTracker);
2357 O << " to " << *ResultTy;
2358}
2359#endif
2360
2362 assert(State.Lane && "Branch on Mask works only on single instance.");
2363
2364 unsigned Lane = State.Lane->getKnownLane();
2365
2366 Value *ConditionBit = nullptr;
2367 VPValue *BlockInMask = getMask();
2368 if (BlockInMask) {
2369 ConditionBit = State.get(BlockInMask);
2370 if (ConditionBit->getType()->isVectorTy())
2371 ConditionBit = State.Builder.CreateExtractElement(
2372 ConditionBit, State.Builder.getInt32(Lane));
2373 } else // Block in mask is all-one.
2374 ConditionBit = State.Builder.getTrue();
2375
2376 // Replace the temporary unreachable terminator with a new conditional branch,
2377 // whose two destinations will be set later when they are created.
2378 auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
2379 assert(isa<UnreachableInst>(CurrentTerminator) &&
2380 "Expected to replace unreachable terminator with conditional branch.");
2381 auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
2382 CondBr->setSuccessor(0, nullptr);
2383 ReplaceInstWithInst(CurrentTerminator, CondBr);
2384}
2385
2387 VPCostContext &Ctx) const {
2388 // The legacy cost model doesn't assign costs to branches for individual
2389 // replicate regions. Match the current behavior in the VPlan cost model for
2390 // now.
2391 return 0;
2392}
2393
2395 assert(State.Lane && "Predicated instruction PHI works per instance.");
2396 Instruction *ScalarPredInst =
2397 cast<Instruction>(State.get(getOperand(0), *State.Lane));
2398 BasicBlock *PredicatedBB = ScalarPredInst->getParent();
2399 BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
2400 assert(PredicatingBB && "Predicated block has no single predecessor.");
2401 assert(isa<VPReplicateRecipe>(getOperand(0)) &&
2402 "operand must be VPReplicateRecipe");
2403
2404 // By current pack/unpack logic we need to generate only a single phi node: if
2405 // a vector value for the predicated instruction exists at this point it means
2406 // the instruction has vector users only, and a phi for the vector value is
2407 // needed. In this case the recipe of the predicated instruction is marked to
2408 // also do that packing, thereby "hoisting" the insert-element sequence.
2409 // Otherwise, a phi node for the scalar value is needed.
2410 if (State.hasVectorValue(getOperand(0))) {
2411 Value *VectorValue = State.get(getOperand(0));
2412 InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
2413 PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
2414 VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
2415 VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
2416 if (State.hasVectorValue(this))
2417 State.reset(this, VPhi);
2418 else
2419 State.set(this, VPhi);
2420 // NOTE: Currently we need to update the value of the operand, so the next
2421 // predicated iteration inserts its generated value in the correct vector.
2422 State.reset(getOperand(0), VPhi);
2423 } else {
2424 if (vputils::onlyFirstLaneUsed(this) && !State.Lane->isFirstLane())
2425 return;
2426
2427 Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
2428 PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
2429 Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
2430 PredicatingBB);
2431 Phi->addIncoming(ScalarPredInst, PredicatedBB);
2432 if (State.hasScalarValue(this, *State.Lane))
2433 State.reset(this, Phi, *State.Lane);
2434 else
2435 State.set(this, Phi, *State.Lane);
2436 // NOTE: Currently we need to update the value of the operand, so the next
2437 // predicated iteration inserts its generated value in the correct vector.
2438 State.reset(getOperand(0), Phi, *State.Lane);
2439 }
2440}
2441
2442#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2444 VPSlotTracker &SlotTracker) const {
2445 O << Indent << "PHI-PREDICATED-INSTRUCTION ";
2447 O << " = ";
2449}
2450#endif
2451
2453 VPCostContext &Ctx) const {
2455 const Align Alignment =
2457 unsigned AS =
2460
2461 if (!Consecutive) {
2462 // TODO: Using the original IR may not be accurate.
2463 // Currently, ARM will use the underlying IR to calculate gather/scatter
2464 // instruction cost.
2466 assert(!Reverse &&
2467 "Inconsecutive memory access should not have the order.");
2468 return Ctx.TTI.getAddressComputationCost(Ty) +
2470 IsMasked, Alignment, CostKind,
2471 &Ingredient);
2472 }
2473
2475 if (IsMasked) {
2476 Cost += Ctx.TTI.getMaskedMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment,
2477 AS, CostKind);
2478 } else {
2479 TTI::OperandValueInfo OpInfo =
2481 Cost += Ctx.TTI.getMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment, AS,
2482 CostKind, OpInfo, &Ingredient);
2483 }
2484 if (!Reverse)
2485 return Cost;
2486
2488 cast<VectorType>(Ty), {}, CostKind, 0);
2489}
2490
2492 auto *LI = cast<LoadInst>(&Ingredient);
2493
2494 Type *ScalarDataTy = getLoadStoreType(&Ingredient);
2495 auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
2496 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2497 bool CreateGather = !isConsecutive();
2498
2499 auto &Builder = State.Builder;
2501 Value *Mask = nullptr;
2502 if (auto *VPMask = getMask()) {
2503 // Mask reversal is only needed for non-all-one (null) masks, as reverse
2504 // of a null all-one mask is a null mask.
2505 Mask = State.get(VPMask);
2506 if (isReverse())
2507 Mask = Builder.CreateVectorReverse(Mask, "reverse");
2508 }
2509
2510 Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateGather);
2511 Value *NewLI;
2512 if (CreateGather) {
2513 NewLI = Builder.CreateMaskedGather(DataTy, Addr, Alignment, Mask, nullptr,
2514 "wide.masked.gather");
2515 } else if (Mask) {
2516 NewLI =
2517 Builder.CreateMaskedLoad(DataTy, Addr, Alignment, Mask,
2518 PoisonValue::get(DataTy), "wide.masked.load");
2519 } else {
2520 NewLI = Builder.CreateAlignedLoad(DataTy, Addr, Alignment, "wide.load");
2521 }
2522 // Add metadata to the load, but setVectorValue to the reverse shuffle.
2523 State.addMetadata(NewLI, LI);
2524 if (Reverse)
2525 NewLI = Builder.CreateVectorReverse(NewLI, "reverse");
2526 State.set(this, NewLI);
2527}
2528
2529#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2531 VPSlotTracker &SlotTracker) const {
2532 O << Indent << "WIDEN ";
2534 O << " = load ";
2536}
2537#endif
2538
2539/// Use all-true mask for reverse rather than actual mask, as it avoids a
2540/// dependence w/o affecting the result.
2542 Value *EVL, const Twine &Name) {
2543 VectorType *ValTy = cast<VectorType>(Operand->getType());
2544 Value *AllTrueMask =
2545 Builder.CreateVectorSplat(ValTy->getElementCount(), Builder.getTrue());
2546 return Builder.CreateIntrinsic(ValTy, Intrinsic::experimental_vp_reverse,
2547 {Operand, AllTrueMask, EVL}, nullptr, Name);
2548}
2549
2551 auto *LI = cast<LoadInst>(&Ingredient);
2552
2553 Type *ScalarDataTy = getLoadStoreType(&Ingredient);
2554 auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
2555 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2556 bool CreateGather = !isConsecutive();
2557
2558 auto &Builder = State.Builder;
2560 CallInst *NewLI;
2561 Value *EVL = State.get(getEVL(), VPLane(0));
2562 Value *Addr = State.get(getAddr(), !CreateGather);
2563 Value *Mask = nullptr;
2564 if (VPValue *VPMask = getMask()) {
2565 Mask = State.get(VPMask);
2566 if (isReverse())
2567 Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
2568 } else {
2569 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2570 }
2571
2572 if (CreateGather) {
2573 NewLI =
2574 Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL},
2575 nullptr, "wide.masked.gather");
2576 } else {
2577 VectorBuilder VBuilder(Builder);
2578 VBuilder.setEVL(EVL).setMask(Mask);
2579 NewLI = cast<CallInst>(VBuilder.createVectorInstruction(
2580 Instruction::Load, DataTy, Addr, "vp.op.load"));
2581 }
2582 NewLI->addParamAttr(
2583 0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
2584 State.addMetadata(NewLI, LI);
2585 Instruction *Res = NewLI;
2586 if (isReverse())
2587 Res = createReverseEVL(Builder, Res, EVL, "vp.reverse");
2588 State.set(this, Res);
2589}
2590
2592 VPCostContext &Ctx) const {
2593 if (!Consecutive || IsMasked)
2594 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
2595
2596 // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()
2597 // here because the EVL recipes using EVL to replace the tail mask. But in the
2598 // legacy model, it will always calculate the cost of mask.
2599 // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
2600 // don't need to compare to the legacy cost model.
2602 const Align Alignment =
2604 unsigned AS =
2608 Ingredient.getOpcode(), Ty, Alignment, AS, CostKind);
2609 if (!Reverse)
2610 return Cost;
2611
2613 cast<VectorType>(Ty), {}, CostKind, 0);
2614}
2615
2616#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2618 VPSlotTracker &SlotTracker) const {
2619 O << Indent << "WIDEN ";
2621 O << " = vp.load ";
2623}
2624#endif
2625
2627 auto *SI = cast<StoreInst>(&Ingredient);
2628
2629 VPValue *StoredVPValue = getStoredValue();
2630 bool CreateScatter = !isConsecutive();
2631 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2632
2633 auto &Builder = State.Builder;
2635
2636 Value *Mask = nullptr;
2637 if (auto *VPMask = getMask()) {
2638 // Mask reversal is only needed for non-all-one (null) masks, as reverse
2639 // of a null all-one mask is a null mask.
2640 Mask = State.get(VPMask);
2641 if (isReverse())
2642 Mask = Builder.CreateVectorReverse(Mask, "reverse");
2643 }
2644
2645 Value *StoredVal = State.get(StoredVPValue);
2646 if (isReverse()) {
2647 // If we store to reverse consecutive memory locations, then we need
2648 // to reverse the order of elements in the stored value.
2649 StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
2650 // We don't want to update the value in the map as it might be used in
2651 // another expression. So don't call resetVectorValue(StoredVal).
2652 }
2653 Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateScatter);
2654 Instruction *NewSI = nullptr;
2655 if (CreateScatter)
2656 NewSI = Builder.CreateMaskedScatter(StoredVal, Addr, Alignment, Mask);
2657 else if (Mask)
2658 NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);
2659 else
2660 NewSI = Builder.CreateAlignedStore(StoredVal, Addr, Alignment);
2661 State.addMetadata(NewSI, SI);
2662}
2663
2664#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2666 VPSlotTracker &SlotTracker) const {
2667 O << Indent << "WIDEN store ";
2669}
2670#endif
2671
2673 auto *SI = cast<StoreInst>(&Ingredient);
2674
2675 VPValue *StoredValue = getStoredValue();
2676 bool CreateScatter = !isConsecutive();
2677 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2678
2679 auto &Builder = State.Builder;
2681
2682 CallInst *NewSI = nullptr;
2683 Value *StoredVal = State.get(StoredValue);
2684 Value *EVL = State.get(getEVL(), VPLane(0));
2685 if (isReverse())
2686 StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse");
2687 Value *Mask = nullptr;
2688 if (VPValue *VPMask = getMask()) {
2689 Mask = State.get(VPMask);
2690 if (isReverse())
2691 Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
2692 } else {
2693 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2694 }
2695 Value *Addr = State.get(getAddr(), !CreateScatter);
2696 if (CreateScatter) {
2697 NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),
2698 Intrinsic::vp_scatter,
2699 {StoredVal, Addr, Mask, EVL});
2700 } else {
2701 VectorBuilder VBuilder(Builder);
2702 VBuilder.setEVL(EVL).setMask(Mask);
2703 NewSI = cast<CallInst>(VBuilder.createVectorInstruction(
2704 Instruction::Store, Type::getVoidTy(EVL->getContext()),
2705 {StoredVal, Addr}));
2706 }
2707 NewSI->addParamAttr(
2708 1, Attribute::getWithAlignment(NewSI->getContext(), Alignment));
2709 State.addMetadata(NewSI, SI);
2710}
2711
2713 VPCostContext &Ctx) const {
2714 if (!Consecutive || IsMasked)
2715 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
2716
2717 // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()
2718 // here because the EVL recipes using EVL to replace the tail mask. But in the
2719 // legacy model, it will always calculate the cost of mask.
2720 // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
2721 // don't need to compare to the legacy cost model.
2723 const Align Alignment =
2725 unsigned AS =
2729 Ingredient.getOpcode(), Ty, Alignment, AS, CostKind);
2730 if (!Reverse)
2731 return Cost;
2732
2734 cast<VectorType>(Ty), {}, CostKind, 0);
2735}
2736
2737#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2739 VPSlotTracker &SlotTracker) const {
2740 O << Indent << "WIDEN vp.store ";
2742}
2743#endif
2744
2746 VectorType *DstVTy, const DataLayout &DL) {
2747 // Verify that V is a vector type with same number of elements as DstVTy.
2748 auto VF = DstVTy->getElementCount();
2749 auto *SrcVecTy = cast<VectorType>(V->getType());
2750 assert(VF == SrcVecTy->getElementCount() && "Vector dimensions do not match");
2751 Type *SrcElemTy = SrcVecTy->getElementType();
2752 Type *DstElemTy = DstVTy->getElementType();
2753 assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) &&
2754 "Vector elements must have same size");
2755
2756 // Do a direct cast if element types are castable.
2757 if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) {
2758 return Builder.CreateBitOrPointerCast(V, DstVTy);
2759 }
2760 // V cannot be directly casted to desired vector type.
2761 // May happen when V is a floating point vector but DstVTy is a vector of
2762 // pointers or vice-versa. Handle this using a two-step bitcast using an
2763 // intermediate Integer type for the bitcast i.e. Ptr <-> Int <-> Float.
2764 assert((DstElemTy->isPointerTy() != SrcElemTy->isPointerTy()) &&
2765 "Only one type should be a pointer type");
2766 assert((DstElemTy->isFloatingPointTy() != SrcElemTy->isFloatingPointTy()) &&
2767 "Only one type should be a floating point type");
2768 Type *IntTy =
2769 IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy));
2770 auto *VecIntTy = VectorType::get(IntTy, VF);
2771 Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);
2772 return Builder.CreateBitOrPointerCast(CastVal, DstVTy);
2773}
2774
2775/// Return a vector containing interleaved elements from multiple
2776/// smaller input vectors.
2778 const Twine &Name) {
2779 unsigned Factor = Vals.size();
2780 assert(Factor > 1 && "Tried to interleave invalid number of vectors");
2781
2782 VectorType *VecTy = cast<VectorType>(Vals[0]->getType());
2783#ifndef NDEBUG
2784 for (Value *Val : Vals)
2785 assert(Val->getType() == VecTy && "Tried to interleave mismatched types");
2786#endif
2787
2788 // Scalable vectors cannot use arbitrary shufflevectors (only splats), so
2789 // must use intrinsics to interleave.
2790 if (VecTy->isScalableTy()) {
2792 return Builder.CreateIntrinsic(WideVecTy, Intrinsic::vector_interleave2,
2793 Vals,
2794 /*FMFSource=*/nullptr, Name);
2795 }
2796
2797 // Fixed length. Start by concatenating all vectors into a wide vector.
2798 Value *WideVec = concatenateVectors(Builder, Vals);
2799
2800 // Interleave the elements into the wide vector.
2801 const unsigned NumElts = VecTy->getElementCount().getFixedValue();
2802 return Builder.CreateShuffleVector(
2803 WideVec, createInterleaveMask(NumElts, Factor), Name);
2804}
2805
2806// Try to vectorize the interleave group that \p Instr belongs to.
2807//
2808// E.g. Translate following interleaved load group (factor = 3):
2809// for (i = 0; i < N; i+=3) {
2810// R = Pic[i]; // Member of index 0
2811// G = Pic[i+1]; // Member of index 1
2812// B = Pic[i+2]; // Member of index 2
2813// ... // do something to R, G, B
2814// }
2815// To:
2816// %wide.vec = load <12 x i32> ; Read 4 tuples of R,G,B
2817// %R.vec = shuffle %wide.vec, poison, <0, 3, 6, 9> ; R elements
2818// %G.vec = shuffle %wide.vec, poison, <1, 4, 7, 10> ; G elements
2819// %B.vec = shuffle %wide.vec, poison, <2, 5, 8, 11> ; B elements
2820//
2821// Or translate following interleaved store group (factor = 3):
2822// for (i = 0; i < N; i+=3) {
2823// ... do something to R, G, B
2824// Pic[i] = R; // Member of index 0
2825// Pic[i+1] = G; // Member of index 1
2826// Pic[i+2] = B; // Member of index 2
2827// }
2828// To:
2829// %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7>
2830// %B_U.vec = shuffle %B.vec, poison, <0, 1, 2, 3, u, u, u, u>
2831// %interleaved.vec = shuffle %R_G.vec, %B_U.vec,
2832// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements
2833// store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B
2835 assert(!State.Lane && "Interleave group being replicated.");
2836 const InterleaveGroup<Instruction> *Group = IG;
2837 Instruction *Instr = Group->getInsertPos();
2838
2839 // Prepare for the vector type of the interleaved load/store.
2840 Type *ScalarTy = getLoadStoreType(Instr);
2841 unsigned InterleaveFactor = Group->getFactor();
2842 auto *VecTy = VectorType::get(ScalarTy, State.VF * InterleaveFactor);
2843
2844 // TODO: extend the masked interleaved-group support to reversed access.
2845 VPValue *BlockInMask = getMask();
2846 assert((!BlockInMask || !Group->isReverse()) &&
2847 "Reversed masked interleave-group not supported.");
2848
2849 VPValue *Addr = getAddr();
2850 Value *ResAddr = State.get(Addr, VPLane(0));
2851 if (auto *I = dyn_cast<Instruction>(ResAddr))
2852 State.setDebugLocFrom(I->getDebugLoc());
2853
2854 // If the group is reverse, adjust the index to refer to the last vector lane
2855 // instead of the first. We adjust the index from the first vector lane,
2856 // rather than directly getting the pointer for lane VF - 1, because the
2857 // pointer operand of the interleaved access is supposed to be uniform.
2858 if (Group->isReverse()) {
2859 Value *RuntimeVF =
2860 getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF);
2861 Value *Index =
2862 State.Builder.CreateSub(RuntimeVF, State.Builder.getInt32(1));
2863 Index = State.Builder.CreateMul(Index,
2864 State.Builder.getInt32(Group->getFactor()));
2865 Index = State.Builder.CreateNeg(Index);
2866
2867 bool InBounds = false;
2868 if (auto *Gep = dyn_cast<GetElementPtrInst>(ResAddr->stripPointerCasts()))
2869 InBounds = Gep->isInBounds();
2870 ResAddr = State.Builder.CreateGEP(ScalarTy, ResAddr, Index, "", InBounds);
2871 }
2872
2873 State.setDebugLocFrom(Instr->getDebugLoc());
2874 Value *PoisonVec = PoisonValue::get(VecTy);
2875
2876 auto CreateGroupMask = [&BlockInMask, &State,
2877 &InterleaveFactor](Value *MaskForGaps) -> Value * {
2878 if (State.VF.isScalable()) {
2879 assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");
2880 assert(InterleaveFactor == 2 &&
2881 "Unsupported deinterleave factor for scalable vectors");
2882 auto *ResBlockInMask = State.get(BlockInMask);
2883 SmallVector<Value *, 2> Ops = {ResBlockInMask, ResBlockInMask};
2884 auto *MaskTy = VectorType::get(State.Builder.getInt1Ty(),
2885 State.VF.getKnownMinValue() * 2, true);
2886 return State.Builder.CreateIntrinsic(
2887 MaskTy, Intrinsic::vector_interleave2, Ops,
2888 /*FMFSource=*/nullptr, "interleaved.mask");
2889 }
2890
2891 if (!BlockInMask)
2892 return MaskForGaps;
2893
2894 Value *ResBlockInMask = State.get(BlockInMask);
2895 Value *ShuffledMask = State.Builder.CreateShuffleVector(
2896 ResBlockInMask,
2897 createReplicatedMask(InterleaveFactor, State.VF.getKnownMinValue()),
2898 "interleaved.mask");
2899 return MaskForGaps ? State.Builder.CreateBinOp(Instruction::And,
2900 ShuffledMask, MaskForGaps)
2901 : ShuffledMask;
2902 };
2903
2904 const DataLayout &DL = Instr->getDataLayout();
2905 // Vectorize the interleaved load group.
2906 if (isa<LoadInst>(Instr)) {
2907 Value *MaskForGaps = nullptr;
2908 if (NeedsMaskForGaps) {
2909 MaskForGaps = createBitMaskForGaps(State.Builder,
2910 State.VF.getKnownMinValue(), *Group);
2911 assert(MaskForGaps && "Mask for Gaps is required but it is null");
2912 }
2913
2914 Instruction *NewLoad;
2915 if (BlockInMask || MaskForGaps) {
2916 Value *GroupMask = CreateGroupMask(MaskForGaps);
2917 NewLoad = State.Builder.CreateMaskedLoad(VecTy, ResAddr,
2918 Group->getAlign(), GroupMask,
2919 PoisonVec, "wide.masked.vec");
2920 } else
2921 NewLoad = State.Builder.CreateAlignedLoad(VecTy, ResAddr,
2922 Group->getAlign(), "wide.vec");
2923 Group->addMetadata(NewLoad);
2924
2926 const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
2927 if (VecTy->isScalableTy()) {
2928 assert(InterleaveFactor == 2 &&
2929 "Unsupported deinterleave factor for scalable vectors");
2930
2931 // Scalable vectors cannot use arbitrary shufflevectors (only splats),
2932 // so must use intrinsics to deinterleave.
2933 Value *DI = State.Builder.CreateIntrinsic(
2934 Intrinsic::vector_deinterleave2, VecTy, NewLoad,
2935 /*FMFSource=*/nullptr, "strided.vec");
2936 unsigned J = 0;
2937 for (unsigned I = 0; I < InterleaveFactor; ++I) {
2938 Instruction *Member = Group->getMember(I);
2939
2940 if (!Member)
2941 continue;
2942
2943 Value *StridedVec = State.Builder.CreateExtractValue(DI, I);
2944 // If this member has different type, cast the result type.
2945 if (Member->getType() != ScalarTy) {
2946 VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
2947 StridedVec =
2948 createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
2949 }
2950
2951 if (Group->isReverse())
2952 StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");
2953
2954 State.set(VPDefs[J], StridedVec);
2955 ++J;
2956 }
2957
2958 return;
2959 }
2960
2961 // For each member in the group, shuffle out the appropriate data from the
2962 // wide loads.
2963 unsigned J = 0;
2964 for (unsigned I = 0; I < InterleaveFactor; ++I) {
2965 Instruction *Member = Group->getMember(I);
2966
2967 // Skip the gaps in the group.
2968 if (!Member)
2969 continue;
2970
2971 auto StrideMask =
2972 createStrideMask(I, InterleaveFactor, State.VF.getKnownMinValue());
2973 Value *StridedVec =
2974 State.Builder.CreateShuffleVector(NewLoad, StrideMask, "strided.vec");
2975
2976 // If this member has different type, cast the result type.
2977 if (Member->getType() != ScalarTy) {
2978 assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
2979 VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
2980 StridedVec =
2981 createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
2982 }
2983
2984 if (Group->isReverse())
2985 StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");
2986
2987 State.set(VPDefs[J], StridedVec);
2988 ++J;
2989 }
2990 return;
2991 }
2992
2993 // The sub vector type for current instruction.
2994 auto *SubVT = VectorType::get(ScalarTy, State.VF);
2995
2996 // Vectorize the interleaved store group.
2997 Value *MaskForGaps =
2998 createBitMaskForGaps(State.Builder, State.VF.getKnownMinValue(), *Group);
2999 assert((!MaskForGaps || !State.VF.isScalable()) &&
3000 "masking gaps for scalable vectors is not yet supported.");
3001 ArrayRef<VPValue *> StoredValues = getStoredValues();
3002 // Collect the stored vector from each member.
3003 SmallVector<Value *, 4> StoredVecs;
3004 unsigned StoredIdx = 0;
3005 for (unsigned i = 0; i < InterleaveFactor; i++) {
3006 assert((Group->getMember(i) || MaskForGaps) &&
3007 "Fail to get a member from an interleaved store group");
3008 Instruction *Member = Group->getMember(i);
3009
3010 // Skip the gaps in the group.
3011 if (!Member) {
3012 Value *Undef = PoisonValue::get(SubVT);
3013 StoredVecs.push_back(Undef);
3014 continue;
3015 }
3016
3017 Value *StoredVec = State.get(StoredValues[StoredIdx]);
3018 ++StoredIdx;
3019
3020 if (Group->isReverse())
3021 StoredVec = State.Builder.CreateVectorReverse(StoredVec, "reverse");
3022
3023 // If this member has different type, cast it to a unified type.
3024
3025 if (StoredVec->getType() != SubVT)
3026 StoredVec = createBitOrPointerCast(State.Builder, StoredVec, SubVT, DL);
3027
3028 StoredVecs.push_back(StoredVec);
3029 }
3030
3031 // Interleave all the smaller vectors into one wider vector.
3032 Value *IVec = interleaveVectors(State.Builder, StoredVecs, "interleaved.vec");
3033 Instruction *NewStoreInstr;
3034 if (BlockInMask || MaskForGaps) {
3035 Value *GroupMask = CreateGroupMask(MaskForGaps);
3036 NewStoreInstr = State.Builder.CreateMaskedStore(
3037 IVec, ResAddr, Group->getAlign(), GroupMask);
3038 } else
3039 NewStoreInstr =
3040 State.Builder.CreateAlignedStore(IVec, ResAddr, Group->getAlign());
3041
3042 Group->addMetadata(NewStoreInstr);
3043}
3044
3045#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3047 VPSlotTracker &SlotTracker) const {
3048 O << Indent << "INTERLEAVE-GROUP with factor " << IG->getFactor() << " at ";
3049 IG->getInsertPos()->printAsOperand(O, false);
3050 O << ", ";
3052 VPValue *Mask = getMask();
3053 if (Mask) {
3054 O << ", ";
3055 Mask->printAsOperand(O, SlotTracker);
3056 }
3057
3058 unsigned OpIdx = 0;
3059 for (unsigned i = 0; i < IG->getFactor(); ++i) {
3060 if (!IG->getMember(i))
3061 continue;
3062 if (getNumStoreOperands() > 0) {
3063 O << "\n" << Indent << " store ";
3064 getOperand(1 + OpIdx)->printAsOperand(O, SlotTracker);
3065 O << " to index " << i;
3066 } else {
3067 O << "\n" << Indent << " ";
3069 O << " = load from index " << i;
3070 }
3071 ++OpIdx;
3072 }
3073}
3074#endif
3075
3077 VPCostContext &Ctx) const {
3078 Instruction *InsertPos = getInsertPos();
3079 // Find the VPValue index of the interleave group. We need to skip gaps.
3080 unsigned InsertPosIdx = 0;
3081 for (unsigned Idx = 0; IG->getFactor(); ++Idx)
3082 if (auto *Member = IG->getMember(Idx)) {
3083 if (Member == InsertPos)
3084 break;
3085 InsertPosIdx++;
3086 }
3087 Type *ValTy = Ctx.Types.inferScalarType(
3088 getNumDefinedValues() > 0 ? getVPValue(InsertPosIdx)
3089 : getStoredValues()[InsertPosIdx]);
3090 auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));
3091 unsigned AS = getLoadStoreAddressSpace(InsertPos);
3093
3094 unsigned InterleaveFactor = IG->getFactor();
3095 auto *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor);
3096
3097 // Holds the indices of existing members in the interleaved group.
3099 for (unsigned IF = 0; IF < InterleaveFactor; IF++)
3100 if (IG->getMember(IF))
3101 Indices.push_back(IF);
3102
3103 // Calculate the cost of the whole interleaved group.
3105 InsertPos->getOpcode(), WideVecTy, IG->getFactor(), Indices,
3106 IG->getAlign(), AS, CostKind, getMask(), NeedsMaskForGaps);
3107
3108 if (!IG->isReverse())
3109 return Cost;
3110
3111 return Cost + IG->getNumMembers() *
3113 VectorTy, std::nullopt, CostKind, 0);
3114}
3115
3116#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3118 VPSlotTracker &SlotTracker) const {
3119 O << Indent << "EMIT ";
3121 O << " = CANONICAL-INDUCTION ";
3123}
3124#endif
3125
3128 VPValue *Step) const {
3129 // Must be an integer induction.
3131 return false;
3132 // Start must match the start value of this canonical induction.
3133 if (Start != getStartValue())
3134 return false;
3135
3136 // If the step is defined by a recipe, it is not a ConstantInt.
3137 if (Step->getDefiningRecipe())
3138 return false;
3139
3140 ConstantInt *StepC = dyn_cast<ConstantInt>(Step->getLiveInIRValue());
3141 return StepC && StepC->isOne();
3142}
3143
3145 return IsScalarAfterVectorization &&
3146 (!IsScalable || vputils::onlyFirstLaneUsed(this));
3147}
3148
3150 assert(getInductionDescriptor().getKind() ==
3152 "Not a pointer induction according to InductionDescriptor!");
3153 assert(cast<PHINode>(getUnderlyingInstr())->getType()->isPointerTy() &&
3154 "Unexpected type.");
3156 "Recipe should have been replaced");
3157
3158 unsigned CurrentPart = getUnrollPart(*this);
3159
3160 // Build a pointer phi
3161 Value *ScalarStartValue = getStartValue()->getLiveInIRValue();
3162 Type *ScStValueType = ScalarStartValue->getType();
3163
3164 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3165 PHINode *NewPointerPhi = nullptr;
3166 if (CurrentPart == 0) {
3167 auto *IVR = cast<VPHeaderPHIRecipe>(&getParent()
3168 ->getPlan()
3169 ->getVectorLoopRegion()
3170 ->getEntryBasicBlock()
3171 ->front());
3172 PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, /*IsScalar*/ true));
3173 NewPointerPhi = PHINode::Create(ScStValueType, 2, "pointer.phi",
3174 CanonicalIV->getIterator());
3175 NewPointerPhi->addIncoming(ScalarStartValue, VectorPH);
3176 NewPointerPhi->setDebugLoc(getDebugLoc());
3177 } else {
3178 // The recipe has been unrolled. In that case, fetch the single pointer phi
3179 // shared among all unrolled parts of the recipe.
3180 auto *GEP =
3181 cast<GetElementPtrInst>(State.get(getFirstUnrolledPartOperand()));
3182 NewPointerPhi = cast<PHINode>(GEP->getPointerOperand());
3183 }
3184
3185 // A pointer induction, performed by using a gep
3186 BasicBlock::iterator InductionLoc = State.Builder.GetInsertPoint();
3187 Value *ScalarStepValue = State.get(getStepValue(), VPLane(0));
3188 Type *PhiType = State.TypeAnalysis.inferScalarType(getStepValue());
3189 Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF);
3190 // Add induction update using an incorrect block temporarily. The phi node
3191 // will be fixed after VPlan execution. Note that at this point the latch
3192 // block cannot be used, as it does not exist yet.
3193 // TODO: Model increment value in VPlan, by turning the recipe into a
3194 // multi-def and a subclass of VPHeaderPHIRecipe.
3195 if (CurrentPart == 0) {
3196 // The recipe represents the first part of the pointer induction. Create the
3197 // GEP to increment the phi across all unrolled parts.
3198 unsigned UF = CurrentPart == 0 ? getParent()->getPlan()->getUF() : 1;
3199 Value *NumUnrolledElems =
3200 State.Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, UF));
3201
3202 Value *InductionGEP = GetElementPtrInst::Create(
3203 State.Builder.getInt8Ty(), NewPointerPhi,
3204 State.Builder.CreateMul(ScalarStepValue, NumUnrolledElems), "ptr.ind",
3205 InductionLoc);
3206
3207 NewPointerPhi->addIncoming(InductionGEP, VectorPH);
3208 }
3209
3210 // Create actual address geps that use the pointer phi as base and a
3211 // vectorized version of the step value (<step*0, ..., step*N>) as offset.
3212 Type *VecPhiType = VectorType::get(PhiType, State.VF);
3213 Value *StartOffsetScalar = State.Builder.CreateMul(
3214 RuntimeVF, ConstantInt::get(PhiType, CurrentPart));
3215 Value *StartOffset =
3216 State.Builder.CreateVectorSplat(State.VF, StartOffsetScalar);
3217 // Create a vector of consecutive numbers from zero to VF.
3218 StartOffset = State.Builder.CreateAdd(
3219 StartOffset, State.Builder.CreateStepVector(VecPhiType));
3220
3221 assert(ScalarStepValue == State.get(getOperand(1), VPLane(0)) &&
3222 "scalar step must be the same across all parts");
3223 Value *GEP = State.Builder.CreateGEP(
3224 State.Builder.getInt8Ty(), NewPointerPhi,
3225 State.Builder.CreateMul(StartOffset, State.Builder.CreateVectorSplat(
3226 State.VF, ScalarStepValue)),
3227 "vector.gep");
3228 State.set(this, GEP);
3229}
3230
3231#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3233 VPSlotTracker &SlotTracker) const {
3234 assert((getNumOperands() == 2 || getNumOperands() == 4) &&
3235 "unexpected number of operands");
3236 O << Indent << "EMIT ";
3238 O << " = WIDEN-POINTER-INDUCTION ";
3240 O << ", ";
3242 if (getNumOperands() == 4) {
3243 O << ", ";
3245 O << ", ";
3247 }
3248}
3249#endif
3250
3252 assert(!State.Lane && "cannot be used in per-lane");
3253 if (State.ExpandedSCEVs.contains(Expr)) {
3254 // SCEV Expr has already been expanded, result must already be set. At the
3255 // moment we have to execute the entry block twice (once before skeleton
3256 // creation to get expanded SCEVs used by the skeleton and once during
3257 // regular VPlan execution).
3259 assert(State.get(this, VPLane(0)) == State.ExpandedSCEVs[Expr] &&
3260 "Results must match");
3261 return;
3262 }
3263
3264 const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
3265 SCEVExpander Exp(SE, DL, "induction");
3266
3267 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
3268 &*State.Builder.GetInsertPoint());
3269 State.ExpandedSCEVs[Expr] = Res;
3270 State.set(this, Res, VPLane(0));
3271}
3272
3273#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3275 VPSlotTracker &SlotTracker) const {
3276 O << Indent << "EMIT ";
3278 O << " = EXPAND SCEV " << *Expr;
3279}
3280#endif
3281
3283 Value *CanonicalIV = State.get(getOperand(0), /*IsScalar*/ true);
3284 Type *STy = CanonicalIV->getType();
3285 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
3286 ElementCount VF = State.VF;
3287 Value *VStart = VF.isScalar()
3288 ? CanonicalIV
3289 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
3290 Value *VStep = createStepForVF(Builder, STy, VF, getUnrollPart(*this));
3291 if (VF.isVector()) {
3292 VStep = Builder.CreateVectorSplat(VF, VStep);
3293 VStep =
3294 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
3295 }
3296 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
3297 State.set(this, CanonicalVectorIV);
3298}
3299
3300#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3302 VPSlotTracker &SlotTracker) const {
3303 O << Indent << "EMIT ";
3305 O << " = WIDEN-CANONICAL-INDUCTION ";
3307}
3308#endif
3309
3311 auto &Builder = State.Builder;
3312 // Create a vector from the initial value.
3313 auto *VectorInit = getStartValue()->getLiveInIRValue();
3314
3315 Type *VecTy = State.VF.isScalar()
3316 ? VectorInit->getType()
3317 : VectorType::get(VectorInit->getType(), State.VF);
3318
3319 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3320 if (State.VF.isVector()) {
3321 auto *IdxTy = Builder.getInt32Ty();
3322 auto *One = ConstantInt::get(IdxTy, 1);
3323 IRBuilder<>::InsertPointGuard Guard(Builder);
3324 Builder.SetInsertPoint(VectorPH->getTerminator());
3325 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
3326 auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
3327 VectorInit = Builder.CreateInsertElement(
3328 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
3329 }
3330
3331 // Create a phi node for the new recurrence.
3332 PHINode *Phi = PHINode::Create(VecTy, 2, "vector.recur");
3333 Phi->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
3334 Phi->addIncoming(VectorInit, VectorPH);
3335 State.set(this, Phi);
3336}
3337
3340 VPCostContext &Ctx) const {
3342 if (VF.isScalar())
3343 return Ctx.TTI.getCFInstrCost(Instruction::PHI, CostKind);
3344
3345 if (VF.isScalable() && VF.getKnownMinValue() == 1)
3347
3349 std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1);
3350 Type *VectorTy =
3351 ToVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
3352
3354 cast<VectorType>(VectorTy), Mask, CostKind,
3355 VF.getKnownMinValue() - 1);
3356}
3357
3358#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3360 VPSlotTracker &SlotTracker) const {
3361 O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
3363 O << " = phi ";
3365}
3366#endif
3367
3369 auto &Builder = State.Builder;
3370
3371 // Reductions do not have to start at zero. They can start with
3372 // any loop invariant values.
3373 VPValue *StartVPV = getStartValue();
3374 Value *StartV = StartVPV->getLiveInIRValue();
3375
3376 // In order to support recurrences we need to be able to vectorize Phi nodes.
3377 // Phi nodes have cycles, so we need to vectorize them in two stages. This is
3378 // stage #1: We create a new vector PHI node with no incoming edges. We'll use
3379 // this value when we vectorize all of the instructions that use the PHI.
3380 bool ScalarPHI = State.VF.isScalar() || IsInLoop;
3381 Type *VecTy = ScalarPHI ? StartV->getType()
3382 : VectorType::get(StartV->getType(), State.VF);
3383
3384 BasicBlock *HeaderBB = State.CFG.PrevBB;
3385 assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
3386 "recipe must be in the vector loop header");
3387 auto *Phi = PHINode::Create(VecTy, 2, "vec.phi");
3388 Phi->insertBefore(HeaderBB->getFirstInsertionPt());
3389 State.set(this, Phi, IsInLoop);
3390
3391 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3392
3393 Value *Iden = nullptr;
3394 RecurKind RK = RdxDesc.getRecurrenceKind();
3395 unsigned CurrentPart = getUnrollPart(*this);
3396
3399 // MinMax and AnyOf reductions have the start value as their identity.
3400 if (ScalarPHI) {
3401 Iden = StartV;
3402 } else {
3403 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
3404 Builder.SetInsertPoint(VectorPH->getTerminator());
3405 StartV = Iden = State.get(StartVPV);
3406 }
3408 // [I|F]FindLastIV will use a sentinel value to initialize the reduction
3409 // phi or the resume value from the main vector loop when vectorizing the
3410 // epilogue loop. In the exit block, ComputeReductionResult will generate
3411 // checks to verify if the reduction result is the sentinel value. If the
3412 // result is the sentinel value, it will be corrected back to the start
3413 // value.
3414 // TODO: The sentinel value is not always necessary. When the start value is
3415 // a constant, and smaller than the start value of the induction variable,
3416 // the start value can be directly used to initialize the reduction phi.
3417 Iden = StartV;
3418 if (!ScalarPHI) {
3419 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
3420 Builder.SetInsertPoint(VectorPH->getTerminator());
3421 StartV = Iden = Builder.CreateVectorSplat(State.VF, Iden);
3422 }
3423 } else {
3424 Iden = llvm::getRecurrenceIdentity(RK, VecTy->getScalarType(),
3425 RdxDesc.getFastMathFlags());
3426
3427 if (!ScalarPHI) {
3428 if (CurrentPart == 0) {
3429 // Create start and identity vector values for the reduction in the
3430 // preheader.
3431 // TODO: Introduce recipes in VPlan preheader to create initial values.
3432 Iden = Builder.CreateVectorSplat(State.VF, Iden);
3433 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
3434 Builder.SetInsertPoint(VectorPH->getTerminator());
3435 Constant *Zero = Builder.getInt32(0);
3436 StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
3437 } else {
3438 Iden = Builder.CreateVectorSplat(State.VF, Iden);
3439 }
3440 }
3441 }
3442
3443 Phi = cast<PHINode>(State.get(this, IsInLoop));
3444 Value *StartVal = (CurrentPart == 0) ? StartV : Iden;
3445 Phi->addIncoming(StartVal, VectorPH);
3446}
3447
3448#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3450 VPSlotTracker &SlotTracker) const {
3451 O << Indent << "WIDEN-REDUCTION-PHI ";
3452
3454 O << " = phi ";
3456}
3457#endif
3458
3461 "Non-native vplans are not expected to have VPWidenPHIRecipes.");
3462
3463 Value *Op0 = State.get(getOperand(0));
3464 Type *VecTy = Op0->getType();
3465 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
3466 State.set(this, VecPhi);
3467}
3468
3469#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3471 VPSlotTracker &SlotTracker) const {
3472 O << Indent << "WIDEN-PHI ";
3473
3474 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
3475 // Unless all incoming values are modeled in VPlan print the original PHI
3476 // directly.
3477 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
3478 // values as VPValues.
3479 if (getNumOperands() != OriginalPhi->getNumOperands()) {
3480 O << VPlanIngredient(OriginalPhi);
3481 return;
3482 }
3483
3485 O << " = phi ";
3487}
3488#endif
3489
3490// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
3491// remove VPActiveLaneMaskPHIRecipe.
3493 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3494 Value *StartMask = State.get(getOperand(0));
3495 PHINode *Phi =
3496 State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
3497 Phi->addIncoming(StartMask, VectorPH);
3498 Phi->setDebugLoc(getDebugLoc());
3499 State.set(this, Phi);
3500}
3501
3502#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3504 VPSlotTracker &SlotTracker) const {
3505 O << Indent << "ACTIVE-LANE-MASK-PHI ";
3506
3508 O << " = phi ";
3510}
3511#endif
3512
3513#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3515 VPSlotTracker &SlotTracker) const {
3516 O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";
3517
3519 O << " = phi ";
3521}
3522#endif
3523
3525 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3526 Value *Start = State.get(getStartValue(), VPLane(0));
3527 PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, Name);
3528 Phi->addIncoming(Start, VectorPH);
3529 Phi->setDebugLoc(getDebugLoc());
3530 State.set(this, Phi, /*IsScalar=*/true);
3531}
3532
3533#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3535 VPSlotTracker &SlotTracker) const {
3536 O << Indent << "SCALAR-PHI ";
3538 O << " = phi ";
3540}
3541#endif
AMDGPU Lower Kernel Arguments
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
std::string Name
Hexagon Common GEP
cl::opt< unsigned > ForceTargetInstructionCost("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for " "an instruction to a single constant value. Mostly " "useful for getting consistent testing."))
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
static Instruction * createReverseEVL(IRBuilderBase &Builder, Value *Operand, Value *EVL, const Twine &Name)
Use all-true mask for reverse rather than actual mask, as it avoids a dependence w/o affecting the re...
static Value * interleaveVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vals, const Twine &Name)
Return a vector containing interleaved elements from multiple smaller input vectors.
static Value * createBitOrPointerCast(IRBuilderBase &Builder, Value *V, VectorType *DstVTy, const DataLayout &DL)
cl::opt< unsigned > ForceTargetInstructionCost
static Value * getStepVector(Value *Val, Value *Step, Instruction::BinaryOps BinOp, ElementCount VF, IRBuilderBase &Builder)
This function adds (0 * Step, 1 * Step, 2 * Step, ...) to each vector element of Val.
static Type * getGEPIndexTy(bool IsScalable, bool IsReverse, unsigned CurrentPart, IRBuilderBase &Builder)
static Constant * getSignedIntOrFpConstant(Type *Ty, int64_t C)
A helper function that returns an integer or floating-point constant with value C.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
This file contains the declarations of the Vectorization Plan base classes:
Value * RHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
static Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
Definition: Attributes.cpp:234
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
InstListType::const_iterator getFirstNonPHIIt() const
Iterator returning form of getFirstNonPHI.
Definition: BasicBlock.cpp:374
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:459
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:296
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:292
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1502
This class represents a function call, abstracting a target machine's calling convention.
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:696
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:698
static StringRef getPredicateName(Predicate P)
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:214
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:126
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
A debug info location.
Definition: DebugLoc.h:33
constexpr bool isVector() const
One or more elements.
Definition: TypeSize.h:326
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
void setAllowContract(bool B=true)
Definition: FMF.h:91
bool noSignedZeros() const
Definition: FMF.h:68
bool noInfs() const
Definition: FMF.h:67
void setAllowReciprocal(bool B=true)
Definition: FMF.h:88
bool allowReciprocal() const
Definition: FMF.h:69
void print(raw_ostream &O) const
Print fast-math flags to O.
Definition: Operator.cpp:271
void setNoSignedZeros(bool B=true)
Definition: FMF.h:85
bool allowReassoc() const
Flag queries.
Definition: FMF.h:65
bool approxFunc() const
Definition: FMF.h:71
void setNoNaNs(bool B=true)
Definition: FMF.h:79
void setAllowReassoc(bool B=true)
Flag setters.
Definition: FMF.h:76
bool noNaNs() const
Definition: FMF.h:66
void setApproxFunc(bool B=true)
Definition: FMF.h:94
void setNoInfs(bool B=true)
Definition: FMF.h:82
bool allowContract() const
Definition: FMF.h:70
Class to represent function types.
Definition: DerivedTypes.h:105
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:137
ArrayRef< Type * > params() const
Definition: DerivedTypes.h:132
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216
bool willReturn() const
Determine if the function will return.
Definition: Function.h:662
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition: Function.h:595
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:221
bool hasNoUnsignedSignedWrap() const
bool hasNoUnsignedWrap() const
bool isInBounds() const
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Definition: Instructions.h:956
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition: IRBuilder.h:458
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2393
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2503
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition: IRBuilder.h:508
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2121
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2491
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1830
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2066
Value * CreateVectorSplice(Value *V1, Value *V2, int64_t Imm, const Twine &Name="")
Return a vector splice intrinsic if using scalable vectors, otherwise return a shufflevector.
Definition: IRBuilder.cpp:1124
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1152
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2547
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:463
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:890
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:546
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1048
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:172
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2060
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2566
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:523
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:2002
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2108
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:308
Value * CreateVectorReverse(Value *V, const Twine &Name="")
Return a vector value that contains the vector V reversed.
Definition: IRBuilder.cpp:1108
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1889
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
Definition: IRBuilder.h:1744
CallInst * CreateOrReduce(Value *Src)
Create a vector int OR reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:424
InsertPoint saveIP() const
Returns the current insert point.
Definition: IRBuilder.h:274
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:483
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2236
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2398
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2429
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1772
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2273
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1367
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1144
Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Definition: IRBuilder.cpp:963
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2048
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2525
LLVMContext & getContext() const
Definition: IRBuilder.h:173
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2189
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:566
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1350
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2444
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2034
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:566
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1689
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1699
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Definition: IRBuilder.h:286
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1849
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2383
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1610
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:513
Value * CreateStepVector(Type *DstType, const Twine &Name="")
Creates a vector of type DstType with the linear sequence <0, 1, ...>
Definition: IRBuilder.cpp:108
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1384
CallInst * CreateMaskedScatter(Value *Val, Value *Ptrs, Align Alignment, Value *Mask=nullptr)
Create a call to Masked Scatter intrinsic.
Definition: IRBuilder.cpp:627
CallInst * CreateMaskedGather(Type *Ty, Value *Ptrs, Align Alignment, Value *Mask=nullptr, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Gather intrinsic.
Definition: IRBuilder.cpp:596
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
static InstructionCost getInvalid(CostType Val=0)
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:97
bool isBinaryOp() const
Definition: Instruction.h:279
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
const char * getOpcodeName() const
Definition: Instruction.h:276
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
bool isUnaryOp() const
Definition: Instruction.h:278
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:468
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:480
uint32_t getFactor() const
Definition: VectorUtils.h:496
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:550
bool isReverse() const
Definition: VectorUtils.h:495
InstTy * getInsertPos() const
Definition: VectorUtils.h:566
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Align getAlign() const
Definition: VectorUtils.h:497
BlockT * getHeader() const
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:77
FastMathFlags getFastMathFlags() const
static unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
Type * getRecurrenceType() const
Returns the type of the recurrence.
TrackingVH< Value > getRecurrenceStartValue() const
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isFindLastIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
bool isSigned() const
Returns true if all source operands of the recurrence are SExtInsts.
RecurKind getRecurrenceKind() const
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This class uses information about analyze scalars to rewrite expressions in canonical form.
Type * getType() const
Return the LLVM type of this SCEV expression.
This class represents the LLVM 'select' instruction.
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:698
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo Op1Info={OK_AnyValue, OP_None}, OperandValueInfo Op2Info={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of vector reduction intrinsics.
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF=FastMathFlags(), TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask={}, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
@ TCC_Free
Expected to fold away in lowering.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Reverse
Reverse the order of the vector.
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency) const
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
CastContextHint
Represents a hint about the context in which a cast is used.
@ Reversed
The cast is used with a reversed load/store.
@ Masked
The cast is used with a masked load/store.
@ None
The cast is not used with a load/store of any kind.
@ Normal
The cast is used with a normal load/store.
@ Interleave
The cast is used with an interleaved load/store.
@ GatherScatter
The cast is used with a gather/scatter.
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264
static IntegerType * getInt1Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static Type * getVoidTy(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
value_op_iterator value_op_end()
Definition: User.h:309
Value * getOperand(unsigned i) const
Definition: User.h:228
value_op_iterator value_op_begin()
Definition: User.h:306
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:3470
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:3520
iterator end()
Definition: VPlan.h:3504
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:3533
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:2454
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:2459
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition: VPlan.h:2449
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition: VPlan.h:2445
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:396
VPRegionBlock * getParent()
Definition: VPlan.h:488
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:178
const VPBlocksTy & getPredecessors() const
Definition: VPlan.h:519
VPlan * getPlan()
Definition: VPlan.cpp:153
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:158
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2819
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPBranchOnMaskRecipe.
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start, VPValue *Step) const
Check if the induction described by Kind, /p Start and Step is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:292
void dump() const
Dump the VPDef to stderr (for debugging).
Definition: VPlan.cpp:114
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
Definition: VPlanValue.h:414
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
Definition: VPlanValue.h:409
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition: VPlanValue.h:387
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Definition: VPlanValue.h:399
unsigned getVPDefID() const
Definition: VPlanValue.h:419
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:3400
VPValue * getStartValue() const
Definition: VPlan.h:3399
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:2060
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition: VPlan.h:1801
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
@ ResumePhi
Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
Definition: VPlan.h:1215
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:1203
@ CanonicalIVIncrementForPart
Definition: VPlan.h:1218
@ CalculateTripCountMinusVF
Definition: VPlan.h:1216
bool hasResult() const
Definition: VPlan.h:1338
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
unsigned getOpcode() const
Definition: VPlan.h:1315
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
bool isSingleScalar() const
Returns true if this VPInstruction's operands are single scalars and the result is also a single scal...
void execute(VPTransformState &State) override
Generate the instruction.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2533
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2539
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition: VPlan.h:2546
Instruction * getInsertPos() const
Definition: VPlan.h:2581
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInterleaveRecipe.
unsigned getNumStoreOperands() const
Returns the number of stored operands of this interleave group.
Definition: VPlan.h:2570
static bool isVPIntrinsic(Intrinsic::ID)
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition: VPlan.h:153
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlan.h:194
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset)
Definition: VPlan.h:180
static VPLane getFirstLane()
Definition: VPlan.h:178
void execute(VPTransformState &State) override
Generates phi nodes for live-outs (from a replicate region) as needed to retain SSA form.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:720
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
virtual InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
VPBasicBlock * getParent()
Definition: VPlan.h:745
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:814
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Class to record LLVM IR flag for a recipe along with it.
Definition: VPlan.h:930
ExactFlagsTy ExactFlags
Definition: VPlan.h:980
FastMathFlagsTy FMFs
Definition: VPlan.h:983
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:982
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition: VPlan.h:1150
void setFlags(Instruction *I) const
Set the IR flags for I.
Definition: VPlan.h:1111
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:1153
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:979
GEPNoWrapFlags GEPFlags
Definition: VPlan.h:981
WrapFlagsTy WrapFlags
Definition: VPlan.h:978
bool hasNoUnsignedWrap() const
Definition: VPlan.h:1157
void printFlags(raw_ostream &O) const
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:1144
bool hasNoSignedWrap() const
Definition: VPlan.h:1163
FastMathFlags getFastMathFlags() const
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition: VPlan.h:2692
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition: VPlan.h:2650
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of VPReductionRecipe.
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2654
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Return the recurrence decriptor for the in-loop reduction.
Definition: VPlan.h:2644
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2656
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition: VPlan.h:2648
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2652
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:3657
const VPBlockBase * getEntry() const
Definition: VPlan.h:3696
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPReplicateRecipe.
unsigned getOpcode() const
Definition: VPlan.h:2779
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:3457
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:916
LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
This class can be used to assign names to VPValues.
Definition: VPlanValue.h:440
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Definition: VPlanAnalysis.h:65
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
VPValue * getUnrollPartOperand(VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:200
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition: VPlan.cpp:1458
operand_range operands()
Definition: VPlanValue.h:257
unsigned getNumOperands() const
Definition: VPlanValue.h:236
operand_iterator op_begin()
Definition: VPlanValue.h:253
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:237
virtual bool onlyFirstLaneUsed(const VPValue *Op) const
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlanValue.h:272
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop region.
Definition: VPlan.cpp:1417
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:123
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition: VPlan.cpp:1454
friend class VPInstruction
Definition: VPlanValue.h:47
bool hasMoreThanOneUniqueUser() const
Returns true if the value has more than one unique user.
Definition: VPlanValue.h:138
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:77
user_iterator user_begin()
Definition: VPlanValue.h:128
unsigned getNumUsers() const
Definition: VPlanValue.h:111
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:172
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition: VPlanValue.h:167
user_range users()
Definition: VPlanValue.h:132
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Function * getCalledScalarFunction() const
Definition: VPlan.h:1749
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCallRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
operand_range arg_operands()
Definition: VPlan.h:1753
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1577
void execute(VPTransformState &State) override
Produce widened copies of the cast.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override final
Print the recipe.
void execute(VPTransformState &State) override final
Produce a vp-intrinsic using the opcode and operands of the recipe, processing EVL elements.
VPValue * getEVL()
Definition: VPlan.h:1505
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
PHINode * getPHINode() const
Definition: VPlan.h:2107
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:2104
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:2110
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:2182
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:2191
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition: VPlan.h:1692
void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
Definition: VPlan.h:2890
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition: VPlan.h:2887
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition: VPlan.h:2926
Instruction & Ingredient
Definition: VPlan.h:2881
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
bool Consecutive
Whether the accessed addresses are consecutive.
Definition: VPlan.h:2884
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2940
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2933
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition: VPlan.h:2930
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
VPValue * getFirstUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the first unrolled part,...
Definition: VPlan.h:2236
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
void execute(VPTransformState &State) override
Produce a widened instruction using the opcode and operands of the recipe, processing State....
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getOpcode() const
Definition: VPlan.h:1471
unsigned getUF() const
Definition: VPlan.h:3958
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:694
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
VectorBuilder & setEVL(Value *NewExplicitVectorLength)
Definition: VectorBuilder.h:82
VectorBuilder & setMask(Value *NewMask)
Definition: VectorBuilder.h:78
Value * createVectorInstruction(unsigned Opcode, Type *ReturnTy, ArrayRef< Value * > VecOpArray, const Twine &Name=Twine())
Base class of all SIMD vector types.
Definition: DerivedTypes.h:427
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:665
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static VectorType * getDoubleElementsVectorType(VectorType *VTy)
This static method returns a VectorType with twice as many elements as the input type and the same el...
Definition: DerivedTypes.h:541
Type * getElementType() const
Definition: DerivedTypes.h:460
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
iterator erase(iterator where)
Definition: ilist.h:204
pointer remove(iterator &IT)
Definition: ilist.h:188
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
StringRef getBaseName(ID id)
Return the LLVM name for an intrinsic, without encoded types for overloading, such as "llvm....
Definition: Intrinsics.cpp:41
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
bool isUniformAfterVectorization(const VPValue *VPV)
Returns true if VPV is uniform after vectorization.
Definition: VPlanUtils.h:39
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
Definition: VPlanUtils.cpp:21
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlanUtils.cpp:16
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void ReplaceInstWithInst(BasicBlock *BB, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
Value * createSimpleReduction(IRBuilderBase &B, Value *Src, RecurKind RdxKind)
Create a reduction of the given vector.
Definition: LoopUtils.cpp:1278
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
Definition: LoopUtils.cpp:989
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition: STLExtras.h:2207
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
Definition: LoopUtils.cpp:1076
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
cl::opt< bool > EnableVPlanNativePath("enable-vplan-native-path", cl::Hidden, cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization."))
Definition: VPlan.cpp:53
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool isPointerTy(const Type *T)
Definition: SPIRVUtils.h:250
Value * createOrderedReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence descriptor Desc.
Definition: LoopUtils.cpp:1341
Value * createReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, PHINode *OrigPhi=nullptr)
Create a generic reduction using a recurrence descriptor Desc Fast-math-flags are propagated using th...
Definition: LoopUtils.cpp:1323
Type * ToVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:33
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ Add
Sum of integers.
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
Definition: LoopUtils.cpp:1270
DWARFExpression::Operation Op
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
InstructionCost Cost
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Struct to hold various analysis needed for cost computations.
Definition: VPlan.h:688
LLVMContext & LLVMCtx
Definition: VPlan.h:692
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
Definition: VPlan.cpp:1667
bool skipCostComputation(Instruction *UI, bool IsVector) const
Return true if the cost for UI shouldn't be computed, e.g.
InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const
Return the cost for UI with VF using the legacy cost model as fallback until computing the cost of al...
VPTypeAnalysis Types
Definition: VPlan.h:691
const TargetLibraryInfo & TLI
Definition: VPlan.h:690
const TargetTransformInfo & TTI
Definition: VPlan.h:689
SmallPtrSet< Instruction *, 8 > SkipCostComputation
Definition: VPlan.h:694
void execute(VPTransformState &State) override
Generate the phi nodes.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:343
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:351
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:352
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:236
bool hasScalarValue(VPValue *Def, VPLane Lane)
Definition: VPlan.h:268
bool hasVectorValue(VPValue *Def)
Definition: VPlan.h:266
DenseMap< const SCEV *, Value * > ExpandedSCEVs
Map SCEVs to their expanded values.
Definition: VPlan.h:388
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition: VPlan.h:391
void addMetadata(Value *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:365
Value * get(VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
Definition: VPlan.cpp:253
struct llvm::VPTransformState::CFGState CFG
std::optional< VPLane > Lane
Hold the index to generate specific scalar instructions.
Definition: VPlan.h:249
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:368
const TargetTransformInfo * TTI
Target Transform Info.
Definition: VPlan.h:241
void reset(VPValue *Def, Value *V)
Reset an existing vector value for Def and a given Part.
Definition: VPlan.h:289
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
Definition: VPlan.h:244
Loop * CurrentVectorLoop
The loop object for the current parent region, or nullptr.
Definition: VPlan.h:377
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:376
void set(VPValue *Def, Value *V, bool IsScalar=false)
Set the generated vector Value for a given VPValue, if IsScalar is false.
Definition: VPlan.h:278
void execute(VPTransformState &State) override
Generate the wide load or gather.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:3010
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate a wide load or gather.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool isInvariantCond() const
Definition: VPlan.h:1845
VPValue * getCond() const
Definition: VPlan.h:1841
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenSelectRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition: VPlan.h:3089
void execute(VPTransformState &State) override
Generate the wide store or scatter.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:3092
void execute(VPTransformState &State) override
Generate a wide store or scatter.
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition: VPlan.h:3054
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.