LLVM 20.0.0git
VPlanRecipes.cpp
Go to the documentation of this file.
1//===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains implementations for different VPlan recipes.
11///
12//===----------------------------------------------------------------------===//
13
14#include "VPlan.h"
15#include "VPlanAnalysis.h"
16#include "VPlanPatternMatch.h"
17#include "VPlanUtils.h"
18#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/Twine.h"
22#include "llvm/IR/BasicBlock.h"
23#include "llvm/IR/IRBuilder.h"
24#include "llvm/IR/Instruction.h"
26#include "llvm/IR/Intrinsics.h"
27#include "llvm/IR/Type.h"
28#include "llvm/IR/Value.h"
32#include "llvm/Support/Debug.h"
37#include <cassert>
38
39using namespace llvm;
40
42
43namespace llvm {
45}
47
48#define LV_NAME "loop-vectorize"
49#define DEBUG_TYPE LV_NAME
50
52 switch (getVPDefID()) {
53 case VPInstructionSC:
54 if (Instruction::isBinaryOp(cast<VPInstruction>(this)->getOpcode()))
55 return false;
56 switch (cast<VPInstruction>(this)->getOpcode()) {
57 case Instruction::Or:
58 case Instruction::ICmp:
59 case Instruction::Select:
68 return false;
69 default:
70 return true;
71 }
72 case VPInterleaveSC:
73 return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
74 case VPWidenStoreEVLSC:
75 case VPWidenStoreSC:
76 return true;
77 case VPReplicateSC:
78 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
79 ->mayWriteToMemory();
80 case VPWidenCallSC:
81 return !cast<VPWidenCallRecipe>(this)
82 ->getCalledScalarFunction()
83 ->onlyReadsMemory();
84 case VPWidenIntrinsicSC:
85 return cast<VPWidenIntrinsicRecipe>(this)->mayWriteToMemory();
86 case VPBranchOnMaskSC:
87 case VPScalarIVStepsSC:
88 case VPPredInstPHISC:
89 return false;
90 case VPBlendSC:
91 case VPReductionEVLSC:
92 case VPReductionSC:
93 case VPVectorPointerSC:
94 case VPWidenCanonicalIVSC:
95 case VPWidenCastSC:
96 case VPWidenGEPSC:
97 case VPWidenIntOrFpInductionSC:
98 case VPWidenLoadEVLSC:
99 case VPWidenLoadSC:
100 case VPWidenPHISC:
101 case VPWidenSC:
102 case VPWidenEVLSC:
103 case VPWidenSelectSC: {
104 const Instruction *I =
105 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
106 (void)I;
107 assert((!I || !I->mayWriteToMemory()) &&
108 "underlying instruction may write to memory");
109 return false;
110 }
111 default:
112 return true;
113 }
114}
115
117 switch (getVPDefID()) {
118 case VPWidenLoadEVLSC:
119 case VPWidenLoadSC:
120 return true;
121 case VPReplicateSC:
122 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
123 ->mayReadFromMemory();
124 case VPWidenCallSC:
125 return !cast<VPWidenCallRecipe>(this)
126 ->getCalledScalarFunction()
127 ->onlyWritesMemory();
128 case VPWidenIntrinsicSC:
129 return cast<VPWidenIntrinsicRecipe>(this)->mayReadFromMemory();
130 case VPBranchOnMaskSC:
131 case VPPredInstPHISC:
132 case VPScalarIVStepsSC:
133 case VPWidenStoreEVLSC:
134 case VPWidenStoreSC:
135 return false;
136 case VPBlendSC:
137 case VPReductionEVLSC:
138 case VPReductionSC:
139 case VPVectorPointerSC:
140 case VPWidenCanonicalIVSC:
141 case VPWidenCastSC:
142 case VPWidenGEPSC:
143 case VPWidenIntOrFpInductionSC:
144 case VPWidenPHISC:
145 case VPWidenSC:
146 case VPWidenEVLSC:
147 case VPWidenSelectSC: {
148 const Instruction *I =
149 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
150 (void)I;
151 assert((!I || !I->mayReadFromMemory()) &&
152 "underlying instruction may read from memory");
153 return false;
154 }
155 default:
156 return true;
157 }
158}
159
161 switch (getVPDefID()) {
162 case VPDerivedIVSC:
163 case VPPredInstPHISC:
164 case VPScalarCastSC:
165 case VPReverseVectorPointerSC:
166 return false;
167 case VPInstructionSC:
168 return mayWriteToMemory();
169 case VPWidenCallSC: {
170 Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction();
171 return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn();
172 }
173 case VPWidenIntrinsicSC:
174 return cast<VPWidenIntrinsicRecipe>(this)->mayHaveSideEffects();
175 case VPBlendSC:
176 case VPReductionEVLSC:
177 case VPReductionSC:
178 case VPScalarIVStepsSC:
179 case VPVectorPointerSC:
180 case VPWidenCanonicalIVSC:
181 case VPWidenCastSC:
182 case VPWidenGEPSC:
183 case VPWidenIntOrFpInductionSC:
184 case VPWidenPHISC:
185 case VPWidenPointerInductionSC:
186 case VPWidenSC:
187 case VPWidenEVLSC:
188 case VPWidenSelectSC: {
189 const Instruction *I =
190 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
191 (void)I;
192 assert((!I || !I->mayHaveSideEffects()) &&
193 "underlying instruction has side-effects");
194 return false;
195 }
196 case VPInterleaveSC:
197 return mayWriteToMemory();
198 case VPWidenLoadEVLSC:
199 case VPWidenLoadSC:
200 case VPWidenStoreEVLSC:
201 case VPWidenStoreSC:
202 assert(
203 cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() ==
205 "mayHaveSideffects result for ingredient differs from this "
206 "implementation");
207 return mayWriteToMemory();
208 case VPReplicateSC: {
209 auto *R = cast<VPReplicateRecipe>(this);
210 return R->getUnderlyingInstr()->mayHaveSideEffects();
211 }
212 default:
213 return true;
214 }
215}
216
218 assert(!Parent && "Recipe already in some VPBasicBlock");
219 assert(InsertPos->getParent() &&
220 "Insertion position not in any VPBasicBlock");
221 InsertPos->getParent()->insert(this, InsertPos->getIterator());
222}
223
226 assert(!Parent && "Recipe already in some VPBasicBlock");
227 assert(I == BB.end() || I->getParent() == &BB);
228 BB.insert(this, I);
229}
230
232 assert(!Parent && "Recipe already in some VPBasicBlock");
233 assert(InsertPos->getParent() &&
234 "Insertion position not in any VPBasicBlock");
235 InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator()));
236}
237
239 assert(getParent() && "Recipe not in any VPBasicBlock");
241 Parent = nullptr;
242}
243
245 assert(getParent() && "Recipe not in any VPBasicBlock");
247}
248
251 insertAfter(InsertPos);
252}
253
257 insertBefore(BB, I);
258}
259
261 // Get the underlying instruction for the recipe, if there is one. It is used
262 // to
263 // * decide if cost computation should be skipped for this recipe,
264 // * apply forced target instruction cost.
265 Instruction *UI = nullptr;
266 if (auto *S = dyn_cast<VPSingleDefRecipe>(this))
267 UI = dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
268 else if (auto *IG = dyn_cast<VPInterleaveRecipe>(this))
269 UI = IG->getInsertPos();
270 else if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(this))
271 UI = &WidenMem->getIngredient();
272
273 InstructionCost RecipeCost;
274 if (UI && Ctx.skipCostComputation(UI, VF.isVector())) {
275 RecipeCost = 0;
276 } else {
277 RecipeCost = computeCost(VF, Ctx);
278 if (UI && ForceTargetInstructionCost.getNumOccurrences() > 0 &&
279 RecipeCost.isValid())
281 }
282
283 LLVM_DEBUG({
284 dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": ";
285 dump();
286 });
287 return RecipeCost;
288}
289
291 VPCostContext &Ctx) const {
292 llvm_unreachable("subclasses should implement computeCost");
293}
294
296 assert(OpType == OperationType::FPMathOp &&
297 "recipe doesn't have fast math flags");
298 FastMathFlags Res;
299 Res.setAllowReassoc(FMFs.AllowReassoc);
300 Res.setNoNaNs(FMFs.NoNaNs);
301 Res.setNoInfs(FMFs.NoInfs);
302 Res.setNoSignedZeros(FMFs.NoSignedZeros);
303 Res.setAllowReciprocal(FMFs.AllowReciprocal);
304 Res.setAllowContract(FMFs.AllowContract);
305 Res.setApproxFunc(FMFs.ApproxFunc);
306 return Res;
307}
308
309#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
311#endif
312
313template <unsigned PartOpIdx>
314VPValue *
316 if (U.getNumOperands() == PartOpIdx + 1)
317 return U.getOperand(PartOpIdx);
318 return nullptr;
319}
320
321template <unsigned PartOpIdx>
323 if (auto *UnrollPartOp = getUnrollPartOperand(U))
324 return cast<ConstantInt>(UnrollPartOp->getLiveInIRValue())->getZExtValue();
325 return 0;
326}
327
330 const Twine &Name)
331 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),
332 Pred, DL),
333 Opcode(Opcode), Name(Name.str()) {
334 assert(Opcode == Instruction::ICmp &&
335 "only ICmp predicates supported at the moment");
336}
337
339 std::initializer_list<VPValue *> Operands,
340 FastMathFlags FMFs, DebugLoc DL, const Twine &Name)
341 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),
342 Opcode(Opcode), Name(Name.str()) {
343 // Make sure the VPInstruction is a floating-point operation.
344 assert(isFPMathOp() && "this op can't take fast-math flags");
345}
346
347bool VPInstruction::doesGeneratePerAllLanes() const {
348 return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);
349}
350
351bool VPInstruction::canGenerateScalarForFirstLane() const {
353 return true;
355 return true;
356 switch (Opcode) {
357 case Instruction::ICmp:
358 case Instruction::Select:
366 return true;
367 default:
368 return false;
369 }
370}
371
372Value *VPInstruction::generatePerLane(VPTransformState &State,
373 const VPLane &Lane) {
374 IRBuilderBase &Builder = State.Builder;
375
377 "only PtrAdd opcodes are supported for now");
378 return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
379 State.get(getOperand(1), Lane), Name);
380}
381
382Value *VPInstruction::generate(VPTransformState &State) {
383 IRBuilderBase &Builder = State.Builder;
384
386 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
387 Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
388 Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);
389 auto *Res =
390 Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
391 if (auto *I = dyn_cast<Instruction>(Res))
392 setFlags(I);
393 return Res;
394 }
395
396 switch (getOpcode()) {
397 case VPInstruction::Not: {
398 Value *A = State.get(getOperand(0));
399 return Builder.CreateNot(A, Name);
400 }
401 case Instruction::ICmp: {
402 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
403 Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
404 Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);
405 return Builder.CreateCmp(getPredicate(), A, B, Name);
406 }
407 case Instruction::Select: {
408 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
409 Value *Cond = State.get(getOperand(0), OnlyFirstLaneUsed);
410 Value *Op1 = State.get(getOperand(1), OnlyFirstLaneUsed);
411 Value *Op2 = State.get(getOperand(2), OnlyFirstLaneUsed);
412 return Builder.CreateSelect(Cond, Op1, Op2, Name);
413 }
415 // Get first lane of vector induction variable.
416 Value *VIVElem0 = State.get(getOperand(0), VPLane(0));
417 // Get the original loop tripcount.
418 Value *ScalarTC = State.get(getOperand(1), VPLane(0));
419
420 // If this part of the active lane mask is scalar, generate the CMP directly
421 // to avoid unnecessary extracts.
422 if (State.VF.isScalar())
423 return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC,
424 Name);
425
426 auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
427 auto *PredTy = VectorType::get(Int1Ty, State.VF);
428 return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
429 {PredTy, ScalarTC->getType()},
430 {VIVElem0, ScalarTC}, nullptr, Name);
431 }
433 // Generate code to combine the previous and current values in vector v3.
434 //
435 // vector.ph:
436 // v_init = vector(..., ..., ..., a[-1])
437 // br vector.body
438 //
439 // vector.body
440 // i = phi [0, vector.ph], [i+4, vector.body]
441 // v1 = phi [v_init, vector.ph], [v2, vector.body]
442 // v2 = a[i, i+1, i+2, i+3];
443 // v3 = vector(v1(3), v2(0, 1, 2))
444
445 auto *V1 = State.get(getOperand(0));
446 if (!V1->getType()->isVectorTy())
447 return V1;
448 Value *V2 = State.get(getOperand(1));
449 return Builder.CreateVectorSplice(V1, V2, -1, Name);
450 }
452 unsigned UF = getParent()->getPlan()->getUF();
453 Value *ScalarTC = State.get(getOperand(0), VPLane(0));
454 Value *Step = createStepForVF(Builder, ScalarTC->getType(), State.VF, UF);
455 Value *Sub = Builder.CreateSub(ScalarTC, Step);
456 Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
457 Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
458 return Builder.CreateSelect(Cmp, Sub, Zero);
459 }
461 // TODO: Restructure this code with an explicit remainder loop, vsetvli can
462 // be outside of the main loop.
463 Value *AVL = State.get(getOperand(0), /*IsScalar*/ true);
464 // Compute EVL
465 assert(AVL->getType()->isIntegerTy() &&
466 "Requested vector length should be an integer.");
467
468 assert(State.VF.isScalable() && "Expected scalable vector factor.");
469 Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue());
470
471 Value *EVL = State.Builder.CreateIntrinsic(
472 State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,
473 {AVL, VFArg, State.Builder.getTrue()});
474 return EVL;
475 }
477 unsigned Part = getUnrollPart(*this);
478 auto *IV = State.get(getOperand(0), VPLane(0));
479 assert(Part != 0 && "Must have a positive part");
480 // The canonical IV is incremented by the vectorization factor (num of
481 // SIMD elements) times the unroll part.
482 Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
483 return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),
485 }
487 Value *Cond = State.get(getOperand(0), VPLane(0));
488 // Replace the temporary unreachable terminator with a new conditional
489 // branch, hooking it up to backward destination for exiting blocks now and
490 // to forward destination(s) later when they are created.
491 BranchInst *CondBr =
492 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
493 CondBr->setSuccessor(0, nullptr);
495
496 if (!getParent()->isExiting())
497 return CondBr;
498
499 VPRegionBlock *ParentRegion = getParent()->getParent();
500 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
501 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
502 return CondBr;
503 }
505 // First create the compare.
506 Value *IV = State.get(getOperand(0), /*IsScalar*/ true);
507 Value *TC = State.get(getOperand(1), /*IsScalar*/ true);
508 Value *Cond = Builder.CreateICmpEQ(IV, TC);
509
510 // Now create the branch.
511 auto *Plan = getParent()->getPlan();
512 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
513 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
514
515 // Replace the temporary unreachable terminator with a new conditional
516 // branch, hooking it up to backward destination (the header) now and to the
517 // forward destination (the exit/middle block) later when it is created.
518 // Note that CreateCondBr expects a valid BB as first argument, so we need
519 // to set it to nullptr later.
520 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
521 State.CFG.VPBB2IRBB[Header]);
522 CondBr->setSuccessor(0, nullptr);
524 return CondBr;
525 }
527 // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
528 // and will be removed by breaking up the recipe further.
529 auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
530 auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
531 // Get its reduction variable descriptor.
532 const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
533
534 RecurKind RK = RdxDesc.getRecurrenceKind();
535
536 Type *PhiTy = OrigPhi->getType();
537 // The recipe's operands are the reduction phi, followed by one operand for
538 // each part of the reduction.
539 unsigned UF = getNumOperands() - 1;
540 VectorParts RdxParts(UF);
541 for (unsigned Part = 0; Part < UF; ++Part)
542 RdxParts[Part] = State.get(getOperand(1 + Part), PhiR->isInLoop());
543
544 // If the vector reduction can be performed in a smaller type, we truncate
545 // then extend the loop exit value to enable InstCombine to evaluate the
546 // entire expression in the smaller type.
547 // TODO: Handle this in truncateToMinBW.
548 if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
549 Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF);
550 for (unsigned Part = 0; Part < UF; ++Part)
551 RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
552 }
553 // Reduce all of the unrolled parts into a single vector.
554 Value *ReducedPartRdx = RdxParts[0];
555 unsigned Op = RdxDesc.getOpcode();
557 Op = Instruction::Or;
558
559 if (PhiR->isOrdered()) {
560 ReducedPartRdx = RdxParts[UF - 1];
561 } else {
562 // Floating-point operations should have some FMF to enable the reduction.
564 Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
565 for (unsigned Part = 1; Part < UF; ++Part) {
566 Value *RdxPart = RdxParts[Part];
567 if (Op != Instruction::ICmp && Op != Instruction::FCmp)
568 ReducedPartRdx = Builder.CreateBinOp(
569 (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
571 ReducedPartRdx =
572 createMinMaxOp(Builder, RecurKind::SMax, ReducedPartRdx, RdxPart);
573 else
574 ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
575 }
576 }
577
578 // Create the reduction after the loop. Note that inloop reductions create
579 // the target reduction in the loop using a Reduction recipe.
580 if ((State.VF.isVector() ||
583 !PhiR->isInLoop()) {
584 ReducedPartRdx =
585 createReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
586 // If the reduction can be performed in a smaller type, we need to extend
587 // the reduction to the wider type before we branch to the original loop.
588 if (PhiTy != RdxDesc.getRecurrenceType())
589 ReducedPartRdx = RdxDesc.isSigned()
590 ? Builder.CreateSExt(ReducedPartRdx, PhiTy)
591 : Builder.CreateZExt(ReducedPartRdx, PhiTy);
592 }
593
594 return ReducedPartRdx;
595 }
597 auto *CI = cast<ConstantInt>(getOperand(1)->getLiveInIRValue());
598 unsigned Offset = CI->getZExtValue();
599 assert(Offset > 0 && "Offset from end must be positive");
600 Value *Res;
601 if (State.VF.isVector()) {
602 assert(Offset <= State.VF.getKnownMinValue() &&
603 "invalid offset to extract from");
604 // Extract lane VF - Offset from the operand.
605 Res = State.get(getOperand(0), VPLane::getLaneFromEnd(State.VF, Offset));
606 } else {
607 assert(Offset <= 1 && "invalid offset to extract from");
608 Res = State.get(getOperand(0));
609 }
610 if (isa<ExtractElementInst>(Res))
611 Res->setName(Name);
612 return Res;
613 }
615 Value *A = State.get(getOperand(0));
616 Value *B = State.get(getOperand(1));
617 return Builder.CreateLogicalAnd(A, B, Name);
618 }
621 "can only generate first lane for PtrAdd");
622 Value *Ptr = State.get(getOperand(0), VPLane(0));
623 Value *Addend = State.get(getOperand(1), VPLane(0));
624 return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
625 }
627 Value *IncomingFromVPlanPred =
628 State.get(getOperand(0), /* IsScalar */ true);
629 Value *IncomingFromOtherPreds =
630 State.get(getOperand(1), /* IsScalar */ true);
631 auto *NewPhi =
632 Builder.CreatePHI(State.TypeAnalysis.inferScalarType(this), 2, Name);
633 BasicBlock *VPlanPred =
634 State.CFG
635 .VPBB2IRBB[cast<VPBasicBlock>(getParent()->getPredecessors()[0])];
636 NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred);
637 for (auto *OtherPred : predecessors(Builder.GetInsertBlock())) {
638 if (OtherPred == VPlanPred)
639 continue;
640 NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred);
641 }
642 return NewPhi;
643 }
645 Value *A = State.get(getOperand(0));
646 return Builder.CreateOrReduce(A);
647 }
648
649 default:
650 llvm_unreachable("Unsupported opcode for instruction");
651 }
652}
653
658}
659
662}
663
664#if !defined(NDEBUG)
665bool VPInstruction::isFPMathOp() const {
666 // Inspired by FPMathOperator::classof. Notable differences are that we don't
667 // support Call, PHI and Select opcodes here yet.
668 return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
669 Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
670 Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
671 Opcode == Instruction::FCmp || Opcode == Instruction::Select;
672}
673#endif
674
676 assert(!State.Lane && "VPInstruction executing an Lane");
678 assert((hasFastMathFlags() == isFPMathOp() ||
679 getOpcode() == Instruction::Select) &&
680 "Recipe not a FPMathOp but has fast-math flags?");
681 if (hasFastMathFlags())
684 bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&
687 bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
688 if (GeneratesPerAllLanes) {
689 for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
690 Lane != NumLanes; ++Lane) {
691 Value *GeneratedValue = generatePerLane(State, VPLane(Lane));
692 assert(GeneratedValue && "generatePerLane must produce a value");
693 State.set(this, GeneratedValue, VPLane(Lane));
694 }
695 return;
696 }
697
698 Value *GeneratedValue = generate(State);
699 if (!hasResult())
700 return;
701 assert(GeneratedValue && "generate must produce a value");
702 assert(
703 (GeneratedValue->getType()->isVectorTy() == !GeneratesPerFirstLaneOnly ||
704 State.VF.isScalar()) &&
705 "scalar value but not only first lane defined");
706 State.set(this, GeneratedValue,
707 /*IsScalar*/ GeneratesPerFirstLaneOnly);
708}
709
711 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
713 return vputils::onlyFirstLaneUsed(this);
714
715 switch (getOpcode()) {
716 default:
717 return false;
718 case Instruction::ICmp:
719 case Instruction::Select:
720 case Instruction::Or:
722 // TODO: Cover additional opcodes.
723 return vputils::onlyFirstLaneUsed(this);
731 return true;
732 };
733 llvm_unreachable("switch should return");
734}
735
737 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
739 return vputils::onlyFirstPartUsed(this);
740
741 switch (getOpcode()) {
742 default:
743 return false;
744 case Instruction::ICmp:
745 case Instruction::Select:
746 return vputils::onlyFirstPartUsed(this);
750 return true;
751 };
752 llvm_unreachable("switch should return");
753}
754
755#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
757 VPSlotTracker SlotTracker(getParent()->getPlan());
758 print(dbgs(), "", SlotTracker);
759}
760
762 VPSlotTracker &SlotTracker) const {
763 O << Indent << "EMIT ";
764
765 if (hasResult()) {
767 O << " = ";
768 }
769
770 switch (getOpcode()) {
772 O << "not";
773 break;
775 O << "combined load";
776 break;
778 O << "combined store";
779 break;
781 O << "active lane mask";
782 break;
784 O << "resume-phi";
785 break;
787 O << "EXPLICIT-VECTOR-LENGTH";
788 break;
790 O << "first-order splice";
791 break;
793 O << "branch-on-cond";
794 break;
796 O << "TC > VF ? TC - VF : 0";
797 break;
799 O << "VF * Part +";
800 break;
802 O << "branch-on-count";
803 break;
805 O << "extract-from-end";
806 break;
808 O << "compute-reduction-result";
809 break;
811 O << "logical-and";
812 break;
814 O << "ptradd";
815 break;
817 O << "any-of";
818 break;
819 default:
821 }
822
823 printFlags(O);
825
826 if (auto DL = getDebugLoc()) {
827 O << ", !dbg ";
828 DL.print(O);
829 }
830}
831#endif
832
834 assert((isa<PHINode>(&I) || getNumOperands() == 0) &&
835 "Only PHINodes can have extra operands");
836 for (const auto &[Idx, Op] : enumerate(operands())) {
837 VPValue *ExitValue = Op;
838 auto Lane = vputils::isUniformAfterVectorization(ExitValue)
842 auto *PredVPBB = Pred->getExitingBasicBlock();
843 BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
844 // Set insertion point in PredBB in case an extract needs to be generated.
845 // TODO: Model extracts explicitly.
846 State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
847 Value *V = State.get(ExitValue, VPLane(Lane));
848 auto *Phi = cast<PHINode>(&I);
849 // If there is no existing block for PredBB in the phi, add a new incoming
850 // value. Otherwise update the existing incoming value for PredBB.
851 if (Phi->getBasicBlockIndex(PredBB) == -1)
852 Phi->addIncoming(V, PredBB);
853 else
854 Phi->setIncomingValueForBlock(PredBB, V);
855 }
856
857 // Advance the insert point after the wrapped IR instruction. This allows
858 // interleaving VPIRInstructions and other recipes.
859 State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator()));
860}
861
863 VPCostContext &Ctx) const {
864 // The recipe wraps an existing IR instruction on the border of VPlan's scope,
865 // hence it does not contribute to the cost-modeling for the VPlan.
866 return 0;
867}
868
869#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
871 VPSlotTracker &SlotTracker) const {
872 O << Indent << "IR " << I;
873
874 if (getNumOperands() != 0) {
875 O << " (extra operand" << (getNumOperands() > 1 ? "s" : "") << ": ";
877 enumerate(operands()), O, [this, &O, &SlotTracker](auto Op) {
878 Op.value()->printAsOperand(O, SlotTracker);
879 O << " from ";
880 getParent()->getPredecessors()[Op.index()]->printAsOperand(O);
881 });
882 O << ")";
883 }
884}
885#endif
886
888 assert(State.VF.isVector() && "not widening");
890
891 FunctionType *VFTy = Variant->getFunctionType();
892 // Add return type if intrinsic is overloaded on it.
894 for (const auto &I : enumerate(arg_operands())) {
895 Value *Arg;
896 // Some vectorized function variants may also take a scalar argument,
897 // e.g. linear parameters for pointers. This needs to be the scalar value
898 // from the start of the respective part when interleaving.
899 if (!VFTy->getParamType(I.index())->isVectorTy())
900 Arg = State.get(I.value(), VPLane(0));
901 else
902 Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
903 Args.push_back(Arg);
904 }
905
906 assert(Variant != nullptr && "Can't create vector function.");
907
908 auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
910 if (CI)
911 CI->getOperandBundlesAsDefs(OpBundles);
912
913 CallInst *V = State.Builder.CreateCall(Variant, Args, OpBundles);
914 setFlags(V);
915
916 if (!V->getType()->isVoidTy())
917 State.set(this, V);
918 State.addMetadata(V, CI);
919}
920
922 VPCostContext &Ctx) const {
924 return Ctx.TTI.getCallInstrCost(nullptr, Variant->getReturnType(),
925 Variant->getFunctionType()->params(),
926 CostKind);
927}
928
929#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
931 VPSlotTracker &SlotTracker) const {
932 O << Indent << "WIDEN-CALL ";
933
934 Function *CalledFn = getCalledScalarFunction();
935 if (CalledFn->getReturnType()->isVoidTy())
936 O << "void ";
937 else {
939 O << " = ";
940 }
941
942 O << "call";
943 printFlags(O);
944 O << " @" << CalledFn->getName() << "(";
946 Op->printAsOperand(O, SlotTracker);
947 });
948 O << ")";
949
950 O << " (using library function";
951 if (Variant->hasName())
952 O << ": " << Variant->getName();
953 O << ")";
954}
955#endif
956
958 assert(State.VF.isVector() && "not widening");
960
961 SmallVector<Type *, 2> TysForDecl;
962 // Add return type if intrinsic is overloaded on it.
963 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1, State.TTI))
964 TysForDecl.push_back(VectorType::get(getResultType(), State.VF));
966 for (const auto &I : enumerate(operands())) {
967 // Some intrinsics have a scalar argument - don't replace it with a
968 // vector.
969 Value *Arg;
970 if (isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index(),
971 State.TTI))
972 Arg = State.get(I.value(), VPLane(0));
973 else
974 Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
975 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index(),
976 State.TTI))
977 TysForDecl.push_back(Arg->getType());
978 Args.push_back(Arg);
979 }
980
981 // Use vector version of the intrinsic.
982 Module *M = State.Builder.GetInsertBlock()->getModule();
983 Function *VectorF =
984 Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
985 assert(VectorF &&
986 "Can't retrieve vector intrinsic or vector-predication intrinsics.");
987
988 auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
990 if (CI)
991 CI->getOperandBundlesAsDefs(OpBundles);
992
993 CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
994
995 setFlags(V);
996
997 if (!V->getType()->isVoidTy())
998 State.set(this, V);
999 State.addMetadata(V, CI);
1000}
1001
1003 VPCostContext &Ctx) const {
1005
1006 // Some backends analyze intrinsic arguments to determine cost. Use the
1007 // underlying value for the operand if it has one. Otherwise try to use the
1008 // operand of the underlying call instruction, if there is one. Otherwise
1009 // clear Arguments.
1010 // TODO: Rework TTI interface to be independent of concrete IR values.
1012 for (const auto &[Idx, Op] : enumerate(operands())) {
1013 auto *V = Op->getUnderlyingValue();
1014 if (!V) {
1015 // Push all the VP Intrinsic's ops into the Argments even if is nullptr.
1016 // Some VP Intrinsic's cost will assert the number of parameters.
1017 // Mainly appears in the following two scenarios:
1018 // 1. EVL Op is nullptr
1019 // 2. The Argmunt of the VP Intrinsic is also the VP Intrinsic
1020 if (VPIntrinsic::isVPIntrinsic(VectorIntrinsicID)) {
1021 Arguments.push_back(V);
1022 continue;
1023 }
1024 if (auto *UI = dyn_cast_or_null<CallBase>(getUnderlyingValue())) {
1025 Arguments.push_back(UI->getArgOperand(Idx));
1026 continue;
1027 }
1028 Arguments.clear();
1029 break;
1030 }
1031 Arguments.push_back(V);
1032 }
1033
1034 Type *RetTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1035 SmallVector<Type *> ParamTys;
1036 for (unsigned I = 0; I != getNumOperands(); ++I)
1037 ParamTys.push_back(
1039
1040 // TODO: Rework TTI interface to avoid reliance on underlying IntrinsicInst.
1042 IntrinsicCostAttributes CostAttrs(
1043 VectorIntrinsicID, RetTy, Arguments, ParamTys, FMF,
1044 dyn_cast_or_null<IntrinsicInst>(getUnderlyingValue()));
1045 return Ctx.TTI.getIntrinsicInstrCost(CostAttrs, CostKind);
1046}
1047
1049 return Intrinsic::getBaseName(VectorIntrinsicID);
1050}
1051
1053 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
1054 // Vector predication intrinsics only demand the the first lane the last
1055 // operand (the EVL operand).
1056 return VPIntrinsic::isVPIntrinsic(VectorIntrinsicID) &&
1057 Op == getOperand(getNumOperands() - 1);
1058}
1059
1060#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1062 VPSlotTracker &SlotTracker) const {
1063 O << Indent << "WIDEN-INTRINSIC ";
1064 if (ResultTy->isVoidTy()) {
1065 O << "void ";
1066 } else {
1068 O << " = ";
1069 }
1070
1071 O << "call";
1072 printFlags(O);
1073 O << getIntrinsicName() << "(";
1074
1076 Op->printAsOperand(O, SlotTracker);
1077 });
1078 O << ")";
1079}
1080#endif
1081
1084 IRBuilderBase &Builder = State.Builder;
1085
1086 Value *Address = State.get(getOperand(0));
1087 Value *IncAmt = State.get(getOperand(1), /*IsScalar=*/true);
1088 VectorType *VTy = cast<VectorType>(Address->getType());
1089
1090 // The histogram intrinsic requires a mask even if the recipe doesn't;
1091 // if the mask operand was omitted then all lanes should be executed and
1092 // we just need to synthesize an all-true mask.
1093 Value *Mask = nullptr;
1094 if (VPValue *VPMask = getMask())
1095 Mask = State.get(VPMask);
1096 else
1097 Mask =
1098 Builder.CreateVectorSplat(VTy->getElementCount(), Builder.getInt1(1));
1099
1100 // If this is a subtract, we want to invert the increment amount. We may
1101 // add a separate intrinsic in future, but for now we'll try this.
1102 if (Opcode == Instruction::Sub)
1103 IncAmt = Builder.CreateNeg(IncAmt);
1104 else
1105 assert(Opcode == Instruction::Add && "only add or sub supported for now");
1106
1107 State.Builder.CreateIntrinsic(Intrinsic::experimental_vector_histogram_add,
1108 {VTy, IncAmt->getType()},
1109 {Address, IncAmt, Mask});
1110}
1111
1113 VPCostContext &Ctx) const {
1114 // FIXME: Take the gather and scatter into account as well. For now we're
1115 // generating the same cost as the fallback path, but we'll likely
1116 // need to create a new TTI method for determining the cost, including
1117 // whether we can use base + vec-of-smaller-indices or just
1118 // vec-of-pointers.
1119 assert(VF.isVector() && "Invalid VF for histogram cost");
1120 Type *AddressTy = Ctx.Types.inferScalarType(getOperand(0));
1121 VPValue *IncAmt = getOperand(1);
1122 Type *IncTy = Ctx.Types.inferScalarType(IncAmt);
1123 VectorType *VTy = VectorType::get(IncTy, VF);
1124
1125 // Assume that a non-constant update value (or a constant != 1) requires
1126 // a multiply, and add that into the cost.
1127 InstructionCost MulCost =
1128 Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VTy);
1129 if (IncAmt->isLiveIn()) {
1130 ConstantInt *CI = dyn_cast<ConstantInt>(IncAmt->getLiveInIRValue());
1131
1132 if (CI && CI->getZExtValue() == 1)
1133 MulCost = TTI::TCC_Free;
1134 }
1135
1136 // Find the cost of the histogram operation itself.
1137 Type *PtrTy = VectorType::get(AddressTy, VF);
1138 Type *MaskTy = VectorType::get(Type::getInt1Ty(Ctx.LLVMCtx), VF);
1139 IntrinsicCostAttributes ICA(Intrinsic::experimental_vector_histogram_add,
1141 {PtrTy, IncTy, MaskTy});
1142
1143 // Add the costs together with the add/sub operation.
1144 return Ctx.TTI.getIntrinsicInstrCost(
1146 MulCost + Ctx.TTI.getArithmeticInstrCost(Opcode, VTy);
1147}
1148
1149#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1151 VPSlotTracker &SlotTracker) const {
1152 O << Indent << "WIDEN-HISTOGRAM buckets: ";
1154
1155 if (Opcode == Instruction::Sub)
1156 O << ", dec: ";
1157 else {
1158 assert(Opcode == Instruction::Add);
1159 O << ", inc: ";
1160 }
1162
1163 if (VPValue *Mask = getMask()) {
1164 O << ", mask: ";
1165 Mask->printAsOperand(O, SlotTracker);
1166 }
1167}
1168
1170 VPSlotTracker &SlotTracker) const {
1171 O << Indent << "WIDEN-SELECT ";
1173 O << " = select ";
1175 O << ", ";
1177 O << ", ";
1179 O << (isInvariantCond() ? " (condition is loop invariant)" : "");
1180}
1181#endif
1182
1185
1186 // The condition can be loop invariant but still defined inside the
1187 // loop. This means that we can't just use the original 'cond' value.
1188 // We have to take the 'vectorized' value and pick the first lane.
1189 // Instcombine will make this a no-op.
1190 auto *InvarCond =
1191 isInvariantCond() ? State.get(getCond(), VPLane(0)) : nullptr;
1192
1193 Value *Cond = InvarCond ? InvarCond : State.get(getCond());
1194 Value *Op0 = State.get(getOperand(1));
1195 Value *Op1 = State.get(getOperand(2));
1196 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
1197 State.set(this, Sel);
1198 State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1199}
1200
1202 VPCostContext &Ctx) const {
1203 SelectInst *SI = cast<SelectInst>(getUnderlyingValue());
1204 bool ScalarCond = getOperand(0)->isDefinedOutsideLoopRegions();
1205 Type *ScalarTy = Ctx.Types.inferScalarType(this);
1206 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1208
1209 VPValue *Op0, *Op1;
1210 using namespace llvm::VPlanPatternMatch;
1211 if (!ScalarCond && ScalarTy->getScalarSizeInBits() == 1 &&
1212 (match(this, m_LogicalAnd(m_VPValue(Op0), m_VPValue(Op1))) ||
1213 match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1))))) {
1214 // select x, y, false --> x & y
1215 // select x, true, y --> x | y
1216 const auto [Op1VK, Op1VP] = Ctx.getOperandInfo(Op0);
1217 const auto [Op2VK, Op2VP] = Ctx.getOperandInfo(Op1);
1218
1220 if (all_of(operands(),
1221 [](VPValue *Op) { return Op->getUnderlyingValue(); }))
1222 Operands.append(SI->op_begin(), SI->op_end());
1223 bool IsLogicalOr = match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1)));
1224 return Ctx.TTI.getArithmeticInstrCost(
1225 IsLogicalOr ? Instruction::Or : Instruction::And, VectorTy, CostKind,
1226 {Op1VK, Op1VP}, {Op2VK, Op2VP}, Operands, SI);
1227 }
1228
1229 Type *CondTy = Ctx.Types.inferScalarType(getOperand(0));
1230 if (!ScalarCond)
1231 CondTy = VectorType::get(CondTy, VF);
1232
1234 if (auto *Cmp = dyn_cast<CmpInst>(SI->getCondition()))
1235 Pred = Cmp->getPredicate();
1236 return Ctx.TTI.getCmpSelInstrCost(Instruction::Select, VectorTy, CondTy, Pred,
1239}
1240
1241VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
1242 const FastMathFlags &FMF) {
1243 AllowReassoc = FMF.allowReassoc();
1244 NoNaNs = FMF.noNaNs();
1245 NoInfs = FMF.noInfs();
1246 NoSignedZeros = FMF.noSignedZeros();
1247 AllowReciprocal = FMF.allowReciprocal();
1248 AllowContract = FMF.allowContract();
1249 ApproxFunc = FMF.approxFunc();
1250}
1251
1252#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1254 switch (OpType) {
1255 case OperationType::Cmp:
1257 break;
1258 case OperationType::DisjointOp:
1260 O << " disjoint";
1261 break;
1262 case OperationType::PossiblyExactOp:
1263 if (ExactFlags.IsExact)
1264 O << " exact";
1265 break;
1266 case OperationType::OverflowingBinOp:
1267 if (WrapFlags.HasNUW)
1268 O << " nuw";
1269 if (WrapFlags.HasNSW)
1270 O << " nsw";
1271 break;
1272 case OperationType::FPMathOp:
1274 break;
1275 case OperationType::GEPOp:
1276 if (GEPFlags.isInBounds())
1277 O << " inbounds";
1279 O << " nusw";
1281 O << " nuw";
1282 break;
1283 case OperationType::NonNegOp:
1284 if (NonNegFlags.NonNeg)
1285 O << " nneg";
1286 break;
1287 case OperationType::Other:
1288 break;
1289 }
1290 if (getNumOperands() > 0)
1291 O << " ";
1292}
1293#endif
1294
1297 auto &Builder = State.Builder;
1298 switch (Opcode) {
1299 case Instruction::Call:
1300 case Instruction::Br:
1301 case Instruction::PHI:
1302 case Instruction::GetElementPtr:
1303 case Instruction::Select:
1304 llvm_unreachable("This instruction is handled by a different recipe.");
1305 case Instruction::UDiv:
1306 case Instruction::SDiv:
1307 case Instruction::SRem:
1308 case Instruction::URem:
1309 case Instruction::Add:
1310 case Instruction::FAdd:
1311 case Instruction::Sub:
1312 case Instruction::FSub:
1313 case Instruction::FNeg:
1314 case Instruction::Mul:
1315 case Instruction::FMul:
1316 case Instruction::FDiv:
1317 case Instruction::FRem:
1318 case Instruction::Shl:
1319 case Instruction::LShr:
1320 case Instruction::AShr:
1321 case Instruction::And:
1322 case Instruction::Or:
1323 case Instruction::Xor: {
1324 // Just widen unops and binops.
1326 for (VPValue *VPOp : operands())
1327 Ops.push_back(State.get(VPOp));
1328
1329 Value *V = Builder.CreateNAryOp(Opcode, Ops);
1330
1331 if (auto *VecOp = dyn_cast<Instruction>(V))
1332 setFlags(VecOp);
1333
1334 // Use this vector value for all users of the original instruction.
1335 State.set(this, V);
1336 State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1337 break;
1338 }
1339 case Instruction::Freeze: {
1340 Value *Op = State.get(getOperand(0));
1341
1342 Value *Freeze = Builder.CreateFreeze(Op);
1343 State.set(this, Freeze);
1344 break;
1345 }
1346 case Instruction::ICmp:
1347 case Instruction::FCmp: {
1348 // Widen compares. Generate vector compares.
1349 bool FCmp = Opcode == Instruction::FCmp;
1350 Value *A = State.get(getOperand(0));
1351 Value *B = State.get(getOperand(1));
1352 Value *C = nullptr;
1353 if (FCmp) {
1354 // Propagate fast math flags.
1355 IRBuilder<>::FastMathFlagGuard FMFG(Builder);
1356 if (auto *I = dyn_cast_or_null<Instruction>(getUnderlyingValue()))
1357 Builder.setFastMathFlags(I->getFastMathFlags());
1358 C = Builder.CreateFCmp(getPredicate(), A, B);
1359 } else {
1360 C = Builder.CreateICmp(getPredicate(), A, B);
1361 }
1362 State.set(this, C);
1363 State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1364 break;
1365 }
1366 default:
1367 // This instruction is not vectorized by simple widening.
1368 LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "
1369 << Instruction::getOpcodeName(Opcode));
1370 llvm_unreachable("Unhandled instruction!");
1371 } // end of switch.
1372
1373#if !defined(NDEBUG)
1374 // Verify that VPlan type inference results agree with the type of the
1375 // generated values.
1377 State.get(this)->getType() &&
1378 "inferred type and type from generated instructions do not match");
1379#endif
1380}
1381
1383 VPCostContext &Ctx) const {
1385 switch (Opcode) {
1386 case Instruction::FNeg: {
1387 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1388 return Ctx.TTI.getArithmeticInstrCost(
1389 Opcode, VectorTy, CostKind,
1392 }
1393
1394 case Instruction::UDiv:
1395 case Instruction::SDiv:
1396 case Instruction::SRem:
1397 case Instruction::URem:
1398 // More complex computation, let the legacy cost-model handle this for now.
1399 return Ctx.getLegacyCost(cast<Instruction>(getUnderlyingValue()), VF);
1400 case Instruction::Add:
1401 case Instruction::FAdd:
1402 case Instruction::Sub:
1403 case Instruction::FSub:
1404 case Instruction::Mul:
1405 case Instruction::FMul:
1406 case Instruction::FDiv:
1407 case Instruction::FRem:
1408 case Instruction::Shl:
1409 case Instruction::LShr:
1410 case Instruction::AShr:
1411 case Instruction::And:
1412 case Instruction::Or:
1413 case Instruction::Xor: {
1414 VPValue *RHS = getOperand(1);
1415 // Certain instructions can be cheaper to vectorize if they have a constant
1416 // second vector operand. One example of this are shifts on x86.
1419 if (RHS->isLiveIn())
1420 RHSInfo = Ctx.TTI.getOperandInfo(RHS->getLiveInIRValue());
1421
1422 if (RHSInfo.Kind == TargetTransformInfo::OK_AnyValue &&
1425 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1426 Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
1427
1429 if (CtxI)
1430 Operands.append(CtxI->value_op_begin(), CtxI->value_op_end());
1431 return Ctx.TTI.getArithmeticInstrCost(
1432 Opcode, VectorTy, CostKind,
1434 RHSInfo, Operands, CtxI, &Ctx.TLI);
1435 }
1436 case Instruction::Freeze: {
1437 // This opcode is unknown. Assume that it is the same as 'mul'.
1438 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1439 return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, CostKind);
1440 }
1441 case Instruction::ICmp:
1442 case Instruction::FCmp: {
1443 Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
1444 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
1445 return Ctx.TTI.getCmpSelInstrCost(Opcode, VectorTy, nullptr, getPredicate(),
1446 CostKind,
1449 }
1450 default:
1451 llvm_unreachable("Unsupported opcode for instruction");
1452 }
1453}
1454
1456 unsigned Opcode = getOpcode();
1457 // TODO: Support other opcodes
1458 if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode))
1459 llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute");
1460
1462
1463 assert(State.get(getOperand(0))->getType()->isVectorTy() &&
1464 "VPWidenEVLRecipe should not be used for scalars");
1465
1466 VPValue *EVL = getEVL();
1467 Value *EVLArg = State.get(EVL, /*NeedsScalar=*/true);
1468 IRBuilderBase &BuilderIR = State.Builder;
1469 VectorBuilder Builder(BuilderIR);
1470 Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
1471
1473 for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) {
1474 VPValue *VPOp = getOperand(I);
1475 Ops.push_back(State.get(VPOp));
1476 }
1477
1478 Builder.setMask(Mask).setEVL(EVLArg);
1479 Value *VPInst =
1480 Builder.createVectorInstruction(Opcode, Ops[0]->getType(), Ops, "vp.op");
1481 // Currently vp-intrinsics only accept FMF flags.
1482 // TODO: Enable other flags when support is added.
1483 if (isa<FPMathOperator>(VPInst))
1484 setFlags(cast<Instruction>(VPInst));
1485
1486 State.set(this, VPInst);
1487 State.addMetadata(VPInst,
1488 dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1489}
1490
1491#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1493 VPSlotTracker &SlotTracker) const {
1494 O << Indent << "WIDEN ";
1496 O << " = " << Instruction::getOpcodeName(Opcode);
1497 printFlags(O);
1499}
1500
1502 VPSlotTracker &SlotTracker) const {
1503 O << Indent << "WIDEN ";
1505 O << " = vp." << Instruction::getOpcodeName(getOpcode());
1506 printFlags(O);
1508}
1509#endif
1510
1513 auto &Builder = State.Builder;
1514 /// Vectorize casts.
1515 assert(State.VF.isVector() && "Not vectorizing?");
1516 Type *DestTy = VectorType::get(getResultType(), State.VF);
1517 VPValue *Op = getOperand(0);
1518 Value *A = State.get(Op);
1519 Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
1520 State.set(this, Cast);
1521 State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
1522 if (auto *CastOp = dyn_cast<Instruction>(Cast))
1523 setFlags(CastOp);
1524}
1525
1527 VPCostContext &Ctx) const {
1528 // TODO: In some cases, VPWidenCastRecipes are created but not considered in
1529 // the legacy cost model, including truncates/extends when evaluating a
1530 // reduction in a smaller type.
1531 if (!getUnderlyingValue())
1532 return 0;
1533 // Computes the CastContextHint from a recipes that may access memory.
1534 auto ComputeCCH = [&](const VPRecipeBase *R) -> TTI::CastContextHint {
1535 if (VF.isScalar())
1537 if (isa<VPInterleaveRecipe>(R))
1539 if (const auto *ReplicateRecipe = dyn_cast<VPReplicateRecipe>(R))
1540 return ReplicateRecipe->isPredicated() ? TTI::CastContextHint::Masked
1542 const auto *WidenMemoryRecipe = dyn_cast<VPWidenMemoryRecipe>(R);
1543 if (WidenMemoryRecipe == nullptr)
1545 if (!WidenMemoryRecipe->isConsecutive())
1547 if (WidenMemoryRecipe->isReverse())
1549 if (WidenMemoryRecipe->isMasked())
1552 };
1553
1554 VPValue *Operand = getOperand(0);
1556 // For Trunc/FPTrunc, get the context from the only user.
1557 if ((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
1559 if (auto *StoreRecipe = dyn_cast<VPRecipeBase>(*user_begin()))
1560 CCH = ComputeCCH(StoreRecipe);
1561 }
1562 // For Z/Sext, get the context from the operand.
1563 else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt ||
1564 Opcode == Instruction::FPExt) {
1565 if (Operand->isLiveIn())
1567 else if (Operand->getDefiningRecipe())
1568 CCH = ComputeCCH(Operand->getDefiningRecipe());
1569 }
1570
1571 auto *SrcTy =
1572 cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(Operand), VF));
1573 auto *DestTy = cast<VectorType>(toVectorTy(getResultType(), VF));
1574 // Arm TTI will use the underlying instruction to determine the cost.
1575 return Ctx.TTI.getCastInstrCost(
1576 Opcode, DestTy, SrcTy, CCH, TTI::TCK_RecipThroughput,
1577 dyn_cast_if_present<Instruction>(getUnderlyingValue()));
1578}
1579
1580#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1582 VPSlotTracker &SlotTracker) const {
1583 O << Indent << "WIDEN-CAST ";
1585 O << " = " << Instruction::getOpcodeName(Opcode);
1586 printFlags(O);
1588 O << " to " << *getResultType();
1589}
1590#endif
1591
1593 VPCostContext &Ctx) const {
1594 return Ctx.TTI.getCFInstrCost(Instruction::PHI, TTI::TCK_RecipThroughput);
1595}
1596
1597/// This function adds
1598/// (0 * Step, 1 * Step, 2 * Step, ...)
1599/// to each vector element of Val.
1600/// \p Opcode is relevant for FP induction variable.
1601static Value *getStepVector(Value *Val, Value *Step,
1603 IRBuilderBase &Builder) {
1604 assert(VF.isVector() && "only vector VFs are supported");
1605
1606 // Create and check the types.
1607 auto *ValVTy = cast<VectorType>(Val->getType());
1608 ElementCount VLen = ValVTy->getElementCount();
1609
1610 Type *STy = Val->getType()->getScalarType();
1611 assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
1612 "Induction Step must be an integer or FP");
1613 assert(Step->getType() == STy && "Step has wrong type");
1614
1616
1617 // Create a vector of consecutive numbers from zero to VF.
1618 VectorType *InitVecValVTy = ValVTy;
1619 if (STy->isFloatingPointTy()) {
1620 Type *InitVecValSTy =
1622 InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
1623 }
1624 Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
1625
1626 if (STy->isIntegerTy()) {
1627 Step = Builder.CreateVectorSplat(VLen, Step);
1628 assert(Step->getType() == Val->getType() && "Invalid step vec");
1629 // FIXME: The newly created binary instructions should contain nsw/nuw
1630 // flags, which can be found from the original scalar operations.
1631 Step = Builder.CreateMul(InitVec, Step);
1632 return Builder.CreateAdd(Val, Step, "induction");
1633 }
1634
1635 // Floating point induction.
1636 assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
1637 "Binary Opcode should be specified for FP induction");
1638 InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
1639
1640 Step = Builder.CreateVectorSplat(VLen, Step);
1641 Value *MulOp = Builder.CreateFMul(InitVec, Step);
1642 return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
1643}
1644
1645/// A helper function that returns an integer or floating-point constant with
1646/// value C.
1648 return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
1649 : ConstantFP::get(Ty, C);
1650}
1651
1653 assert(!State.Lane && "Int or FP induction being replicated.");
1654
1655 Value *Start = getStartValue()->getLiveInIRValue();
1657 TruncInst *Trunc = getTruncInst();
1658 IRBuilderBase &Builder = State.Builder;
1659 assert(getPHINode()->getType() == ID.getStartValue()->getType() &&
1660 "Types must match");
1661 assert(State.VF.isVector() && "must have vector VF");
1662
1663 // The value from the original loop to which we are mapping the new induction
1664 // variable.
1665 Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : getPHINode();
1666
1667 // Fast-math-flags propagate from the original induction instruction.
1668 IRBuilder<>::FastMathFlagGuard FMFG(Builder);
1669 if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
1670 Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
1671
1672 // Now do the actual transformations, and start with fetching the step value.
1673 Value *Step = State.get(getStepValue(), VPLane(0));
1674
1675 assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
1676 "Expected either an induction phi-node or a truncate of it!");
1677
1678 // Construct the initial value of the vector IV in the vector loop preheader
1679 auto CurrIP = Builder.saveIP();
1680 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1681 Builder.SetInsertPoint(VectorPH->getTerminator());
1682 if (isa<TruncInst>(EntryVal)) {
1683 assert(Start->getType()->isIntegerTy() &&
1684 "Truncation requires an integer type");
1685 auto *TruncType = cast<IntegerType>(EntryVal->getType());
1686 Step = Builder.CreateTrunc(Step, TruncType);
1687 Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
1688 }
1689
1690 Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
1691 Value *SteppedStart = getStepVector(SplatStart, Step, ID.getInductionOpcode(),
1692 State.VF, State.Builder);
1693
1694 // We create vector phi nodes for both integer and floating-point induction
1695 // variables. Here, we determine the kind of arithmetic we will perform.
1698 if (Step->getType()->isIntegerTy()) {
1699 AddOp = Instruction::Add;
1700 MulOp = Instruction::Mul;
1701 } else {
1702 AddOp = ID.getInductionOpcode();
1703 MulOp = Instruction::FMul;
1704 }
1705
1706 Value *SplatVF;
1707 if (VPValue *SplatVFOperand = getSplatVFValue()) {
1708 // The recipe has been unrolled. In that case, fetch the splat value for the
1709 // induction increment.
1710 SplatVF = State.get(SplatVFOperand);
1711 } else {
1712 // Multiply the vectorization factor by the step using integer or
1713 // floating-point arithmetic as appropriate.
1714 Type *StepType = Step->getType();
1715 Value *RuntimeVF = State.get(getVFValue(), VPLane(0));
1716 if (Step->getType()->isFloatingPointTy())
1717 RuntimeVF = Builder.CreateUIToFP(RuntimeVF, StepType);
1718 else
1719 RuntimeVF = Builder.CreateZExtOrTrunc(RuntimeVF, StepType);
1720 Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
1721
1722 // Create a vector splat to use in the induction update.
1723 SplatVF = Builder.CreateVectorSplat(State.VF, Mul);
1724 }
1725
1726 Builder.restoreIP(CurrIP);
1727
1728 // We may need to add the step a number of times, depending on the unroll
1729 // factor. The last of those goes into the PHI.
1730 PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");
1731 VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1732 VecInd->setDebugLoc(getDebugLoc());
1733 State.set(this, VecInd);
1734
1735 Instruction *LastInduction = cast<Instruction>(
1736 Builder.CreateBinOp(AddOp, VecInd, SplatVF, "vec.ind.next"));
1737 if (isa<TruncInst>(EntryVal))
1738 State.addMetadata(LastInduction, EntryVal);
1739 LastInduction->setDebugLoc(getDebugLoc());
1740
1741 VecInd->addIncoming(SteppedStart, VectorPH);
1742 // Add induction update using an incorrect block temporarily. The phi node
1743 // will be fixed after VPlan execution. Note that at this point the latch
1744 // block cannot be used, as it does not exist yet.
1745 // TODO: Model increment value in VPlan, by turning the recipe into a
1746 // multi-def and a subclass of VPHeaderPHIRecipe.
1747 VecInd->addIncoming(LastInduction, VectorPH);
1748}
1749
1750#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1752 VPSlotTracker &SlotTracker) const {
1753 O << Indent;
1755 O << " = WIDEN-INDUCTION ";
1757
1758 if (auto *TI = getTruncInst())
1759 O << " (truncated to " << *TI->getType() << ")";
1760}
1761#endif
1762
1764 // The step may be defined by a recipe in the preheader (e.g. if it requires
1765 // SCEV expansion), but for the canonical induction the step is required to be
1766 // 1, which is represented as live-in.
1768 return false;
1769 auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
1770 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
1771 auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin());
1772 return StartC && StartC->isZero() && StepC && StepC->isOne() &&
1773 getScalarType() == CanIV->getScalarType();
1774}
1775
1776#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1778 VPSlotTracker &SlotTracker) const {
1779 O << Indent;
1781 O << " = DERIVED-IV ";
1783 O << " + ";
1785 O << " * ";
1787}
1788#endif
1789
1791 // Fast-math-flags propagate from the original induction instruction.
1793 if (hasFastMathFlags())
1795
1796 /// Compute scalar induction steps. \p ScalarIV is the scalar induction
1797 /// variable on which to base the steps, \p Step is the size of the step.
1798
1799 Value *BaseIV = State.get(getOperand(0), VPLane(0));
1800 Value *Step = State.get(getStepValue(), VPLane(0));
1801 IRBuilderBase &Builder = State.Builder;
1802
1803 // Ensure step has the same type as that of scalar IV.
1804 Type *BaseIVTy = BaseIV->getType()->getScalarType();
1805 assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");
1806
1807 // We build scalar steps for both integer and floating-point induction
1808 // variables. Here, we determine the kind of arithmetic we will perform.
1811 if (BaseIVTy->isIntegerTy()) {
1812 AddOp = Instruction::Add;
1813 MulOp = Instruction::Mul;
1814 } else {
1815 AddOp = InductionOpcode;
1816 MulOp = Instruction::FMul;
1817 }
1818
1819 // Determine the number of scalars we need to generate for each unroll
1820 // iteration.
1821 bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
1822 // Compute the scalar steps and save the results in State.
1823 Type *IntStepTy =
1824 IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
1825 Type *VecIVTy = nullptr;
1826 Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
1827 if (!FirstLaneOnly && State.VF.isScalable()) {
1828 VecIVTy = VectorType::get(BaseIVTy, State.VF);
1829 UnitStepVec =
1830 Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
1831 SplatStep = Builder.CreateVectorSplat(State.VF, Step);
1832 SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
1833 }
1834
1835 unsigned StartLane = 0;
1836 unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
1837 if (State.Lane) {
1838 StartLane = State.Lane->getKnownLane();
1839 EndLane = StartLane + 1;
1840 }
1841 Value *StartIdx0 =
1842 createStepForVF(Builder, IntStepTy, State.VF, getUnrollPart(*this));
1843
1844 if (!FirstLaneOnly && State.VF.isScalable()) {
1845 auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
1846 auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
1847 if (BaseIVTy->isFloatingPointTy())
1848 InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
1849 auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
1850 auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
1851 State.set(this, Add);
1852 // It's useful to record the lane values too for the known minimum number
1853 // of elements so we do those below. This improves the code quality when
1854 // trying to extract the first element, for example.
1855 }
1856
1857 if (BaseIVTy->isFloatingPointTy())
1858 StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
1859
1860 for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
1861 Value *StartIdx = Builder.CreateBinOp(
1862 AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
1863 // The step returned by `createStepForVF` is a runtime-evaluated value
1864 // when VF is scalable. Otherwise, it should be folded into a Constant.
1865 assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
1866 "Expected StartIdx to be folded to a constant when VF is not "
1867 "scalable");
1868 auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
1869 auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
1870 State.set(this, Add, VPLane(Lane));
1871 }
1872}
1873
1874#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1876 VPSlotTracker &SlotTracker) const {
1877 O << Indent;
1879 O << " = SCALAR-STEPS ";
1881}
1882#endif
1883
1885 assert(State.VF.isVector() && "not widening");
1886 auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
1887 // Construct a vector GEP by widening the operands of the scalar GEP as
1888 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
1889 // results in a vector of pointers when at least one operand of the GEP
1890 // is vector-typed. Thus, to keep the representation compact, we only use
1891 // vector-typed operands for loop-varying values.
1892
1893 if (areAllOperandsInvariant()) {
1894 // If we are vectorizing, but the GEP has only loop-invariant operands,
1895 // the GEP we build (by only using vector-typed operands for
1896 // loop-varying values) would be a scalar pointer. Thus, to ensure we
1897 // produce a vector of pointers, we need to either arbitrarily pick an
1898 // operand to broadcast, or broadcast a clone of the original GEP.
1899 // Here, we broadcast a clone of the original.
1900 //
1901 // TODO: If at some point we decide to scalarize instructions having
1902 // loop-invariant operands, this special case will no longer be
1903 // required. We would add the scalarization decision to
1904 // collectLoopScalars() and teach getVectorValue() to broadcast
1905 // the lane-zero scalar value.
1907 for (unsigned I = 0, E = getNumOperands(); I != E; I++)
1908 Ops.push_back(State.get(getOperand(I), VPLane(0)));
1909
1910 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
1911 ArrayRef(Ops).drop_front(), "",
1913 Value *Splat = State.Builder.CreateVectorSplat(State.VF, NewGEP);
1914 State.set(this, Splat);
1915 State.addMetadata(Splat, GEP);
1916 } else {
1917 // If the GEP has at least one loop-varying operand, we are sure to
1918 // produce a vector of pointers unless VF is scalar.
1919 // The pointer operand of the new GEP. If it's loop-invariant, we
1920 // won't broadcast it.
1921 auto *Ptr = isPointerLoopInvariant() ? State.get(getOperand(0), VPLane(0))
1922 : State.get(getOperand(0));
1923
1924 // Collect all the indices for the new GEP. If any index is
1925 // loop-invariant, we won't broadcast it.
1927 for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
1928 VPValue *Operand = getOperand(I);
1929 if (isIndexLoopInvariant(I - 1))
1930 Indices.push_back(State.get(Operand, VPLane(0)));
1931 else
1932 Indices.push_back(State.get(Operand));
1933 }
1934
1935 // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
1936 // but it should be a vector, otherwise.
1937 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
1938 Indices, "", getGEPNoWrapFlags());
1939 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
1940 "NewGEP is not a pointer vector");
1941 State.set(this, NewGEP);
1942 State.addMetadata(NewGEP, GEP);
1943 }
1944}
1945
1946#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1948 VPSlotTracker &SlotTracker) const {
1949 O << Indent << "WIDEN-GEP ";
1950 O << (isPointerLoopInvariant() ? "Inv" : "Var");
1951 for (size_t I = 0; I < getNumOperands() - 1; ++I)
1952 O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";
1953
1954 O << " ";
1956 O << " = getelementptr";
1957 printFlags(O);
1959}
1960#endif
1961
1962static Type *getGEPIndexTy(bool IsScalable, bool IsReverse,
1963 unsigned CurrentPart, IRBuilderBase &Builder) {
1964 // Use i32 for the gep index type when the value is constant,
1965 // or query DataLayout for a more suitable index type otherwise.
1966 const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
1967 return IsScalable && (IsReverse || CurrentPart > 0)
1968 ? DL.getIndexType(Builder.getPtrTy(0))
1969 : Builder.getInt32Ty();
1970}
1971
1973 auto &Builder = State.Builder;
1975 unsigned CurrentPart = getUnrollPart(*this);
1976 Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true,
1977 CurrentPart, Builder);
1978
1979 // The wide store needs to start at the last vector element.
1980 Value *RunTimeVF = State.get(getVFValue(), VPLane(0));
1981 if (IndexTy != RunTimeVF->getType())
1982 RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy);
1983 // NumElt = -CurrentPart * RunTimeVF
1984 Value *NumElt = Builder.CreateMul(
1985 ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
1986 // LastLane = 1 - RunTimeVF
1987 Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
1988 Value *Ptr = State.get(getOperand(0), VPLane(0));
1989 Value *ResultPtr =
1990 Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", getGEPNoWrapFlags());
1991 ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "",
1993
1994 State.set(this, ResultPtr, /*IsScalar*/ true);
1995}
1996
1997#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1999 VPSlotTracker &SlotTracker) const {
2000 O << Indent;
2002 O << " = reverse-vector-pointer";
2003 printFlags(O);
2005}
2006#endif
2007
2009 auto &Builder = State.Builder;
2011 unsigned CurrentPart = getUnrollPart(*this);
2012 Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false,
2013 CurrentPart, Builder);
2014 Value *Ptr = State.get(getOperand(0), VPLane(0));
2015
2016 Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
2017 Value *ResultPtr =
2018 Builder.CreateGEP(IndexedTy, Ptr, Increment, "", getGEPNoWrapFlags());
2019
2020 State.set(this, ResultPtr, /*IsScalar*/ true);
2021}
2022
2023#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2025 VPSlotTracker &SlotTracker) const {
2026 O << Indent;
2028 O << " = vector-pointer ";
2029
2031}
2032#endif
2033
2035 assert(isNormalized() && "Expected blend to be normalized!");
2037 // We know that all PHIs in non-header blocks are converted into
2038 // selects, so we don't have to worry about the insertion order and we
2039 // can just use the builder.
2040 // At this point we generate the predication tree. There may be
2041 // duplications since this is a simple recursive scan, but future
2042 // optimizations will clean it up.
2043
2044 unsigned NumIncoming = getNumIncomingValues();
2045
2046 // Generate a sequence of selects of the form:
2047 // SELECT(Mask3, In3,
2048 // SELECT(Mask2, In2,
2049 // SELECT(Mask1, In1,
2050 // In0)))
2051 // Note that Mask0 is never used: lanes for which no path reaches this phi and
2052 // are essentially undef are taken from In0.
2053 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
2054 Value *Result = nullptr;
2055 for (unsigned In = 0; In < NumIncoming; ++In) {
2056 // We might have single edge PHIs (blocks) - use an identity
2057 // 'select' for the first PHI operand.
2058 Value *In0 = State.get(getIncomingValue(In), OnlyFirstLaneUsed);
2059 if (In == 0)
2060 Result = In0; // Initialize with the first incoming value.
2061 else {
2062 // Select between the current value and the previous incoming edge
2063 // based on the incoming mask.
2064 Value *Cond = State.get(getMask(In), OnlyFirstLaneUsed);
2065 Result = State.Builder.CreateSelect(Cond, In0, Result, "predphi");
2066 }
2067 }
2068 State.set(this, Result, OnlyFirstLaneUsed);
2069}
2070
2072 VPCostContext &Ctx) const {
2074
2075 // Handle cases where only the first lane is used the same way as the legacy
2076 // cost model.
2078 return Ctx.TTI.getCFInstrCost(Instruction::PHI, CostKind);
2079
2080 Type *ResultTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
2081 Type *CmpTy = toVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF);
2082 return (getNumIncomingValues() - 1) *
2083 Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResultTy, CmpTy,
2085}
2086
2087#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2089 VPSlotTracker &SlotTracker) const {
2090 O << Indent << "BLEND ";
2092 O << " =";
2093 if (getNumIncomingValues() == 1) {
2094 // Not a User of any mask: not really blending, this is a
2095 // single-predecessor phi.
2096 O << " ";
2098 } else {
2099 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
2100 O << " ";
2102 if (I == 0)
2103 continue;
2104 O << "/";
2106 }
2107 }
2108}
2109#endif
2110
2112 assert(!State.Lane && "Reduction being replicated.");
2113 Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
2114 RecurKind Kind = RdxDesc.getRecurrenceKind();
2115 // Propagate the fast-math flags carried by the underlying instruction.
2117 State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
2119 Value *NewVecOp = State.get(getVecOp());
2120 if (VPValue *Cond = getCondOp()) {
2121 Value *NewCond = State.get(Cond, State.VF.isScalar());
2122 VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
2123 Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
2124
2125 Value *Start;
2127 Start = RdxDesc.getRecurrenceStartValue();
2128 else
2129 Start = llvm::getRecurrenceIdentity(Kind, ElementTy,
2130 RdxDesc.getFastMathFlags());
2131 if (State.VF.isVector())
2132 Start = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Start);
2133
2134 Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Start);
2135 NewVecOp = Select;
2136 }
2137 Value *NewRed;
2138 Value *NextInChain;
2139 if (IsOrdered) {
2140 if (State.VF.isVector())
2141 NewRed =
2142 createOrderedReduction(State.Builder, RdxDesc, NewVecOp, PrevInChain);
2143 else
2144 NewRed = State.Builder.CreateBinOp(
2145 (Instruction::BinaryOps)RdxDesc.getOpcode(), PrevInChain, NewVecOp);
2146 PrevInChain = NewRed;
2147 NextInChain = NewRed;
2148 } else {
2149 PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
2150 NewRed = createReduction(State.Builder, RdxDesc, NewVecOp);
2152 NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),
2153 NewRed, PrevInChain);
2154 else
2155 NextInChain = State.Builder.CreateBinOp(
2156 (Instruction::BinaryOps)RdxDesc.getOpcode(), NewRed, PrevInChain);
2157 }
2158 State.set(this, NextInChain, /*IsScalar*/ true);
2159}
2160
2162 assert(!State.Lane && "Reduction being replicated.");
2163
2164 auto &Builder = State.Builder;
2165 // Propagate the fast-math flags carried by the underlying instruction.
2166 IRBuilderBase::FastMathFlagGuard FMFGuard(Builder);
2168 Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
2169
2170 RecurKind Kind = RdxDesc.getRecurrenceKind();
2171 Value *Prev = State.get(getChainOp(), /*IsScalar*/ true);
2172 Value *VecOp = State.get(getVecOp());
2173 Value *EVL = State.get(getEVL(), VPLane(0));
2174
2175 VectorBuilder VBuilder(Builder);
2176 VBuilder.setEVL(EVL);
2177 Value *Mask;
2178 // TODO: move the all-true mask generation into VectorBuilder.
2179 if (VPValue *CondOp = getCondOp())
2180 Mask = State.get(CondOp);
2181 else
2182 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2183 VBuilder.setMask(Mask);
2184
2185 Value *NewRed;
2186 if (isOrdered()) {
2187 NewRed = createOrderedReduction(VBuilder, RdxDesc, VecOp, Prev);
2188 } else {
2189 NewRed = createSimpleReduction(VBuilder, VecOp, RdxDesc);
2191 NewRed = createMinMaxOp(Builder, Kind, NewRed, Prev);
2192 else
2193 NewRed = Builder.CreateBinOp((Instruction::BinaryOps)RdxDesc.getOpcode(),
2194 NewRed, Prev);
2195 }
2196 State.set(this, NewRed, /*IsScalar*/ true);
2197}
2198
2200 VPCostContext &Ctx) const {
2201 RecurKind RdxKind = RdxDesc.getRecurrenceKind();
2202 Type *ElementTy = Ctx.Types.inferScalarType(this);
2203 auto *VectorTy = cast<VectorType>(toVectorTy(ElementTy, VF));
2205 unsigned Opcode = RdxDesc.getOpcode();
2206
2207 // TODO: Support any-of and in-loop reductions.
2208 assert(
2210 ForceTargetInstructionCost.getNumOccurrences() > 0) &&
2211 "Any-of reduction not implemented in VPlan-based cost model currently.");
2212 assert(
2213 (!cast<VPReductionPHIRecipe>(getOperand(0))->isInLoop() ||
2214 ForceTargetInstructionCost.getNumOccurrences() > 0) &&
2215 "In-loop reduction not implemented in VPlan-based cost model currently.");
2216
2217 assert(ElementTy->getTypeID() == RdxDesc.getRecurrenceType()->getTypeID() &&
2218 "Inferred type and recurrence type mismatch.");
2219
2220 // Cost = Reduction cost + BinOp cost
2222 Ctx.TTI.getArithmeticInstrCost(Opcode, ElementTy, CostKind);
2225 return Cost + Ctx.TTI.getMinMaxReductionCost(
2226 Id, VectorTy, RdxDesc.getFastMathFlags(), CostKind);
2227 }
2228
2229 return Cost + Ctx.TTI.getArithmeticReductionCost(
2230 Opcode, VectorTy, RdxDesc.getFastMathFlags(), CostKind);
2231}
2232
2233#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2235 VPSlotTracker &SlotTracker) const {
2236 O << Indent << "REDUCE ";
2238 O << " = ";
2240 O << " +";
2241 if (isa<FPMathOperator>(getUnderlyingInstr()))
2243 O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
2245 if (isConditional()) {
2246 O << ", ";
2248 }
2249 O << ")";
2250 if (RdxDesc.IntermediateStore)
2251 O << " (with final reduction value stored in invariant address sank "
2252 "outside of loop)";
2253}
2254
2256 VPSlotTracker &SlotTracker) const {
2258 O << Indent << "REDUCE ";
2260 O << " = ";
2262 O << " +";
2263 if (isa<FPMathOperator>(getUnderlyingInstr()))
2265 O << " vp.reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
2267 O << ", ";
2269 if (isConditional()) {
2270 O << ", ";
2272 }
2273 O << ")";
2274 if (RdxDesc.IntermediateStore)
2275 O << " (with final reduction value stored in invariant address sank "
2276 "outside of loop)";
2277}
2278#endif
2279
2281 // Find if the recipe is used by a widened recipe via an intervening
2282 // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
2283 return any_of(users(), [](const VPUser *U) {
2284 if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))
2285 return any_of(PredR->users(), [PredR](const VPUser *U) {
2286 return !U->usesScalars(PredR);
2287 });
2288 return false;
2289 });
2290}
2291
2293 VPCostContext &Ctx) const {
2294 Instruction *UI = cast<Instruction>(getUnderlyingValue());
2295 // VPReplicateRecipe may be cloned as part of an existing VPlan-to-VPlan
2296 // transform, avoid computing their cost multiple times for now.
2297 Ctx.SkipCostComputation.insert(UI);
2298 return Ctx.getLegacyCost(UI, VF);
2299}
2300
2301#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2303 VPSlotTracker &SlotTracker) const {
2304 O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
2305
2306 if (!getUnderlyingInstr()->getType()->isVoidTy()) {
2308 O << " = ";
2309 }
2310 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
2311 O << "call";
2312 printFlags(O);
2313 O << "@" << CB->getCalledFunction()->getName() << "(";
2315 O, [&O, &SlotTracker](VPValue *Op) {
2316 Op->printAsOperand(O, SlotTracker);
2317 });
2318 O << ")";
2319 } else {
2321 printFlags(O);
2323 }
2324
2325 if (shouldPack())
2326 O << " (S->V)";
2327}
2328#endif
2329
2330Value *VPScalarCastRecipe ::generate(VPTransformState &State) {
2332 "Codegen only implemented for first lane.");
2333 switch (Opcode) {
2334 case Instruction::SExt:
2335 case Instruction::ZExt:
2336 case Instruction::Trunc: {
2337 // Note: SExt/ZExt not used yet.
2338 Value *Op = State.get(getOperand(0), VPLane(0));
2339 return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
2340 }
2341 default:
2342 llvm_unreachable("opcode not implemented yet");
2343 }
2344}
2345
2346void VPScalarCastRecipe ::execute(VPTransformState &State) {
2347 State.set(this, generate(State), VPLane(0));
2348}
2349
2350#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2351void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
2352 VPSlotTracker &SlotTracker) const {
2353 O << Indent << "SCALAR-CAST ";
2354 printAsOperand(O, SlotTracker);
2355 O << " = " << Instruction::getOpcodeName(Opcode) << " ";
2356 printOperands(O, SlotTracker);
2357 O << " to " << *ResultTy;
2358}
2359#endif
2360
2362 assert(State.Lane && "Branch on Mask works only on single instance.");
2363
2364 unsigned Lane = State.Lane->getKnownLane();
2365
2366 Value *ConditionBit = nullptr;
2367 VPValue *BlockInMask = getMask();
2368 if (BlockInMask) {
2369 ConditionBit = State.get(BlockInMask);
2370 if (ConditionBit->getType()->isVectorTy())
2371 ConditionBit = State.Builder.CreateExtractElement(
2372 ConditionBit, State.Builder.getInt32(Lane));
2373 } else // Block in mask is all-one.
2374 ConditionBit = State.Builder.getTrue();
2375
2376 // Replace the temporary unreachable terminator with a new conditional branch,
2377 // whose two destinations will be set later when they are created.
2378 auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
2379 assert(isa<UnreachableInst>(CurrentTerminator) &&
2380 "Expected to replace unreachable terminator with conditional branch.");
2381 auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
2382 CondBr->setSuccessor(0, nullptr);
2383 ReplaceInstWithInst(CurrentTerminator, CondBr);
2384}
2385
2387 VPCostContext &Ctx) const {
2388 // The legacy cost model doesn't assign costs to branches for individual
2389 // replicate regions. Match the current behavior in the VPlan cost model for
2390 // now.
2391 return 0;
2392}
2393
2396 assert(State.Lane && "Predicated instruction PHI works per instance.");
2397 Instruction *ScalarPredInst =
2398 cast<Instruction>(State.get(getOperand(0), *State.Lane));
2399 BasicBlock *PredicatedBB = ScalarPredInst->getParent();
2400 BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
2401 assert(PredicatingBB && "Predicated block has no single predecessor.");
2402 assert(isa<VPReplicateRecipe>(getOperand(0)) &&
2403 "operand must be VPReplicateRecipe");
2404
2405 // By current pack/unpack logic we need to generate only a single phi node: if
2406 // a vector value for the predicated instruction exists at this point it means
2407 // the instruction has vector users only, and a phi for the vector value is
2408 // needed. In this case the recipe of the predicated instruction is marked to
2409 // also do that packing, thereby "hoisting" the insert-element sequence.
2410 // Otherwise, a phi node for the scalar value is needed.
2411 if (State.hasVectorValue(getOperand(0))) {
2412 Value *VectorValue = State.get(getOperand(0));
2413 InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
2414 PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
2415 VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
2416 VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
2417 if (State.hasVectorValue(this))
2418 State.reset(this, VPhi);
2419 else
2420 State.set(this, VPhi);
2421 // NOTE: Currently we need to update the value of the operand, so the next
2422 // predicated iteration inserts its generated value in the correct vector.
2423 State.reset(getOperand(0), VPhi);
2424 } else {
2425 if (vputils::onlyFirstLaneUsed(this) && !State.Lane->isFirstLane())
2426 return;
2427
2428 Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
2429 PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
2430 Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
2431 PredicatingBB);
2432 Phi->addIncoming(ScalarPredInst, PredicatedBB);
2433 if (State.hasScalarValue(this, *State.Lane))
2434 State.reset(this, Phi, *State.Lane);
2435 else
2436 State.set(this, Phi, *State.Lane);
2437 // NOTE: Currently we need to update the value of the operand, so the next
2438 // predicated iteration inserts its generated value in the correct vector.
2439 State.reset(getOperand(0), Phi, *State.Lane);
2440 }
2441}
2442
2443#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2445 VPSlotTracker &SlotTracker) const {
2446 O << Indent << "PHI-PREDICATED-INSTRUCTION ";
2448 O << " = ";
2450}
2451#endif
2452
2454 VPCostContext &Ctx) const {
2456 const Align Alignment =
2458 unsigned AS =
2461
2462 if (!Consecutive) {
2463 // TODO: Using the original IR may not be accurate.
2464 // Currently, ARM will use the underlying IR to calculate gather/scatter
2465 // instruction cost.
2467 assert(!Reverse &&
2468 "Inconsecutive memory access should not have the order.");
2469 return Ctx.TTI.getAddressComputationCost(Ty) +
2471 IsMasked, Alignment, CostKind,
2472 &Ingredient);
2473 }
2474
2476 if (IsMasked) {
2477 Cost += Ctx.TTI.getMaskedMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment,
2478 AS, CostKind);
2479 } else {
2480 TTI::OperandValueInfo OpInfo =
2482 Cost += Ctx.TTI.getMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment, AS,
2483 CostKind, OpInfo, &Ingredient);
2484 }
2485 if (!Reverse)
2486 return Cost;
2487
2489 cast<VectorType>(Ty), {}, CostKind, 0);
2490}
2491
2493 auto *LI = cast<LoadInst>(&Ingredient);
2494
2495 Type *ScalarDataTy = getLoadStoreType(&Ingredient);
2496 auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
2497 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2498 bool CreateGather = !isConsecutive();
2499
2500 auto &Builder = State.Builder;
2502 Value *Mask = nullptr;
2503 if (auto *VPMask = getMask()) {
2504 // Mask reversal is only needed for non-all-one (null) masks, as reverse
2505 // of a null all-one mask is a null mask.
2506 Mask = State.get(VPMask);
2507 if (isReverse())
2508 Mask = Builder.CreateVectorReverse(Mask, "reverse");
2509 }
2510
2511 Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateGather);
2512 Value *NewLI;
2513 if (CreateGather) {
2514 NewLI = Builder.CreateMaskedGather(DataTy, Addr, Alignment, Mask, nullptr,
2515 "wide.masked.gather");
2516 } else if (Mask) {
2517 NewLI =
2518 Builder.CreateMaskedLoad(DataTy, Addr, Alignment, Mask,
2519 PoisonValue::get(DataTy), "wide.masked.load");
2520 } else {
2521 NewLI = Builder.CreateAlignedLoad(DataTy, Addr, Alignment, "wide.load");
2522 }
2523 // Add metadata to the load, but setVectorValue to the reverse shuffle.
2524 State.addMetadata(NewLI, LI);
2525 if (Reverse)
2526 NewLI = Builder.CreateVectorReverse(NewLI, "reverse");
2527 State.set(this, NewLI);
2528}
2529
2530#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2532 VPSlotTracker &SlotTracker) const {
2533 O << Indent << "WIDEN ";
2535 O << " = load ";
2537}
2538#endif
2539
2540/// Use all-true mask for reverse rather than actual mask, as it avoids a
2541/// dependence w/o affecting the result.
2543 Value *EVL, const Twine &Name) {
2544 VectorType *ValTy = cast<VectorType>(Operand->getType());
2545 Value *AllTrueMask =
2546 Builder.CreateVectorSplat(ValTy->getElementCount(), Builder.getTrue());
2547 return Builder.CreateIntrinsic(ValTy, Intrinsic::experimental_vp_reverse,
2548 {Operand, AllTrueMask, EVL}, nullptr, Name);
2549}
2550
2552 auto *LI = cast<LoadInst>(&Ingredient);
2553
2554 Type *ScalarDataTy = getLoadStoreType(&Ingredient);
2555 auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
2556 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2557 bool CreateGather = !isConsecutive();
2558
2559 auto &Builder = State.Builder;
2561 CallInst *NewLI;
2562 Value *EVL = State.get(getEVL(), VPLane(0));
2563 Value *Addr = State.get(getAddr(), !CreateGather);
2564 Value *Mask = nullptr;
2565 if (VPValue *VPMask = getMask()) {
2566 Mask = State.get(VPMask);
2567 if (isReverse())
2568 Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
2569 } else {
2570 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2571 }
2572
2573 if (CreateGather) {
2574 NewLI =
2575 Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL},
2576 nullptr, "wide.masked.gather");
2577 } else {
2578 VectorBuilder VBuilder(Builder);
2579 VBuilder.setEVL(EVL).setMask(Mask);
2580 NewLI = cast<CallInst>(VBuilder.createVectorInstruction(
2581 Instruction::Load, DataTy, Addr, "vp.op.load"));
2582 }
2583 NewLI->addParamAttr(
2584 0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
2585 State.addMetadata(NewLI, LI);
2586 Instruction *Res = NewLI;
2587 if (isReverse())
2588 Res = createReverseEVL(Builder, Res, EVL, "vp.reverse");
2589 State.set(this, Res);
2590}
2591
2593 VPCostContext &Ctx) const {
2594 if (!Consecutive || IsMasked)
2595 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
2596
2597 // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()
2598 // here because the EVL recipes using EVL to replace the tail mask. But in the
2599 // legacy model, it will always calculate the cost of mask.
2600 // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
2601 // don't need to compare to the legacy cost model.
2603 const Align Alignment =
2605 unsigned AS =
2609 Ingredient.getOpcode(), Ty, Alignment, AS, CostKind);
2610 if (!Reverse)
2611 return Cost;
2612
2614 cast<VectorType>(Ty), {}, CostKind, 0);
2615}
2616
2617#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2619 VPSlotTracker &SlotTracker) const {
2620 O << Indent << "WIDEN ";
2622 O << " = vp.load ";
2624}
2625#endif
2626
2628 auto *SI = cast<StoreInst>(&Ingredient);
2629
2630 VPValue *StoredVPValue = getStoredValue();
2631 bool CreateScatter = !isConsecutive();
2632 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2633
2634 auto &Builder = State.Builder;
2636
2637 Value *Mask = nullptr;
2638 if (auto *VPMask = getMask()) {
2639 // Mask reversal is only needed for non-all-one (null) masks, as reverse
2640 // of a null all-one mask is a null mask.
2641 Mask = State.get(VPMask);
2642 if (isReverse())
2643 Mask = Builder.CreateVectorReverse(Mask, "reverse");
2644 }
2645
2646 Value *StoredVal = State.get(StoredVPValue);
2647 if (isReverse()) {
2648 // If we store to reverse consecutive memory locations, then we need
2649 // to reverse the order of elements in the stored value.
2650 StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
2651 // We don't want to update the value in the map as it might be used in
2652 // another expression. So don't call resetVectorValue(StoredVal).
2653 }
2654 Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateScatter);
2655 Instruction *NewSI = nullptr;
2656 if (CreateScatter)
2657 NewSI = Builder.CreateMaskedScatter(StoredVal, Addr, Alignment, Mask);
2658 else if (Mask)
2659 NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);
2660 else
2661 NewSI = Builder.CreateAlignedStore(StoredVal, Addr, Alignment);
2662 State.addMetadata(NewSI, SI);
2663}
2664
2665#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2667 VPSlotTracker &SlotTracker) const {
2668 O << Indent << "WIDEN store ";
2670}
2671#endif
2672
2674 auto *SI = cast<StoreInst>(&Ingredient);
2675
2676 VPValue *StoredValue = getStoredValue();
2677 bool CreateScatter = !isConsecutive();
2678 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2679
2680 auto &Builder = State.Builder;
2682
2683 CallInst *NewSI = nullptr;
2684 Value *StoredVal = State.get(StoredValue);
2685 Value *EVL = State.get(getEVL(), VPLane(0));
2686 if (isReverse())
2687 StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse");
2688 Value *Mask = nullptr;
2689 if (VPValue *VPMask = getMask()) {
2690 Mask = State.get(VPMask);
2691 if (isReverse())
2692 Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
2693 } else {
2694 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2695 }
2696 Value *Addr = State.get(getAddr(), !CreateScatter);
2697 if (CreateScatter) {
2698 NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),
2699 Intrinsic::vp_scatter,
2700 {StoredVal, Addr, Mask, EVL});
2701 } else {
2702 VectorBuilder VBuilder(Builder);
2703 VBuilder.setEVL(EVL).setMask(Mask);
2704 NewSI = cast<CallInst>(VBuilder.createVectorInstruction(
2705 Instruction::Store, Type::getVoidTy(EVL->getContext()),
2706 {StoredVal, Addr}));
2707 }
2708 NewSI->addParamAttr(
2709 1, Attribute::getWithAlignment(NewSI->getContext(), Alignment));
2710 State.addMetadata(NewSI, SI);
2711}
2712
2714 VPCostContext &Ctx) const {
2715 if (!Consecutive || IsMasked)
2716 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
2717
2718 // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()
2719 // here because the EVL recipes using EVL to replace the tail mask. But in the
2720 // legacy model, it will always calculate the cost of mask.
2721 // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
2722 // don't need to compare to the legacy cost model.
2724 const Align Alignment =
2726 unsigned AS =
2730 Ingredient.getOpcode(), Ty, Alignment, AS, CostKind);
2731 if (!Reverse)
2732 return Cost;
2733
2735 cast<VectorType>(Ty), {}, CostKind, 0);
2736}
2737
2738#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2740 VPSlotTracker &SlotTracker) const {
2741 O << Indent << "WIDEN vp.store ";
2743}
2744#endif
2745
2747 VectorType *DstVTy, const DataLayout &DL) {
2748 // Verify that V is a vector type with same number of elements as DstVTy.
2749 auto VF = DstVTy->getElementCount();
2750 auto *SrcVecTy = cast<VectorType>(V->getType());
2751 assert(VF == SrcVecTy->getElementCount() && "Vector dimensions do not match");
2752 Type *SrcElemTy = SrcVecTy->getElementType();
2753 Type *DstElemTy = DstVTy->getElementType();
2754 assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) &&
2755 "Vector elements must have same size");
2756
2757 // Do a direct cast if element types are castable.
2758 if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) {
2759 return Builder.CreateBitOrPointerCast(V, DstVTy);
2760 }
2761 // V cannot be directly casted to desired vector type.
2762 // May happen when V is a floating point vector but DstVTy is a vector of
2763 // pointers or vice-versa. Handle this using a two-step bitcast using an
2764 // intermediate Integer type for the bitcast i.e. Ptr <-> Int <-> Float.
2765 assert((DstElemTy->isPointerTy() != SrcElemTy->isPointerTy()) &&
2766 "Only one type should be a pointer type");
2767 assert((DstElemTy->isFloatingPointTy() != SrcElemTy->isFloatingPointTy()) &&
2768 "Only one type should be a floating point type");
2769 Type *IntTy =
2770 IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy));
2771 auto *VecIntTy = VectorType::get(IntTy, VF);
2772 Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);
2773 return Builder.CreateBitOrPointerCast(CastVal, DstVTy);
2774}
2775
2776/// Return a vector containing interleaved elements from multiple
2777/// smaller input vectors.
2779 const Twine &Name) {
2780 unsigned Factor = Vals.size();
2781 assert(Factor > 1 && "Tried to interleave invalid number of vectors");
2782
2783 VectorType *VecTy = cast<VectorType>(Vals[0]->getType());
2784#ifndef NDEBUG
2785 for (Value *Val : Vals)
2786 assert(Val->getType() == VecTy && "Tried to interleave mismatched types");
2787#endif
2788
2789 // Scalable vectors cannot use arbitrary shufflevectors (only splats), so
2790 // must use intrinsics to interleave.
2791 if (VecTy->isScalableTy()) {
2792 assert(isPowerOf2_32(Factor) && "Unsupported interleave factor for "
2793 "scalable vectors, must be power of 2");
2794 SmallVector<Value *> InterleavingValues(Vals);
2795 // When interleaving, the number of values will be shrunk until we have the
2796 // single final interleaved value.
2797 auto *InterleaveTy = cast<VectorType>(InterleavingValues[0]->getType());
2798 for (unsigned Midpoint = Factor / 2; Midpoint > 0; Midpoint /= 2) {
2799 InterleaveTy = VectorType::getDoubleElementsVectorType(InterleaveTy);
2800 for (unsigned I = 0; I < Midpoint; ++I)
2801 InterleavingValues[I] = Builder.CreateIntrinsic(
2802 InterleaveTy, Intrinsic::vector_interleave2,
2803 {InterleavingValues[I], InterleavingValues[Midpoint + I]},
2804 /*FMFSource=*/nullptr, Name);
2805 }
2806 return InterleavingValues[0];
2807 }
2808
2809 // Fixed length. Start by concatenating all vectors into a wide vector.
2810 Value *WideVec = concatenateVectors(Builder, Vals);
2811
2812 // Interleave the elements into the wide vector.
2813 const unsigned NumElts = VecTy->getElementCount().getFixedValue();
2814 return Builder.CreateShuffleVector(
2815 WideVec, createInterleaveMask(NumElts, Factor), Name);
2816}
2817
2818// Try to vectorize the interleave group that \p Instr belongs to.
2819//
2820// E.g. Translate following interleaved load group (factor = 3):
2821// for (i = 0; i < N; i+=3) {
2822// R = Pic[i]; // Member of index 0
2823// G = Pic[i+1]; // Member of index 1
2824// B = Pic[i+2]; // Member of index 2
2825// ... // do something to R, G, B
2826// }
2827// To:
2828// %wide.vec = load <12 x i32> ; Read 4 tuples of R,G,B
2829// %R.vec = shuffle %wide.vec, poison, <0, 3, 6, 9> ; R elements
2830// %G.vec = shuffle %wide.vec, poison, <1, 4, 7, 10> ; G elements
2831// %B.vec = shuffle %wide.vec, poison, <2, 5, 8, 11> ; B elements
2832//
2833// Or translate following interleaved store group (factor = 3):
2834// for (i = 0; i < N; i+=3) {
2835// ... do something to R, G, B
2836// Pic[i] = R; // Member of index 0
2837// Pic[i+1] = G; // Member of index 1
2838// Pic[i+2] = B; // Member of index 2
2839// }
2840// To:
2841// %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7>
2842// %B_U.vec = shuffle %B.vec, poison, <0, 1, 2, 3, u, u, u, u>
2843// %interleaved.vec = shuffle %R_G.vec, %B_U.vec,
2844// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements
2845// store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B
2847 assert(!State.Lane && "Interleave group being replicated.");
2848 const InterleaveGroup<Instruction> *Group = IG;
2849 Instruction *Instr = Group->getInsertPos();
2850
2851 // Prepare for the vector type of the interleaved load/store.
2852 Type *ScalarTy = getLoadStoreType(Instr);
2853 unsigned InterleaveFactor = Group->getFactor();
2854 auto *VecTy = VectorType::get(ScalarTy, State.VF * InterleaveFactor);
2855
2856 // TODO: extend the masked interleaved-group support to reversed access.
2857 VPValue *BlockInMask = getMask();
2858 assert((!BlockInMask || !Group->isReverse()) &&
2859 "Reversed masked interleave-group not supported.");
2860
2861 VPValue *Addr = getAddr();
2862 Value *ResAddr = State.get(Addr, VPLane(0));
2863 if (auto *I = dyn_cast<Instruction>(ResAddr))
2864 State.setDebugLocFrom(I->getDebugLoc());
2865
2866 // If the group is reverse, adjust the index to refer to the last vector lane
2867 // instead of the first. We adjust the index from the first vector lane,
2868 // rather than directly getting the pointer for lane VF - 1, because the
2869 // pointer operand of the interleaved access is supposed to be uniform.
2870 if (Group->isReverse()) {
2871 Value *RuntimeVF =
2872 getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF);
2873 Value *Index =
2874 State.Builder.CreateSub(RuntimeVF, State.Builder.getInt32(1));
2875 Index = State.Builder.CreateMul(Index,
2876 State.Builder.getInt32(Group->getFactor()));
2877 Index = State.Builder.CreateNeg(Index);
2878
2879 bool InBounds = false;
2880 if (auto *Gep = dyn_cast<GetElementPtrInst>(ResAddr->stripPointerCasts()))
2881 InBounds = Gep->isInBounds();
2882 ResAddr = State.Builder.CreateGEP(ScalarTy, ResAddr, Index, "", InBounds);
2883 }
2884
2885 State.setDebugLocFrom(Instr->getDebugLoc());
2886 Value *PoisonVec = PoisonValue::get(VecTy);
2887
2888 auto CreateGroupMask = [&BlockInMask, &State,
2889 &InterleaveFactor](Value *MaskForGaps) -> Value * {
2890 if (State.VF.isScalable()) {
2891 assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");
2892 assert(isPowerOf2_32(InterleaveFactor) &&
2893 "Unsupported deinterleave factor for scalable vectors");
2894 auto *ResBlockInMask = State.get(BlockInMask);
2895 SmallVector<Value *> Ops(InterleaveFactor, ResBlockInMask);
2896 return interleaveVectors(State.Builder, Ops, "interleaved.mask");
2897 }
2898
2899 if (!BlockInMask)
2900 return MaskForGaps;
2901
2902 Value *ResBlockInMask = State.get(BlockInMask);
2903 Value *ShuffledMask = State.Builder.CreateShuffleVector(
2904 ResBlockInMask,
2905 createReplicatedMask(InterleaveFactor, State.VF.getKnownMinValue()),
2906 "interleaved.mask");
2907 return MaskForGaps ? State.Builder.CreateBinOp(Instruction::And,
2908 ShuffledMask, MaskForGaps)
2909 : ShuffledMask;
2910 };
2911
2912 const DataLayout &DL = Instr->getDataLayout();
2913 // Vectorize the interleaved load group.
2914 if (isa<LoadInst>(Instr)) {
2915 Value *MaskForGaps = nullptr;
2916 if (NeedsMaskForGaps) {
2917 MaskForGaps = createBitMaskForGaps(State.Builder,
2918 State.VF.getKnownMinValue(), *Group);
2919 assert(MaskForGaps && "Mask for Gaps is required but it is null");
2920 }
2921
2922 Instruction *NewLoad;
2923 if (BlockInMask || MaskForGaps) {
2924 Value *GroupMask = CreateGroupMask(MaskForGaps);
2925 NewLoad = State.Builder.CreateMaskedLoad(VecTy, ResAddr,
2926 Group->getAlign(), GroupMask,
2927 PoisonVec, "wide.masked.vec");
2928 } else
2929 NewLoad = State.Builder.CreateAlignedLoad(VecTy, ResAddr,
2930 Group->getAlign(), "wide.vec");
2931 Group->addMetadata(NewLoad);
2932
2934 const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
2935 if (VecTy->isScalableTy()) {
2936 assert(isPowerOf2_32(InterleaveFactor) &&
2937 "Unsupported deinterleave factor for scalable vectors");
2938
2939 // Scalable vectors cannot use arbitrary shufflevectors (only splats),
2940 // so must use intrinsics to deinterleave.
2941 SmallVector<Value *> DeinterleavedValues(InterleaveFactor);
2942 DeinterleavedValues[0] = NewLoad;
2943 // For the case of InterleaveFactor > 2, we will have to do recursive
2944 // deinterleaving, because the current available deinterleave intrinsic
2945 // supports only Factor of 2, otherwise it will bailout after first
2946 // iteration.
2947 // When deinterleaving, the number of values will double until we
2948 // have "InterleaveFactor".
2949 for (unsigned NumVectors = 1; NumVectors < InterleaveFactor;
2950 NumVectors *= 2) {
2951 // Deinterleave the elements within the vector
2952 SmallVector<Value *> TempDeinterleavedValues(NumVectors);
2953 for (unsigned I = 0; I < NumVectors; ++I) {
2954 auto *DiTy = DeinterleavedValues[I]->getType();
2955 TempDeinterleavedValues[I] = State.Builder.CreateIntrinsic(
2956 Intrinsic::vector_deinterleave2, DiTy, DeinterleavedValues[I],
2957 /*FMFSource=*/nullptr, "strided.vec");
2958 }
2959 // Extract the deinterleaved values:
2960 for (unsigned I = 0; I < 2; ++I)
2961 for (unsigned J = 0; J < NumVectors; ++J)
2962 DeinterleavedValues[NumVectors * I + J] =
2963 State.Builder.CreateExtractValue(TempDeinterleavedValues[J], I);
2964 }
2965
2966#ifndef NDEBUG
2967 for (Value *Val : DeinterleavedValues)
2968 assert(Val && "NULL Deinterleaved Value");
2969#endif
2970 for (unsigned I = 0, J = 0; I < InterleaveFactor; ++I) {
2971 Instruction *Member = Group->getMember(I);
2972 Value *StridedVec = DeinterleavedValues[I];
2973 if (!Member) {
2974 // This value is not needed as it's not used
2975 static_cast<Instruction *>(StridedVec)->eraseFromParent();
2976 continue;
2977 }
2978 // If this member has different type, cast the result type.
2979 if (Member->getType() != ScalarTy) {
2980 VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
2981 StridedVec =
2982 createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
2983 }
2984
2985 if (Group->isReverse())
2986 StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");
2987
2988 State.set(VPDefs[J], StridedVec);
2989 ++J;
2990 }
2991
2992 return;
2993 }
2994
2995 // For each member in the group, shuffle out the appropriate data from the
2996 // wide loads.
2997 unsigned J = 0;
2998 for (unsigned I = 0; I < InterleaveFactor; ++I) {
2999 Instruction *Member = Group->getMember(I);
3000
3001 // Skip the gaps in the group.
3002 if (!Member)
3003 continue;
3004
3005 auto StrideMask =
3006 createStrideMask(I, InterleaveFactor, State.VF.getKnownMinValue());
3007 Value *StridedVec =
3008 State.Builder.CreateShuffleVector(NewLoad, StrideMask, "strided.vec");
3009
3010 // If this member has different type, cast the result type.
3011 if (Member->getType() != ScalarTy) {
3012 assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
3013 VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
3014 StridedVec =
3015 createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
3016 }
3017
3018 if (Group->isReverse())
3019 StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");
3020
3021 State.set(VPDefs[J], StridedVec);
3022 ++J;
3023 }
3024 return;
3025 }
3026
3027 // The sub vector type for current instruction.
3028 auto *SubVT = VectorType::get(ScalarTy, State.VF);
3029
3030 // Vectorize the interleaved store group.
3031 Value *MaskForGaps =
3032 createBitMaskForGaps(State.Builder, State.VF.getKnownMinValue(), *Group);
3033 assert((!MaskForGaps || !State.VF.isScalable()) &&
3034 "masking gaps for scalable vectors is not yet supported.");
3035 ArrayRef<VPValue *> StoredValues = getStoredValues();
3036 // Collect the stored vector from each member.
3037 SmallVector<Value *, 4> StoredVecs;
3038 unsigned StoredIdx = 0;
3039 for (unsigned i = 0; i < InterleaveFactor; i++) {
3040 assert((Group->getMember(i) || MaskForGaps) &&
3041 "Fail to get a member from an interleaved store group");
3042 Instruction *Member = Group->getMember(i);
3043
3044 // Skip the gaps in the group.
3045 if (!Member) {
3046 Value *Undef = PoisonValue::get(SubVT);
3047 StoredVecs.push_back(Undef);
3048 continue;
3049 }
3050
3051 Value *StoredVec = State.get(StoredValues[StoredIdx]);
3052 ++StoredIdx;
3053
3054 if (Group->isReverse())
3055 StoredVec = State.Builder.CreateVectorReverse(StoredVec, "reverse");
3056
3057 // If this member has different type, cast it to a unified type.
3058
3059 if (StoredVec->getType() != SubVT)
3060 StoredVec = createBitOrPointerCast(State.Builder, StoredVec, SubVT, DL);
3061
3062 StoredVecs.push_back(StoredVec);
3063 }
3064
3065 // Interleave all the smaller vectors into one wider vector.
3066 Value *IVec = interleaveVectors(State.Builder, StoredVecs, "interleaved.vec");
3067 Instruction *NewStoreInstr;
3068 if (BlockInMask || MaskForGaps) {
3069 Value *GroupMask = CreateGroupMask(MaskForGaps);
3070 NewStoreInstr = State.Builder.CreateMaskedStore(
3071 IVec, ResAddr, Group->getAlign(), GroupMask);
3072 } else
3073 NewStoreInstr =
3074 State.Builder.CreateAlignedStore(IVec, ResAddr, Group->getAlign());
3075
3076 Group->addMetadata(NewStoreInstr);
3077}
3078
3079#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3081 VPSlotTracker &SlotTracker) const {
3082 O << Indent << "INTERLEAVE-GROUP with factor " << IG->getFactor() << " at ";
3083 IG->getInsertPos()->printAsOperand(O, false);
3084 O << ", ";
3086 VPValue *Mask = getMask();
3087 if (Mask) {
3088 O << ", ";
3089 Mask->printAsOperand(O, SlotTracker);
3090 }
3091
3092 unsigned OpIdx = 0;
3093 for (unsigned i = 0; i < IG->getFactor(); ++i) {
3094 if (!IG->getMember(i))
3095 continue;
3096 if (getNumStoreOperands() > 0) {
3097 O << "\n" << Indent << " store ";
3098 getOperand(1 + OpIdx)->printAsOperand(O, SlotTracker);
3099 O << " to index " << i;
3100 } else {
3101 O << "\n" << Indent << " ";
3103 O << " = load from index " << i;
3104 }
3105 ++OpIdx;
3106 }
3107}
3108#endif
3109
3111 VPCostContext &Ctx) const {
3112 Instruction *InsertPos = getInsertPos();
3113 // Find the VPValue index of the interleave group. We need to skip gaps.
3114 unsigned InsertPosIdx = 0;
3115 for (unsigned Idx = 0; IG->getFactor(); ++Idx)
3116 if (auto *Member = IG->getMember(Idx)) {
3117 if (Member == InsertPos)
3118 break;
3119 InsertPosIdx++;
3120 }
3121 Type *ValTy = Ctx.Types.inferScalarType(
3122 getNumDefinedValues() > 0 ? getVPValue(InsertPosIdx)
3123 : getStoredValues()[InsertPosIdx]);
3124 auto *VectorTy = cast<VectorType>(toVectorTy(ValTy, VF));
3125 unsigned AS = getLoadStoreAddressSpace(InsertPos);
3127
3128 unsigned InterleaveFactor = IG->getFactor();
3129 auto *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor);
3130
3131 // Holds the indices of existing members in the interleaved group.
3133 for (unsigned IF = 0; IF < InterleaveFactor; IF++)
3134 if (IG->getMember(IF))
3135 Indices.push_back(IF);
3136
3137 // Calculate the cost of the whole interleaved group.
3139 InsertPos->getOpcode(), WideVecTy, IG->getFactor(), Indices,
3140 IG->getAlign(), AS, CostKind, getMask(), NeedsMaskForGaps);
3141
3142 if (!IG->isReverse())
3143 return Cost;
3144
3145 return Cost + IG->getNumMembers() *
3147 VectorTy, std::nullopt, CostKind, 0);
3148}
3149
3150#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3152 VPSlotTracker &SlotTracker) const {
3153 O << Indent << "EMIT ";
3155 O << " = CANONICAL-INDUCTION ";
3157}
3158#endif
3159
3161 return IsScalarAfterVectorization &&
3162 (!IsScalable || vputils::onlyFirstLaneUsed(this));
3163}
3164
3166 assert(getInductionDescriptor().getKind() ==
3168 "Not a pointer induction according to InductionDescriptor!");
3169 assert(cast<PHINode>(getUnderlyingInstr())->getType()->isPointerTy() &&
3170 "Unexpected type.");
3172 "Recipe should have been replaced");
3173
3174 unsigned CurrentPart = getUnrollPart(*this);
3175
3176 // Build a pointer phi
3177 Value *ScalarStartValue = getStartValue()->getLiveInIRValue();
3178 Type *ScStValueType = ScalarStartValue->getType();
3179
3180 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3181 PHINode *NewPointerPhi = nullptr;
3182 if (CurrentPart == 0) {
3183 auto *IVR = cast<VPHeaderPHIRecipe>(&getParent()
3184 ->getPlan()
3185 ->getVectorLoopRegion()
3186 ->getEntryBasicBlock()
3187 ->front());
3188 PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, /*IsScalar*/ true));
3189 NewPointerPhi = PHINode::Create(ScStValueType, 2, "pointer.phi",
3190 CanonicalIV->getIterator());
3191 NewPointerPhi->addIncoming(ScalarStartValue, VectorPH);
3192 NewPointerPhi->setDebugLoc(getDebugLoc());
3193 } else {
3194 // The recipe has been unrolled. In that case, fetch the single pointer phi
3195 // shared among all unrolled parts of the recipe.
3196 auto *GEP =
3197 cast<GetElementPtrInst>(State.get(getFirstUnrolledPartOperand()));
3198 NewPointerPhi = cast<PHINode>(GEP->getPointerOperand());
3199 }
3200
3201 // A pointer induction, performed by using a gep
3202 BasicBlock::iterator InductionLoc = State.Builder.GetInsertPoint();
3203 Value *ScalarStepValue = State.get(getStepValue(), VPLane(0));
3204 Type *PhiType = State.TypeAnalysis.inferScalarType(getStepValue());
3205 Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF);
3206 // Add induction update using an incorrect block temporarily. The phi node
3207 // will be fixed after VPlan execution. Note that at this point the latch
3208 // block cannot be used, as it does not exist yet.
3209 // TODO: Model increment value in VPlan, by turning the recipe into a
3210 // multi-def and a subclass of VPHeaderPHIRecipe.
3211 if (CurrentPart == 0) {
3212 // The recipe represents the first part of the pointer induction. Create the
3213 // GEP to increment the phi across all unrolled parts.
3214 unsigned UF = CurrentPart == 0 ? getParent()->getPlan()->getUF() : 1;
3215 Value *NumUnrolledElems =
3216 State.Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, UF));
3217
3218 Value *InductionGEP = GetElementPtrInst::Create(
3219 State.Builder.getInt8Ty(), NewPointerPhi,
3220 State.Builder.CreateMul(ScalarStepValue, NumUnrolledElems), "ptr.ind",
3221 InductionLoc);
3222
3223 NewPointerPhi->addIncoming(InductionGEP, VectorPH);
3224 }
3225
3226 // Create actual address geps that use the pointer phi as base and a
3227 // vectorized version of the step value (<step*0, ..., step*N>) as offset.
3228 Type *VecPhiType = VectorType::get(PhiType, State.VF);
3229 Value *StartOffsetScalar = State.Builder.CreateMul(
3230 RuntimeVF, ConstantInt::get(PhiType, CurrentPart));
3231 Value *StartOffset =
3232 State.Builder.CreateVectorSplat(State.VF, StartOffsetScalar);
3233 // Create a vector of consecutive numbers from zero to VF.
3234 StartOffset = State.Builder.CreateAdd(
3235 StartOffset, State.Builder.CreateStepVector(VecPhiType));
3236
3237 assert(ScalarStepValue == State.get(getOperand(1), VPLane(0)) &&
3238 "scalar step must be the same across all parts");
3239 Value *GEP = State.Builder.CreateGEP(
3240 State.Builder.getInt8Ty(), NewPointerPhi,
3241 State.Builder.CreateMul(StartOffset, State.Builder.CreateVectorSplat(
3242 State.VF, ScalarStepValue)),
3243 "vector.gep");
3244 State.set(this, GEP);
3245}
3246
3247#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3249 VPSlotTracker &SlotTracker) const {
3250 assert((getNumOperands() == 2 || getNumOperands() == 4) &&
3251 "unexpected number of operands");
3252 O << Indent << "EMIT ";
3254 O << " = WIDEN-POINTER-INDUCTION ";
3256 O << ", ";
3258 if (getNumOperands() == 4) {
3259 O << ", ";
3261 O << ", ";
3263 }
3264}
3265#endif
3266
3268 assert(!State.Lane && "cannot be used in per-lane");
3269 if (State.ExpandedSCEVs.contains(Expr)) {
3270 // SCEV Expr has already been expanded, result must already be set. At the
3271 // moment we have to execute the entry block twice (once before skeleton
3272 // creation to get expanded SCEVs used by the skeleton and once during
3273 // regular VPlan execution).
3275 assert(State.get(this, VPLane(0)) == State.ExpandedSCEVs[Expr] &&
3276 "Results must match");
3277 return;
3278 }
3279
3280 const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
3281 SCEVExpander Exp(SE, DL, "induction");
3282
3283 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
3284 &*State.Builder.GetInsertPoint());
3285 State.ExpandedSCEVs[Expr] = Res;
3286 State.set(this, Res, VPLane(0));
3287}
3288
3289#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3291 VPSlotTracker &SlotTracker) const {
3292 O << Indent << "EMIT ";
3294 O << " = EXPAND SCEV " << *Expr;
3295}
3296#endif
3297
3299 Value *CanonicalIV = State.get(getOperand(0), /*IsScalar*/ true);
3300 Type *STy = CanonicalIV->getType();
3301 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
3302 ElementCount VF = State.VF;
3303 Value *VStart = VF.isScalar()
3304 ? CanonicalIV
3305 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
3306 Value *VStep = createStepForVF(Builder, STy, VF, getUnrollPart(*this));
3307 if (VF.isVector()) {
3308 VStep = Builder.CreateVectorSplat(VF, VStep);
3309 VStep =
3310 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
3311 }
3312 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
3313 State.set(this, CanonicalVectorIV);
3314}
3315
3316#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3318 VPSlotTracker &SlotTracker) const {
3319 O << Indent << "EMIT ";
3321 O << " = WIDEN-CANONICAL-INDUCTION ";
3323}
3324#endif
3325
3327 auto &Builder = State.Builder;
3328 // Create a vector from the initial value.
3329 auto *VectorInit = getStartValue()->getLiveInIRValue();
3330
3331 Type *VecTy = State.VF.isScalar()
3332 ? VectorInit->getType()
3333 : VectorType::get(VectorInit->getType(), State.VF);
3334
3335 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3336 if (State.VF.isVector()) {
3337 auto *IdxTy = Builder.getInt32Ty();
3338 auto *One = ConstantInt::get(IdxTy, 1);
3339 IRBuilder<>::InsertPointGuard Guard(Builder);
3340 Builder.SetInsertPoint(VectorPH->getTerminator());
3341 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
3342 auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
3343 VectorInit = Builder.CreateInsertElement(
3344 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
3345 }
3346
3347 // Create a phi node for the new recurrence.
3348 PHINode *Phi = PHINode::Create(VecTy, 2, "vector.recur");
3349 Phi->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
3350 Phi->addIncoming(VectorInit, VectorPH);
3351 State.set(this, Phi);
3352}
3353
3356 VPCostContext &Ctx) const {
3358 if (VF.isScalar())
3359 return Ctx.TTI.getCFInstrCost(Instruction::PHI, CostKind);
3360
3361 if (VF.isScalable() && VF.getKnownMinValue() == 1)
3363
3365 std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1);
3366 Type *VectorTy =
3367 toVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
3368
3370 cast<VectorType>(VectorTy), Mask, CostKind,
3371 VF.getKnownMinValue() - 1);
3372}
3373
3374#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3376 VPSlotTracker &SlotTracker) const {
3377 O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
3379 O << " = phi ";
3381}
3382#endif
3383
3385 auto &Builder = State.Builder;
3386
3387 // Reductions do not have to start at zero. They can start with
3388 // any loop invariant values.
3389 VPValue *StartVPV = getStartValue();
3390 Value *StartV = StartVPV->getLiveInIRValue();
3391
3392 // In order to support recurrences we need to be able to vectorize Phi nodes.
3393 // Phi nodes have cycles, so we need to vectorize them in two stages. This is
3394 // stage #1: We create a new vector PHI node with no incoming edges. We'll use
3395 // this value when we vectorize all of the instructions that use the PHI.
3396 bool ScalarPHI = State.VF.isScalar() || IsInLoop;
3397 Type *VecTy = ScalarPHI ? StartV->getType()
3398 : VectorType::get(StartV->getType(), State.VF);
3399
3400 BasicBlock *HeaderBB = State.CFG.PrevBB;
3401 assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
3402 "recipe must be in the vector loop header");
3403 auto *Phi = PHINode::Create(VecTy, 2, "vec.phi");
3404 Phi->insertBefore(HeaderBB->getFirstInsertionPt());
3405 State.set(this, Phi, IsInLoop);
3406
3407 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3408
3409 Value *Iden = nullptr;
3410 RecurKind RK = RdxDesc.getRecurrenceKind();
3411 unsigned CurrentPart = getUnrollPart(*this);
3412
3415 // MinMax and AnyOf reductions have the start value as their identity.
3416 if (ScalarPHI) {
3417 Iden = StartV;
3418 } else {
3419 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
3420 Builder.SetInsertPoint(VectorPH->getTerminator());
3421 StartV = Iden = State.get(StartVPV);
3422 }
3424 // [I|F]FindLastIV will use a sentinel value to initialize the reduction
3425 // phi or the resume value from the main vector loop when vectorizing the
3426 // epilogue loop. In the exit block, ComputeReductionResult will generate
3427 // checks to verify if the reduction result is the sentinel value. If the
3428 // result is the sentinel value, it will be corrected back to the start
3429 // value.
3430 // TODO: The sentinel value is not always necessary. When the start value is
3431 // a constant, and smaller than the start value of the induction variable,
3432 // the start value can be directly used to initialize the reduction phi.
3433 Iden = StartV;
3434 if (!ScalarPHI) {
3435 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
3436 Builder.SetInsertPoint(VectorPH->getTerminator());
3437 StartV = Iden = Builder.CreateVectorSplat(State.VF, Iden);
3438 }
3439 } else {
3440 Iden = llvm::getRecurrenceIdentity(RK, VecTy->getScalarType(),
3441 RdxDesc.getFastMathFlags());
3442
3443 if (!ScalarPHI) {
3444 if (CurrentPart == 0) {
3445 // Create start and identity vector values for the reduction in the
3446 // preheader.
3447 // TODO: Introduce recipes in VPlan preheader to create initial values.
3448 Iden = Builder.CreateVectorSplat(State.VF, Iden);
3449 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
3450 Builder.SetInsertPoint(VectorPH->getTerminator());
3451 Constant *Zero = Builder.getInt32(0);
3452 StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
3453 } else {
3454 Iden = Builder.CreateVectorSplat(State.VF, Iden);
3455 }
3456 }
3457 }
3458
3459 Phi = cast<PHINode>(State.get(this, IsInLoop));
3460 Value *StartVal = (CurrentPart == 0) ? StartV : Iden;
3461 Phi->addIncoming(StartVal, VectorPH);
3462}
3463
3464#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3466 VPSlotTracker &SlotTracker) const {
3467 O << Indent << "WIDEN-REDUCTION-PHI ";
3468
3470 O << " = phi ";
3472}
3473#endif
3474
3477 "Non-native vplans are not expected to have VPWidenPHIRecipes.");
3478
3479 Value *Op0 = State.get(getOperand(0));
3480 Type *VecTy = Op0->getType();
3481 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
3482 State.set(this, VecPhi);
3483}
3484
3485#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3487 VPSlotTracker &SlotTracker) const {
3488 O << Indent << "WIDEN-PHI ";
3489
3490 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
3491 // Unless all incoming values are modeled in VPlan print the original PHI
3492 // directly.
3493 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
3494 // values as VPValues.
3495 if (getNumOperands() != OriginalPhi->getNumOperands()) {
3496 O << VPlanIngredient(OriginalPhi);
3497 return;
3498 }
3499
3501 O << " = phi ";
3503}
3504#endif
3505
3506// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
3507// remove VPActiveLaneMaskPHIRecipe.
3509 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3510 Value *StartMask = State.get(getOperand(0));
3511 PHINode *Phi =
3512 State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
3513 Phi->addIncoming(StartMask, VectorPH);
3514 Phi->setDebugLoc(getDebugLoc());
3515 State.set(this, Phi);
3516}
3517
3518#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3520 VPSlotTracker &SlotTracker) const {
3521 O << Indent << "ACTIVE-LANE-MASK-PHI ";
3522
3524 O << " = phi ";
3526}
3527#endif
3528
3529#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3531 VPSlotTracker &SlotTracker) const {
3532 O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";
3533
3535 O << " = phi ";
3537}
3538#endif
3539
3541 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3542 Value *Start = State.get(getStartValue(), VPLane(0));
3543 PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, Name);
3544 Phi->addIncoming(Start, VectorPH);
3545 Phi->setDebugLoc(getDebugLoc());
3546 State.set(this, Phi, /*IsScalar=*/true);
3547}
3548
3549#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3551 VPSlotTracker &SlotTracker) const {
3552 O << Indent << "SCALAR-PHI ";
3554 O << " = phi ";
3556}
3557#endif
AMDGPU Lower Kernel Arguments
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
std::string Name
Hexagon Common GEP
cl::opt< unsigned > ForceTargetInstructionCost("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for " "an instruction to a single constant value. Mostly " "useful for getting consistent testing."))
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
static Instruction * createReverseEVL(IRBuilderBase &Builder, Value *Operand, Value *EVL, const Twine &Name)
Use all-true mask for reverse rather than actual mask, as it avoids a dependence w/o affecting the re...
static Value * interleaveVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vals, const Twine &Name)
Return a vector containing interleaved elements from multiple smaller input vectors.
static Value * createBitOrPointerCast(IRBuilderBase &Builder, Value *V, VectorType *DstVTy, const DataLayout &DL)
cl::opt< unsigned > ForceTargetInstructionCost
static Value * getStepVector(Value *Val, Value *Step, Instruction::BinaryOps BinOp, ElementCount VF, IRBuilderBase &Builder)
This function adds (0 * Step, 1 * Step, 2 * Step, ...) to each vector element of Val.
static Type * getGEPIndexTy(bool IsScalable, bool IsReverse, unsigned CurrentPart, IRBuilderBase &Builder)
static Constant * getSignedIntOrFpConstant(Type *Ty, int64_t C)
A helper function that returns an integer or floating-point constant with value C.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
This file contains the declarations of the Vectorization Plan base classes:
Value * RHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
static Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
Definition: Attributes.cpp:234
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
InstListType::const_iterator getFirstNonPHIIt() const
Iterator returning form of getFirstNonPHI.
Definition: BasicBlock.cpp:374
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:459
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:296
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:292
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1502
This class represents a function call, abstracting a target machine's calling convention.
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:696
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:698
static StringRef getPredicateName(Predicate P)
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:126
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
A debug info location.
Definition: DebugLoc.h:33
constexpr bool isVector() const
One or more elements.
Definition: TypeSize.h:326
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
void setAllowContract(bool B=true)
Definition: FMF.h:91
bool noSignedZeros() const
Definition: FMF.h:68
bool noInfs() const
Definition: FMF.h:67
void setAllowReciprocal(bool B=true)
Definition: FMF.h:88
bool allowReciprocal() const
Definition: FMF.h:69
void print(raw_ostream &O) const
Print fast-math flags to O.
Definition: Operator.cpp:271
void setNoSignedZeros(bool B=true)
Definition: FMF.h:85
bool allowReassoc() const
Flag queries.
Definition: FMF.h:65
bool approxFunc() const
Definition: FMF.h:71
void setNoNaNs(bool B=true)
Definition: FMF.h:79
void setAllowReassoc(bool B=true)
Flag setters.
Definition: FMF.h:76
bool noNaNs() const
Definition: FMF.h:66
void setApproxFunc(bool B=true)
Definition: FMF.h:94
void setNoInfs(bool B=true)
Definition: FMF.h:82
bool allowContract() const
Definition: FMF.h:70
Class to represent function types.
Definition: DerivedTypes.h:105
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:137
ArrayRef< Type * > params() const
Definition: DerivedTypes.h:132
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216
bool willReturn() const
Determine if the function will return.
Definition: Function.h:662
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition: Function.h:595
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:221
bool hasNoUnsignedSignedWrap() const
bool hasNoUnsignedWrap() const
bool isInBounds() const
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Definition: Instructions.h:956
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition: IRBuilder.h:458
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2393
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2503
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2121
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2491
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1830
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2066
Value * CreateVectorSplice(Value *V1, Value *V2, int64_t Imm, const Twine &Name="")
Return a vector splice intrinsic if using scalable vectors, otherwise return a shufflevector.
Definition: IRBuilder.cpp:1124
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1152
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2547
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:463
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:890
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:546
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1048
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:172
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2060
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2566
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:523
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:2002
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2108
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:308
Value * CreateVectorReverse(Value *V, const Twine &Name="")
Return a vector value that contains the vector V reversed.
Definition: IRBuilder.cpp:1108
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1889
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
Definition: IRBuilder.h:1744
CallInst * CreateOrReduce(Value *Src)
Create a vector int OR reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:424
InsertPoint saveIP() const
Returns the current insert point.
Definition: IRBuilder.h:274
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:483
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2236
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2398
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2429
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1772
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2273
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1367
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1144
Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Definition: IRBuilder.cpp:963
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2048
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2525
LLVMContext & getContext() const
Definition: IRBuilder.h:173
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2189
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:566
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1350
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2444
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2034
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:566
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1689
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1699
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Definition: IRBuilder.h:286
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1849
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2383
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1610
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:513
Value * CreateStepVector(Type *DstType, const Twine &Name="")
Creates a vector of type DstType with the linear sequence <0, 1, ...>
Definition: IRBuilder.cpp:108
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1384
CallInst * CreateMaskedScatter(Value *Val, Value *Ptrs, Align Alignment, Value *Mask=nullptr)
Create a call to Masked Scatter intrinsic.
Definition: IRBuilder.cpp:627
CallInst * CreateMaskedGather(Type *Ty, Value *Ptrs, Align Alignment, Value *Mask=nullptr, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Gather intrinsic.
Definition: IRBuilder.cpp:596
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
A struct for saving information about induction variables.
@ IK_PtrInduction
Pointer induction var. Step = C.
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
static InstructionCost getInvalid(CostType Val=0)
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:99
bool isBinaryOp() const
Definition: Instruction.h:279
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
const char * getOpcodeName() const
Definition: Instruction.h:276
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
bool isUnaryOp() const
Definition: Instruction.h:278
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:472
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:480
uint32_t getFactor() const
Definition: VectorUtils.h:496
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:550
bool isReverse() const
Definition: VectorUtils.h:495
InstTy * getInsertPos() const
Definition: VectorUtils.h:566
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Align getAlign() const
Definition: VectorUtils.h:497
BlockT * getHeader() const
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:77
FastMathFlags getFastMathFlags() const
static unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
Type * getRecurrenceType() const
Returns the type of the recurrence.
TrackingVH< Value > getRecurrenceStartValue() const
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isFindLastIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
bool isSigned() const
Returns true if all source operands of the recurrence are SExtInsts.
RecurKind getRecurrenceKind() const
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This class uses information about analyze scalars to rewrite expressions in canonical form.
Type * getType() const
Return the LLVM type of this SCEV expression.
This class represents the LLVM 'select' instruction.
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:698
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo Op1Info={OK_AnyValue, OP_None}, OperandValueInfo Op2Info={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of vector reduction intrinsics.
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF=FastMathFlags(), TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask={}, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
@ TCC_Free
Expected to fold away in lowering.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Reverse
Reverse the order of the vector.
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency) const
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
CastContextHint
Represents a hint about the context in which a cast is used.
@ Reversed
The cast is used with a reversed load/store.
@ Masked
The cast is used with a masked load/store.
@ None
The cast is not used with a load/store of any kind.
@ Normal
The cast is used with a normal load/store.
@ Interleave
The cast is used with an interleaved load/store.
@ GatherScatter
The cast is used with a gather/scatter.
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264
static IntegerType * getInt1Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static Type * getVoidTy(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
value_op_iterator value_op_end()
Definition: User.h:309
Value * getOperand(unsigned i) const
Definition: User.h:228
value_op_iterator value_op_begin()
Definition: User.h:306
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:3470
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:3520
iterator end()
Definition: VPlan.h:3504
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:3533
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:2457
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:2462
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition: VPlan.h:2452
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition: VPlan.h:2448
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:396
VPRegionBlock * getParent()
Definition: VPlan.h:488
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:178
const VPBlocksTy & getPredecessors() const
Definition: VPlan.h:519
VPlan * getPlan()
Definition: VPlan.cpp:153
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:158
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2824
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPBranchOnMaskRecipe.
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:292
void dump() const
Dump the VPDef to stderr (for debugging).
Definition: VPlan.cpp:114
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
Definition: VPlanValue.h:414
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
Definition: VPlanValue.h:409
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition: VPlanValue.h:387
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Definition: VPlanValue.h:399
unsigned getVPDefID() const
Definition: VPlanValue.h:419
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:3400
VPValue * getStartValue() const
Definition: VPlan.h:3399
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:2063
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition: VPlan.h:1804
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
@ ResumePhi
Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
Definition: VPlan.h:1215
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:1203
@ CanonicalIVIncrementForPart
Definition: VPlan.h:1218
@ CalculateTripCountMinusVF
Definition: VPlan.h:1216
bool hasResult() const
Definition: VPlan.h:1338
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
unsigned getOpcode() const
Definition: VPlan.h:1315
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
bool isSingleScalar() const
Returns true if this VPInstruction's operands are single scalars and the result is also a single scal...
void execute(VPTransformState &State) override
Generate the instruction.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2536
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2542
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition: VPlan.h:2549
Instruction * getInsertPos() const
Definition: VPlan.h:2584
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInterleaveRecipe.
unsigned getNumStoreOperands() const
Returns the number of stored operands of this interleave group.
Definition: VPlan.h:2573
static bool isVPIntrinsic(Intrinsic::ID)
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition: VPlan.h:153
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlan.h:194
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset)
Definition: VPlan.h:180
static VPLane getFirstLane()
Definition: VPlan.h:178
void execute(VPTransformState &State) override
Generates phi nodes for live-outs (from a replicate region) as needed to retain SSA form.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:720
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
virtual InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
VPBasicBlock * getParent()
Definition: VPlan.h:745
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:814
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Class to record LLVM IR flag for a recipe along with it.
Definition: VPlan.h:930
ExactFlagsTy ExactFlags
Definition: VPlan.h:980
FastMathFlagsTy FMFs
Definition: VPlan.h:983
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:982
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition: VPlan.h:1150
void setFlags(Instruction *I) const
Set the IR flags for I.
Definition: VPlan.h:1111
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:1153
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:979
GEPNoWrapFlags GEPFlags
Definition: VPlan.h:981
WrapFlagsTy WrapFlags
Definition: VPlan.h:978
bool hasNoUnsignedWrap() const
Definition: VPlan.h:1157
void printFlags(raw_ostream &O) const
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:1144
bool hasNoSignedWrap() const
Definition: VPlan.h:1163
FastMathFlags getFastMathFlags() const
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition: VPlan.h:2697
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition: VPlan.h:2655
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of VPReductionRecipe.
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2659
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Return the recurrence decriptor for the in-loop reduction.
Definition: VPlan.h:2649
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2661
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition: VPlan.h:2653
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2657
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:3657
const VPBlockBase * getEntry() const
Definition: VPlan.h:3696
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPReplicateRecipe.
unsigned getOpcode() const
Definition: VPlan.h:2784
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:3457
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:916
LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
This class can be used to assign names to VPValues.
Definition: VPlanValue.h:440
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Definition: VPlanAnalysis.h:65
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
VPValue * getUnrollPartOperand(VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:200
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition: VPlan.cpp:1453
operand_range operands()
Definition: VPlanValue.h:257
unsigned getNumOperands() const
Definition: VPlanValue.h:236
operand_iterator op_begin()
Definition: VPlanValue.h:253
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:237
virtual bool onlyFirstLaneUsed(const VPValue *Op) const
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlanValue.h:272
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop region.
Definition: VPlan.cpp:1412
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:123
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition: VPlan.cpp:1449
friend class VPInstruction
Definition: VPlanValue.h:47
bool hasMoreThanOneUniqueUser() const
Returns true if the value has more than one unique user.
Definition: VPlanValue.h:138
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:77
user_iterator user_begin()
Definition: VPlanValue.h:128
unsigned getNumUsers() const
Definition: VPlanValue.h:111
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:172
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition: VPlanValue.h:167
user_range users()
Definition: VPlanValue.h:132
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Function * getCalledScalarFunction() const
Definition: VPlan.h:1752
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCallRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
operand_range arg_operands()
Definition: VPlan.h:1756
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1577
void execute(VPTransformState &State) override
Produce widened copies of the cast.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override final
Print the recipe.
void execute(VPTransformState &State) override final
Produce a vp-intrinsic using the opcode and operands of the recipe, processing EVL elements.
VPValue * getEVL()
Definition: VPlan.h:1505
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
PHINode * getPHINode() const
Definition: VPlan.h:2110
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:2107
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:2113
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:2185
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:2194
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition: VPlan.h:1695
void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
Definition: VPlan.h:2895
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition: VPlan.h:2892
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition: VPlan.h:2931
Instruction & Ingredient
Definition: VPlan.h:2886
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
bool Consecutive
Whether the accessed addresses are consecutive.
Definition: VPlan.h:2889
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2945
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2938
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition: VPlan.h:2935
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
VPValue * getFirstUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the first unrolled part,...
Definition: VPlan.h:2239
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
void execute(VPTransformState &State) override
Produce a widened instruction using the opcode and operands of the recipe, processing State....
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getOpcode() const
Definition: VPlan.h:1471
unsigned getUF() const
Definition: VPlan.h:3959
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:694
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
VectorBuilder & setEVL(Value *NewExplicitVectorLength)
Definition: VectorBuilder.h:82
VectorBuilder & setMask(Value *NewMask)
Definition: VectorBuilder.h:78
Value * createVectorInstruction(unsigned Opcode, Type *ReturnTy, ArrayRef< Value * > VecOpArray, const Twine &Name=Twine())
Base class of all SIMD vector types.
Definition: DerivedTypes.h:427
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:665
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static VectorType * getDoubleElementsVectorType(VectorType *VTy)
This static method returns a VectorType with twice as many elements as the input type and the same el...
Definition: DerivedTypes.h:541
Type * getElementType() const
Definition: DerivedTypes.h:460
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
iterator erase(iterator where)
Definition: ilist.h:204
pointer remove(iterator &IT)
Definition: ilist.h:188
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
StringRef getBaseName(ID id)
Return the LLVM name for an intrinsic, without encoded types for overloading, such as "llvm....
Definition: Intrinsics.cpp:41
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
bool isUniformAfterVectorization(const VPValue *VPV)
Returns true if VPV is uniform after vectorization.
Definition: VPlanUtils.h:39
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
Definition: VPlanUtils.cpp:21
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlanUtils.cpp:16
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void ReplaceInstWithInst(BasicBlock *BB, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
Value * createSimpleReduction(IRBuilderBase &B, Value *Src, RecurKind RdxKind)
Create a reduction of the given vector.
Definition: LoopUtils.cpp:1278
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
Definition: LoopUtils.cpp:989
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition: STLExtras.h:2207
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
Definition: LoopUtils.cpp:1076
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
cl::opt< bool > EnableVPlanNativePath("enable-vplan-native-path", cl::Hidden, cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization."))
Definition: VPlan.cpp:53
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool isPointerTy(const Type *T)
Definition: SPIRVUtils.h:250
Value * createOrderedReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence descriptor Desc.
Definition: LoopUtils.cpp:1341
Value * createReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, PHINode *OrigPhi=nullptr)
Create a generic reduction using a recurrence descriptor Desc Fast-math-flags are propagated using th...
Definition: LoopUtils.cpp:1323
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:33
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ Add
Sum of integers.
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
Definition: LoopUtils.cpp:1270
DWARFExpression::Operation Op
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
InstructionCost Cost
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Struct to hold various analysis needed for cost computations.
Definition: VPlan.h:688
LLVMContext & LLVMCtx
Definition: VPlan.h:692
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
Definition: VPlan.cpp:1662
bool skipCostComputation(Instruction *UI, bool IsVector) const
Return true if the cost for UI shouldn't be computed, e.g.
InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const
Return the cost for UI with VF using the legacy cost model as fallback until computing the cost of al...
VPTypeAnalysis Types
Definition: VPlan.h:691
const TargetLibraryInfo & TLI
Definition: VPlan.h:690
const TargetTransformInfo & TTI
Definition: VPlan.h:689
SmallPtrSet< Instruction *, 8 > SkipCostComputation
Definition: VPlan.h:694
void execute(VPTransformState &State) override
Generate the phi nodes.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:343
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:351
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:352
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:236
bool hasScalarValue(VPValue *Def, VPLane Lane)
Definition: VPlan.h:268
bool hasVectorValue(VPValue *Def)
Definition: VPlan.h:266
DenseMap< const SCEV *, Value * > ExpandedSCEVs
Map SCEVs to their expanded values.
Definition: VPlan.h:388
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition: VPlan.h:391
void addMetadata(Value *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:365
Value * get(VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
Definition: VPlan.cpp:253
struct llvm::VPTransformState::CFGState CFG
std::optional< VPLane > Lane
Hold the index to generate specific scalar instructions.
Definition: VPlan.h:249
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:368
const TargetTransformInfo * TTI
Target Transform Info.
Definition: VPlan.h:241
void reset(VPValue *Def, Value *V)
Reset an existing vector value for Def and a given Part.
Definition: VPlan.h:289
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
Definition: VPlan.h:244
Loop * CurrentVectorLoop
The loop object for the current parent region, or nullptr.
Definition: VPlan.h:377
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:376
void set(VPValue *Def, Value *V, bool IsScalar=false)
Set the generated vector Value for a given VPValue, if IsScalar is false.
Definition: VPlan.h:278
void execute(VPTransformState &State) override
Generate the wide load or gather.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:3015
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate a wide load or gather.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool isInvariantCond() const
Definition: VPlan.h:1848
VPValue * getCond() const
Definition: VPlan.h:1844
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenSelectRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition: VPlan.h:3094
void execute(VPTransformState &State) override
Generate the wide store or scatter.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:3097
void execute(VPTransformState &State) override
Generate a wide store or scatter.
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition: VPlan.h:3059
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.