LLVM 20.0.0git
VPlanRecipes.cpp
Go to the documentation of this file.
1//===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains implementations for different VPlan recipes.
11///
12//===----------------------------------------------------------------------===//
13
14#include "VPlan.h"
15#include "VPlanAnalysis.h"
16#include "VPlanPatternMatch.h"
17#include "VPlanUtils.h"
18#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/Twine.h"
22#include "llvm/IR/BasicBlock.h"
23#include "llvm/IR/IRBuilder.h"
24#include "llvm/IR/Instruction.h"
26#include "llvm/IR/Intrinsics.h"
27#include "llvm/IR/Type.h"
28#include "llvm/IR/Value.h"
32#include "llvm/Support/Debug.h"
37#include <cassert>
38
39using namespace llvm;
40
42
43namespace llvm {
45}
47
48#define LV_NAME "loop-vectorize"
49#define DEBUG_TYPE LV_NAME
50
52 switch (getVPDefID()) {
53 case VPInstructionSC:
54 return cast<VPInstruction>(this)->opcodeMayReadOrWriteFromMemory();
55 case VPInterleaveSC:
56 return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
57 case VPWidenStoreEVLSC:
58 case VPWidenStoreSC:
59 return true;
60 case VPReplicateSC:
61 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
62 ->mayWriteToMemory();
63 case VPWidenCallSC:
64 return !cast<VPWidenCallRecipe>(this)
65 ->getCalledScalarFunction()
66 ->onlyReadsMemory();
67 case VPWidenIntrinsicSC:
68 return cast<VPWidenIntrinsicRecipe>(this)->mayWriteToMemory();
69 case VPBranchOnMaskSC:
70 case VPScalarIVStepsSC:
71 case VPPredInstPHISC:
72 return false;
73 case VPBlendSC:
74 case VPReductionEVLSC:
75 case VPReductionSC:
76 case VPVectorPointerSC:
77 case VPWidenCanonicalIVSC:
78 case VPWidenCastSC:
79 case VPWidenGEPSC:
80 case VPWidenIntOrFpInductionSC:
81 case VPWidenLoadEVLSC:
82 case VPWidenLoadSC:
83 case VPWidenPHISC:
84 case VPWidenSC:
85 case VPWidenEVLSC:
86 case VPWidenSelectSC: {
87 const Instruction *I =
88 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
89 (void)I;
90 assert((!I || !I->mayWriteToMemory()) &&
91 "underlying instruction may write to memory");
92 return false;
93 }
94 default:
95 return true;
96 }
97}
98
100 switch (getVPDefID()) {
101 case VPInstructionSC:
102 return cast<VPInstruction>(this)->opcodeMayReadOrWriteFromMemory();
103 case VPWidenLoadEVLSC:
104 case VPWidenLoadSC:
105 return true;
106 case VPReplicateSC:
107 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
108 ->mayReadFromMemory();
109 case VPWidenCallSC:
110 return !cast<VPWidenCallRecipe>(this)
111 ->getCalledScalarFunction()
112 ->onlyWritesMemory();
113 case VPWidenIntrinsicSC:
114 return cast<VPWidenIntrinsicRecipe>(this)->mayReadFromMemory();
115 case VPBranchOnMaskSC:
116 case VPPredInstPHISC:
117 case VPScalarIVStepsSC:
118 case VPWidenStoreEVLSC:
119 case VPWidenStoreSC:
120 return false;
121 case VPBlendSC:
122 case VPReductionEVLSC:
123 case VPReductionSC:
124 case VPVectorPointerSC:
125 case VPWidenCanonicalIVSC:
126 case VPWidenCastSC:
127 case VPWidenGEPSC:
128 case VPWidenIntOrFpInductionSC:
129 case VPWidenPHISC:
130 case VPWidenSC:
131 case VPWidenEVLSC:
132 case VPWidenSelectSC: {
133 const Instruction *I =
134 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
135 (void)I;
136 assert((!I || !I->mayReadFromMemory()) &&
137 "underlying instruction may read from memory");
138 return false;
139 }
140 default:
141 return true;
142 }
143}
144
146 switch (getVPDefID()) {
147 case VPDerivedIVSC:
148 case VPPredInstPHISC:
149 case VPScalarCastSC:
150 case VPReverseVectorPointerSC:
151 return false;
152 case VPInstructionSC:
153 return mayWriteToMemory();
154 case VPWidenCallSC: {
155 Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction();
156 return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn();
157 }
158 case VPWidenIntrinsicSC:
159 return cast<VPWidenIntrinsicRecipe>(this)->mayHaveSideEffects();
160 case VPBlendSC:
161 case VPReductionEVLSC:
162 case VPReductionSC:
163 case VPScalarIVStepsSC:
164 case VPVectorPointerSC:
165 case VPWidenCanonicalIVSC:
166 case VPWidenCastSC:
167 case VPWidenGEPSC:
168 case VPWidenIntOrFpInductionSC:
169 case VPWidenPHISC:
170 case VPWidenPointerInductionSC:
171 case VPWidenSC:
172 case VPWidenEVLSC:
173 case VPWidenSelectSC: {
174 const Instruction *I =
175 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
176 (void)I;
177 assert((!I || !I->mayHaveSideEffects()) &&
178 "underlying instruction has side-effects");
179 return false;
180 }
181 case VPInterleaveSC:
182 return mayWriteToMemory();
183 case VPWidenLoadEVLSC:
184 case VPWidenLoadSC:
185 case VPWidenStoreEVLSC:
186 case VPWidenStoreSC:
187 assert(
188 cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() ==
190 "mayHaveSideffects result for ingredient differs from this "
191 "implementation");
192 return mayWriteToMemory();
193 case VPReplicateSC: {
194 auto *R = cast<VPReplicateRecipe>(this);
195 return R->getUnderlyingInstr()->mayHaveSideEffects();
196 }
197 default:
198 return true;
199 }
200}
201
203 assert(!Parent && "Recipe already in some VPBasicBlock");
204 assert(InsertPos->getParent() &&
205 "Insertion position not in any VPBasicBlock");
206 InsertPos->getParent()->insert(this, InsertPos->getIterator());
207}
208
211 assert(!Parent && "Recipe already in some VPBasicBlock");
212 assert(I == BB.end() || I->getParent() == &BB);
213 BB.insert(this, I);
214}
215
217 assert(!Parent && "Recipe already in some VPBasicBlock");
218 assert(InsertPos->getParent() &&
219 "Insertion position not in any VPBasicBlock");
220 InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator()));
221}
222
224 assert(getParent() && "Recipe not in any VPBasicBlock");
226 Parent = nullptr;
227}
228
230 assert(getParent() && "Recipe not in any VPBasicBlock");
232}
233
236 insertAfter(InsertPos);
237}
238
242 insertBefore(BB, I);
243}
244
246 // Get the underlying instruction for the recipe, if there is one. It is used
247 // to
248 // * decide if cost computation should be skipped for this recipe,
249 // * apply forced target instruction cost.
250 Instruction *UI = nullptr;
251 if (auto *S = dyn_cast<VPSingleDefRecipe>(this))
252 UI = dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
253 else if (auto *IG = dyn_cast<VPInterleaveRecipe>(this))
254 UI = IG->getInsertPos();
255 else if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(this))
256 UI = &WidenMem->getIngredient();
257
258 InstructionCost RecipeCost;
259 if (UI && Ctx.skipCostComputation(UI, VF.isVector())) {
260 RecipeCost = 0;
261 } else {
262 RecipeCost = computeCost(VF, Ctx);
263 if (UI && ForceTargetInstructionCost.getNumOccurrences() > 0 &&
264 RecipeCost.isValid())
266 }
267
268 LLVM_DEBUG({
269 dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": ";
270 dump();
271 });
272 return RecipeCost;
273}
274
276 VPCostContext &Ctx) const {
277 llvm_unreachable("subclasses should implement computeCost");
278}
279
282 VPCostContext &Ctx) const {
283 std::optional<unsigned> Opcode = std::nullopt;
285 if (auto *WidenR = dyn_cast<VPWidenRecipe>(BinOpR))
286 Opcode = std::make_optional(WidenR->getOpcode());
287
288 VPRecipeBase *ExtAR = BinOpR->getOperand(0)->getDefiningRecipe();
289 VPRecipeBase *ExtBR = BinOpR->getOperand(1)->getDefiningRecipe();
290
291 auto *PhiType = Ctx.Types.inferScalarType(getOperand(1));
292 auto *InputTypeA = Ctx.Types.inferScalarType(ExtAR ? ExtAR->getOperand(0)
293 : BinOpR->getOperand(0));
294 auto *InputTypeB = Ctx.Types.inferScalarType(ExtBR ? ExtBR->getOperand(0)
295 : BinOpR->getOperand(1));
296
297 auto GetExtendKind = [](VPRecipeBase *R) {
298 // The extend could come from outside the plan.
299 if (!R)
301 auto *WidenCastR = dyn_cast<VPWidenCastRecipe>(R);
302 if (!WidenCastR)
304 if (WidenCastR->getOpcode() == Instruction::CastOps::ZExt)
306 if (WidenCastR->getOpcode() == Instruction::CastOps::SExt)
309 };
310
311 return Ctx.TTI.getPartialReductionCost(getOpcode(), InputTypeA, InputTypeB,
312 PhiType, VF, GetExtendKind(ExtAR),
313 GetExtendKind(ExtBR), Opcode);
314}
315
318 auto &Builder = State.Builder;
319
320 assert(getOpcode() == Instruction::Add &&
321 "Unhandled partial reduction opcode");
322
323 Value *BinOpVal = State.get(getOperand(0));
324 Value *PhiVal = State.get(getOperand(1));
325 assert(PhiVal && BinOpVal && "Phi and Mul must be set");
326
327 Type *RetTy = PhiVal->getType();
328
329 CallInst *V = Builder.CreateIntrinsic(
330 RetTy, Intrinsic::experimental_vector_partial_reduce_add,
331 {PhiVal, BinOpVal}, nullptr, "partial.reduce");
332
333 State.set(this, V);
334}
335
336#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
338 VPSlotTracker &SlotTracker) const {
339 O << Indent << "PARTIAL-REDUCE ";
341 O << " = " << Instruction::getOpcodeName(getOpcode()) << " ";
343}
344#endif
345
347 assert(OpType == OperationType::FPMathOp &&
348 "recipe doesn't have fast math flags");
349 FastMathFlags Res;
350 Res.setAllowReassoc(FMFs.AllowReassoc);
351 Res.setNoNaNs(FMFs.NoNaNs);
352 Res.setNoInfs(FMFs.NoInfs);
353 Res.setNoSignedZeros(FMFs.NoSignedZeros);
354 Res.setAllowReciprocal(FMFs.AllowReciprocal);
355 Res.setAllowContract(FMFs.AllowContract);
356 Res.setApproxFunc(FMFs.ApproxFunc);
357 return Res;
358}
359
360#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
362#endif
363
364template <unsigned PartOpIdx>
365VPValue *
367 if (U.getNumOperands() == PartOpIdx + 1)
368 return U.getOperand(PartOpIdx);
369 return nullptr;
370}
371
372template <unsigned PartOpIdx>
374 if (auto *UnrollPartOp = getUnrollPartOperand(U))
375 return cast<ConstantInt>(UnrollPartOp->getLiveInIRValue())->getZExtValue();
376 return 0;
377}
378
381 const Twine &Name)
382 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),
383 Pred, DL),
384 Opcode(Opcode), Name(Name.str()) {
385 assert(Opcode == Instruction::ICmp &&
386 "only ICmp predicates supported at the moment");
387}
388
390 std::initializer_list<VPValue *> Operands,
391 FastMathFlags FMFs, DebugLoc DL, const Twine &Name)
392 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),
393 Opcode(Opcode), Name(Name.str()) {
394 // Make sure the VPInstruction is a floating-point operation.
395 assert(isFPMathOp() && "this op can't take fast-math flags");
396}
397
398bool VPInstruction::doesGeneratePerAllLanes() const {
399 return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);
400}
401
402bool VPInstruction::canGenerateScalarForFirstLane() const {
404 return true;
406 return true;
407 switch (Opcode) {
408 case Instruction::ICmp:
409 case Instruction::Select:
417 return true;
418 default:
419 return false;
420 }
421}
422
423Value *VPInstruction::generatePerLane(VPTransformState &State,
424 const VPLane &Lane) {
425 IRBuilderBase &Builder = State.Builder;
426
428 "only PtrAdd opcodes are supported for now");
429 return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
430 State.get(getOperand(1), Lane), Name);
431}
432
433Value *VPInstruction::generate(VPTransformState &State) {
434 IRBuilderBase &Builder = State.Builder;
435
437 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
438 Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
439 Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);
440 auto *Res =
441 Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
442 if (auto *I = dyn_cast<Instruction>(Res))
443 setFlags(I);
444 return Res;
445 }
446
447 switch (getOpcode()) {
448 case VPInstruction::Not: {
449 Value *A = State.get(getOperand(0));
450 return Builder.CreateNot(A, Name);
451 }
452 case Instruction::ICmp: {
453 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
454 Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
455 Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);
456 return Builder.CreateCmp(getPredicate(), A, B, Name);
457 }
458 case Instruction::Select: {
459 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
460 Value *Cond = State.get(getOperand(0), OnlyFirstLaneUsed);
461 Value *Op1 = State.get(getOperand(1), OnlyFirstLaneUsed);
462 Value *Op2 = State.get(getOperand(2), OnlyFirstLaneUsed);
463 return Builder.CreateSelect(Cond, Op1, Op2, Name);
464 }
466 // Get first lane of vector induction variable.
467 Value *VIVElem0 = State.get(getOperand(0), VPLane(0));
468 // Get the original loop tripcount.
469 Value *ScalarTC = State.get(getOperand(1), VPLane(0));
470
471 // If this part of the active lane mask is scalar, generate the CMP directly
472 // to avoid unnecessary extracts.
473 if (State.VF.isScalar())
474 return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC,
475 Name);
476
477 auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
478 auto *PredTy = VectorType::get(Int1Ty, State.VF);
479 return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
480 {PredTy, ScalarTC->getType()},
481 {VIVElem0, ScalarTC}, nullptr, Name);
482 }
484 // Generate code to combine the previous and current values in vector v3.
485 //
486 // vector.ph:
487 // v_init = vector(..., ..., ..., a[-1])
488 // br vector.body
489 //
490 // vector.body
491 // i = phi [0, vector.ph], [i+4, vector.body]
492 // v1 = phi [v_init, vector.ph], [v2, vector.body]
493 // v2 = a[i, i+1, i+2, i+3];
494 // v3 = vector(v1(3), v2(0, 1, 2))
495
496 auto *V1 = State.get(getOperand(0));
497 if (!V1->getType()->isVectorTy())
498 return V1;
499 Value *V2 = State.get(getOperand(1));
500 return Builder.CreateVectorSplice(V1, V2, -1, Name);
501 }
503 unsigned UF = getParent()->getPlan()->getUF();
504 Value *ScalarTC = State.get(getOperand(0), VPLane(0));
505 Value *Step = createStepForVF(Builder, ScalarTC->getType(), State.VF, UF);
506 Value *Sub = Builder.CreateSub(ScalarTC, Step);
507 Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
508 Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
509 return Builder.CreateSelect(Cmp, Sub, Zero);
510 }
512 // TODO: Restructure this code with an explicit remainder loop, vsetvli can
513 // be outside of the main loop.
514 Value *AVL = State.get(getOperand(0), /*IsScalar*/ true);
515 // Compute EVL
516 assert(AVL->getType()->isIntegerTy() &&
517 "Requested vector length should be an integer.");
518
519 assert(State.VF.isScalable() && "Expected scalable vector factor.");
520 Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue());
521
522 Value *EVL = State.Builder.CreateIntrinsic(
523 State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,
524 {AVL, VFArg, State.Builder.getTrue()});
525 return EVL;
526 }
528 unsigned Part = getUnrollPart(*this);
529 auto *IV = State.get(getOperand(0), VPLane(0));
530 assert(Part != 0 && "Must have a positive part");
531 // The canonical IV is incremented by the vectorization factor (num of
532 // SIMD elements) times the unroll part.
533 Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
534 return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),
536 }
538 Value *Cond = State.get(getOperand(0), VPLane(0));
539 // Replace the temporary unreachable terminator with a new conditional
540 // branch, hooking it up to backward destination for exiting blocks now and
541 // to forward destination(s) later when they are created.
542 BranchInst *CondBr =
543 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
544 CondBr->setSuccessor(0, nullptr);
546
547 if (!getParent()->isExiting())
548 return CondBr;
549
550 VPRegionBlock *ParentRegion = getParent()->getParent();
551 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
552 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
553 return CondBr;
554 }
556 // First create the compare.
557 Value *IV = State.get(getOperand(0), /*IsScalar*/ true);
558 Value *TC = State.get(getOperand(1), /*IsScalar*/ true);
559 Value *Cond = Builder.CreateICmpEQ(IV, TC);
560
561 // Now create the branch.
562 auto *Plan = getParent()->getPlan();
563 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
564 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
565
566 // Replace the temporary unreachable terminator with a new conditional
567 // branch, hooking it up to backward destination (the header) now and to the
568 // forward destination (the exit/middle block) later when it is created.
569 // Note that CreateCondBr expects a valid BB as first argument, so we need
570 // to set it to nullptr later.
571 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
572 State.CFG.VPBB2IRBB[Header]);
573 CondBr->setSuccessor(0, nullptr);
575 return CondBr;
576 }
578 // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
579 // and will be removed by breaking up the recipe further.
580 auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
581 auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
582 // Get its reduction variable descriptor.
583 const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
584
585 RecurKind RK = RdxDesc.getRecurrenceKind();
586
587 Type *PhiTy = OrigPhi->getType();
588 // The recipe's operands are the reduction phi, followed by one operand for
589 // each part of the reduction.
590 unsigned UF = getNumOperands() - 1;
591 VectorParts RdxParts(UF);
592 for (unsigned Part = 0; Part < UF; ++Part)
593 RdxParts[Part] = State.get(getOperand(1 + Part), PhiR->isInLoop());
594
595 // If the vector reduction can be performed in a smaller type, we truncate
596 // then extend the loop exit value to enable InstCombine to evaluate the
597 // entire expression in the smaller type.
598 // TODO: Handle this in truncateToMinBW.
599 if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
600 Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF);
601 for (unsigned Part = 0; Part < UF; ++Part)
602 RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
603 }
604 // Reduce all of the unrolled parts into a single vector.
605 Value *ReducedPartRdx = RdxParts[0];
606 unsigned Op = RdxDesc.getOpcode();
608 Op = Instruction::Or;
609
610 if (PhiR->isOrdered()) {
611 ReducedPartRdx = RdxParts[UF - 1];
612 } else {
613 // Floating-point operations should have some FMF to enable the reduction.
615 Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
616 for (unsigned Part = 1; Part < UF; ++Part) {
617 Value *RdxPart = RdxParts[Part];
618 if (Op != Instruction::ICmp && Op != Instruction::FCmp)
619 ReducedPartRdx = Builder.CreateBinOp(
620 (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
622 ReducedPartRdx =
623 createMinMaxOp(Builder, RecurKind::SMax, ReducedPartRdx, RdxPart);
624 else
625 ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
626 }
627 }
628
629 // Create the reduction after the loop. Note that inloop reductions create
630 // the target reduction in the loop using a Reduction recipe.
631 if ((State.VF.isVector() ||
634 !PhiR->isInLoop()) {
635 ReducedPartRdx =
636 createReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
637 // If the reduction can be performed in a smaller type, we need to extend
638 // the reduction to the wider type before we branch to the original loop.
639 if (PhiTy != RdxDesc.getRecurrenceType())
640 ReducedPartRdx = RdxDesc.isSigned()
641 ? Builder.CreateSExt(ReducedPartRdx, PhiTy)
642 : Builder.CreateZExt(ReducedPartRdx, PhiTy);
643 }
644
645 return ReducedPartRdx;
646 }
648 auto *CI = cast<ConstantInt>(getOperand(1)->getLiveInIRValue());
649 unsigned Offset = CI->getZExtValue();
650 assert(Offset > 0 && "Offset from end must be positive");
651 Value *Res;
652 if (State.VF.isVector()) {
653 assert(Offset <= State.VF.getKnownMinValue() &&
654 "invalid offset to extract from");
655 // Extract lane VF - Offset from the operand.
656 Res = State.get(getOperand(0), VPLane::getLaneFromEnd(State.VF, Offset));
657 } else {
658 assert(Offset <= 1 && "invalid offset to extract from");
659 Res = State.get(getOperand(0));
660 }
661 if (isa<ExtractElementInst>(Res))
662 Res->setName(Name);
663 return Res;
664 }
666 Value *A = State.get(getOperand(0));
667 Value *B = State.get(getOperand(1));
668 return Builder.CreateLogicalAnd(A, B, Name);
669 }
672 "can only generate first lane for PtrAdd");
673 Value *Ptr = State.get(getOperand(0), VPLane(0));
674 Value *Addend = State.get(getOperand(1), VPLane(0));
675 return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
676 }
678 Value *IncomingFromVPlanPred =
679 State.get(getOperand(0), /* IsScalar */ true);
680 Value *IncomingFromOtherPreds =
681 State.get(getOperand(1), /* IsScalar */ true);
682 auto *NewPhi =
683 Builder.CreatePHI(State.TypeAnalysis.inferScalarType(this), 2, Name);
684 BasicBlock *VPlanPred =
685 State.CFG
686 .VPBB2IRBB[cast<VPBasicBlock>(getParent()->getPredecessors()[0])];
687 NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred);
688 for (auto *OtherPred : predecessors(Builder.GetInsertBlock())) {
689 if (OtherPred == VPlanPred)
690 continue;
691 NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred);
692 }
693 return NewPhi;
694 }
696 Value *A = State.get(getOperand(0));
697 return Builder.CreateOrReduce(A);
698 }
699
700 default:
701 llvm_unreachable("Unsupported opcode for instruction");
702 }
703}
704
709}
710
713}
714
715#if !defined(NDEBUG)
716bool VPInstruction::isFPMathOp() const {
717 // Inspired by FPMathOperator::classof. Notable differences are that we don't
718 // support Call, PHI and Select opcodes here yet.
719 return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
720 Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
721 Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
722 Opcode == Instruction::FCmp || Opcode == Instruction::Select;
723}
724#endif
725
727 assert(!State.Lane && "VPInstruction executing an Lane");
729 assert((hasFastMathFlags() == isFPMathOp() ||
730 getOpcode() == Instruction::Select) &&
731 "Recipe not a FPMathOp but has fast-math flags?");
732 if (hasFastMathFlags())
735 bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&
738 bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
739 if (GeneratesPerAllLanes) {
740 for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
741 Lane != NumLanes; ++Lane) {
742 Value *GeneratedValue = generatePerLane(State, VPLane(Lane));
743 assert(GeneratedValue && "generatePerLane must produce a value");
744 State.set(this, GeneratedValue, VPLane(Lane));
745 }
746 return;
747 }
748
749 Value *GeneratedValue = generate(State);
750 if (!hasResult())
751 return;
752 assert(GeneratedValue && "generate must produce a value");
753 assert(
754 (GeneratedValue->getType()->isVectorTy() == !GeneratesPerFirstLaneOnly ||
755 State.VF.isScalar()) &&
756 "scalar value but not only first lane defined");
757 State.set(this, GeneratedValue,
758 /*IsScalar*/ GeneratesPerFirstLaneOnly);
759}
760
763 return false;
764 switch (getOpcode()) {
765 case Instruction::ICmp:
766 case Instruction::Select:
775 return false;
776 default:
777 return true;
778 }
779}
780
782 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
784 return vputils::onlyFirstLaneUsed(this);
785
786 switch (getOpcode()) {
787 default:
788 return false;
789 case Instruction::ICmp:
790 case Instruction::Select:
791 case Instruction::Or:
793 // TODO: Cover additional opcodes.
794 return vputils::onlyFirstLaneUsed(this);
802 return true;
803 };
804 llvm_unreachable("switch should return");
805}
806
808 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
810 return vputils::onlyFirstPartUsed(this);
811
812 switch (getOpcode()) {
813 default:
814 return false;
815 case Instruction::ICmp:
816 case Instruction::Select:
817 return vputils::onlyFirstPartUsed(this);
821 return true;
822 };
823 llvm_unreachable("switch should return");
824}
825
826#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
828 VPSlotTracker SlotTracker(getParent()->getPlan());
829 print(dbgs(), "", SlotTracker);
830}
831
833 VPSlotTracker &SlotTracker) const {
834 O << Indent << "EMIT ";
835
836 if (hasResult()) {
838 O << " = ";
839 }
840
841 switch (getOpcode()) {
843 O << "not";
844 break;
846 O << "combined load";
847 break;
849 O << "combined store";
850 break;
852 O << "active lane mask";
853 break;
855 O << "resume-phi";
856 break;
858 O << "EXPLICIT-VECTOR-LENGTH";
859 break;
861 O << "first-order splice";
862 break;
864 O << "branch-on-cond";
865 break;
867 O << "TC > VF ? TC - VF : 0";
868 break;
870 O << "VF * Part +";
871 break;
873 O << "branch-on-count";
874 break;
876 O << "extract-from-end";
877 break;
879 O << "compute-reduction-result";
880 break;
882 O << "logical-and";
883 break;
885 O << "ptradd";
886 break;
888 O << "any-of";
889 break;
890 default:
892 }
893
894 printFlags(O);
896
897 if (auto DL = getDebugLoc()) {
898 O << ", !dbg ";
899 DL.print(O);
900 }
901}
902#endif
903
905 assert((isa<PHINode>(&I) || getNumOperands() == 0) &&
906 "Only PHINodes can have extra operands");
907 for (const auto &[Idx, Op] : enumerate(operands())) {
908 VPValue *ExitValue = Op;
909 auto Lane = vputils::isUniformAfterVectorization(ExitValue)
913 auto *PredVPBB = Pred->getExitingBasicBlock();
914 BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
915 // Set insertion point in PredBB in case an extract needs to be generated.
916 // TODO: Model extracts explicitly.
917 State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
918 Value *V = State.get(ExitValue, VPLane(Lane));
919 auto *Phi = cast<PHINode>(&I);
920 // If there is no existing block for PredBB in the phi, add a new incoming
921 // value. Otherwise update the existing incoming value for PredBB.
922 if (Phi->getBasicBlockIndex(PredBB) == -1)
923 Phi->addIncoming(V, PredBB);
924 else
925 Phi->setIncomingValueForBlock(PredBB, V);
926 }
927
928 // Advance the insert point after the wrapped IR instruction. This allows
929 // interleaving VPIRInstructions and other recipes.
930 State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator()));
931}
932
934 VPCostContext &Ctx) const {
935 // The recipe wraps an existing IR instruction on the border of VPlan's scope,
936 // hence it does not contribute to the cost-modeling for the VPlan.
937 return 0;
938}
939
940#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
942 VPSlotTracker &SlotTracker) const {
943 O << Indent << "IR " << I;
944
945 if (getNumOperands() != 0) {
946 O << " (extra operand" << (getNumOperands() > 1 ? "s" : "") << ": ";
948 enumerate(operands()), O, [this, &O, &SlotTracker](auto Op) {
949 Op.value()->printAsOperand(O, SlotTracker);
950 O << " from ";
951 getParent()->getPredecessors()[Op.index()]->printAsOperand(O);
952 });
953 O << ")";
954 }
955}
956#endif
957
959 assert(State.VF.isVector() && "not widening");
961
962 FunctionType *VFTy = Variant->getFunctionType();
963 // Add return type if intrinsic is overloaded on it.
965 for (const auto &I : enumerate(arg_operands())) {
966 Value *Arg;
967 // Some vectorized function variants may also take a scalar argument,
968 // e.g. linear parameters for pointers. This needs to be the scalar value
969 // from the start of the respective part when interleaving.
970 if (!VFTy->getParamType(I.index())->isVectorTy())
971 Arg = State.get(I.value(), VPLane(0));
972 else
973 Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
974 Args.push_back(Arg);
975 }
976
977 assert(Variant != nullptr && "Can't create vector function.");
978
979 auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
981 if (CI)
982 CI->getOperandBundlesAsDefs(OpBundles);
983
984 CallInst *V = State.Builder.CreateCall(Variant, Args, OpBundles);
985 setFlags(V);
986
987 if (!V->getType()->isVoidTy())
988 State.set(this, V);
989 State.addMetadata(V, CI);
990}
991
993 VPCostContext &Ctx) const {
995 return Ctx.TTI.getCallInstrCost(nullptr, Variant->getReturnType(),
996 Variant->getFunctionType()->params(),
997 CostKind);
998}
999
1000#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1002 VPSlotTracker &SlotTracker) const {
1003 O << Indent << "WIDEN-CALL ";
1004
1005 Function *CalledFn = getCalledScalarFunction();
1006 if (CalledFn->getReturnType()->isVoidTy())
1007 O << "void ";
1008 else {
1010 O << " = ";
1011 }
1012
1013 O << "call";
1014 printFlags(O);
1015 O << " @" << CalledFn->getName() << "(";
1017 Op->printAsOperand(O, SlotTracker);
1018 });
1019 O << ")";
1020
1021 O << " (using library function";
1022 if (Variant->hasName())
1023 O << ": " << Variant->getName();
1024 O << ")";
1025}
1026#endif
1027
1029 assert(State.VF.isVector() && "not widening");
1031
1032 SmallVector<Type *, 2> TysForDecl;
1033 // Add return type if intrinsic is overloaded on it.
1034 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1, State.TTI))
1035 TysForDecl.push_back(VectorType::get(getResultType(), State.VF));
1037 for (const auto &I : enumerate(operands())) {
1038 // Some intrinsics have a scalar argument - don't replace it with a
1039 // vector.
1040 Value *Arg;
1041 if (isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index(),
1042 State.TTI))
1043 Arg = State.get(I.value(), VPLane(0));
1044 else
1045 Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
1046 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index(),
1047 State.TTI))
1048 TysForDecl.push_back(Arg->getType());
1049 Args.push_back(Arg);
1050 }
1051
1052 // Use vector version of the intrinsic.
1053 Module *M = State.Builder.GetInsertBlock()->getModule();
1054 Function *VectorF =
1055 Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
1056 assert(VectorF &&
1057 "Can't retrieve vector intrinsic or vector-predication intrinsics.");
1058
1059 auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
1061 if (CI)
1062 CI->getOperandBundlesAsDefs(OpBundles);
1063
1064 CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
1065
1066 setFlags(V);
1067
1068 if (!V->getType()->isVoidTy())
1069 State.set(this, V);
1070 State.addMetadata(V, CI);
1071}
1072
1074 VPCostContext &Ctx) const {
1076
1077 // Some backends analyze intrinsic arguments to determine cost. Use the
1078 // underlying value for the operand if it has one. Otherwise try to use the
1079 // operand of the underlying call instruction, if there is one. Otherwise
1080 // clear Arguments.
1081 // TODO: Rework TTI interface to be independent of concrete IR values.
1083 for (const auto &[Idx, Op] : enumerate(operands())) {
1084 auto *V = Op->getUnderlyingValue();
1085 if (!V) {
1086 // Push all the VP Intrinsic's ops into the Argments even if is nullptr.
1087 // Some VP Intrinsic's cost will assert the number of parameters.
1088 // Mainly appears in the following two scenarios:
1089 // 1. EVL Op is nullptr
1090 // 2. The Argmunt of the VP Intrinsic is also the VP Intrinsic
1091 if (VPIntrinsic::isVPIntrinsic(VectorIntrinsicID)) {
1092 Arguments.push_back(V);
1093 continue;
1094 }
1095 if (auto *UI = dyn_cast_or_null<CallBase>(getUnderlyingValue())) {
1096 Arguments.push_back(UI->getArgOperand(Idx));
1097 continue;
1098 }
1099 Arguments.clear();
1100 break;
1101 }
1102 Arguments.push_back(V);
1103 }
1104
1105 Type *RetTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1106 SmallVector<Type *> ParamTys;
1107 for (unsigned I = 0; I != getNumOperands(); ++I)
1108 ParamTys.push_back(
1110
1111 // TODO: Rework TTI interface to avoid reliance on underlying IntrinsicInst.
1113 IntrinsicCostAttributes CostAttrs(
1114 VectorIntrinsicID, RetTy, Arguments, ParamTys, FMF,
1115 dyn_cast_or_null<IntrinsicInst>(getUnderlyingValue()));
1116 return Ctx.TTI.getIntrinsicInstrCost(CostAttrs, CostKind);
1117}
1118
1120 return Intrinsic::getBaseName(VectorIntrinsicID);
1121}
1122
1124 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
1125 // Vector predication intrinsics only demand the the first lane the last
1126 // operand (the EVL operand).
1127 return VPIntrinsic::isVPIntrinsic(VectorIntrinsicID) &&
1128 Op == getOperand(getNumOperands() - 1);
1129}
1130
1131#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1133 VPSlotTracker &SlotTracker) const {
1134 O << Indent << "WIDEN-INTRINSIC ";
1135 if (ResultTy->isVoidTy()) {
1136 O << "void ";
1137 } else {
1139 O << " = ";
1140 }
1141
1142 O << "call";
1143 printFlags(O);
1144 O << getIntrinsicName() << "(";
1145
1147 Op->printAsOperand(O, SlotTracker);
1148 });
1149 O << ")";
1150}
1151#endif
1152
1155 IRBuilderBase &Builder = State.Builder;
1156
1157 Value *Address = State.get(getOperand(0));
1158 Value *IncAmt = State.get(getOperand(1), /*IsScalar=*/true);
1159 VectorType *VTy = cast<VectorType>(Address->getType());
1160
1161 // The histogram intrinsic requires a mask even if the recipe doesn't;
1162 // if the mask operand was omitted then all lanes should be executed and
1163 // we just need to synthesize an all-true mask.
1164 Value *Mask = nullptr;
1165 if (VPValue *VPMask = getMask())
1166 Mask = State.get(VPMask);
1167 else
1168 Mask =
1169 Builder.CreateVectorSplat(VTy->getElementCount(), Builder.getInt1(1));
1170
1171 // If this is a subtract, we want to invert the increment amount. We may
1172 // add a separate intrinsic in future, but for now we'll try this.
1173 if (Opcode == Instruction::Sub)
1174 IncAmt = Builder.CreateNeg(IncAmt);
1175 else
1176 assert(Opcode == Instruction::Add && "only add or sub supported for now");
1177
1178 State.Builder.CreateIntrinsic(Intrinsic::experimental_vector_histogram_add,
1179 {VTy, IncAmt->getType()},
1180 {Address, IncAmt, Mask});
1182
1184 VPCostContext &Ctx) const {
1185 // FIXME: Take the gather and scatter into account as well. For now we're
1186 // generating the same cost as the fallback path, but we'll likely
1187 // need to create a new TTI method for determining the cost, including
1188 // whether we can use base + vec-of-smaller-indices or just
1189 // vec-of-pointers.
1190 assert(VF.isVector() && "Invalid VF for histogram cost");
1191 Type *AddressTy = Ctx.Types.inferScalarType(getOperand(0));
1192 VPValue *IncAmt = getOperand(1);
1193 Type *IncTy = Ctx.Types.inferScalarType(IncAmt);
1194 VectorType *VTy = VectorType::get(IncTy, VF);
1195
1196 // Assume that a non-constant update value (or a constant != 1) requires
1197 // a multiply, and add that into the cost.
1198 InstructionCost MulCost =
1199 Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VTy);
1200 if (IncAmt->isLiveIn()) {
1201 ConstantInt *CI = dyn_cast<ConstantInt>(IncAmt->getLiveInIRValue());
1202
1203 if (CI && CI->getZExtValue() == 1)
1204 MulCost = TTI::TCC_Free;
1205 }
1206
1207 // Find the cost of the histogram operation itself.
1208 Type *PtrTy = VectorType::get(AddressTy, VF);
1209 Type *MaskTy = VectorType::get(Type::getInt1Ty(Ctx.LLVMCtx), VF);
1210 IntrinsicCostAttributes ICA(Intrinsic::experimental_vector_histogram_add,
1212 {PtrTy, IncTy, MaskTy});
1213
1214 // Add the costs together with the add/sub operation.
1215 return Ctx.TTI.getIntrinsicInstrCost(
1217 MulCost + Ctx.TTI.getArithmeticInstrCost(Opcode, VTy);
1218}
1219
1220#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1222 VPSlotTracker &SlotTracker) const {
1223 O << Indent << "WIDEN-HISTOGRAM buckets: ";
1225
1226 if (Opcode == Instruction::Sub)
1227 O << ", dec: ";
1228 else {
1229 assert(Opcode == Instruction::Add);
1230 O << ", inc: ";
1231 }
1233
1234 if (VPValue *Mask = getMask()) {
1235 O << ", mask: ";
1236 Mask->printAsOperand(O, SlotTracker);
1237 }
1238}
1239
1241 VPSlotTracker &SlotTracker) const {
1242 O << Indent << "WIDEN-SELECT ";
1244 O << " = select ";
1246 O << ", ";
1248 O << ", ";
1250 O << (isInvariantCond() ? " (condition is loop invariant)" : "");
1251}
1252#endif
1253
1256
1257 // The condition can be loop invariant but still defined inside the
1258 // loop. This means that we can't just use the original 'cond' value.
1259 // We have to take the 'vectorized' value and pick the first lane.
1260 // Instcombine will make this a no-op.
1261 auto *InvarCond =
1262 isInvariantCond() ? State.get(getCond(), VPLane(0)) : nullptr;
1263
1264 Value *Cond = InvarCond ? InvarCond : State.get(getCond());
1265 Value *Op0 = State.get(getOperand(1));
1266 Value *Op1 = State.get(getOperand(2));
1267 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
1268 State.set(this, Sel);
1269 State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1270}
1271
1273 VPCostContext &Ctx) const {
1274 SelectInst *SI = cast<SelectInst>(getUnderlyingValue());
1275 bool ScalarCond = getOperand(0)->isDefinedOutsideLoopRegions();
1276 Type *ScalarTy = Ctx.Types.inferScalarType(this);
1277 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1279
1280 VPValue *Op0, *Op1;
1281 using namespace llvm::VPlanPatternMatch;
1282 if (!ScalarCond && ScalarTy->getScalarSizeInBits() == 1 &&
1283 (match(this, m_LogicalAnd(m_VPValue(Op0), m_VPValue(Op1))) ||
1284 match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1))))) {
1285 // select x, y, false --> x & y
1286 // select x, true, y --> x | y
1287 const auto [Op1VK, Op1VP] = Ctx.getOperandInfo(Op0);
1288 const auto [Op2VK, Op2VP] = Ctx.getOperandInfo(Op1);
1289
1291 if (all_of(operands(),
1292 [](VPValue *Op) { return Op->getUnderlyingValue(); }))
1293 Operands.append(SI->op_begin(), SI->op_end());
1294 bool IsLogicalOr = match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1)));
1295 return Ctx.TTI.getArithmeticInstrCost(
1296 IsLogicalOr ? Instruction::Or : Instruction::And, VectorTy, CostKind,
1297 {Op1VK, Op1VP}, {Op2VK, Op2VP}, Operands, SI);
1298 }
1299
1300 Type *CondTy = Ctx.Types.inferScalarType(getOperand(0));
1301 if (!ScalarCond)
1302 CondTy = VectorType::get(CondTy, VF);
1303
1305 if (auto *Cmp = dyn_cast<CmpInst>(SI->getCondition()))
1306 Pred = Cmp->getPredicate();
1307 return Ctx.TTI.getCmpSelInstrCost(Instruction::Select, VectorTy, CondTy, Pred,
1310}
1311
1312VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
1313 const FastMathFlags &FMF) {
1314 AllowReassoc = FMF.allowReassoc();
1315 NoNaNs = FMF.noNaNs();
1316 NoInfs = FMF.noInfs();
1317 NoSignedZeros = FMF.noSignedZeros();
1318 AllowReciprocal = FMF.allowReciprocal();
1319 AllowContract = FMF.allowContract();
1320 ApproxFunc = FMF.approxFunc();
1321}
1322
1323#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1325 switch (OpType) {
1326 case OperationType::Cmp:
1328 break;
1329 case OperationType::DisjointOp:
1331 O << " disjoint";
1332 break;
1333 case OperationType::PossiblyExactOp:
1334 if (ExactFlags.IsExact)
1335 O << " exact";
1336 break;
1337 case OperationType::OverflowingBinOp:
1338 if (WrapFlags.HasNUW)
1339 O << " nuw";
1340 if (WrapFlags.HasNSW)
1341 O << " nsw";
1342 break;
1343 case OperationType::FPMathOp:
1345 break;
1346 case OperationType::GEPOp:
1347 if (GEPFlags.isInBounds())
1348 O << " inbounds";
1350 O << " nusw";
1352 O << " nuw";
1353 break;
1354 case OperationType::NonNegOp:
1355 if (NonNegFlags.NonNeg)
1356 O << " nneg";
1357 break;
1358 case OperationType::Other:
1359 break;
1360 }
1361 if (getNumOperands() > 0)
1362 O << " ";
1363}
1364#endif
1365
1368 auto &Builder = State.Builder;
1369 switch (Opcode) {
1370 case Instruction::Call:
1371 case Instruction::Br:
1372 case Instruction::PHI:
1373 case Instruction::GetElementPtr:
1374 case Instruction::Select:
1375 llvm_unreachable("This instruction is handled by a different recipe.");
1376 case Instruction::UDiv:
1377 case Instruction::SDiv:
1378 case Instruction::SRem:
1379 case Instruction::URem:
1380 case Instruction::Add:
1381 case Instruction::FAdd:
1382 case Instruction::Sub:
1383 case Instruction::FSub:
1384 case Instruction::FNeg:
1385 case Instruction::Mul:
1386 case Instruction::FMul:
1387 case Instruction::FDiv:
1388 case Instruction::FRem:
1389 case Instruction::Shl:
1390 case Instruction::LShr:
1391 case Instruction::AShr:
1392 case Instruction::And:
1393 case Instruction::Or:
1394 case Instruction::Xor: {
1395 // Just widen unops and binops.
1397 for (VPValue *VPOp : operands())
1398 Ops.push_back(State.get(VPOp));
1399
1400 Value *V = Builder.CreateNAryOp(Opcode, Ops);
1401
1402 if (auto *VecOp = dyn_cast<Instruction>(V))
1403 setFlags(VecOp);
1404
1405 // Use this vector value for all users of the original instruction.
1406 State.set(this, V);
1407 State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1408 break;
1409 }
1410 case Instruction::Freeze: {
1411 Value *Op = State.get(getOperand(0));
1412
1413 Value *Freeze = Builder.CreateFreeze(Op);
1414 State.set(this, Freeze);
1415 break;
1416 }
1417 case Instruction::ICmp:
1418 case Instruction::FCmp: {
1419 // Widen compares. Generate vector compares.
1420 bool FCmp = Opcode == Instruction::FCmp;
1421 Value *A = State.get(getOperand(0));
1422 Value *B = State.get(getOperand(1));
1423 Value *C = nullptr;
1424 if (FCmp) {
1425 // Propagate fast math flags.
1426 C = Builder.CreateFCmpFMF(
1427 getPredicate(), A, B,
1428 dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1429 } else {
1430 C = Builder.CreateICmp(getPredicate(), A, B);
1431 }
1432 State.set(this, C);
1433 State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1434 break;
1435 }
1436 default:
1437 // This instruction is not vectorized by simple widening.
1438 LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "
1439 << Instruction::getOpcodeName(Opcode));
1440 llvm_unreachable("Unhandled instruction!");
1441 } // end of switch.
1442
1443#if !defined(NDEBUG)
1444 // Verify that VPlan type inference results agree with the type of the
1445 // generated values.
1447 State.get(this)->getType() &&
1448 "inferred type and type from generated instructions do not match");
1449#endif
1450}
1451
1453 VPCostContext &Ctx) const {
1455 switch (Opcode) {
1456 case Instruction::FNeg: {
1457 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1458 return Ctx.TTI.getArithmeticInstrCost(
1459 Opcode, VectorTy, CostKind,
1462 }
1463
1464 case Instruction::UDiv:
1465 case Instruction::SDiv:
1466 case Instruction::SRem:
1467 case Instruction::URem:
1468 // More complex computation, let the legacy cost-model handle this for now.
1469 return Ctx.getLegacyCost(cast<Instruction>(getUnderlyingValue()), VF);
1470 case Instruction::Add:
1471 case Instruction::FAdd:
1472 case Instruction::Sub:
1473 case Instruction::FSub:
1474 case Instruction::Mul:
1475 case Instruction::FMul:
1476 case Instruction::FDiv:
1477 case Instruction::FRem:
1478 case Instruction::Shl:
1479 case Instruction::LShr:
1480 case Instruction::AShr:
1481 case Instruction::And:
1482 case Instruction::Or:
1483 case Instruction::Xor: {
1484 VPValue *RHS = getOperand(1);
1485 // Certain instructions can be cheaper to vectorize if they have a constant
1486 // second vector operand. One example of this are shifts on x86.
1489 if (RHS->isLiveIn())
1490 RHSInfo = Ctx.TTI.getOperandInfo(RHS->getLiveInIRValue());
1491
1492 if (RHSInfo.Kind == TargetTransformInfo::OK_AnyValue &&
1495 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1496 Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
1497
1499 if (CtxI)
1500 Operands.append(CtxI->value_op_begin(), CtxI->value_op_end());
1501 return Ctx.TTI.getArithmeticInstrCost(
1502 Opcode, VectorTy, CostKind,
1504 RHSInfo, Operands, CtxI, &Ctx.TLI);
1505 }
1506 case Instruction::Freeze: {
1507 // This opcode is unknown. Assume that it is the same as 'mul'.
1508 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1509 return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, CostKind);
1510 }
1511 case Instruction::ICmp:
1512 case Instruction::FCmp: {
1513 Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
1514 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
1515 return Ctx.TTI.getCmpSelInstrCost(Opcode, VectorTy, nullptr, getPredicate(),
1516 CostKind,
1519 }
1520 default:
1521 llvm_unreachable("Unsupported opcode for instruction");
1522 }
1523}
1524
1526 unsigned Opcode = getOpcode();
1527 // TODO: Support other opcodes
1528 if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode))
1529 llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute");
1530
1532
1533 assert(State.get(getOperand(0))->getType()->isVectorTy() &&
1534 "VPWidenEVLRecipe should not be used for scalars");
1535
1536 VPValue *EVL = getEVL();
1537 Value *EVLArg = State.get(EVL, /*NeedsScalar=*/true);
1538 IRBuilderBase &BuilderIR = State.Builder;
1539 VectorBuilder Builder(BuilderIR);
1540 Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
1541
1543 for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) {
1544 VPValue *VPOp = getOperand(I);
1545 Ops.push_back(State.get(VPOp));
1546 }
1547
1548 Builder.setMask(Mask).setEVL(EVLArg);
1549 Value *VPInst =
1550 Builder.createVectorInstruction(Opcode, Ops[0]->getType(), Ops, "vp.op");
1551 // Currently vp-intrinsics only accept FMF flags.
1552 // TODO: Enable other flags when support is added.
1553 if (isa<FPMathOperator>(VPInst))
1554 setFlags(cast<Instruction>(VPInst));
1555
1556 State.set(this, VPInst);
1557 State.addMetadata(VPInst,
1558 dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1559}
1560
1561#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1563 VPSlotTracker &SlotTracker) const {
1564 O << Indent << "WIDEN ";
1566 O << " = " << Instruction::getOpcodeName(Opcode);
1567 printFlags(O);
1569}
1570
1572 VPSlotTracker &SlotTracker) const {
1573 O << Indent << "WIDEN ";
1575 O << " = vp." << Instruction::getOpcodeName(getOpcode());
1576 printFlags(O);
1578}
1579#endif
1580
1583 auto &Builder = State.Builder;
1584 /// Vectorize casts.
1585 assert(State.VF.isVector() && "Not vectorizing?");
1586 Type *DestTy = VectorType::get(getResultType(), State.VF);
1587 VPValue *Op = getOperand(0);
1588 Value *A = State.get(Op);
1589 Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
1590 State.set(this, Cast);
1591 State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
1592 if (auto *CastOp = dyn_cast<Instruction>(Cast))
1593 setFlags(CastOp);
1594}
1595
1597 VPCostContext &Ctx) const {
1598 // TODO: In some cases, VPWidenCastRecipes are created but not considered in
1599 // the legacy cost model, including truncates/extends when evaluating a
1600 // reduction in a smaller type.
1601 if (!getUnderlyingValue())
1602 return 0;
1603 // Computes the CastContextHint from a recipes that may access memory.
1604 auto ComputeCCH = [&](const VPRecipeBase *R) -> TTI::CastContextHint {
1605 if (VF.isScalar())
1607 if (isa<VPInterleaveRecipe>(R))
1609 if (const auto *ReplicateRecipe = dyn_cast<VPReplicateRecipe>(R))
1610 return ReplicateRecipe->isPredicated() ? TTI::CastContextHint::Masked
1612 const auto *WidenMemoryRecipe = dyn_cast<VPWidenMemoryRecipe>(R);
1613 if (WidenMemoryRecipe == nullptr)
1615 if (!WidenMemoryRecipe->isConsecutive())
1617 if (WidenMemoryRecipe->isReverse())
1619 if (WidenMemoryRecipe->isMasked())
1622 };
1623
1624 VPValue *Operand = getOperand(0);
1626 // For Trunc/FPTrunc, get the context from the only user.
1627 if ((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
1629 if (auto *StoreRecipe = dyn_cast<VPRecipeBase>(*user_begin()))
1630 CCH = ComputeCCH(StoreRecipe);
1631 }
1632 // For Z/Sext, get the context from the operand.
1633 else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt ||
1634 Opcode == Instruction::FPExt) {
1635 if (Operand->isLiveIn())
1637 else if (Operand->getDefiningRecipe())
1638 CCH = ComputeCCH(Operand->getDefiningRecipe());
1639 }
1640
1641 auto *SrcTy =
1642 cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(Operand), VF));
1643 auto *DestTy = cast<VectorType>(toVectorTy(getResultType(), VF));
1644 // Arm TTI will use the underlying instruction to determine the cost.
1645 return Ctx.TTI.getCastInstrCost(
1646 Opcode, DestTy, SrcTy, CCH, TTI::TCK_RecipThroughput,
1647 dyn_cast_if_present<Instruction>(getUnderlyingValue()));
1648}
1649
1650#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1652 VPSlotTracker &SlotTracker) const {
1653 O << Indent << "WIDEN-CAST ";
1655 O << " = " << Instruction::getOpcodeName(Opcode);
1656 printFlags(O);
1658 O << " to " << *getResultType();
1659}
1660#endif
1661
1663 VPCostContext &Ctx) const {
1664 return Ctx.TTI.getCFInstrCost(Instruction::PHI, TTI::TCK_RecipThroughput);
1665}
1666
1667/// This function adds
1668/// (0 * Step, 1 * Step, 2 * Step, ...)
1669/// to each vector element of Val.
1670/// \p Opcode is relevant for FP induction variable.
1671static Value *getStepVector(Value *Val, Value *Step,
1673 IRBuilderBase &Builder) {
1674 assert(VF.isVector() && "only vector VFs are supported");
1675
1676 // Create and check the types.
1677 auto *ValVTy = cast<VectorType>(Val->getType());
1678 ElementCount VLen = ValVTy->getElementCount();
1679
1680 Type *STy = Val->getType()->getScalarType();
1681 assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
1682 "Induction Step must be an integer or FP");
1683 assert(Step->getType() == STy && "Step has wrong type");
1684
1686
1687 // Create a vector of consecutive numbers from zero to VF.
1688 VectorType *InitVecValVTy = ValVTy;
1689 if (STy->isFloatingPointTy()) {
1690 Type *InitVecValSTy =
1692 InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
1693 }
1694 Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
1695
1696 if (STy->isIntegerTy()) {
1697 Step = Builder.CreateVectorSplat(VLen, Step);
1698 assert(Step->getType() == Val->getType() && "Invalid step vec");
1699 // FIXME: The newly created binary instructions should contain nsw/nuw
1700 // flags, which can be found from the original scalar operations.
1701 Step = Builder.CreateMul(InitVec, Step);
1702 return Builder.CreateAdd(Val, Step, "induction");
1703 }
1704
1705 // Floating point induction.
1706 assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
1707 "Binary Opcode should be specified for FP induction");
1708 InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
1709
1710 Step = Builder.CreateVectorSplat(VLen, Step);
1711 Value *MulOp = Builder.CreateFMul(InitVec, Step);
1712 return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
1713}
1714
1715/// A helper function that returns an integer or floating-point constant with
1716/// value C.
1718 return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
1719 : ConstantFP::get(Ty, C);
1720}
1721
1723 assert(!State.Lane && "Int or FP induction being replicated.");
1724
1725 Value *Start = getStartValue()->getLiveInIRValue();
1727 TruncInst *Trunc = getTruncInst();
1728 IRBuilderBase &Builder = State.Builder;
1729 assert(getPHINode()->getType() == ID.getStartValue()->getType() &&
1730 "Types must match");
1731 assert(State.VF.isVector() && "must have vector VF");
1732
1733 // The value from the original loop to which we are mapping the new induction
1734 // variable.
1735 Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : getPHINode();
1736
1737 // Fast-math-flags propagate from the original induction instruction.
1738 IRBuilder<>::FastMathFlagGuard FMFG(Builder);
1739 if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
1740 Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
1741
1742 // Now do the actual transformations, and start with fetching the step value.
1743 Value *Step = State.get(getStepValue(), VPLane(0));
1744
1745 assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
1746 "Expected either an induction phi-node or a truncate of it!");
1747
1748 // Construct the initial value of the vector IV in the vector loop preheader
1749 auto CurrIP = Builder.saveIP();
1750 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1751 Builder.SetInsertPoint(VectorPH->getTerminator());
1752 if (isa<TruncInst>(EntryVal)) {
1753 assert(Start->getType()->isIntegerTy() &&
1754 "Truncation requires an integer type");
1755 auto *TruncType = cast<IntegerType>(EntryVal->getType());
1756 Step = Builder.CreateTrunc(Step, TruncType);
1757 Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
1758 }
1759
1760 Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
1761 Value *SteppedStart = getStepVector(SplatStart, Step, ID.getInductionOpcode(),
1762 State.VF, State.Builder);
1763
1764 // We create vector phi nodes for both integer and floating-point induction
1765 // variables. Here, we determine the kind of arithmetic we will perform.
1768 if (Step->getType()->isIntegerTy()) {
1769 AddOp = Instruction::Add;
1770 MulOp = Instruction::Mul;
1771 } else {
1772 AddOp = ID.getInductionOpcode();
1773 MulOp = Instruction::FMul;
1774 }
1775
1776 Value *SplatVF;
1777 if (VPValue *SplatVFOperand = getSplatVFValue()) {
1778 // The recipe has been unrolled. In that case, fetch the splat value for the
1779 // induction increment.
1780 SplatVF = State.get(SplatVFOperand);
1781 } else {
1782 // Multiply the vectorization factor by the step using integer or
1783 // floating-point arithmetic as appropriate.
1784 Type *StepType = Step->getType();
1785 Value *RuntimeVF = State.get(getVFValue(), VPLane(0));
1786 if (Step->getType()->isFloatingPointTy())
1787 RuntimeVF = Builder.CreateUIToFP(RuntimeVF, StepType);
1788 else
1789 RuntimeVF = Builder.CreateZExtOrTrunc(RuntimeVF, StepType);
1790 Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
1791
1792 // Create a vector splat to use in the induction update.
1793 SplatVF = Builder.CreateVectorSplat(State.VF, Mul);
1794 }
1795
1796 Builder.restoreIP(CurrIP);
1797
1798 // We may need to add the step a number of times, depending on the unroll
1799 // factor. The last of those goes into the PHI.
1800 PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");
1801 VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1802 VecInd->setDebugLoc(getDebugLoc());
1803 State.set(this, VecInd);
1804
1805 Instruction *LastInduction = cast<Instruction>(
1806 Builder.CreateBinOp(AddOp, VecInd, SplatVF, "vec.ind.next"));
1807 if (isa<TruncInst>(EntryVal))
1808 State.addMetadata(LastInduction, EntryVal);
1809 LastInduction->setDebugLoc(getDebugLoc());
1810
1811 VecInd->addIncoming(SteppedStart, VectorPH);
1812 // Add induction update using an incorrect block temporarily. The phi node
1813 // will be fixed after VPlan execution. Note that at this point the latch
1814 // block cannot be used, as it does not exist yet.
1815 // TODO: Model increment value in VPlan, by turning the recipe into a
1816 // multi-def and a subclass of VPHeaderPHIRecipe.
1817 VecInd->addIncoming(LastInduction, VectorPH);
1818}
1819
1820#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1822 VPSlotTracker &SlotTracker) const {
1823 O << Indent;
1825 O << " = WIDEN-INDUCTION ";
1827
1828 if (auto *TI = getTruncInst())
1829 O << " (truncated to " << *TI->getType() << ")";
1830}
1831#endif
1832
1834 // The step may be defined by a recipe in the preheader (e.g. if it requires
1835 // SCEV expansion), but for the canonical induction the step is required to be
1836 // 1, which is represented as live-in.
1838 return false;
1839 auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
1840 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
1841 auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin());
1842 return StartC && StartC->isZero() && StepC && StepC->isOne() &&
1843 getScalarType() == CanIV->getScalarType();
1844}
1845
1846#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1848 VPSlotTracker &SlotTracker) const {
1849 O << Indent;
1851 O << " = DERIVED-IV ";
1853 O << " + ";
1855 O << " * ";
1857}
1858#endif
1859
1861 // Fast-math-flags propagate from the original induction instruction.
1863 if (hasFastMathFlags())
1865
1866 /// Compute scalar induction steps. \p ScalarIV is the scalar induction
1867 /// variable on which to base the steps, \p Step is the size of the step.
1868
1869 Value *BaseIV = State.get(getOperand(0), VPLane(0));
1870 Value *Step = State.get(getStepValue(), VPLane(0));
1871 IRBuilderBase &Builder = State.Builder;
1872
1873 // Ensure step has the same type as that of scalar IV.
1874 Type *BaseIVTy = BaseIV->getType()->getScalarType();
1875 assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");
1876
1877 // We build scalar steps for both integer and floating-point induction
1878 // variables. Here, we determine the kind of arithmetic we will perform.
1881 if (BaseIVTy->isIntegerTy()) {
1882 AddOp = Instruction::Add;
1883 MulOp = Instruction::Mul;
1884 } else {
1885 AddOp = InductionOpcode;
1886 MulOp = Instruction::FMul;
1887 }
1888
1889 // Determine the number of scalars we need to generate for each unroll
1890 // iteration.
1891 bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
1892 // Compute the scalar steps and save the results in State.
1893 Type *IntStepTy =
1894 IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
1895 Type *VecIVTy = nullptr;
1896 Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
1897 if (!FirstLaneOnly && State.VF.isScalable()) {
1898 VecIVTy = VectorType::get(BaseIVTy, State.VF);
1899 UnitStepVec =
1900 Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
1901 SplatStep = Builder.CreateVectorSplat(State.VF, Step);
1902 SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
1903 }
1904
1905 unsigned StartLane = 0;
1906 unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
1907 if (State.Lane) {
1908 StartLane = State.Lane->getKnownLane();
1909 EndLane = StartLane + 1;
1910 }
1911 Value *StartIdx0 =
1912 createStepForVF(Builder, IntStepTy, State.VF, getUnrollPart(*this));
1913
1914 if (!FirstLaneOnly && State.VF.isScalable()) {
1915 auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
1916 auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
1917 if (BaseIVTy->isFloatingPointTy())
1918 InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
1919 auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
1920 auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
1921 State.set(this, Add);
1922 // It's useful to record the lane values too for the known minimum number
1923 // of elements so we do those below. This improves the code quality when
1924 // trying to extract the first element, for example.
1925 }
1926
1927 if (BaseIVTy->isFloatingPointTy())
1928 StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
1929
1930 for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
1931 Value *StartIdx = Builder.CreateBinOp(
1932 AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
1933 // The step returned by `createStepForVF` is a runtime-evaluated value
1934 // when VF is scalable. Otherwise, it should be folded into a Constant.
1935 assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
1936 "Expected StartIdx to be folded to a constant when VF is not "
1937 "scalable");
1938 auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
1939 auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
1940 State.set(this, Add, VPLane(Lane));
1941 }
1942}
1943
1944#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1946 VPSlotTracker &SlotTracker) const {
1947 O << Indent;
1949 O << " = SCALAR-STEPS ";
1951}
1952#endif
1953
1955 assert(State.VF.isVector() && "not widening");
1956 auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
1957 // Construct a vector GEP by widening the operands of the scalar GEP as
1958 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
1959 // results in a vector of pointers when at least one operand of the GEP
1960 // is vector-typed. Thus, to keep the representation compact, we only use
1961 // vector-typed operands for loop-varying values.
1962
1963 if (areAllOperandsInvariant()) {
1964 // If we are vectorizing, but the GEP has only loop-invariant operands,
1965 // the GEP we build (by only using vector-typed operands for
1966 // loop-varying values) would be a scalar pointer. Thus, to ensure we
1967 // produce a vector of pointers, we need to either arbitrarily pick an
1968 // operand to broadcast, or broadcast a clone of the original GEP.
1969 // Here, we broadcast a clone of the original.
1970 //
1971 // TODO: If at some point we decide to scalarize instructions having
1972 // loop-invariant operands, this special case will no longer be
1973 // required. We would add the scalarization decision to
1974 // collectLoopScalars() and teach getVectorValue() to broadcast
1975 // the lane-zero scalar value.
1977 for (unsigned I = 0, E = getNumOperands(); I != E; I++)
1978 Ops.push_back(State.get(getOperand(I), VPLane(0)));
1979
1980 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
1981 ArrayRef(Ops).drop_front(), "",
1983 Value *Splat = State.Builder.CreateVectorSplat(State.VF, NewGEP);
1984 State.set(this, Splat);
1985 State.addMetadata(Splat, GEP);
1986 } else {
1987 // If the GEP has at least one loop-varying operand, we are sure to
1988 // produce a vector of pointers unless VF is scalar.
1989 // The pointer operand of the new GEP. If it's loop-invariant, we
1990 // won't broadcast it.
1991 auto *Ptr = isPointerLoopInvariant() ? State.get(getOperand(0), VPLane(0))
1992 : State.get(getOperand(0));
1993
1994 // Collect all the indices for the new GEP. If any index is
1995 // loop-invariant, we won't broadcast it.
1997 for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
1998 VPValue *Operand = getOperand(I);
1999 if (isIndexLoopInvariant(I - 1))
2000 Indices.push_back(State.get(Operand, VPLane(0)));
2001 else
2002 Indices.push_back(State.get(Operand));
2003 }
2004
2005 // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
2006 // but it should be a vector, otherwise.
2007 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
2008 Indices, "", getGEPNoWrapFlags());
2009 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
2010 "NewGEP is not a pointer vector");
2011 State.set(this, NewGEP);
2012 State.addMetadata(NewGEP, GEP);
2013 }
2014}
2015
2016#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2018 VPSlotTracker &SlotTracker) const {
2019 O << Indent << "WIDEN-GEP ";
2020 O << (isPointerLoopInvariant() ? "Inv" : "Var");
2021 for (size_t I = 0; I < getNumOperands() - 1; ++I)
2022 O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";
2023
2024 O << " ";
2026 O << " = getelementptr";
2027 printFlags(O);
2029}
2030#endif
2031
2032static Type *getGEPIndexTy(bool IsScalable, bool IsReverse,
2033 unsigned CurrentPart, IRBuilderBase &Builder) {
2034 // Use i32 for the gep index type when the value is constant,
2035 // or query DataLayout for a more suitable index type otherwise.
2036 const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
2037 return IsScalable && (IsReverse || CurrentPart > 0)
2038 ? DL.getIndexType(Builder.getPtrTy(0))
2039 : Builder.getInt32Ty();
2040}
2041
2043 auto &Builder = State.Builder;
2045 unsigned CurrentPart = getUnrollPart(*this);
2046 Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true,
2047 CurrentPart, Builder);
2048
2049 // The wide store needs to start at the last vector element.
2050 Value *RunTimeVF = State.get(getVFValue(), VPLane(0));
2051 if (IndexTy != RunTimeVF->getType())
2052 RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy);
2053 // NumElt = -CurrentPart * RunTimeVF
2054 Value *NumElt = Builder.CreateMul(
2055 ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
2056 // LastLane = 1 - RunTimeVF
2057 Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
2058 Value *Ptr = State.get(getOperand(0), VPLane(0));
2059 Value *ResultPtr =
2060 Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", getGEPNoWrapFlags());
2061 ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "",
2063
2064 State.set(this, ResultPtr, /*IsScalar*/ true);
2065}
2066
2067#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2069 VPSlotTracker &SlotTracker) const {
2070 O << Indent;
2072 O << " = reverse-vector-pointer";
2073 printFlags(O);
2075}
2076#endif
2077
2079 auto &Builder = State.Builder;
2081 unsigned CurrentPart = getUnrollPart(*this);
2082 Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false,
2083 CurrentPart, Builder);
2084 Value *Ptr = State.get(getOperand(0), VPLane(0));
2085
2086 Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
2087 Value *ResultPtr =
2088 Builder.CreateGEP(IndexedTy, Ptr, Increment, "", getGEPNoWrapFlags());
2089
2090 State.set(this, ResultPtr, /*IsScalar*/ true);
2091}
2092
2093#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2095 VPSlotTracker &SlotTracker) const {
2096 O << Indent;
2098 O << " = vector-pointer ";
2099
2101}
2102#endif
2103
2105 assert(isNormalized() && "Expected blend to be normalized!");
2107 // We know that all PHIs in non-header blocks are converted into
2108 // selects, so we don't have to worry about the insertion order and we
2109 // can just use the builder.
2110 // At this point we generate the predication tree. There may be
2111 // duplications since this is a simple recursive scan, but future
2112 // optimizations will clean it up.
2113
2114 unsigned NumIncoming = getNumIncomingValues();
2115
2116 // Generate a sequence of selects of the form:
2117 // SELECT(Mask3, In3,
2118 // SELECT(Mask2, In2,
2119 // SELECT(Mask1, In1,
2120 // In0)))
2121 // Note that Mask0 is never used: lanes for which no path reaches this phi and
2122 // are essentially undef are taken from In0.
2123 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
2124 Value *Result = nullptr;
2125 for (unsigned In = 0; In < NumIncoming; ++In) {
2126 // We might have single edge PHIs (blocks) - use an identity
2127 // 'select' for the first PHI operand.
2128 Value *In0 = State.get(getIncomingValue(In), OnlyFirstLaneUsed);
2129 if (In == 0)
2130 Result = In0; // Initialize with the first incoming value.
2131 else {
2132 // Select between the current value and the previous incoming edge
2133 // based on the incoming mask.
2134 Value *Cond = State.get(getMask(In), OnlyFirstLaneUsed);
2135 Result = State.Builder.CreateSelect(Cond, In0, Result, "predphi");
2136 }
2137 }
2138 State.set(this, Result, OnlyFirstLaneUsed);
2139}
2140
2142 VPCostContext &Ctx) const {
2144
2145 // Handle cases where only the first lane is used the same way as the legacy
2146 // cost model.
2148 return Ctx.TTI.getCFInstrCost(Instruction::PHI, CostKind);
2149
2150 Type *ResultTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
2151 Type *CmpTy = toVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF);
2152 return (getNumIncomingValues() - 1) *
2153 Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResultTy, CmpTy,
2155}
2156
2157#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2159 VPSlotTracker &SlotTracker) const {
2160 O << Indent << "BLEND ";
2162 O << " =";
2163 if (getNumIncomingValues() == 1) {
2164 // Not a User of any mask: not really blending, this is a
2165 // single-predecessor phi.
2166 O << " ";
2168 } else {
2169 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
2170 O << " ";
2172 if (I == 0)
2173 continue;
2174 O << "/";
2176 }
2177 }
2178}
2179#endif
2180
2182 assert(!State.Lane && "Reduction being replicated.");
2183 Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
2184 RecurKind Kind = RdxDesc.getRecurrenceKind();
2185 // Propagate the fast-math flags carried by the underlying instruction.
2187 State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
2189 Value *NewVecOp = State.get(getVecOp());
2190 if (VPValue *Cond = getCondOp()) {
2191 Value *NewCond = State.get(Cond, State.VF.isScalar());
2192 VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
2193 Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
2194
2195 Value *Start;
2197 Start = RdxDesc.getRecurrenceStartValue();
2198 else
2199 Start = llvm::getRecurrenceIdentity(Kind, ElementTy,
2200 RdxDesc.getFastMathFlags());
2201 if (State.VF.isVector())
2202 Start = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Start);
2203
2204 Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Start);
2205 NewVecOp = Select;
2206 }
2207 Value *NewRed;
2208 Value *NextInChain;
2209 if (IsOrdered) {
2210 if (State.VF.isVector())
2211 NewRed =
2212 createOrderedReduction(State.Builder, RdxDesc, NewVecOp, PrevInChain);
2213 else
2214 NewRed = State.Builder.CreateBinOp(
2215 (Instruction::BinaryOps)RdxDesc.getOpcode(), PrevInChain, NewVecOp);
2216 PrevInChain = NewRed;
2217 NextInChain = NewRed;
2218 } else {
2219 PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
2220 NewRed = createReduction(State.Builder, RdxDesc, NewVecOp);
2222 NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),
2223 NewRed, PrevInChain);
2224 else
2225 NextInChain = State.Builder.CreateBinOp(
2226 (Instruction::BinaryOps)RdxDesc.getOpcode(), NewRed, PrevInChain);
2227 }
2228 State.set(this, NextInChain, /*IsScalar*/ true);
2229}
2230
2232 assert(!State.Lane && "Reduction being replicated.");
2233
2234 auto &Builder = State.Builder;
2235 // Propagate the fast-math flags carried by the underlying instruction.
2236 IRBuilderBase::FastMathFlagGuard FMFGuard(Builder);
2238 Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
2239
2240 RecurKind Kind = RdxDesc.getRecurrenceKind();
2241 Value *Prev = State.get(getChainOp(), /*IsScalar*/ true);
2242 Value *VecOp = State.get(getVecOp());
2243 Value *EVL = State.get(getEVL(), VPLane(0));
2244
2245 VectorBuilder VBuilder(Builder);
2246 VBuilder.setEVL(EVL);
2247 Value *Mask;
2248 // TODO: move the all-true mask generation into VectorBuilder.
2249 if (VPValue *CondOp = getCondOp())
2250 Mask = State.get(CondOp);
2251 else
2252 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2253 VBuilder.setMask(Mask);
2254
2255 Value *NewRed;
2256 if (isOrdered()) {
2257 NewRed = createOrderedReduction(VBuilder, RdxDesc, VecOp, Prev);
2258 } else {
2259 NewRed = createSimpleReduction(VBuilder, VecOp, RdxDesc);
2261 NewRed = createMinMaxOp(Builder, Kind, NewRed, Prev);
2262 else
2263 NewRed = Builder.CreateBinOp((Instruction::BinaryOps)RdxDesc.getOpcode(),
2264 NewRed, Prev);
2265 }
2266 State.set(this, NewRed, /*IsScalar*/ true);
2267}
2268
2270 VPCostContext &Ctx) const {
2271 RecurKind RdxKind = RdxDesc.getRecurrenceKind();
2272 Type *ElementTy = Ctx.Types.inferScalarType(this);
2273 auto *VectorTy = cast<VectorType>(toVectorTy(ElementTy, VF));
2275 unsigned Opcode = RdxDesc.getOpcode();
2276
2277 // TODO: Support any-of and in-loop reductions.
2278 assert(
2280 ForceTargetInstructionCost.getNumOccurrences() > 0) &&
2281 "Any-of reduction not implemented in VPlan-based cost model currently.");
2282 assert(
2283 (!cast<VPReductionPHIRecipe>(getOperand(0))->isInLoop() ||
2284 ForceTargetInstructionCost.getNumOccurrences() > 0) &&
2285 "In-loop reduction not implemented in VPlan-based cost model currently.");
2286
2287 assert(ElementTy->getTypeID() == RdxDesc.getRecurrenceType()->getTypeID() &&
2288 "Inferred type and recurrence type mismatch.");
2289
2290 // Cost = Reduction cost + BinOp cost
2292 Ctx.TTI.getArithmeticInstrCost(Opcode, ElementTy, CostKind);
2295 return Cost + Ctx.TTI.getMinMaxReductionCost(
2296 Id, VectorTy, RdxDesc.getFastMathFlags(), CostKind);
2297 }
2298
2299 return Cost + Ctx.TTI.getArithmeticReductionCost(
2300 Opcode, VectorTy, RdxDesc.getFastMathFlags(), CostKind);
2301}
2302
2303#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2305 VPSlotTracker &SlotTracker) const {
2306 O << Indent << "REDUCE ";
2308 O << " = ";
2310 O << " +";
2311 if (isa<FPMathOperator>(getUnderlyingInstr()))
2313 O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
2315 if (isConditional()) {
2316 O << ", ";
2318 }
2319 O << ")";
2320 if (RdxDesc.IntermediateStore)
2321 O << " (with final reduction value stored in invariant address sank "
2322 "outside of loop)";
2323}
2324
2326 VPSlotTracker &SlotTracker) const {
2328 O << Indent << "REDUCE ";
2330 O << " = ";
2332 O << " +";
2333 if (isa<FPMathOperator>(getUnderlyingInstr()))
2335 O << " vp.reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
2337 O << ", ";
2339 if (isConditional()) {
2340 O << ", ";
2342 }
2343 O << ")";
2344 if (RdxDesc.IntermediateStore)
2345 O << " (with final reduction value stored in invariant address sank "
2346 "outside of loop)";
2347}
2348#endif
2349
2351 // Find if the recipe is used by a widened recipe via an intervening
2352 // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
2353 return any_of(users(), [](const VPUser *U) {
2354 if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))
2355 return any_of(PredR->users(), [PredR](const VPUser *U) {
2356 return !U->usesScalars(PredR);
2357 });
2358 return false;
2359 });
2360}
2361
2363 VPCostContext &Ctx) const {
2364 Instruction *UI = cast<Instruction>(getUnderlyingValue());
2365 // VPReplicateRecipe may be cloned as part of an existing VPlan-to-VPlan
2366 // transform, avoid computing their cost multiple times for now.
2367 Ctx.SkipCostComputation.insert(UI);
2368 return Ctx.getLegacyCost(UI, VF);
2369}
2370
2371#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2373 VPSlotTracker &SlotTracker) const {
2374 O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
2375
2376 if (!getUnderlyingInstr()->getType()->isVoidTy()) {
2378 O << " = ";
2379 }
2380 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
2381 O << "call";
2382 printFlags(O);
2383 O << "@" << CB->getCalledFunction()->getName() << "(";
2385 O, [&O, &SlotTracker](VPValue *Op) {
2386 Op->printAsOperand(O, SlotTracker);
2387 });
2388 O << ")";
2389 } else {
2391 printFlags(O);
2393 }
2394
2395 if (shouldPack())
2396 O << " (S->V)";
2397}
2398#endif
2399
2400Value *VPScalarCastRecipe ::generate(VPTransformState &State) {
2403 "Codegen only implemented for first lane.");
2404 switch (Opcode) {
2405 case Instruction::SExt:
2406 case Instruction::ZExt:
2407 case Instruction::Trunc: {
2408 // Note: SExt/ZExt not used yet.
2409 Value *Op = State.get(getOperand(0), VPLane(0));
2410 return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
2411 }
2412 default:
2413 llvm_unreachable("opcode not implemented yet");
2414 }
2415}
2416
2417void VPScalarCastRecipe ::execute(VPTransformState &State) {
2418 State.set(this, generate(State), VPLane(0));
2419}
2420
2421#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2422void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
2423 VPSlotTracker &SlotTracker) const {
2424 O << Indent << "SCALAR-CAST ";
2425 printAsOperand(O, SlotTracker);
2426 O << " = " << Instruction::getOpcodeName(Opcode) << " ";
2427 printOperands(O, SlotTracker);
2428 O << " to " << *ResultTy;
2429}
2430#endif
2431
2433 assert(State.Lane && "Branch on Mask works only on single instance.");
2434
2435 unsigned Lane = State.Lane->getKnownLane();
2436
2437 Value *ConditionBit = nullptr;
2438 VPValue *BlockInMask = getMask();
2439 if (BlockInMask) {
2440 ConditionBit = State.get(BlockInMask);
2441 if (ConditionBit->getType()->isVectorTy())
2442 ConditionBit = State.Builder.CreateExtractElement(
2443 ConditionBit, State.Builder.getInt32(Lane));
2444 } else // Block in mask is all-one.
2445 ConditionBit = State.Builder.getTrue();
2446
2447 // Replace the temporary unreachable terminator with a new conditional branch,
2448 // whose two destinations will be set later when they are created.
2449 auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
2450 assert(isa<UnreachableInst>(CurrentTerminator) &&
2451 "Expected to replace unreachable terminator with conditional branch.");
2452 auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
2453 CondBr->setSuccessor(0, nullptr);
2454 ReplaceInstWithInst(CurrentTerminator, CondBr);
2455}
2456
2458 VPCostContext &Ctx) const {
2459 // The legacy cost model doesn't assign costs to branches for individual
2460 // replicate regions. Match the current behavior in the VPlan cost model for
2461 // now.
2462 return 0;
2463}
2464
2467 assert(State.Lane && "Predicated instruction PHI works per instance.");
2468 Instruction *ScalarPredInst =
2469 cast<Instruction>(State.get(getOperand(0), *State.Lane));
2470 BasicBlock *PredicatedBB = ScalarPredInst->getParent();
2471 BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
2472 assert(PredicatingBB && "Predicated block has no single predecessor.");
2473 assert(isa<VPReplicateRecipe>(getOperand(0)) &&
2474 "operand must be VPReplicateRecipe");
2475
2476 // By current pack/unpack logic we need to generate only a single phi node: if
2477 // a vector value for the predicated instruction exists at this point it means
2478 // the instruction has vector users only, and a phi for the vector value is
2479 // needed. In this case the recipe of the predicated instruction is marked to
2480 // also do that packing, thereby "hoisting" the insert-element sequence.
2481 // Otherwise, a phi node for the scalar value is needed.
2482 if (State.hasVectorValue(getOperand(0))) {
2483 Value *VectorValue = State.get(getOperand(0));
2484 InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
2485 PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
2486 VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
2487 VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
2488 if (State.hasVectorValue(this))
2489 State.reset(this, VPhi);
2490 else
2491 State.set(this, VPhi);
2492 // NOTE: Currently we need to update the value of the operand, so the next
2493 // predicated iteration inserts its generated value in the correct vector.
2494 State.reset(getOperand(0), VPhi);
2495 } else {
2496 if (vputils::onlyFirstLaneUsed(this) && !State.Lane->isFirstLane())
2497 return;
2498
2499 Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
2500 PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
2501 Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
2502 PredicatingBB);
2503 Phi->addIncoming(ScalarPredInst, PredicatedBB);
2504 if (State.hasScalarValue(this, *State.Lane))
2505 State.reset(this, Phi, *State.Lane);
2506 else
2507 State.set(this, Phi, *State.Lane);
2508 // NOTE: Currently we need to update the value of the operand, so the next
2509 // predicated iteration inserts its generated value in the correct vector.
2510 State.reset(getOperand(0), Phi, *State.Lane);
2511 }
2512}
2513
2514#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2516 VPSlotTracker &SlotTracker) const {
2517 O << Indent << "PHI-PREDICATED-INSTRUCTION ";
2519 O << " = ";
2521}
2522#endif
2523
2525 VPCostContext &Ctx) const {
2527 const Align Alignment =
2529 unsigned AS =
2532
2533 if (!Consecutive) {
2534 // TODO: Using the original IR may not be accurate.
2535 // Currently, ARM will use the underlying IR to calculate gather/scatter
2536 // instruction cost.
2538 assert(!Reverse &&
2539 "Inconsecutive memory access should not have the order.");
2540 return Ctx.TTI.getAddressComputationCost(Ty) +
2542 IsMasked, Alignment, CostKind,
2543 &Ingredient);
2544 }
2545
2547 if (IsMasked) {
2548 Cost += Ctx.TTI.getMaskedMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment,
2549 AS, CostKind);
2550 } else {
2551 TTI::OperandValueInfo OpInfo =
2553 Cost += Ctx.TTI.getMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment, AS,
2554 CostKind, OpInfo, &Ingredient);
2555 }
2556 if (!Reverse)
2557 return Cost;
2558
2560 cast<VectorType>(Ty), {}, CostKind, 0);
2561}
2562
2564 auto *LI = cast<LoadInst>(&Ingredient);
2565
2566 Type *ScalarDataTy = getLoadStoreType(&Ingredient);
2567 auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
2568 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2569 bool CreateGather = !isConsecutive();
2570
2571 auto &Builder = State.Builder;
2573 Value *Mask = nullptr;
2574 if (auto *VPMask = getMask()) {
2575 // Mask reversal is only needed for non-all-one (null) masks, as reverse
2576 // of a null all-one mask is a null mask.
2577 Mask = State.get(VPMask);
2578 if (isReverse())
2579 Mask = Builder.CreateVectorReverse(Mask, "reverse");
2580 }
2581
2582 Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateGather);
2583 Value *NewLI;
2584 if (CreateGather) {
2585 NewLI = Builder.CreateMaskedGather(DataTy, Addr, Alignment, Mask, nullptr,
2586 "wide.masked.gather");
2587 } else if (Mask) {
2588 NewLI =
2589 Builder.CreateMaskedLoad(DataTy, Addr, Alignment, Mask,
2590 PoisonValue::get(DataTy), "wide.masked.load");
2591 } else {
2592 NewLI = Builder.CreateAlignedLoad(DataTy, Addr, Alignment, "wide.load");
2593 }
2594 // Add metadata to the load, but setVectorValue to the reverse shuffle.
2595 State.addMetadata(NewLI, LI);
2596 if (Reverse)
2597 NewLI = Builder.CreateVectorReverse(NewLI, "reverse");
2598 State.set(this, NewLI);
2599}
2600
2601#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2603 VPSlotTracker &SlotTracker) const {
2604 O << Indent << "WIDEN ";
2606 O << " = load ";
2608}
2609#endif
2610
2611/// Use all-true mask for reverse rather than actual mask, as it avoids a
2612/// dependence w/o affecting the result.
2614 Value *EVL, const Twine &Name) {
2615 VectorType *ValTy = cast<VectorType>(Operand->getType());
2616 Value *AllTrueMask =
2617 Builder.CreateVectorSplat(ValTy->getElementCount(), Builder.getTrue());
2618 return Builder.CreateIntrinsic(ValTy, Intrinsic::experimental_vp_reverse,
2619 {Operand, AllTrueMask, EVL}, nullptr, Name);
2620}
2621
2623 auto *LI = cast<LoadInst>(&Ingredient);
2624
2625 Type *ScalarDataTy = getLoadStoreType(&Ingredient);
2626 auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
2627 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2628 bool CreateGather = !isConsecutive();
2629
2630 auto &Builder = State.Builder;
2632 CallInst *NewLI;
2633 Value *EVL = State.get(getEVL(), VPLane(0));
2634 Value *Addr = State.get(getAddr(), !CreateGather);
2635 Value *Mask = nullptr;
2636 if (VPValue *VPMask = getMask()) {
2637 Mask = State.get(VPMask);
2638 if (isReverse())
2639 Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
2640 } else {
2641 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2642 }
2643
2644 if (CreateGather) {
2645 NewLI =
2646 Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL},
2647 nullptr, "wide.masked.gather");
2648 } else {
2649 VectorBuilder VBuilder(Builder);
2650 VBuilder.setEVL(EVL).setMask(Mask);
2651 NewLI = cast<CallInst>(VBuilder.createVectorInstruction(
2652 Instruction::Load, DataTy, Addr, "vp.op.load"));
2653 }
2654 NewLI->addParamAttr(
2655 0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
2656 State.addMetadata(NewLI, LI);
2657 Instruction *Res = NewLI;
2658 if (isReverse())
2659 Res = createReverseEVL(Builder, Res, EVL, "vp.reverse");
2660 State.set(this, Res);
2661}
2662
2664 VPCostContext &Ctx) const {
2665 if (!Consecutive || IsMasked)
2666 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
2667
2668 // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()
2669 // here because the EVL recipes using EVL to replace the tail mask. But in the
2670 // legacy model, it will always calculate the cost of mask.
2671 // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
2672 // don't need to compare to the legacy cost model.
2674 const Align Alignment =
2676 unsigned AS =
2680 Ingredient.getOpcode(), Ty, Alignment, AS, CostKind);
2681 if (!Reverse)
2682 return Cost;
2683
2685 cast<VectorType>(Ty), {}, CostKind, 0);
2686}
2687
2688#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2690 VPSlotTracker &SlotTracker) const {
2691 O << Indent << "WIDEN ";
2693 O << " = vp.load ";
2695}
2696#endif
2697
2699 auto *SI = cast<StoreInst>(&Ingredient);
2700
2701 VPValue *StoredVPValue = getStoredValue();
2702 bool CreateScatter = !isConsecutive();
2703 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2704
2705 auto &Builder = State.Builder;
2707
2708 Value *Mask = nullptr;
2709 if (auto *VPMask = getMask()) {
2710 // Mask reversal is only needed for non-all-one (null) masks, as reverse
2711 // of a null all-one mask is a null mask.
2712 Mask = State.get(VPMask);
2713 if (isReverse())
2714 Mask = Builder.CreateVectorReverse(Mask, "reverse");
2715 }
2716
2717 Value *StoredVal = State.get(StoredVPValue);
2718 if (isReverse()) {
2719 // If we store to reverse consecutive memory locations, then we need
2720 // to reverse the order of elements in the stored value.
2721 StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
2722 // We don't want to update the value in the map as it might be used in
2723 // another expression. So don't call resetVectorValue(StoredVal).
2724 }
2725 Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateScatter);
2726 Instruction *NewSI = nullptr;
2727 if (CreateScatter)
2728 NewSI = Builder.CreateMaskedScatter(StoredVal, Addr, Alignment, Mask);
2729 else if (Mask)
2730 NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);
2731 else
2732 NewSI = Builder.CreateAlignedStore(StoredVal, Addr, Alignment);
2733 State.addMetadata(NewSI, SI);
2734}
2735
2736#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2738 VPSlotTracker &SlotTracker) const {
2739 O << Indent << "WIDEN store ";
2741}
2742#endif
2743
2745 auto *SI = cast<StoreInst>(&Ingredient);
2746
2747 VPValue *StoredValue = getStoredValue();
2748 bool CreateScatter = !isConsecutive();
2749 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2750
2751 auto &Builder = State.Builder;
2753
2754 CallInst *NewSI = nullptr;
2755 Value *StoredVal = State.get(StoredValue);
2756 Value *EVL = State.get(getEVL(), VPLane(0));
2757 if (isReverse())
2758 StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse");
2759 Value *Mask = nullptr;
2760 if (VPValue *VPMask = getMask()) {
2761 Mask = State.get(VPMask);
2762 if (isReverse())
2763 Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
2764 } else {
2765 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2766 }
2767 Value *Addr = State.get(getAddr(), !CreateScatter);
2768 if (CreateScatter) {
2769 NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),
2770 Intrinsic::vp_scatter,
2771 {StoredVal, Addr, Mask, EVL});
2772 } else {
2773 VectorBuilder VBuilder(Builder);
2774 VBuilder.setEVL(EVL).setMask(Mask);
2775 NewSI = cast<CallInst>(VBuilder.createVectorInstruction(
2776 Instruction::Store, Type::getVoidTy(EVL->getContext()),
2777 {StoredVal, Addr}));
2778 }
2779 NewSI->addParamAttr(
2780 1, Attribute::getWithAlignment(NewSI->getContext(), Alignment));
2781 State.addMetadata(NewSI, SI);
2782}
2783
2785 VPCostContext &Ctx) const {
2786 if (!Consecutive || IsMasked)
2787 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
2788
2789 // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()
2790 // here because the EVL recipes using EVL to replace the tail mask. But in the
2791 // legacy model, it will always calculate the cost of mask.
2792 // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
2793 // don't need to compare to the legacy cost model.
2795 const Align Alignment =
2797 unsigned AS =
2801 Ingredient.getOpcode(), Ty, Alignment, AS, CostKind);
2802 if (!Reverse)
2803 return Cost;
2804
2806 cast<VectorType>(Ty), {}, CostKind, 0);
2807}
2808
2809#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2811 VPSlotTracker &SlotTracker) const {
2812 O << Indent << "WIDEN vp.store ";
2814}
2815#endif
2816
2818 VectorType *DstVTy, const DataLayout &DL) {
2819 // Verify that V is a vector type with same number of elements as DstVTy.
2820 auto VF = DstVTy->getElementCount();
2821 auto *SrcVecTy = cast<VectorType>(V->getType());
2822 assert(VF == SrcVecTy->getElementCount() && "Vector dimensions do not match");
2823 Type *SrcElemTy = SrcVecTy->getElementType();
2824 Type *DstElemTy = DstVTy->getElementType();
2825 assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) &&
2826 "Vector elements must have same size");
2827
2828 // Do a direct cast if element types are castable.
2829 if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) {
2830 return Builder.CreateBitOrPointerCast(V, DstVTy);
2831 }
2832 // V cannot be directly casted to desired vector type.
2833 // May happen when V is a floating point vector but DstVTy is a vector of
2834 // pointers or vice-versa. Handle this using a two-step bitcast using an
2835 // intermediate Integer type for the bitcast i.e. Ptr <-> Int <-> Float.
2836 assert((DstElemTy->isPointerTy() != SrcElemTy->isPointerTy()) &&
2837 "Only one type should be a pointer type");
2838 assert((DstElemTy->isFloatingPointTy() != SrcElemTy->isFloatingPointTy()) &&
2839 "Only one type should be a floating point type");
2840 Type *IntTy =
2841 IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy));
2842 auto *VecIntTy = VectorType::get(IntTy, VF);
2843 Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);
2844 return Builder.CreateBitOrPointerCast(CastVal, DstVTy);
2845}
2846
2847/// Return a vector containing interleaved elements from multiple
2848/// smaller input vectors.
2850 const Twine &Name) {
2851 unsigned Factor = Vals.size();
2852 assert(Factor > 1 && "Tried to interleave invalid number of vectors");
2853
2854 VectorType *VecTy = cast<VectorType>(Vals[0]->getType());
2855#ifndef NDEBUG
2856 for (Value *Val : Vals)
2857 assert(Val->getType() == VecTy && "Tried to interleave mismatched types");
2858#endif
2859
2860 // Scalable vectors cannot use arbitrary shufflevectors (only splats), so
2861 // must use intrinsics to interleave.
2862 if (VecTy->isScalableTy()) {
2864 return Builder.CreateIntrinsic(WideVecTy, Intrinsic::vector_interleave2,
2865 Vals,
2866 /*FMFSource=*/nullptr, Name);
2867 }
2868
2869 // Fixed length. Start by concatenating all vectors into a wide vector.
2870 Value *WideVec = concatenateVectors(Builder, Vals);
2871
2872 // Interleave the elements into the wide vector.
2873 const unsigned NumElts = VecTy->getElementCount().getFixedValue();
2874 return Builder.CreateShuffleVector(
2875 WideVec, createInterleaveMask(NumElts, Factor), Name);
2876}
2877
2878// Try to vectorize the interleave group that \p Instr belongs to.
2879//
2880// E.g. Translate following interleaved load group (factor = 3):
2881// for (i = 0; i < N; i+=3) {
2882// R = Pic[i]; // Member of index 0
2883// G = Pic[i+1]; // Member of index 1
2884// B = Pic[i+2]; // Member of index 2
2885// ... // do something to R, G, B
2886// }
2887// To:
2888// %wide.vec = load <12 x i32> ; Read 4 tuples of R,G,B
2889// %R.vec = shuffle %wide.vec, poison, <0, 3, 6, 9> ; R elements
2890// %G.vec = shuffle %wide.vec, poison, <1, 4, 7, 10> ; G elements
2891// %B.vec = shuffle %wide.vec, poison, <2, 5, 8, 11> ; B elements
2892//
2893// Or translate following interleaved store group (factor = 3):
2894// for (i = 0; i < N; i+=3) {
2895// ... do something to R, G, B
2896// Pic[i] = R; // Member of index 0
2897// Pic[i+1] = G; // Member of index 1
2898// Pic[i+2] = B; // Member of index 2
2899// }
2900// To:
2901// %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7>
2902// %B_U.vec = shuffle %B.vec, poison, <0, 1, 2, 3, u, u, u, u>
2903// %interleaved.vec = shuffle %R_G.vec, %B_U.vec,
2904// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements
2905// store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B
2907 assert(!State.Lane && "Interleave group being replicated.");
2908 const InterleaveGroup<Instruction> *Group = IG;
2909 Instruction *Instr = Group->getInsertPos();
2910
2911 // Prepare for the vector type of the interleaved load/store.
2912 Type *ScalarTy = getLoadStoreType(Instr);
2913 unsigned InterleaveFactor = Group->getFactor();
2914 auto *VecTy = VectorType::get(ScalarTy, State.VF * InterleaveFactor);
2915
2916 // TODO: extend the masked interleaved-group support to reversed access.
2917 VPValue *BlockInMask = getMask();
2918 assert((!BlockInMask || !Group->isReverse()) &&
2919 "Reversed masked interleave-group not supported.");
2920
2921 VPValue *Addr = getAddr();
2922 Value *ResAddr = State.get(Addr, VPLane(0));
2923 if (auto *I = dyn_cast<Instruction>(ResAddr))
2924 State.setDebugLocFrom(I->getDebugLoc());
2925
2926 // If the group is reverse, adjust the index to refer to the last vector lane
2927 // instead of the first. We adjust the index from the first vector lane,
2928 // rather than directly getting the pointer for lane VF - 1, because the
2929 // pointer operand of the interleaved access is supposed to be uniform.
2930 if (Group->isReverse()) {
2931 Value *RuntimeVF =
2932 getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF);
2933 Value *Index =
2934 State.Builder.CreateSub(RuntimeVF, State.Builder.getInt32(1));
2935 Index = State.Builder.CreateMul(Index,
2936 State.Builder.getInt32(Group->getFactor()));
2937 Index = State.Builder.CreateNeg(Index);
2938
2939 bool InBounds = false;
2940 if (auto *Gep = dyn_cast<GetElementPtrInst>(ResAddr->stripPointerCasts()))
2941 InBounds = Gep->isInBounds();
2942 ResAddr = State.Builder.CreateGEP(ScalarTy, ResAddr, Index, "", InBounds);
2943 }
2944
2945 State.setDebugLocFrom(Instr->getDebugLoc());
2946 Value *PoisonVec = PoisonValue::get(VecTy);
2947
2948 auto CreateGroupMask = [&BlockInMask, &State,
2949 &InterleaveFactor](Value *MaskForGaps) -> Value * {
2950 if (State.VF.isScalable()) {
2951 assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");
2952 assert(InterleaveFactor == 2 &&
2953 "Unsupported deinterleave factor for scalable vectors");
2954 auto *ResBlockInMask = State.get(BlockInMask);
2955 SmallVector<Value *, 2> Ops = {ResBlockInMask, ResBlockInMask};
2956 auto *MaskTy = VectorType::get(State.Builder.getInt1Ty(),
2957 State.VF.getKnownMinValue() * 2, true);
2958 return State.Builder.CreateIntrinsic(
2959 MaskTy, Intrinsic::vector_interleave2, Ops,
2960 /*FMFSource=*/nullptr, "interleaved.mask");
2961 }
2962
2963 if (!BlockInMask)
2964 return MaskForGaps;
2965
2966 Value *ResBlockInMask = State.get(BlockInMask);
2967 Value *ShuffledMask = State.Builder.CreateShuffleVector(
2968 ResBlockInMask,
2969 createReplicatedMask(InterleaveFactor, State.VF.getKnownMinValue()),
2970 "interleaved.mask");
2971 return MaskForGaps ? State.Builder.CreateBinOp(Instruction::And,
2972 ShuffledMask, MaskForGaps)
2973 : ShuffledMask;
2974 };
2975
2976 const DataLayout &DL = Instr->getDataLayout();
2977 // Vectorize the interleaved load group.
2978 if (isa<LoadInst>(Instr)) {
2979 Value *MaskForGaps = nullptr;
2980 if (NeedsMaskForGaps) {
2981 MaskForGaps = createBitMaskForGaps(State.Builder,
2982 State.VF.getKnownMinValue(), *Group);
2983 assert(MaskForGaps && "Mask for Gaps is required but it is null");
2984 }
2985
2986 Instruction *NewLoad;
2987 if (BlockInMask || MaskForGaps) {
2988 Value *GroupMask = CreateGroupMask(MaskForGaps);
2989 NewLoad = State.Builder.CreateMaskedLoad(VecTy, ResAddr,
2990 Group->getAlign(), GroupMask,
2991 PoisonVec, "wide.masked.vec");
2992 } else
2993 NewLoad = State.Builder.CreateAlignedLoad(VecTy, ResAddr,
2994 Group->getAlign(), "wide.vec");
2995 Group->addMetadata(NewLoad);
2996
2998 const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
2999 if (VecTy->isScalableTy()) {
3000 assert(InterleaveFactor == 2 &&
3001 "Unsupported deinterleave factor for scalable vectors");
3002
3003 // Scalable vectors cannot use arbitrary shufflevectors (only splats),
3004 // so must use intrinsics to deinterleave.
3005 Value *DI = State.Builder.CreateIntrinsic(
3006 Intrinsic::vector_deinterleave2, VecTy, NewLoad,
3007 /*FMFSource=*/nullptr, "strided.vec");
3008 unsigned J = 0;
3009 for (unsigned I = 0; I < InterleaveFactor; ++I) {
3010 Instruction *Member = Group->getMember(I);
3011
3012 if (!Member)
3013 continue;
3014
3015 Value *StridedVec = State.Builder.CreateExtractValue(DI, I);
3016 // If this member has different type, cast the result type.
3017 if (Member->getType() != ScalarTy) {
3018 VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
3019 StridedVec =
3020 createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
3021 }
3022
3023 if (Group->isReverse())
3024 StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");
3025
3026 State.set(VPDefs[J], StridedVec);
3027 ++J;
3028 }
3029
3030 return;
3031 }
3032
3033 // For each member in the group, shuffle out the appropriate data from the
3034 // wide loads.
3035 unsigned J = 0;
3036 for (unsigned I = 0; I < InterleaveFactor; ++I) {
3037 Instruction *Member = Group->getMember(I);
3038
3039 // Skip the gaps in the group.
3040 if (!Member)
3041 continue;
3042
3043 auto StrideMask =
3044 createStrideMask(I, InterleaveFactor, State.VF.getKnownMinValue());
3045 Value *StridedVec =
3046 State.Builder.CreateShuffleVector(NewLoad, StrideMask, "strided.vec");
3047
3048 // If this member has different type, cast the result type.
3049 if (Member->getType() != ScalarTy) {
3050 assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
3051 VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
3052 StridedVec =
3053 createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
3054 }
3055
3056 if (Group->isReverse())
3057 StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");
3058
3059 State.set(VPDefs[J], StridedVec);
3060 ++J;
3061 }
3062 return;
3063 }
3064
3065 // The sub vector type for current instruction.
3066 auto *SubVT = VectorType::get(ScalarTy, State.VF);
3067
3068 // Vectorize the interleaved store group.
3069 Value *MaskForGaps =
3070 createBitMaskForGaps(State.Builder, State.VF.getKnownMinValue(), *Group);
3071 assert((!MaskForGaps || !State.VF.isScalable()) &&
3072 "masking gaps for scalable vectors is not yet supported.");
3073 ArrayRef<VPValue *> StoredValues = getStoredValues();
3074 // Collect the stored vector from each member.
3075 SmallVector<Value *, 4> StoredVecs;
3076 unsigned StoredIdx = 0;
3077 for (unsigned i = 0; i < InterleaveFactor; i++) {
3078 assert((Group->getMember(i) || MaskForGaps) &&
3079 "Fail to get a member from an interleaved store group");
3080 Instruction *Member = Group->getMember(i);
3081
3082 // Skip the gaps in the group.
3083 if (!Member) {
3084 Value *Undef = PoisonValue::get(SubVT);
3085 StoredVecs.push_back(Undef);
3086 continue;
3087 }
3088
3089 Value *StoredVec = State.get(StoredValues[StoredIdx]);
3090 ++StoredIdx;
3091
3092 if (Group->isReverse())
3093 StoredVec = State.Builder.CreateVectorReverse(StoredVec, "reverse");
3094
3095 // If this member has different type, cast it to a unified type.
3096
3097 if (StoredVec->getType() != SubVT)
3098 StoredVec = createBitOrPointerCast(State.Builder, StoredVec, SubVT, DL);
3099
3100 StoredVecs.push_back(StoredVec);
3101 }
3102
3103 // Interleave all the smaller vectors into one wider vector.
3104 Value *IVec = interleaveVectors(State.Builder, StoredVecs, "interleaved.vec");
3105 Instruction *NewStoreInstr;
3106 if (BlockInMask || MaskForGaps) {
3107 Value *GroupMask = CreateGroupMask(MaskForGaps);
3108 NewStoreInstr = State.Builder.CreateMaskedStore(
3109 IVec, ResAddr, Group->getAlign(), GroupMask);
3110 } else
3111 NewStoreInstr =
3112 State.Builder.CreateAlignedStore(IVec, ResAddr, Group->getAlign());
3113
3114 Group->addMetadata(NewStoreInstr);
3115}
3116
3117#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3119 VPSlotTracker &SlotTracker) const {
3120 O << Indent << "INTERLEAVE-GROUP with factor " << IG->getFactor() << " at ";
3121 IG->getInsertPos()->printAsOperand(O, false);
3122 O << ", ";
3124 VPValue *Mask = getMask();
3125 if (Mask) {
3126 O << ", ";
3127 Mask->printAsOperand(O, SlotTracker);
3128 }
3129
3130 unsigned OpIdx = 0;
3131 for (unsigned i = 0; i < IG->getFactor(); ++i) {
3132 if (!IG->getMember(i))
3133 continue;
3134 if (getNumStoreOperands() > 0) {
3135 O << "\n" << Indent << " store ";
3136 getOperand(1 + OpIdx)->printAsOperand(O, SlotTracker);
3137 O << " to index " << i;
3138 } else {
3139 O << "\n" << Indent << " ";
3141 O << " = load from index " << i;
3142 }
3143 ++OpIdx;
3144 }
3145}
3146#endif
3147
3149 VPCostContext &Ctx) const {
3150 Instruction *InsertPos = getInsertPos();
3151 // Find the VPValue index of the interleave group. We need to skip gaps.
3152 unsigned InsertPosIdx = 0;
3153 for (unsigned Idx = 0; IG->getFactor(); ++Idx)
3154 if (auto *Member = IG->getMember(Idx)) {
3155 if (Member == InsertPos)
3156 break;
3157 InsertPosIdx++;
3158 }
3159 Type *ValTy = Ctx.Types.inferScalarType(
3160 getNumDefinedValues() > 0 ? getVPValue(InsertPosIdx)
3161 : getStoredValues()[InsertPosIdx]);
3162 auto *VectorTy = cast<VectorType>(toVectorTy(ValTy, VF));
3163 unsigned AS = getLoadStoreAddressSpace(InsertPos);
3165
3166 unsigned InterleaveFactor = IG->getFactor();
3167 auto *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor);
3168
3169 // Holds the indices of existing members in the interleaved group.
3171 for (unsigned IF = 0; IF < InterleaveFactor; IF++)
3172 if (IG->getMember(IF))
3173 Indices.push_back(IF);
3174
3175 // Calculate the cost of the whole interleaved group.
3177 InsertPos->getOpcode(), WideVecTy, IG->getFactor(), Indices,
3178 IG->getAlign(), AS, CostKind, getMask(), NeedsMaskForGaps);
3179
3180 if (!IG->isReverse())
3181 return Cost;
3182
3183 return Cost + IG->getNumMembers() *
3185 VectorTy, std::nullopt, CostKind, 0);
3186}
3187
3188#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3190 VPSlotTracker &SlotTracker) const {
3191 O << Indent << "EMIT ";
3193 O << " = CANONICAL-INDUCTION ";
3195}
3196#endif
3197
3199 return IsScalarAfterVectorization &&
3200 (!IsScalable || vputils::onlyFirstLaneUsed(this));
3201}
3202
3204 assert(getInductionDescriptor().getKind() ==
3206 "Not a pointer induction according to InductionDescriptor!");
3207 assert(cast<PHINode>(getUnderlyingInstr())->getType()->isPointerTy() &&
3208 "Unexpected type.");
3210 "Recipe should have been replaced");
3211
3212 unsigned CurrentPart = getUnrollPart(*this);
3213
3214 // Build a pointer phi
3215 Value *ScalarStartValue = getStartValue()->getLiveInIRValue();
3216 Type *ScStValueType = ScalarStartValue->getType();
3217
3218 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3219 PHINode *NewPointerPhi = nullptr;
3220 if (CurrentPart == 0) {
3221 auto *IVR = cast<VPHeaderPHIRecipe>(&getParent()
3222 ->getPlan()
3223 ->getVectorLoopRegion()
3224 ->getEntryBasicBlock()
3225 ->front());
3226 PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, /*IsScalar*/ true));
3227 NewPointerPhi = PHINode::Create(ScStValueType, 2, "pointer.phi",
3228 CanonicalIV->getIterator());
3229 NewPointerPhi->addIncoming(ScalarStartValue, VectorPH);
3230 NewPointerPhi->setDebugLoc(getDebugLoc());
3231 } else {
3232 // The recipe has been unrolled. In that case, fetch the single pointer phi
3233 // shared among all unrolled parts of the recipe.
3234 auto *GEP =
3235 cast<GetElementPtrInst>(State.get(getFirstUnrolledPartOperand()));
3236 NewPointerPhi = cast<PHINode>(GEP->getPointerOperand());
3237 }
3238
3239 // A pointer induction, performed by using a gep
3240 BasicBlock::iterator InductionLoc = State.Builder.GetInsertPoint();
3241 Value *ScalarStepValue = State.get(getStepValue(), VPLane(0));
3242 Type *PhiType = State.TypeAnalysis.inferScalarType(getStepValue());
3243 Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF);
3244 // Add induction update using an incorrect block temporarily. The phi node
3245 // will be fixed after VPlan execution. Note that at this point the latch
3246 // block cannot be used, as it does not exist yet.
3247 // TODO: Model increment value in VPlan, by turning the recipe into a
3248 // multi-def and a subclass of VPHeaderPHIRecipe.
3249 if (CurrentPart == 0) {
3250 // The recipe represents the first part of the pointer induction. Create the
3251 // GEP to increment the phi across all unrolled parts.
3252 unsigned UF = CurrentPart == 0 ? getParent()->getPlan()->getUF() : 1;
3253 Value *NumUnrolledElems =
3254 State.Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, UF));
3255
3256 Value *InductionGEP = GetElementPtrInst::Create(
3257 State.Builder.getInt8Ty(), NewPointerPhi,
3258 State.Builder.CreateMul(ScalarStepValue, NumUnrolledElems), "ptr.ind",
3259 InductionLoc);
3260
3261 NewPointerPhi->addIncoming(InductionGEP, VectorPH);
3262 }
3263
3264 // Create actual address geps that use the pointer phi as base and a
3265 // vectorized version of the step value (<step*0, ..., step*N>) as offset.
3266 Type *VecPhiType = VectorType::get(PhiType, State.VF);
3267 Value *StartOffsetScalar = State.Builder.CreateMul(
3268 RuntimeVF, ConstantInt::get(PhiType, CurrentPart));
3269 Value *StartOffset =
3270 State.Builder.CreateVectorSplat(State.VF, StartOffsetScalar);
3271 // Create a vector of consecutive numbers from zero to VF.
3272 StartOffset = State.Builder.CreateAdd(
3273 StartOffset, State.Builder.CreateStepVector(VecPhiType));
3274
3275 assert(ScalarStepValue == State.get(getOperand(1), VPLane(0)) &&
3276 "scalar step must be the same across all parts");
3277 Value *GEP = State.Builder.CreateGEP(
3278 State.Builder.getInt8Ty(), NewPointerPhi,
3279 State.Builder.CreateMul(StartOffset, State.Builder.CreateVectorSplat(
3280 State.VF, ScalarStepValue)),
3281 "vector.gep");
3282 State.set(this, GEP);
3283}
3284
3285#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3287 VPSlotTracker &SlotTracker) const {
3288 assert((getNumOperands() == 2 || getNumOperands() == 4) &&
3289 "unexpected number of operands");
3290 O << Indent << "EMIT ";
3292 O << " = WIDEN-POINTER-INDUCTION ";
3294 O << ", ";
3296 if (getNumOperands() == 4) {
3297 O << ", ";
3299 O << ", ";
3301 }
3302}
3303#endif
3304
3306 assert(!State.Lane && "cannot be used in per-lane");
3307 if (State.ExpandedSCEVs.contains(Expr)) {
3308 // SCEV Expr has already been expanded, result must already be set. At the
3309 // moment we have to execute the entry block twice (once before skeleton
3310 // creation to get expanded SCEVs used by the skeleton and once during
3311 // regular VPlan execution).
3313 assert(State.get(this, VPLane(0)) == State.ExpandedSCEVs[Expr] &&
3314 "Results must match");
3315 return;
3316 }
3317
3318 const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
3319 SCEVExpander Exp(SE, DL, "induction");
3320
3321 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
3322 &*State.Builder.GetInsertPoint());
3323 State.ExpandedSCEVs[Expr] = Res;
3324 State.set(this, Res, VPLane(0));
3325}
3326
3327#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3329 VPSlotTracker &SlotTracker) const {
3330 O << Indent << "EMIT ";
3332 O << " = EXPAND SCEV " << *Expr;
3333}
3334#endif
3335
3337 Value *CanonicalIV = State.get(getOperand(0), /*IsScalar*/ true);
3338 Type *STy = CanonicalIV->getType();
3339 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
3340 ElementCount VF = State.VF;
3341 Value *VStart = VF.isScalar()
3342 ? CanonicalIV
3343 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
3344 Value *VStep = createStepForVF(Builder, STy, VF, getUnrollPart(*this));
3345 if (VF.isVector()) {
3346 VStep = Builder.CreateVectorSplat(VF, VStep);
3347 VStep =
3348 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
3349 }
3350 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
3351 State.set(this, CanonicalVectorIV);
3352}
3353
3354#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3356 VPSlotTracker &SlotTracker) const {
3357 O << Indent << "EMIT ";
3359 O << " = WIDEN-CANONICAL-INDUCTION ";
3361}
3362#endif
3363
3365 auto &Builder = State.Builder;
3366 // Create a vector from the initial value.
3367 auto *VectorInit = getStartValue()->getLiveInIRValue();
3368
3369 Type *VecTy = State.VF.isScalar()
3370 ? VectorInit->getType()
3371 : VectorType::get(VectorInit->getType(), State.VF);
3372
3373 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3374 if (State.VF.isVector()) {
3375 auto *IdxTy = Builder.getInt32Ty();
3376 auto *One = ConstantInt::get(IdxTy, 1);
3377 IRBuilder<>::InsertPointGuard Guard(Builder);
3378 Builder.SetInsertPoint(VectorPH->getTerminator());
3379 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
3380 auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
3381 VectorInit = Builder.CreateInsertElement(
3382 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
3383 }
3384
3385 // Create a phi node for the new recurrence.
3386 PHINode *Phi = PHINode::Create(VecTy, 2, "vector.recur");
3387 Phi->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
3388 Phi->addIncoming(VectorInit, VectorPH);
3389 State.set(this, Phi);
3390}
3391
3394 VPCostContext &Ctx) const {
3396 if (VF.isScalar())
3397 return Ctx.TTI.getCFInstrCost(Instruction::PHI, CostKind);
3398
3399 if (VF.isScalable() && VF.getKnownMinValue() == 1)
3401
3403 std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1);
3404 Type *VectorTy =
3405 toVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
3406
3408 cast<VectorType>(VectorTy), Mask, CostKind,
3409 VF.getKnownMinValue() - 1);
3410}
3411
3412#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3414 VPSlotTracker &SlotTracker) const {
3415 O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
3417 O << " = phi ";
3419}
3420#endif
3421
3423 auto &Builder = State.Builder;
3424
3425 // If this phi is fed by a scaled reduction then it should output a
3426 // vector with fewer elements than the VF.
3427 ElementCount VF = State.VF.divideCoefficientBy(VFScaleFactor);
3428
3429 // Reductions do not have to start at zero. They can start with
3430 // any loop invariant values.
3431 VPValue *StartVPV = getStartValue();
3432 Value *StartV = StartVPV->getLiveInIRValue();
3433
3434 // In order to support recurrences we need to be able to vectorize Phi nodes.
3435 // Phi nodes have cycles, so we need to vectorize them in two stages. This is
3436 // stage #1: We create a new vector PHI node with no incoming edges. We'll use
3437 // this value when we vectorize all of the instructions that use the PHI.
3438 bool ScalarPHI = State.VF.isScalar() || IsInLoop;
3439 Type *VecTy =
3440 ScalarPHI ? StartV->getType() : VectorType::get(StartV->getType(), VF);
3441
3442 BasicBlock *HeaderBB = State.CFG.PrevBB;
3443 assert(State.CurrentParentLoop->getHeader() == HeaderBB &&
3444 "recipe must be in the vector loop header");
3445 auto *Phi = PHINode::Create(VecTy, 2, "vec.phi");
3446 Phi->insertBefore(HeaderBB->getFirstInsertionPt());
3447 State.set(this, Phi, IsInLoop);
3448
3449 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3450
3451 Value *Iden = nullptr;
3452 RecurKind RK = RdxDesc.getRecurrenceKind();
3453 unsigned CurrentPart = getUnrollPart(*this);
3454
3457 // MinMax and AnyOf reductions have the start value as their identity.
3458 if (ScalarPHI) {
3459 Iden = StartV;
3460 } else {
3461 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
3462 Builder.SetInsertPoint(VectorPH->getTerminator());
3463 StartV = Iden = State.get(StartVPV);
3464 }
3466 // [I|F]FindLastIV will use a sentinel value to initialize the reduction
3467 // phi or the resume value from the main vector loop when vectorizing the
3468 // epilogue loop. In the exit block, ComputeReductionResult will generate
3469 // checks to verify if the reduction result is the sentinel value. If the
3470 // result is the sentinel value, it will be corrected back to the start
3471 // value.
3472 // TODO: The sentinel value is not always necessary. When the start value is
3473 // a constant, and smaller than the start value of the induction variable,
3474 // the start value can be directly used to initialize the reduction phi.
3475 Iden = StartV;
3476 if (!ScalarPHI) {
3477 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
3478 Builder.SetInsertPoint(VectorPH->getTerminator());
3479 StartV = Iden = Builder.CreateVectorSplat(State.VF, Iden);
3480 }
3481 } else {
3482 Iden = llvm::getRecurrenceIdentity(RK, VecTy->getScalarType(),
3483 RdxDesc.getFastMathFlags());
3484
3485 if (!ScalarPHI) {
3486 if (CurrentPart == 0) {
3487 // Create start and identity vector values for the reduction in the
3488 // preheader.
3489 // TODO: Introduce recipes in VPlan preheader to create initial values.
3490 Iden = Builder.CreateVectorSplat(VF, Iden);
3491 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
3492 Builder.SetInsertPoint(VectorPH->getTerminator());
3493 Constant *Zero = Builder.getInt32(0);
3494 StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
3495 } else {
3496 Iden = Builder.CreateVectorSplat(VF, Iden);
3497 }
3498 }
3499 }
3500
3501 Phi = cast<PHINode>(State.get(this, IsInLoop));
3502 Value *StartVal = (CurrentPart == 0) ? StartV : Iden;
3503 Phi->addIncoming(StartVal, VectorPH);
3504}
3505
3506#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3508 VPSlotTracker &SlotTracker) const {
3509 O << Indent << "WIDEN-REDUCTION-PHI ";
3510
3512 O << " = phi ";
3514 if (VFScaleFactor != 1)
3515 O << " (VF scaled by 1/" << VFScaleFactor << ")";
3516}
3517#endif
3518
3521 "Non-native vplans are not expected to have VPWidenPHIRecipes.");
3522
3523 Value *Op0 = State.get(getOperand(0));
3524 Type *VecTy = Op0->getType();
3525 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
3526 State.set(this, VecPhi);
3527}
3528
3529#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3531 VPSlotTracker &SlotTracker) const {
3532 O << Indent << "WIDEN-PHI ";
3533
3534 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
3535 // Unless all incoming values are modeled in VPlan print the original PHI
3536 // directly.
3537 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
3538 // values as VPValues.
3539 if (getNumOperands() != OriginalPhi->getNumOperands()) {
3540 O << VPlanIngredient(OriginalPhi);
3541 return;
3542 }
3543
3545 O << " = phi ";
3547}
3548#endif
3549
3550// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
3551// remove VPActiveLaneMaskPHIRecipe.
3553 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3554 Value *StartMask = State.get(getOperand(0));
3555 PHINode *Phi =
3556 State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
3557 Phi->addIncoming(StartMask, VectorPH);
3558 Phi->setDebugLoc(getDebugLoc());
3559 State.set(this, Phi);
3560}
3561
3562#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3564 VPSlotTracker &SlotTracker) const {
3565 O << Indent << "ACTIVE-LANE-MASK-PHI ";
3566
3568 O << " = phi ";
3570}
3571#endif
3572
3573#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3575 VPSlotTracker &SlotTracker) const {
3576 O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";
3577
3579 O << " = phi ";
3581}
3582#endif
3583
3585 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3586 Value *Start = State.get(getStartValue(), VPLane(0));
3587 PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, Name);
3588 Phi->addIncoming(Start, VectorPH);
3589 Phi->setDebugLoc(getDebugLoc());
3590 State.set(this, Phi, /*IsScalar=*/true);
3591}
3592
3593#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3595 VPSlotTracker &SlotTracker) const {
3596 O << Indent << "SCALAR-PHI ";
3598 O << " = phi ";
3600}
3601#endif
AMDGPU Lower Kernel Arguments
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
std::string Name
Hexagon Common GEP
cl::opt< unsigned > ForceTargetInstructionCost("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for " "an instruction to a single constant value. Mostly " "useful for getting consistent testing."))
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
static Instruction * createReverseEVL(IRBuilderBase &Builder, Value *Operand, Value *EVL, const Twine &Name)
Use all-true mask for reverse rather than actual mask, as it avoids a dependence w/o affecting the re...
static Value * interleaveVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vals, const Twine &Name)
Return a vector containing interleaved elements from multiple smaller input vectors.
static Value * createBitOrPointerCast(IRBuilderBase &Builder, Value *V, VectorType *DstVTy, const DataLayout &DL)
cl::opt< unsigned > ForceTargetInstructionCost
static Value * getStepVector(Value *Val, Value *Step, Instruction::BinaryOps BinOp, ElementCount VF, IRBuilderBase &Builder)
This function adds (0 * Step, 1 * Step, 2 * Step, ...) to each vector element of Val.
static Type * getGEPIndexTy(bool IsScalable, bool IsReverse, unsigned CurrentPart, IRBuilderBase &Builder)
static Constant * getSignedIntOrFpConstant(Type *Ty, int64_t C)
A helper function that returns an integer or floating-point constant with value C.
This file contains the declarations of the Vectorization Plan base classes:
Value * RHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
static Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
Definition: Attributes.cpp:234
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
InstListType::const_iterator getFirstNonPHIIt() const
Iterator returning form of getFirstNonPHI.
Definition: BasicBlock.cpp:374
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:459
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:296
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:292
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1494
This class represents a function call, abstracting a target machine's calling convention.
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:696
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:698
static StringRef getPredicateName(Predicate P)
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:126
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
A debug info location.
Definition: DebugLoc.h:33
constexpr bool isVector() const
One or more elements.
Definition: TypeSize.h:326
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
void setAllowContract(bool B=true)
Definition: FMF.h:91
bool noSignedZeros() const
Definition: FMF.h:68
bool noInfs() const
Definition: FMF.h:67
void setAllowReciprocal(bool B=true)
Definition: FMF.h:88
bool allowReciprocal() const
Definition: FMF.h:69
void print(raw_ostream &O) const
Print fast-math flags to O.
Definition: Operator.cpp:271
void setNoSignedZeros(bool B=true)
Definition: FMF.h:85
bool allowReassoc() const
Flag queries.
Definition: FMF.h:65
bool approxFunc() const
Definition: FMF.h:71
void setNoNaNs(bool B=true)
Definition: FMF.h:79
void setAllowReassoc(bool B=true)
Flag setters.
Definition: FMF.h:76
bool noNaNs() const
Definition: FMF.h:66
void setApproxFunc(bool B=true)
Definition: FMF.h:94
void setNoInfs(bool B=true)
Definition: FMF.h:82
bool allowContract() const
Definition: FMF.h:70
Class to represent function types.
Definition: DerivedTypes.h:105
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:137
ArrayRef< Type * > params() const
Definition: DerivedTypes.h:132
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216
bool willReturn() const
Determine if the function will return.
Definition: Function.h:662
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition: Function.h:595
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:221
bool hasNoUnsignedSignedWrap() const
bool hasNoUnsignedWrap() const
bool isInBounds() const
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Definition: Instructions.h:956
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:113
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition: IRBuilder.h:480
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2510
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition: IRBuilder.h:530
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2105
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2498
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1814
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2050
Value * CreateVectorSplice(Value *V1, Value *V2, int64_t Imm, const Twine &Name="")
Return a vector splice intrinsic if using scalable vectors, otherwise return a shufflevector.
Definition: IRBuilder.cpp:1125
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1153
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2554
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:485
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:546
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1043
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:194
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2044
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2573
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:545
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1986
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Definition: IRBuilder.h:2185
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2092
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:193
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:330
Value * CreateVectorReverse(Value *V, const Twine &Name="")
Return a vector value that contains the vector V reversed.
Definition: IRBuilder.cpp:1109
Value * CreateFCmpFMF(CmpInst::Predicate P, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2397
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1873
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
Definition: IRBuilder.h:1732
CallInst * CreateOrReduce(Value *Src)
Create a vector int OR reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:424
InsertPoint saveIP() const
Returns the current insert point.
Definition: IRBuilder.h:296
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:890
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:505
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2233
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2403
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2434
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1756
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2269
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1386
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1163
Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Definition: IRBuilder.cpp:958
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2032
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2532
LLVMContext & getContext() const
Definition: IRBuilder.h:195
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:566
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1369
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2448
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2018
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:588
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1670
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1687
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Definition: IRBuilder.h:308
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:199
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1833
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2379
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1613
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:535
Value * CreateStepVector(Type *DstType, const Twine &Name="")
Creates a vector of type DstType with the linear sequence <0, 1, ...>
Definition: IRBuilder.cpp:108
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1403
CallInst * CreateMaskedScatter(Value *Val, Value *Ptrs, Align Alignment, Value *Mask=nullptr)
Create a call to Masked Scatter intrinsic.
Definition: IRBuilder.cpp:627
CallInst * CreateMaskedGather(Type *Ty, Value *Ptrs, Align Alignment, Value *Mask=nullptr, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Gather intrinsic.
Definition: IRBuilder.cpp:596
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2704
A struct for saving information about induction variables.
@ IK_PtrInduction
Pointer induction var. Step = C.
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
static InstructionCost getInvalid(CostType Val=0)
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:99
bool isBinaryOp() const
Definition: Instruction.h:279
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
const char * getOpcodeName() const
Definition: Instruction.h:276
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
bool isUnaryOp() const
Definition: Instruction.h:278
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:472
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:488
uint32_t getFactor() const
Definition: VectorUtils.h:504
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:558
bool isReverse() const
Definition: VectorUtils.h:503
InstTy * getInsertPos() const
Definition: VectorUtils.h:574
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Align getAlign() const
Definition: VectorUtils.h:505
BlockT * getHeader() const
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:77
FastMathFlags getFastMathFlags() const
static unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
Type * getRecurrenceType() const
Returns the type of the recurrence.
TrackingVH< Value > getRecurrenceStartValue() const
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isFindLastIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
bool isSigned() const
Returns true if all source operands of the recurrence are SExtInsts.
RecurKind getRecurrenceKind() const
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This class uses information about analyze scalars to rewrite expressions in canonical form.
Type * getType() const
Return the LLVM type of this SCEV expression.
This class represents the LLVM 'select' instruction.
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:698
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo Op1Info={OK_AnyValue, OP_None}, OperandValueInfo Op2Info={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of vector reduction intrinsics.
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF=FastMathFlags(), TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask={}, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
@ TCC_Free
Expected to fold away in lowering.
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, PartialReductionExtendKind OpAExtend, PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp=std::nullopt) const
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Reverse
Reverse the order of the vector.
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency) const
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
CastContextHint
Represents a hint about the context in which a cast is used.
@ Reversed
The cast is used with a reversed load/store.
@ Masked
The cast is used with a masked load/store.
@ None
The cast is not used with a load/store of any kind.
@ Normal
The cast is used with a normal load/store.
@ Interleave
The cast is used with an interleaved load/store.
@ GatherScatter
The cast is used with a gather/scatter.
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264
static IntegerType * getInt1Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static Type * getVoidTy(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
value_op_iterator value_op_end()
Definition: User.h:309
Value * getOperand(unsigned i) const
Definition: User.h:228
value_op_iterator value_op_begin()
Definition: User.h:306
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:3529
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:3579
iterator end()
Definition: VPlan.h:3563
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:3592
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:2516
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:2521
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition: VPlan.h:2511
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition: VPlan.h:2507
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:397
VPRegionBlock * getParent()
Definition: VPlan.h:489
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:178
const VPBlocksTy & getPredecessors() const
Definition: VPlan.h:520
VPlan * getPlan()
Definition: VPlan.cpp:153
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:158
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2883
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPBranchOnMaskRecipe.
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:292
void dump() const
Dump the VPDef to stderr (for debugging).
Definition: VPlan.cpp:114
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
Definition: VPlanValue.h:415
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
Definition: VPlanValue.h:410
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition: VPlanValue.h:388
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Definition: VPlanValue.h:400
unsigned getVPDefID() const
Definition: VPlanValue.h:420
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:3459
VPValue * getStartValue() const
Definition: VPlan.h:3458
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:2063
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition: VPlan.h:1804
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
@ ResumePhi
Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
Definition: VPlan.h:1210
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:1198
@ CanonicalIVIncrementForPart
Definition: VPlan.h:1213
@ CalculateTripCountMinusVF
Definition: VPlan.h:1211
bool hasResult() const
Definition: VPlan.h:1333
bool opcodeMayReadOrWriteFromMemory() const
Returns true if the underlying opcode may read from or write to memory.
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
unsigned getOpcode() const
Definition: VPlan.h:1310
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
bool isSingleScalar() const
Returns true if this VPInstruction's operands are single scalars and the result is also a single scal...
void execute(VPTransformState &State) override
Generate the instruction.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2595
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2601
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition: VPlan.h:2608
Instruction * getInsertPos() const
Definition: VPlan.h:2643
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInterleaveRecipe.
unsigned getNumStoreOperands() const
Returns the number of stored operands of this interleave group.
Definition: VPlan.h:2632
static bool isVPIntrinsic(Intrinsic::ID)
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition: VPlan.h:153
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlan.h:194
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset)
Definition: VPlan.h:180
static VPLane getFirstLane()
Definition: VPlan.h:178
void execute(VPTransformState &State) override
Generate the reduction in the loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPartialReductionRecipe.
unsigned getOpcode() const
Get the binary op's opcode.
Definition: VPlan.h:2476
void execute(VPTransformState &State) override
Generates phi nodes for live-outs (from a replicate region) as needed to retain SSA form.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:714
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
virtual InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
VPBasicBlock * getParent()
Definition: VPlan.h:739
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:808
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Class to record LLVM IR flag for a recipe along with it.
Definition: VPlan.h:925
ExactFlagsTy ExactFlags
Definition: VPlan.h:975
FastMathFlagsTy FMFs
Definition: VPlan.h:978
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:977
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition: VPlan.h:1145
void setFlags(Instruction *I) const
Set the IR flags for I.
Definition: VPlan.h:1106
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:1148
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:974
GEPNoWrapFlags GEPFlags
Definition: VPlan.h:976
WrapFlagsTy WrapFlags
Definition: VPlan.h:973
bool hasNoUnsignedWrap() const
Definition: VPlan.h:1152
void printFlags(raw_ostream &O) const
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:1139
bool hasNoSignedWrap() const
Definition: VPlan.h:1158
FastMathFlags getFastMathFlags() const
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition: VPlan.h:2756
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition: VPlan.h:2714
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of VPReductionRecipe.
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2718
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Return the recurrence decriptor for the in-loop reduction.
Definition: VPlan.h:2708
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2720
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition: VPlan.h:2712
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2716
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:3700
const VPBlockBase * getEntry() const
Definition: VPlan.h:3733
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPReplicateRecipe.
unsigned getOpcode() const
Definition: VPlan.h:2843
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:3516
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:911
LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
This class can be used to assign names to VPValues.
Definition: VPlanValue.h:441
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Definition: VPlanAnalysis.h:65
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
VPValue * getUnrollPartOperand(VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:200
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition: VPlan.cpp:1456
operand_range operands()
Definition: VPlanValue.h:257
unsigned getNumOperands() const
Definition: VPlanValue.h:236
operand_iterator op_begin()
Definition: VPlanValue.h:253
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:237
virtual bool onlyFirstLaneUsed(const VPValue *Op) const
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlanValue.h:272
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop region.
Definition: VPlan.cpp:1417
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:123
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition: VPlan.cpp:1452
friend class VPInstruction
Definition: VPlanValue.h:47
bool hasMoreThanOneUniqueUser() const
Returns true if the value has more than one unique user.
Definition: VPlanValue.h:138
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:77
user_iterator user_begin()
Definition: VPlanValue.h:128
unsigned getNumUsers() const
Definition: VPlanValue.h:111
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:172
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition: VPlanValue.h:167
user_range users()
Definition: VPlanValue.h:132
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Function * getCalledScalarFunction() const
Definition: VPlan.h:1752
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCallRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
operand_range arg_operands()
Definition: VPlan.h:1756
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1575
void execute(VPTransformState &State) override
Produce widened copies of the cast.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override final
Print the recipe.
void execute(VPTransformState &State) override final
Produce a vp-intrinsic using the opcode and operands of the recipe, processing EVL elements.
VPValue * getEVL()
Definition: VPlan.h:1503
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
PHINode * getPHINode() const
Definition: VPlan.h:2119
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:2116
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:2122
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:2194
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:2203
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition: VPlan.h:1695
void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
Definition: VPlan.h:2954
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition: VPlan.h:2951
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition: VPlan.h:2990
Instruction & Ingredient
Definition: VPlan.h:2945
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
bool Consecutive
Whether the accessed addresses are consecutive.
Definition: VPlan.h:2948
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:3004
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2997
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition: VPlan.h:2994
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
VPValue * getFirstUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the first unrolled part,...
Definition: VPlan.h:2248
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
void execute(VPTransformState &State) override
Produce a widened instruction using the opcode and operands of the recipe, processing State....
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getOpcode() const
Definition: VPlan.h:1469
unsigned getUF() const
Definition: VPlan.h:4002
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:694
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
VectorBuilder & setEVL(Value *NewExplicitVectorLength)
Definition: VectorBuilder.h:82
VectorBuilder & setMask(Value *NewMask)
Definition: VectorBuilder.h:78
Value * createVectorInstruction(unsigned Opcode, Type *ReturnTy, ArrayRef< Value * > VecOpArray, const Twine &Name=Twine())
Base class of all SIMD vector types.
Definition: DerivedTypes.h:427
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:665
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static VectorType * getDoubleElementsVectorType(VectorType *VTy)
This static method returns a VectorType with twice as many elements as the input type and the same el...
Definition: DerivedTypes.h:541
Type * getElementType() const
Definition: DerivedTypes.h:460
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
iterator erase(iterator where)
Definition: ilist.h:204
pointer remove(iterator &IT)
Definition: ilist.h:188
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
StringRef getBaseName(ID id)
Return the LLVM name for an intrinsic, without encoded types for overloading, such as "llvm....
Definition: Intrinsics.cpp:41
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
bool isUniformAfterVectorization(const VPValue *VPV)
Returns true if VPV is uniform after vectorization.
Definition: VPlanUtils.h:39
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
Definition: VPlanUtils.cpp:21
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlanUtils.cpp:16
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void ReplaceInstWithInst(BasicBlock *BB, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
Value * createSimpleReduction(IRBuilderBase &B, Value *Src, RecurKind RdxKind)
Create a reduction of the given vector.
Definition: LoopUtils.cpp:1278
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
Definition: LoopUtils.cpp:989
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition: STLExtras.h:2207
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
Definition: LoopUtils.cpp:1076
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
cl::opt< bool > EnableVPlanNativePath("enable-vplan-native-path", cl::Hidden, cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization."))
Definition: VPlan.cpp:53
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool isPointerTy(const Type *T)
Definition: SPIRVUtils.h:255
Value * createOrderedReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence descriptor Desc.
Definition: LoopUtils.cpp:1341
Value * createReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, PHINode *OrigPhi=nullptr)
Create a generic reduction using a recurrence descriptor Desc Fast-math-flags are propagated using th...
Definition: LoopUtils.cpp:1323
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:33
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ Add
Sum of integers.
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
Definition: LoopUtils.cpp:1270
DWARFExpression::Operation Op
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
InstructionCost Cost
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Struct to hold various analysis needed for cost computations.
Definition: VPlan.h:682
LLVMContext & LLVMCtx
Definition: VPlan.h:686
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
Definition: VPlan.cpp:1665
bool skipCostComputation(Instruction *UI, bool IsVector) const
Return true if the cost for UI shouldn't be computed, e.g.
InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const
Return the cost for UI with VF using the legacy cost model as fallback until computing the cost of al...
VPTypeAnalysis Types
Definition: VPlan.h:685
const TargetLibraryInfo & TLI
Definition: VPlan.h:684
const TargetTransformInfo & TTI
Definition: VPlan.h:683
SmallPtrSet< Instruction *, 8 > SkipCostComputation
Definition: VPlan.h:688
void execute(VPTransformState &State) override
Generate the phi nodes.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:344
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:352
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:348
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:236
bool hasScalarValue(VPValue *Def, VPLane Lane)
Definition: VPlan.h:269
bool hasVectorValue(VPValue *Def)
Definition: VPlan.h:267
DenseMap< const SCEV *, Value * > ExpandedSCEVs
Map SCEVs to their expanded values.
Definition: VPlan.h:389
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition: VPlan.h:392
void addMetadata(Value *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:361
Value * get(VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
Definition: VPlan.cpp:249
struct llvm::VPTransformState::CFGState CFG
std::optional< VPLane > Lane
Hold the index to generate specific scalar instructions.
Definition: VPlan.h:250
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:369
const TargetTransformInfo * TTI
Target Transform Info.
Definition: VPlan.h:242
void reset(VPValue *Def, Value *V)
Reset an existing vector value for Def and a given Part.
Definition: VPlan.h:290
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
Definition: VPlan.h:245
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:372
Loop * CurrentParentLoop
The parent loop object for the current scope, or nullptr.
Definition: VPlan.h:378
void set(VPValue *Def, Value *V, bool IsScalar=false)
Set the generated vector Value for a given VPValue, if IsScalar is false.
Definition: VPlan.h:279
void execute(VPTransformState &State) override
Generate the wide load or gather.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:3074
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate a wide load or gather.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool isInvariantCond() const
Definition: VPlan.h:1848
VPValue * getCond() const
Definition: VPlan.h:1844
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenSelectRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition: VPlan.h:3153
void execute(VPTransformState &State) override
Generate the wide store or scatter.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:3156
void execute(VPTransformState &State) override
Generate a wide store or scatter.
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition: VPlan.h:3118
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.