LLVM 20.0.0git
VPlanRecipes.cpp
Go to the documentation of this file.
1//===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains implementations for different VPlan recipes.
11///
12//===----------------------------------------------------------------------===//
13
14#include "VPlan.h"
15#include "VPlanAnalysis.h"
16#include "VPlanPatternMatch.h"
17#include "VPlanUtils.h"
18#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/Twine.h"
22#include "llvm/IR/BasicBlock.h"
23#include "llvm/IR/IRBuilder.h"
24#include "llvm/IR/Instruction.h"
26#include "llvm/IR/Intrinsics.h"
27#include "llvm/IR/Type.h"
28#include "llvm/IR/Value.h"
32#include "llvm/Support/Debug.h"
37#include <cassert>
38
39using namespace llvm;
40
42
43namespace llvm {
45}
47
48#define LV_NAME "loop-vectorize"
49#define DEBUG_TYPE LV_NAME
50
52 switch (getVPDefID()) {
53 case VPInstructionSC:
54 if (Instruction::isBinaryOp(cast<VPInstruction>(this)->getOpcode()))
55 return false;
56 switch (cast<VPInstruction>(this)->getOpcode()) {
57 case Instruction::Or:
58 case Instruction::ICmp:
59 case Instruction::Select:
68 return false;
69 default:
70 return true;
71 }
72 case VPInterleaveSC:
73 return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
74 case VPWidenStoreEVLSC:
75 case VPWidenStoreSC:
76 return true;
77 case VPReplicateSC:
78 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
79 ->mayWriteToMemory();
80 case VPWidenCallSC:
81 return !cast<VPWidenCallRecipe>(this)
82 ->getCalledScalarFunction()
83 ->onlyReadsMemory();
84 case VPWidenIntrinsicSC:
85 return cast<VPWidenIntrinsicRecipe>(this)->mayWriteToMemory();
86 case VPBranchOnMaskSC:
87 case VPScalarIVStepsSC:
88 case VPPredInstPHISC:
89 return false;
90 case VPBlendSC:
91 case VPReductionEVLSC:
92 case VPReductionSC:
93 case VPVectorPointerSC:
94 case VPWidenCanonicalIVSC:
95 case VPWidenCastSC:
96 case VPWidenGEPSC:
97 case VPWidenIntOrFpInductionSC:
98 case VPWidenLoadEVLSC:
99 case VPWidenLoadSC:
100 case VPWidenPHISC:
101 case VPWidenSC:
102 case VPWidenEVLSC:
103 case VPWidenSelectSC: {
104 const Instruction *I =
105 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
106 (void)I;
107 assert((!I || !I->mayWriteToMemory()) &&
108 "underlying instruction may write to memory");
109 return false;
110 }
111 default:
112 return true;
113 }
114}
115
117 switch (getVPDefID()) {
118 case VPWidenLoadEVLSC:
119 case VPWidenLoadSC:
120 return true;
121 case VPReplicateSC:
122 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
123 ->mayReadFromMemory();
124 case VPWidenCallSC:
125 return !cast<VPWidenCallRecipe>(this)
126 ->getCalledScalarFunction()
127 ->onlyWritesMemory();
128 case VPWidenIntrinsicSC:
129 return cast<VPWidenIntrinsicRecipe>(this)->mayReadFromMemory();
130 case VPBranchOnMaskSC:
131 case VPPredInstPHISC:
132 case VPScalarIVStepsSC:
133 case VPWidenStoreEVLSC:
134 case VPWidenStoreSC:
135 return false;
136 case VPBlendSC:
137 case VPReductionEVLSC:
138 case VPReductionSC:
139 case VPVectorPointerSC:
140 case VPWidenCanonicalIVSC:
141 case VPWidenCastSC:
142 case VPWidenGEPSC:
143 case VPWidenIntOrFpInductionSC:
144 case VPWidenPHISC:
145 case VPWidenSC:
146 case VPWidenEVLSC:
147 case VPWidenSelectSC: {
148 const Instruction *I =
149 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
150 (void)I;
151 assert((!I || !I->mayReadFromMemory()) &&
152 "underlying instruction may read from memory");
153 return false;
154 }
155 default:
156 return true;
157 }
158}
159
161 switch (getVPDefID()) {
162 case VPDerivedIVSC:
163 case VPPredInstPHISC:
164 case VPScalarCastSC:
165 case VPReverseVectorPointerSC:
166 return false;
167 case VPInstructionSC:
168 return mayWriteToMemory();
169 case VPWidenCallSC: {
170 Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction();
171 return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn();
172 }
173 case VPWidenIntrinsicSC:
174 return cast<VPWidenIntrinsicRecipe>(this)->mayHaveSideEffects();
175 case VPBlendSC:
176 case VPReductionEVLSC:
177 case VPReductionSC:
178 case VPScalarIVStepsSC:
179 case VPVectorPointerSC:
180 case VPWidenCanonicalIVSC:
181 case VPWidenCastSC:
182 case VPWidenGEPSC:
183 case VPWidenIntOrFpInductionSC:
184 case VPWidenPHISC:
185 case VPWidenPointerInductionSC:
186 case VPWidenSC:
187 case VPWidenEVLSC:
188 case VPWidenSelectSC: {
189 const Instruction *I =
190 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
191 (void)I;
192 assert((!I || !I->mayHaveSideEffects()) &&
193 "underlying instruction has side-effects");
194 return false;
195 }
196 case VPInterleaveSC:
197 return mayWriteToMemory();
198 case VPWidenLoadEVLSC:
199 case VPWidenLoadSC:
200 case VPWidenStoreEVLSC:
201 case VPWidenStoreSC:
202 assert(
203 cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() ==
205 "mayHaveSideffects result for ingredient differs from this "
206 "implementation");
207 return mayWriteToMemory();
208 case VPReplicateSC: {
209 auto *R = cast<VPReplicateRecipe>(this);
210 return R->getUnderlyingInstr()->mayHaveSideEffects();
211 }
212 default:
213 return true;
214 }
215}
216
218 assert(!Parent && "Recipe already in some VPBasicBlock");
219 assert(InsertPos->getParent() &&
220 "Insertion position not in any VPBasicBlock");
221 InsertPos->getParent()->insert(this, InsertPos->getIterator());
222}
223
226 assert(!Parent && "Recipe already in some VPBasicBlock");
227 assert(I == BB.end() || I->getParent() == &BB);
228 BB.insert(this, I);
229}
230
232 assert(!Parent && "Recipe already in some VPBasicBlock");
233 assert(InsertPos->getParent() &&
234 "Insertion position not in any VPBasicBlock");
235 InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator()));
236}
237
239 assert(getParent() && "Recipe not in any VPBasicBlock");
241 Parent = nullptr;
242}
243
245 assert(getParent() && "Recipe not in any VPBasicBlock");
247}
248
251 insertAfter(InsertPos);
252}
253
257 insertBefore(BB, I);
258}
259
261 // Get the underlying instruction for the recipe, if there is one. It is used
262 // to
263 // * decide if cost computation should be skipped for this recipe,
264 // * apply forced target instruction cost.
265 Instruction *UI = nullptr;
266 if (auto *S = dyn_cast<VPSingleDefRecipe>(this))
267 UI = dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
268 else if (auto *IG = dyn_cast<VPInterleaveRecipe>(this))
269 UI = IG->getInsertPos();
270 else if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(this))
271 UI = &WidenMem->getIngredient();
272
273 InstructionCost RecipeCost;
274 if (UI && Ctx.skipCostComputation(UI, VF.isVector())) {
275 RecipeCost = 0;
276 } else {
277 RecipeCost = computeCost(VF, Ctx);
278 if (UI && ForceTargetInstructionCost.getNumOccurrences() > 0 &&
279 RecipeCost.isValid())
281 }
282
283 LLVM_DEBUG({
284 dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": ";
285 dump();
286 });
287 return RecipeCost;
288}
289
291 VPCostContext &Ctx) const {
292 llvm_unreachable("subclasses should implement computeCost");
293}
294
297 VPCostContext &Ctx) const {
298 std::optional<unsigned> Opcode = std::nullopt;
300 if (auto *WidenR = dyn_cast<VPWidenRecipe>(BinOpR))
301 Opcode = std::make_optional(WidenR->getOpcode());
302
303 VPRecipeBase *ExtAR = BinOpR->getOperand(0)->getDefiningRecipe();
304 VPRecipeBase *ExtBR = BinOpR->getOperand(1)->getDefiningRecipe();
305
306 auto GetExtendKind = [](VPRecipeBase *R) {
307 auto *WidenCastR = dyn_cast<VPWidenCastRecipe>(R);
308 if (!WidenCastR)
310 if (WidenCastR->getOpcode() == Instruction::CastOps::ZExt)
312 if (WidenCastR->getOpcode() == Instruction::CastOps::SExt)
315 };
316
317 auto *PhiType = Ctx.Types.inferScalarType(getOperand(1));
318 auto *ExtTy = Ctx.Types.inferScalarType(ExtAR->getOperand(0));
319
320 return Ctx.TTI.getPartialReductionCost(getOpcode(), ExtTy, PhiType, VF,
321 GetExtendKind(ExtAR),
322 GetExtendKind(ExtBR), Opcode);
323}
324
327 auto &Builder = State.Builder;
328
329 assert(getOpcode() == Instruction::Add &&
330 "Unhandled partial reduction opcode");
331
332 Value *BinOpVal = State.get(getOperand(0));
333 Value *PhiVal = State.get(getOperand(1));
334 assert(PhiVal && BinOpVal && "Phi and Mul must be set");
335
336 Type *RetTy = PhiVal->getType();
337
338 CallInst *V = Builder.CreateIntrinsic(
339 RetTy, Intrinsic::experimental_vector_partial_reduce_add,
340 {PhiVal, BinOpVal}, nullptr, "partial.reduce");
341
342 State.set(this, V);
343}
344
345#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
347 VPSlotTracker &SlotTracker) const {
348 O << Indent << "PARTIAL-REDUCE ";
350 O << " = " << Instruction::getOpcodeName(getOpcode()) << " ";
352}
353#endif
354
356 assert(OpType == OperationType::FPMathOp &&
357 "recipe doesn't have fast math flags");
358 FastMathFlags Res;
359 Res.setAllowReassoc(FMFs.AllowReassoc);
360 Res.setNoNaNs(FMFs.NoNaNs);
361 Res.setNoInfs(FMFs.NoInfs);
362 Res.setNoSignedZeros(FMFs.NoSignedZeros);
363 Res.setAllowReciprocal(FMFs.AllowReciprocal);
364 Res.setAllowContract(FMFs.AllowContract);
365 Res.setApproxFunc(FMFs.ApproxFunc);
366 return Res;
367}
368
369#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
371#endif
372
373template <unsigned PartOpIdx>
374VPValue *
376 if (U.getNumOperands() == PartOpIdx + 1)
377 return U.getOperand(PartOpIdx);
378 return nullptr;
379}
380
381template <unsigned PartOpIdx>
383 if (auto *UnrollPartOp = getUnrollPartOperand(U))
384 return cast<ConstantInt>(UnrollPartOp->getLiveInIRValue())->getZExtValue();
385 return 0;
386}
387
390 const Twine &Name)
391 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),
392 Pred, DL),
393 Opcode(Opcode), Name(Name.str()) {
394 assert(Opcode == Instruction::ICmp &&
395 "only ICmp predicates supported at the moment");
396}
397
399 std::initializer_list<VPValue *> Operands,
400 FastMathFlags FMFs, DebugLoc DL, const Twine &Name)
401 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),
402 Opcode(Opcode), Name(Name.str()) {
403 // Make sure the VPInstruction is a floating-point operation.
404 assert(isFPMathOp() && "this op can't take fast-math flags");
405}
406
407bool VPInstruction::doesGeneratePerAllLanes() const {
408 return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);
409}
410
411bool VPInstruction::canGenerateScalarForFirstLane() const {
413 return true;
415 return true;
416 switch (Opcode) {
417 case Instruction::ICmp:
418 case Instruction::Select:
426 return true;
427 default:
428 return false;
429 }
430}
431
432Value *VPInstruction::generatePerLane(VPTransformState &State,
433 const VPLane &Lane) {
434 IRBuilderBase &Builder = State.Builder;
435
437 "only PtrAdd opcodes are supported for now");
438 return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
439 State.get(getOperand(1), Lane), Name);
440}
441
442Value *VPInstruction::generate(VPTransformState &State) {
443 IRBuilderBase &Builder = State.Builder;
444
446 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
447 Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
448 Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);
449 auto *Res =
450 Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
451 if (auto *I = dyn_cast<Instruction>(Res))
452 setFlags(I);
453 return Res;
454 }
455
456 switch (getOpcode()) {
457 case VPInstruction::Not: {
458 Value *A = State.get(getOperand(0));
459 return Builder.CreateNot(A, Name);
460 }
461 case Instruction::ICmp: {
462 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
463 Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
464 Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);
465 return Builder.CreateCmp(getPredicate(), A, B, Name);
466 }
467 case Instruction::Select: {
468 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
469 Value *Cond = State.get(getOperand(0), OnlyFirstLaneUsed);
470 Value *Op1 = State.get(getOperand(1), OnlyFirstLaneUsed);
471 Value *Op2 = State.get(getOperand(2), OnlyFirstLaneUsed);
472 return Builder.CreateSelect(Cond, Op1, Op2, Name);
473 }
475 // Get first lane of vector induction variable.
476 Value *VIVElem0 = State.get(getOperand(0), VPLane(0));
477 // Get the original loop tripcount.
478 Value *ScalarTC = State.get(getOperand(1), VPLane(0));
479
480 // If this part of the active lane mask is scalar, generate the CMP directly
481 // to avoid unnecessary extracts.
482 if (State.VF.isScalar())
483 return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC,
484 Name);
485
486 auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
487 auto *PredTy = VectorType::get(Int1Ty, State.VF);
488 return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
489 {PredTy, ScalarTC->getType()},
490 {VIVElem0, ScalarTC}, nullptr, Name);
491 }
493 // Generate code to combine the previous and current values in vector v3.
494 //
495 // vector.ph:
496 // v_init = vector(..., ..., ..., a[-1])
497 // br vector.body
498 //
499 // vector.body
500 // i = phi [0, vector.ph], [i+4, vector.body]
501 // v1 = phi [v_init, vector.ph], [v2, vector.body]
502 // v2 = a[i, i+1, i+2, i+3];
503 // v3 = vector(v1(3), v2(0, 1, 2))
504
505 auto *V1 = State.get(getOperand(0));
506 if (!V1->getType()->isVectorTy())
507 return V1;
508 Value *V2 = State.get(getOperand(1));
509 return Builder.CreateVectorSplice(V1, V2, -1, Name);
510 }
512 unsigned UF = getParent()->getPlan()->getUF();
513 Value *ScalarTC = State.get(getOperand(0), VPLane(0));
514 Value *Step = createStepForVF(Builder, ScalarTC->getType(), State.VF, UF);
515 Value *Sub = Builder.CreateSub(ScalarTC, Step);
516 Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
517 Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
518 return Builder.CreateSelect(Cmp, Sub, Zero);
519 }
521 // TODO: Restructure this code with an explicit remainder loop, vsetvli can
522 // be outside of the main loop.
523 Value *AVL = State.get(getOperand(0), /*IsScalar*/ true);
524 // Compute EVL
525 assert(AVL->getType()->isIntegerTy() &&
526 "Requested vector length should be an integer.");
527
528 assert(State.VF.isScalable() && "Expected scalable vector factor.");
529 Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue());
530
531 Value *EVL = State.Builder.CreateIntrinsic(
532 State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,
533 {AVL, VFArg, State.Builder.getTrue()});
534 return EVL;
535 }
537 unsigned Part = getUnrollPart(*this);
538 auto *IV = State.get(getOperand(0), VPLane(0));
539 assert(Part != 0 && "Must have a positive part");
540 // The canonical IV is incremented by the vectorization factor (num of
541 // SIMD elements) times the unroll part.
542 Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
543 return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),
545 }
547 Value *Cond = State.get(getOperand(0), VPLane(0));
548 // Replace the temporary unreachable terminator with a new conditional
549 // branch, hooking it up to backward destination for exiting blocks now and
550 // to forward destination(s) later when they are created.
551 BranchInst *CondBr =
552 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
553 CondBr->setSuccessor(0, nullptr);
555
556 if (!getParent()->isExiting())
557 return CondBr;
558
559 VPRegionBlock *ParentRegion = getParent()->getParent();
560 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
561 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
562 return CondBr;
563 }
565 // First create the compare.
566 Value *IV = State.get(getOperand(0), /*IsScalar*/ true);
567 Value *TC = State.get(getOperand(1), /*IsScalar*/ true);
568 Value *Cond = Builder.CreateICmpEQ(IV, TC);
569
570 // Now create the branch.
571 auto *Plan = getParent()->getPlan();
572 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
573 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
574
575 // Replace the temporary unreachable terminator with a new conditional
576 // branch, hooking it up to backward destination (the header) now and to the
577 // forward destination (the exit/middle block) later when it is created.
578 // Note that CreateCondBr expects a valid BB as first argument, so we need
579 // to set it to nullptr later.
580 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
581 State.CFG.VPBB2IRBB[Header]);
582 CondBr->setSuccessor(0, nullptr);
584 return CondBr;
585 }
587 // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
588 // and will be removed by breaking up the recipe further.
589 auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
590 auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
591 // Get its reduction variable descriptor.
592 const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
593
594 RecurKind RK = RdxDesc.getRecurrenceKind();
595
596 Type *PhiTy = OrigPhi->getType();
597 // The recipe's operands are the reduction phi, followed by one operand for
598 // each part of the reduction.
599 unsigned UF = getNumOperands() - 1;
600 VectorParts RdxParts(UF);
601 for (unsigned Part = 0; Part < UF; ++Part)
602 RdxParts[Part] = State.get(getOperand(1 + Part), PhiR->isInLoop());
603
604 // If the vector reduction can be performed in a smaller type, we truncate
605 // then extend the loop exit value to enable InstCombine to evaluate the
606 // entire expression in the smaller type.
607 // TODO: Handle this in truncateToMinBW.
608 if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
609 Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF);
610 for (unsigned Part = 0; Part < UF; ++Part)
611 RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
612 }
613 // Reduce all of the unrolled parts into a single vector.
614 Value *ReducedPartRdx = RdxParts[0];
615 unsigned Op = RdxDesc.getOpcode();
617 Op = Instruction::Or;
618
619 if (PhiR->isOrdered()) {
620 ReducedPartRdx = RdxParts[UF - 1];
621 } else {
622 // Floating-point operations should have some FMF to enable the reduction.
624 Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
625 for (unsigned Part = 1; Part < UF; ++Part) {
626 Value *RdxPart = RdxParts[Part];
627 if (Op != Instruction::ICmp && Op != Instruction::FCmp)
628 ReducedPartRdx = Builder.CreateBinOp(
629 (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
631 ReducedPartRdx =
632 createMinMaxOp(Builder, RecurKind::SMax, ReducedPartRdx, RdxPart);
633 else
634 ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
635 }
636 }
637
638 // Create the reduction after the loop. Note that inloop reductions create
639 // the target reduction in the loop using a Reduction recipe.
640 if ((State.VF.isVector() ||
643 !PhiR->isInLoop()) {
644 ReducedPartRdx =
645 createReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
646 // If the reduction can be performed in a smaller type, we need to extend
647 // the reduction to the wider type before we branch to the original loop.
648 if (PhiTy != RdxDesc.getRecurrenceType())
649 ReducedPartRdx = RdxDesc.isSigned()
650 ? Builder.CreateSExt(ReducedPartRdx, PhiTy)
651 : Builder.CreateZExt(ReducedPartRdx, PhiTy);
652 }
653
654 return ReducedPartRdx;
655 }
657 auto *CI = cast<ConstantInt>(getOperand(1)->getLiveInIRValue());
658 unsigned Offset = CI->getZExtValue();
659 assert(Offset > 0 && "Offset from end must be positive");
660 Value *Res;
661 if (State.VF.isVector()) {
662 assert(Offset <= State.VF.getKnownMinValue() &&
663 "invalid offset to extract from");
664 // Extract lane VF - Offset from the operand.
665 Res = State.get(getOperand(0), VPLane::getLaneFromEnd(State.VF, Offset));
666 } else {
667 assert(Offset <= 1 && "invalid offset to extract from");
668 Res = State.get(getOperand(0));
669 }
670 if (isa<ExtractElementInst>(Res))
671 Res->setName(Name);
672 return Res;
673 }
675 Value *A = State.get(getOperand(0));
676 Value *B = State.get(getOperand(1));
677 return Builder.CreateLogicalAnd(A, B, Name);
678 }
681 "can only generate first lane for PtrAdd");
682 Value *Ptr = State.get(getOperand(0), VPLane(0));
683 Value *Addend = State.get(getOperand(1), VPLane(0));
684 return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
685 }
687 Value *IncomingFromVPlanPred =
688 State.get(getOperand(0), /* IsScalar */ true);
689 Value *IncomingFromOtherPreds =
690 State.get(getOperand(1), /* IsScalar */ true);
691 auto *NewPhi =
692 Builder.CreatePHI(State.TypeAnalysis.inferScalarType(this), 2, Name);
693 BasicBlock *VPlanPred =
694 State.CFG
695 .VPBB2IRBB[cast<VPBasicBlock>(getParent()->getPredecessors()[0])];
696 NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred);
697 for (auto *OtherPred : predecessors(Builder.GetInsertBlock())) {
698 if (OtherPred == VPlanPred)
699 continue;
700 NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred);
701 }
702 return NewPhi;
703 }
705 Value *A = State.get(getOperand(0));
706 return Builder.CreateOrReduce(A);
707 }
708
709 default:
710 llvm_unreachable("Unsupported opcode for instruction");
711 }
712}
713
718}
719
722}
723
724#if !defined(NDEBUG)
725bool VPInstruction::isFPMathOp() const {
726 // Inspired by FPMathOperator::classof. Notable differences are that we don't
727 // support Call, PHI and Select opcodes here yet.
728 return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
729 Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
730 Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
731 Opcode == Instruction::FCmp || Opcode == Instruction::Select;
732}
733#endif
734
736 assert(!State.Lane && "VPInstruction executing an Lane");
738 assert((hasFastMathFlags() == isFPMathOp() ||
739 getOpcode() == Instruction::Select) &&
740 "Recipe not a FPMathOp but has fast-math flags?");
741 if (hasFastMathFlags())
744 bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&
747 bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
748 if (GeneratesPerAllLanes) {
749 for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
750 Lane != NumLanes; ++Lane) {
751 Value *GeneratedValue = generatePerLane(State, VPLane(Lane));
752 assert(GeneratedValue && "generatePerLane must produce a value");
753 State.set(this, GeneratedValue, VPLane(Lane));
754 }
755 return;
756 }
757
758 Value *GeneratedValue = generate(State);
759 if (!hasResult())
760 return;
761 assert(GeneratedValue && "generate must produce a value");
762 assert(
763 (GeneratedValue->getType()->isVectorTy() == !GeneratesPerFirstLaneOnly ||
764 State.VF.isScalar()) &&
765 "scalar value but not only first lane defined");
766 State.set(this, GeneratedValue,
767 /*IsScalar*/ GeneratesPerFirstLaneOnly);
768}
769
771 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
773 return vputils::onlyFirstLaneUsed(this);
774
775 switch (getOpcode()) {
776 default:
777 return false;
778 case Instruction::ICmp:
779 case Instruction::Select:
780 case Instruction::Or:
782 // TODO: Cover additional opcodes.
783 return vputils::onlyFirstLaneUsed(this);
791 return true;
792 };
793 llvm_unreachable("switch should return");
794}
795
797 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
799 return vputils::onlyFirstPartUsed(this);
800
801 switch (getOpcode()) {
802 default:
803 return false;
804 case Instruction::ICmp:
805 case Instruction::Select:
806 return vputils::onlyFirstPartUsed(this);
810 return true;
811 };
812 llvm_unreachable("switch should return");
813}
814
815#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
817 VPSlotTracker SlotTracker(getParent()->getPlan());
818 print(dbgs(), "", SlotTracker);
819}
820
822 VPSlotTracker &SlotTracker) const {
823 O << Indent << "EMIT ";
824
825 if (hasResult()) {
827 O << " = ";
828 }
829
830 switch (getOpcode()) {
832 O << "not";
833 break;
835 O << "combined load";
836 break;
838 O << "combined store";
839 break;
841 O << "active lane mask";
842 break;
844 O << "resume-phi";
845 break;
847 O << "EXPLICIT-VECTOR-LENGTH";
848 break;
850 O << "first-order splice";
851 break;
853 O << "branch-on-cond";
854 break;
856 O << "TC > VF ? TC - VF : 0";
857 break;
859 O << "VF * Part +";
860 break;
862 O << "branch-on-count";
863 break;
865 O << "extract-from-end";
866 break;
868 O << "compute-reduction-result";
869 break;
871 O << "logical-and";
872 break;
874 O << "ptradd";
875 break;
877 O << "any-of";
878 break;
879 default:
881 }
882
883 printFlags(O);
885
886 if (auto DL = getDebugLoc()) {
887 O << ", !dbg ";
888 DL.print(O);
889 }
890}
891#endif
892
894 assert((isa<PHINode>(&I) || getNumOperands() == 0) &&
895 "Only PHINodes can have extra operands");
896 for (const auto &[Idx, Op] : enumerate(operands())) {
897 VPValue *ExitValue = Op;
898 auto Lane = vputils::isUniformAfterVectorization(ExitValue)
902 auto *PredVPBB = Pred->getExitingBasicBlock();
903 BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
904 // Set insertion point in PredBB in case an extract needs to be generated.
905 // TODO: Model extracts explicitly.
906 State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
907 Value *V = State.get(ExitValue, VPLane(Lane));
908 auto *Phi = cast<PHINode>(&I);
909 // If there is no existing block for PredBB in the phi, add a new incoming
910 // value. Otherwise update the existing incoming value for PredBB.
911 if (Phi->getBasicBlockIndex(PredBB) == -1)
912 Phi->addIncoming(V, PredBB);
913 else
914 Phi->setIncomingValueForBlock(PredBB, V);
915 }
916
917 // Advance the insert point after the wrapped IR instruction. This allows
918 // interleaving VPIRInstructions and other recipes.
919 State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator()));
920}
921
923 VPCostContext &Ctx) const {
924 // The recipe wraps an existing IR instruction on the border of VPlan's scope,
925 // hence it does not contribute to the cost-modeling for the VPlan.
926 return 0;
927}
928
929#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
931 VPSlotTracker &SlotTracker) const {
932 O << Indent << "IR " << I;
933
934 if (getNumOperands() != 0) {
935 O << " (extra operand" << (getNumOperands() > 1 ? "s" : "") << ": ";
937 enumerate(operands()), O, [this, &O, &SlotTracker](auto Op) {
938 Op.value()->printAsOperand(O, SlotTracker);
939 O << " from ";
940 getParent()->getPredecessors()[Op.index()]->printAsOperand(O);
941 });
942 O << ")";
943 }
944}
945#endif
946
948 assert(State.VF.isVector() && "not widening");
950
951 FunctionType *VFTy = Variant->getFunctionType();
952 // Add return type if intrinsic is overloaded on it.
954 for (const auto &I : enumerate(arg_operands())) {
955 Value *Arg;
956 // Some vectorized function variants may also take a scalar argument,
957 // e.g. linear parameters for pointers. This needs to be the scalar value
958 // from the start of the respective part when interleaving.
959 if (!VFTy->getParamType(I.index())->isVectorTy())
960 Arg = State.get(I.value(), VPLane(0));
961 else
962 Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
963 Args.push_back(Arg);
964 }
965
966 assert(Variant != nullptr && "Can't create vector function.");
967
968 auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
970 if (CI)
971 CI->getOperandBundlesAsDefs(OpBundles);
972
973 CallInst *V = State.Builder.CreateCall(Variant, Args, OpBundles);
974 setFlags(V);
975
976 if (!V->getType()->isVoidTy())
977 State.set(this, V);
978 State.addMetadata(V, CI);
979}
980
982 VPCostContext &Ctx) const {
984 return Ctx.TTI.getCallInstrCost(nullptr, Variant->getReturnType(),
985 Variant->getFunctionType()->params(),
986 CostKind);
987}
988
989#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
991 VPSlotTracker &SlotTracker) const {
992 O << Indent << "WIDEN-CALL ";
993
994 Function *CalledFn = getCalledScalarFunction();
995 if (CalledFn->getReturnType()->isVoidTy())
996 O << "void ";
997 else {
999 O << " = ";
1000 }
1001
1002 O << "call";
1003 printFlags(O);
1004 O << " @" << CalledFn->getName() << "(";
1006 Op->printAsOperand(O, SlotTracker);
1007 });
1008 O << ")";
1009
1010 O << " (using library function";
1011 if (Variant->hasName())
1012 O << ": " << Variant->getName();
1013 O << ")";
1014}
1015#endif
1016
1018 assert(State.VF.isVector() && "not widening");
1020
1021 SmallVector<Type *, 2> TysForDecl;
1022 // Add return type if intrinsic is overloaded on it.
1023 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1, State.TTI))
1024 TysForDecl.push_back(VectorType::get(getResultType(), State.VF));
1026 for (const auto &I : enumerate(operands())) {
1027 // Some intrinsics have a scalar argument - don't replace it with a
1028 // vector.
1029 Value *Arg;
1030 if (isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index(),
1031 State.TTI))
1032 Arg = State.get(I.value(), VPLane(0));
1033 else
1034 Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
1035 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index(),
1036 State.TTI))
1037 TysForDecl.push_back(Arg->getType());
1038 Args.push_back(Arg);
1039 }
1040
1041 // Use vector version of the intrinsic.
1042 Module *M = State.Builder.GetInsertBlock()->getModule();
1043 Function *VectorF =
1044 Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
1045 assert(VectorF &&
1046 "Can't retrieve vector intrinsic or vector-predication intrinsics.");
1047
1048 auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
1050 if (CI)
1051 CI->getOperandBundlesAsDefs(OpBundles);
1052
1053 CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
1054
1055 setFlags(V);
1056
1057 if (!V->getType()->isVoidTy())
1058 State.set(this, V);
1059 State.addMetadata(V, CI);
1060}
1061
1063 VPCostContext &Ctx) const {
1065
1066 // Some backends analyze intrinsic arguments to determine cost. Use the
1067 // underlying value for the operand if it has one. Otherwise try to use the
1068 // operand of the underlying call instruction, if there is one. Otherwise
1069 // clear Arguments.
1070 // TODO: Rework TTI interface to be independent of concrete IR values.
1072 for (const auto &[Idx, Op] : enumerate(operands())) {
1073 auto *V = Op->getUnderlyingValue();
1074 if (!V) {
1075 // Push all the VP Intrinsic's ops into the Argments even if is nullptr.
1076 // Some VP Intrinsic's cost will assert the number of parameters.
1077 // Mainly appears in the following two scenarios:
1078 // 1. EVL Op is nullptr
1079 // 2. The Argmunt of the VP Intrinsic is also the VP Intrinsic
1080 if (VPIntrinsic::isVPIntrinsic(VectorIntrinsicID)) {
1081 Arguments.push_back(V);
1082 continue;
1083 }
1084 if (auto *UI = dyn_cast_or_null<CallBase>(getUnderlyingValue())) {
1085 Arguments.push_back(UI->getArgOperand(Idx));
1086 continue;
1087 }
1088 Arguments.clear();
1089 break;
1090 }
1091 Arguments.push_back(V);
1092 }
1093
1094 Type *RetTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1095 SmallVector<Type *> ParamTys;
1096 for (unsigned I = 0; I != getNumOperands(); ++I)
1097 ParamTys.push_back(
1099
1100 // TODO: Rework TTI interface to avoid reliance on underlying IntrinsicInst.
1102 IntrinsicCostAttributes CostAttrs(
1103 VectorIntrinsicID, RetTy, Arguments, ParamTys, FMF,
1104 dyn_cast_or_null<IntrinsicInst>(getUnderlyingValue()));
1105 return Ctx.TTI.getIntrinsicInstrCost(CostAttrs, CostKind);
1106}
1107
1109 return Intrinsic::getBaseName(VectorIntrinsicID);
1110}
1111
1113 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
1114 // Vector predication intrinsics only demand the the first lane the last
1115 // operand (the EVL operand).
1116 return VPIntrinsic::isVPIntrinsic(VectorIntrinsicID) &&
1117 Op == getOperand(getNumOperands() - 1);
1118}
1119
1120#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1122 VPSlotTracker &SlotTracker) const {
1123 O << Indent << "WIDEN-INTRINSIC ";
1124 if (ResultTy->isVoidTy()) {
1125 O << "void ";
1126 } else {
1128 O << " = ";
1129 }
1130
1131 O << "call";
1132 printFlags(O);
1133 O << getIntrinsicName() << "(";
1134
1136 Op->printAsOperand(O, SlotTracker);
1137 });
1138 O << ")";
1139}
1140#endif
1141
1144 IRBuilderBase &Builder = State.Builder;
1145
1146 Value *Address = State.get(getOperand(0));
1147 Value *IncAmt = State.get(getOperand(1), /*IsScalar=*/true);
1148 VectorType *VTy = cast<VectorType>(Address->getType());
1149
1150 // The histogram intrinsic requires a mask even if the recipe doesn't;
1151 // if the mask operand was omitted then all lanes should be executed and
1152 // we just need to synthesize an all-true mask.
1153 Value *Mask = nullptr;
1154 if (VPValue *VPMask = getMask())
1155 Mask = State.get(VPMask);
1156 else
1157 Mask =
1158 Builder.CreateVectorSplat(VTy->getElementCount(), Builder.getInt1(1));
1159
1160 // If this is a subtract, we want to invert the increment amount. We may
1161 // add a separate intrinsic in future, but for now we'll try this.
1162 if (Opcode == Instruction::Sub)
1163 IncAmt = Builder.CreateNeg(IncAmt);
1164 else
1165 assert(Opcode == Instruction::Add && "only add or sub supported for now");
1166
1167 State.Builder.CreateIntrinsic(Intrinsic::experimental_vector_histogram_add,
1168 {VTy, IncAmt->getType()},
1169 {Address, IncAmt, Mask});
1170}
1171
1173 VPCostContext &Ctx) const {
1174 // FIXME: Take the gather and scatter into account as well. For now we're
1175 // generating the same cost as the fallback path, but we'll likely
1176 // need to create a new TTI method for determining the cost, including
1177 // whether we can use base + vec-of-smaller-indices or just
1178 // vec-of-pointers.
1179 assert(VF.isVector() && "Invalid VF for histogram cost");
1180 Type *AddressTy = Ctx.Types.inferScalarType(getOperand(0));
1181 VPValue *IncAmt = getOperand(1);
1182 Type *IncTy = Ctx.Types.inferScalarType(IncAmt);
1183 VectorType *VTy = VectorType::get(IncTy, VF);
1184
1185 // Assume that a non-constant update value (or a constant != 1) requires
1186 // a multiply, and add that into the cost.
1188 Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VTy);
1189 if (IncAmt->isLiveIn()) {
1190 ConstantInt *CI = dyn_cast<ConstantInt>(IncAmt->getLiveInIRValue());
1191
1192 if (CI && CI->getZExtValue() == 1)
1193 MulCost = TTI::TCC_Free;
1194 }
1195
1196 // Find the cost of the histogram operation itself.
1197 Type *PtrTy = VectorType::get(AddressTy, VF);
1198 Type *MaskTy = VectorType::get(Type::getInt1Ty(Ctx.LLVMCtx), VF);
1199 IntrinsicCostAttributes ICA(Intrinsic::experimental_vector_histogram_add,
1201 {PtrTy, IncTy, MaskTy});
1202
1203 // Add the costs together with the add/sub operation.
1204 return Ctx.TTI.getIntrinsicInstrCost(
1206 MulCost + Ctx.TTI.getArithmeticInstrCost(Opcode, VTy);
1207}
1208
1209#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1211 VPSlotTracker &SlotTracker) const {
1212 O << Indent << "WIDEN-HISTOGRAM buckets: ";
1214
1215 if (Opcode == Instruction::Sub)
1216 O << ", dec: ";
1217 else {
1218 assert(Opcode == Instruction::Add);
1219 O << ", inc: ";
1220 }
1222
1223 if (VPValue *Mask = getMask()) {
1224 O << ", mask: ";
1225 Mask->printAsOperand(O, SlotTracker);
1226 }
1227}
1228
1230 VPSlotTracker &SlotTracker) const {
1231 O << Indent << "WIDEN-SELECT ";
1233 O << " = select ";
1235 O << ", ";
1237 O << ", ";
1239 O << (isInvariantCond() ? " (condition is loop invariant)" : "");
1240}
1241#endif
1242
1245
1246 // The condition can be loop invariant but still defined inside the
1247 // loop. This means that we can't just use the original 'cond' value.
1248 // We have to take the 'vectorized' value and pick the first lane.
1249 // Instcombine will make this a no-op.
1250 auto *InvarCond =
1251 isInvariantCond() ? State.get(getCond(), VPLane(0)) : nullptr;
1252
1253 Value *Cond = InvarCond ? InvarCond : State.get(getCond());
1254 Value *Op0 = State.get(getOperand(1));
1255 Value *Op1 = State.get(getOperand(2));
1256 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
1257 State.set(this, Sel);
1258 State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1259}
1260
1262 VPCostContext &Ctx) const {
1263 SelectInst *SI = cast<SelectInst>(getUnderlyingValue());
1264 bool ScalarCond = getOperand(0)->isDefinedOutsideLoopRegions();
1265 Type *ScalarTy = Ctx.Types.inferScalarType(this);
1266 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1268
1269 VPValue *Op0, *Op1;
1270 using namespace llvm::VPlanPatternMatch;
1271 if (!ScalarCond && ScalarTy->getScalarSizeInBits() == 1 &&
1272 (match(this, m_LogicalAnd(m_VPValue(Op0), m_VPValue(Op1))) ||
1273 match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1))))) {
1274 // select x, y, false --> x & y
1275 // select x, true, y --> x | y
1276 const auto [Op1VK, Op1VP] = Ctx.getOperandInfo(Op0);
1277 const auto [Op2VK, Op2VP] = Ctx.getOperandInfo(Op1);
1278
1280 if (all_of(operands(),
1281 [](VPValue *Op) { return Op->getUnderlyingValue(); }))
1282 Operands.append(SI->op_begin(), SI->op_end());
1283 bool IsLogicalOr = match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1)));
1284 return Ctx.TTI.getArithmeticInstrCost(
1285 IsLogicalOr ? Instruction::Or : Instruction::And, VectorTy, CostKind,
1286 {Op1VK, Op1VP}, {Op2VK, Op2VP}, Operands, SI);
1287 }
1288
1289 Type *CondTy = Ctx.Types.inferScalarType(getOperand(0));
1290 if (!ScalarCond)
1291 CondTy = VectorType::get(CondTy, VF);
1292
1294 if (auto *Cmp = dyn_cast<CmpInst>(SI->getCondition()))
1295 Pred = Cmp->getPredicate();
1296 return Ctx.TTI.getCmpSelInstrCost(Instruction::Select, VectorTy, CondTy, Pred,
1299}
1300
1301VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
1302 const FastMathFlags &FMF) {
1303 AllowReassoc = FMF.allowReassoc();
1304 NoNaNs = FMF.noNaNs();
1305 NoInfs = FMF.noInfs();
1306 NoSignedZeros = FMF.noSignedZeros();
1307 AllowReciprocal = FMF.allowReciprocal();
1308 AllowContract = FMF.allowContract();
1309 ApproxFunc = FMF.approxFunc();
1310}
1311
1312#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1314 switch (OpType) {
1315 case OperationType::Cmp:
1317 break;
1318 case OperationType::DisjointOp:
1320 O << " disjoint";
1321 break;
1322 case OperationType::PossiblyExactOp:
1323 if (ExactFlags.IsExact)
1324 O << " exact";
1325 break;
1326 case OperationType::OverflowingBinOp:
1327 if (WrapFlags.HasNUW)
1328 O << " nuw";
1329 if (WrapFlags.HasNSW)
1330 O << " nsw";
1331 break;
1332 case OperationType::FPMathOp:
1334 break;
1335 case OperationType::GEPOp:
1336 if (GEPFlags.isInBounds())
1337 O << " inbounds";
1339 O << " nusw";
1341 O << " nuw";
1342 break;
1343 case OperationType::NonNegOp:
1344 if (NonNegFlags.NonNeg)
1345 O << " nneg";
1346 break;
1347 case OperationType::Other:
1348 break;
1349 }
1350 if (getNumOperands() > 0)
1351 O << " ";
1352}
1353#endif
1354
1357 auto &Builder = State.Builder;
1358 switch (Opcode) {
1359 case Instruction::Call:
1360 case Instruction::Br:
1361 case Instruction::PHI:
1362 case Instruction::GetElementPtr:
1363 case Instruction::Select:
1364 llvm_unreachable("This instruction is handled by a different recipe.");
1365 case Instruction::UDiv:
1366 case Instruction::SDiv:
1367 case Instruction::SRem:
1368 case Instruction::URem:
1369 case Instruction::Add:
1370 case Instruction::FAdd:
1371 case Instruction::Sub:
1372 case Instruction::FSub:
1373 case Instruction::FNeg:
1374 case Instruction::Mul:
1375 case Instruction::FMul:
1376 case Instruction::FDiv:
1377 case Instruction::FRem:
1378 case Instruction::Shl:
1379 case Instruction::LShr:
1380 case Instruction::AShr:
1381 case Instruction::And:
1382 case Instruction::Or:
1383 case Instruction::Xor: {
1384 // Just widen unops and binops.
1386 for (VPValue *VPOp : operands())
1387 Ops.push_back(State.get(VPOp));
1388
1389 Value *V = Builder.CreateNAryOp(Opcode, Ops);
1390
1391 if (auto *VecOp = dyn_cast<Instruction>(V))
1392 setFlags(VecOp);
1393
1394 // Use this vector value for all users of the original instruction.
1395 State.set(this, V);
1396 State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1397 break;
1398 }
1399 case Instruction::Freeze: {
1400 Value *Op = State.get(getOperand(0));
1401
1402 Value *Freeze = Builder.CreateFreeze(Op);
1403 State.set(this, Freeze);
1404 break;
1405 }
1406 case Instruction::ICmp:
1407 case Instruction::FCmp: {
1408 // Widen compares. Generate vector compares.
1409 bool FCmp = Opcode == Instruction::FCmp;
1410 Value *A = State.get(getOperand(0));
1411 Value *B = State.get(getOperand(1));
1412 Value *C = nullptr;
1413 if (FCmp) {
1414 // Propagate fast math flags.
1415 IRBuilder<>::FastMathFlagGuard FMFG(Builder);
1416 if (auto *I = dyn_cast_or_null<Instruction>(getUnderlyingValue()))
1417 Builder.setFastMathFlags(I->getFastMathFlags());
1418 C = Builder.CreateFCmp(getPredicate(), A, B);
1419 } else {
1420 C = Builder.CreateICmp(getPredicate(), A, B);
1421 }
1422 State.set(this, C);
1423 State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1424 break;
1425 }
1426 default:
1427 // This instruction is not vectorized by simple widening.
1428 LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "
1429 << Instruction::getOpcodeName(Opcode));
1430 llvm_unreachable("Unhandled instruction!");
1431 } // end of switch.
1432
1433#if !defined(NDEBUG)
1434 // Verify that VPlan type inference results agree with the type of the
1435 // generated values.
1437 State.get(this)->getType() &&
1438 "inferred type and type from generated instructions do not match");
1439#endif
1440}
1441
1443 VPCostContext &Ctx) const {
1445 switch (Opcode) {
1446 case Instruction::FNeg: {
1447 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1448 return Ctx.TTI.getArithmeticInstrCost(
1449 Opcode, VectorTy, CostKind,
1452 }
1453
1454 case Instruction::UDiv:
1455 case Instruction::SDiv:
1456 case Instruction::SRem:
1457 case Instruction::URem:
1458 // More complex computation, let the legacy cost-model handle this for now.
1459 return Ctx.getLegacyCost(cast<Instruction>(getUnderlyingValue()), VF);
1460 case Instruction::Add:
1461 case Instruction::FAdd:
1462 case Instruction::Sub:
1463 case Instruction::FSub:
1464 case Instruction::Mul:
1465 case Instruction::FMul:
1466 case Instruction::FDiv:
1467 case Instruction::FRem:
1468 case Instruction::Shl:
1469 case Instruction::LShr:
1470 case Instruction::AShr:
1471 case Instruction::And:
1472 case Instruction::Or:
1473 case Instruction::Xor: {
1474 VPValue *RHS = getOperand(1);
1475 // Certain instructions can be cheaper to vectorize if they have a constant
1476 // second vector operand. One example of this are shifts on x86.
1479 if (RHS->isLiveIn())
1480 RHSInfo = Ctx.TTI.getOperandInfo(RHS->getLiveInIRValue());
1481
1482 if (RHSInfo.Kind == TargetTransformInfo::OK_AnyValue &&
1485 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1486 Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
1487
1489 if (CtxI)
1490 Operands.append(CtxI->value_op_begin(), CtxI->value_op_end());
1491 return Ctx.TTI.getArithmeticInstrCost(
1492 Opcode, VectorTy, CostKind,
1494 RHSInfo, Operands, CtxI, &Ctx.TLI);
1495 }
1496 case Instruction::Freeze: {
1497 // This opcode is unknown. Assume that it is the same as 'mul'.
1498 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1499 return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, CostKind);
1500 }
1501 case Instruction::ICmp:
1502 case Instruction::FCmp: {
1503 Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
1504 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
1505 return Ctx.TTI.getCmpSelInstrCost(Opcode, VectorTy, nullptr, getPredicate(),
1506 CostKind,
1509 }
1510 default:
1511 llvm_unreachable("Unsupported opcode for instruction");
1512 }
1513}
1514
1516 unsigned Opcode = getOpcode();
1517 // TODO: Support other opcodes
1518 if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode))
1519 llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute");
1520
1522
1523 assert(State.get(getOperand(0))->getType()->isVectorTy() &&
1524 "VPWidenEVLRecipe should not be used for scalars");
1525
1526 VPValue *EVL = getEVL();
1527 Value *EVLArg = State.get(EVL, /*NeedsScalar=*/true);
1528 IRBuilderBase &BuilderIR = State.Builder;
1529 VectorBuilder Builder(BuilderIR);
1530 Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
1531
1533 for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) {
1534 VPValue *VPOp = getOperand(I);
1535 Ops.push_back(State.get(VPOp));
1536 }
1537
1538 Builder.setMask(Mask).setEVL(EVLArg);
1539 Value *VPInst =
1540 Builder.createVectorInstruction(Opcode, Ops[0]->getType(), Ops, "vp.op");
1541 // Currently vp-intrinsics only accept FMF flags.
1542 // TODO: Enable other flags when support is added.
1543 if (isa<FPMathOperator>(VPInst))
1544 setFlags(cast<Instruction>(VPInst));
1545
1546 State.set(this, VPInst);
1547 State.addMetadata(VPInst,
1548 dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1549}
1550
1551#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1553 VPSlotTracker &SlotTracker) const {
1554 O << Indent << "WIDEN ";
1556 O << " = " << Instruction::getOpcodeName(Opcode);
1557 printFlags(O);
1559}
1560
1562 VPSlotTracker &SlotTracker) const {
1563 O << Indent << "WIDEN ";
1565 O << " = vp." << Instruction::getOpcodeName(getOpcode());
1566 printFlags(O);
1568}
1569#endif
1570
1573 auto &Builder = State.Builder;
1574 /// Vectorize casts.
1575 assert(State.VF.isVector() && "Not vectorizing?");
1576 Type *DestTy = VectorType::get(getResultType(), State.VF);
1577 VPValue *Op = getOperand(0);
1578 Value *A = State.get(Op);
1579 Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
1580 State.set(this, Cast);
1581 State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
1582 if (auto *CastOp = dyn_cast<Instruction>(Cast))
1583 setFlags(CastOp);
1584}
1585
1587 VPCostContext &Ctx) const {
1588 // TODO: In some cases, VPWidenCastRecipes are created but not considered in
1589 // the legacy cost model, including truncates/extends when evaluating a
1590 // reduction in a smaller type.
1591 if (!getUnderlyingValue())
1592 return 0;
1593 // Computes the CastContextHint from a recipes that may access memory.
1594 auto ComputeCCH = [&](const VPRecipeBase *R) -> TTI::CastContextHint {
1595 if (VF.isScalar())
1597 if (isa<VPInterleaveRecipe>(R))
1599 if (const auto *ReplicateRecipe = dyn_cast<VPReplicateRecipe>(R))
1600 return ReplicateRecipe->isPredicated() ? TTI::CastContextHint::Masked
1602 const auto *WidenMemoryRecipe = dyn_cast<VPWidenMemoryRecipe>(R);
1603 if (WidenMemoryRecipe == nullptr)
1605 if (!WidenMemoryRecipe->isConsecutive())
1607 if (WidenMemoryRecipe->isReverse())
1609 if (WidenMemoryRecipe->isMasked())
1612 };
1613
1614 VPValue *Operand = getOperand(0);
1616 // For Trunc/FPTrunc, get the context from the only user.
1617 if ((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
1619 if (auto *StoreRecipe = dyn_cast<VPRecipeBase>(*user_begin()))
1620 CCH = ComputeCCH(StoreRecipe);
1621 }
1622 // For Z/Sext, get the context from the operand.
1623 else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt ||
1624 Opcode == Instruction::FPExt) {
1625 if (Operand->isLiveIn())
1627 else if (Operand->getDefiningRecipe())
1628 CCH = ComputeCCH(Operand->getDefiningRecipe());
1629 }
1630
1631 auto *SrcTy =
1632 cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(Operand), VF));
1633 auto *DestTy = cast<VectorType>(toVectorTy(getResultType(), VF));
1634 // Arm TTI will use the underlying instruction to determine the cost.
1635 return Ctx.TTI.getCastInstrCost(
1636 Opcode, DestTy, SrcTy, CCH, TTI::TCK_RecipThroughput,
1637 dyn_cast_if_present<Instruction>(getUnderlyingValue()));
1638}
1639
1640#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1642 VPSlotTracker &SlotTracker) const {
1643 O << Indent << "WIDEN-CAST ";
1645 O << " = " << Instruction::getOpcodeName(Opcode);
1646 printFlags(O);
1648 O << " to " << *getResultType();
1649}
1650#endif
1651
1653 VPCostContext &Ctx) const {
1654 return Ctx.TTI.getCFInstrCost(Instruction::PHI, TTI::TCK_RecipThroughput);
1655}
1656
1657/// This function adds
1658/// (0 * Step, 1 * Step, 2 * Step, ...)
1659/// to each vector element of Val.
1660/// \p Opcode is relevant for FP induction variable.
1661static Value *getStepVector(Value *Val, Value *Step,
1663 IRBuilderBase &Builder) {
1664 assert(VF.isVector() && "only vector VFs are supported");
1665
1666 // Create and check the types.
1667 auto *ValVTy = cast<VectorType>(Val->getType());
1668 ElementCount VLen = ValVTy->getElementCount();
1669
1670 Type *STy = Val->getType()->getScalarType();
1671 assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
1672 "Induction Step must be an integer or FP");
1673 assert(Step->getType() == STy && "Step has wrong type");
1674
1676
1677 // Create a vector of consecutive numbers from zero to VF.
1678 VectorType *InitVecValVTy = ValVTy;
1679 if (STy->isFloatingPointTy()) {
1680 Type *InitVecValSTy =
1682 InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
1683 }
1684 Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
1685
1686 if (STy->isIntegerTy()) {
1687 Step = Builder.CreateVectorSplat(VLen, Step);
1688 assert(Step->getType() == Val->getType() && "Invalid step vec");
1689 // FIXME: The newly created binary instructions should contain nsw/nuw
1690 // flags, which can be found from the original scalar operations.
1691 Step = Builder.CreateMul(InitVec, Step);
1692 return Builder.CreateAdd(Val, Step, "induction");
1693 }
1694
1695 // Floating point induction.
1696 assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
1697 "Binary Opcode should be specified for FP induction");
1698 InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
1699
1700 Step = Builder.CreateVectorSplat(VLen, Step);
1701 Value *MulOp = Builder.CreateFMul(InitVec, Step);
1702 return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
1703}
1704
1705/// A helper function that returns an integer or floating-point constant with
1706/// value C.
1708 return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
1709 : ConstantFP::get(Ty, C);
1710}
1711
1713 assert(!State.Lane && "Int or FP induction being replicated.");
1714
1715 Value *Start = getStartValue()->getLiveInIRValue();
1717 TruncInst *Trunc = getTruncInst();
1718 IRBuilderBase &Builder = State.Builder;
1719 assert(getPHINode()->getType() == ID.getStartValue()->getType() &&
1720 "Types must match");
1721 assert(State.VF.isVector() && "must have vector VF");
1722
1723 // The value from the original loop to which we are mapping the new induction
1724 // variable.
1725 Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : getPHINode();
1726
1727 // Fast-math-flags propagate from the original induction instruction.
1728 IRBuilder<>::FastMathFlagGuard FMFG(Builder);
1729 if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
1730 Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
1731
1732 // Now do the actual transformations, and start with fetching the step value.
1733 Value *Step = State.get(getStepValue(), VPLane(0));
1734
1735 assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
1736 "Expected either an induction phi-node or a truncate of it!");
1737
1738 // Construct the initial value of the vector IV in the vector loop preheader
1739 auto CurrIP = Builder.saveIP();
1740 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1741 Builder.SetInsertPoint(VectorPH->getTerminator());
1742 if (isa<TruncInst>(EntryVal)) {
1743 assert(Start->getType()->isIntegerTy() &&
1744 "Truncation requires an integer type");
1745 auto *TruncType = cast<IntegerType>(EntryVal->getType());
1746 Step = Builder.CreateTrunc(Step, TruncType);
1747 Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
1748 }
1749
1750 Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
1751 Value *SteppedStart = getStepVector(SplatStart, Step, ID.getInductionOpcode(),
1752 State.VF, State.Builder);
1753
1754 // We create vector phi nodes for both integer and floating-point induction
1755 // variables. Here, we determine the kind of arithmetic we will perform.
1758 if (Step->getType()->isIntegerTy()) {
1759 AddOp = Instruction::Add;
1760 MulOp = Instruction::Mul;
1761 } else {
1762 AddOp = ID.getInductionOpcode();
1763 MulOp = Instruction::FMul;
1764 }
1765
1766 Value *SplatVF;
1767 if (VPValue *SplatVFOperand = getSplatVFValue()) {
1768 // The recipe has been unrolled. In that case, fetch the splat value for the
1769 // induction increment.
1770 SplatVF = State.get(SplatVFOperand);
1771 } else {
1772 // Multiply the vectorization factor by the step using integer or
1773 // floating-point arithmetic as appropriate.
1774 Type *StepType = Step->getType();
1775 Value *RuntimeVF = State.get(getVFValue(), VPLane(0));
1776 if (Step->getType()->isFloatingPointTy())
1777 RuntimeVF = Builder.CreateUIToFP(RuntimeVF, StepType);
1778 else
1779 RuntimeVF = Builder.CreateZExtOrTrunc(RuntimeVF, StepType);
1780 Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
1781
1782 // Create a vector splat to use in the induction update.
1783 SplatVF = Builder.CreateVectorSplat(State.VF, Mul);
1784 }
1785
1786 Builder.restoreIP(CurrIP);
1787
1788 // We may need to add the step a number of times, depending on the unroll
1789 // factor. The last of those goes into the PHI.
1790 PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");
1791 VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1792 VecInd->setDebugLoc(getDebugLoc());
1793 State.set(this, VecInd);
1794
1795 Instruction *LastInduction = cast<Instruction>(
1796 Builder.CreateBinOp(AddOp, VecInd, SplatVF, "vec.ind.next"));
1797 if (isa<TruncInst>(EntryVal))
1798 State.addMetadata(LastInduction, EntryVal);
1799 LastInduction->setDebugLoc(getDebugLoc());
1800
1801 VecInd->addIncoming(SteppedStart, VectorPH);
1802 // Add induction update using an incorrect block temporarily. The phi node
1803 // will be fixed after VPlan execution. Note that at this point the latch
1804 // block cannot be used, as it does not exist yet.
1805 // TODO: Model increment value in VPlan, by turning the recipe into a
1806 // multi-def and a subclass of VPHeaderPHIRecipe.
1807 VecInd->addIncoming(LastInduction, VectorPH);
1808}
1809
1810#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1812 VPSlotTracker &SlotTracker) const {
1813 O << Indent;
1815 O << " = WIDEN-INDUCTION ";
1817
1818 if (auto *TI = getTruncInst())
1819 O << " (truncated to " << *TI->getType() << ")";
1820}
1821#endif
1822
1824 // The step may be defined by a recipe in the preheader (e.g. if it requires
1825 // SCEV expansion), but for the canonical induction the step is required to be
1826 // 1, which is represented as live-in.
1828 return false;
1829 auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
1830 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
1831 auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin());
1832 return StartC && StartC->isZero() && StepC && StepC->isOne() &&
1833 getScalarType() == CanIV->getScalarType();
1834}
1835
1836#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1838 VPSlotTracker &SlotTracker) const {
1839 O << Indent;
1841 O << " = DERIVED-IV ";
1843 O << " + ";
1845 O << " * ";
1847}
1848#endif
1849
1851 // Fast-math-flags propagate from the original induction instruction.
1853 if (hasFastMathFlags())
1855
1856 /// Compute scalar induction steps. \p ScalarIV is the scalar induction
1857 /// variable on which to base the steps, \p Step is the size of the step.
1858
1859 Value *BaseIV = State.get(getOperand(0), VPLane(0));
1860 Value *Step = State.get(getStepValue(), VPLane(0));
1861 IRBuilderBase &Builder = State.Builder;
1862
1863 // Ensure step has the same type as that of scalar IV.
1864 Type *BaseIVTy = BaseIV->getType()->getScalarType();
1865 assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");
1866
1867 // We build scalar steps for both integer and floating-point induction
1868 // variables. Here, we determine the kind of arithmetic we will perform.
1871 if (BaseIVTy->isIntegerTy()) {
1872 AddOp = Instruction::Add;
1873 MulOp = Instruction::Mul;
1874 } else {
1875 AddOp = InductionOpcode;
1876 MulOp = Instruction::FMul;
1877 }
1878
1879 // Determine the number of scalars we need to generate for each unroll
1880 // iteration.
1881 bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
1882 // Compute the scalar steps and save the results in State.
1883 Type *IntStepTy =
1884 IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
1885 Type *VecIVTy = nullptr;
1886 Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
1887 if (!FirstLaneOnly && State.VF.isScalable()) {
1888 VecIVTy = VectorType::get(BaseIVTy, State.VF);
1889 UnitStepVec =
1890 Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
1891 SplatStep = Builder.CreateVectorSplat(State.VF, Step);
1892 SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
1893 }
1894
1895 unsigned StartLane = 0;
1896 unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
1897 if (State.Lane) {
1898 StartLane = State.Lane->getKnownLane();
1899 EndLane = StartLane + 1;
1900 }
1901 Value *StartIdx0 =
1902 createStepForVF(Builder, IntStepTy, State.VF, getUnrollPart(*this));
1903
1904 if (!FirstLaneOnly && State.VF.isScalable()) {
1905 auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
1906 auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
1907 if (BaseIVTy->isFloatingPointTy())
1908 InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
1909 auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
1910 auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
1911 State.set(this, Add);
1912 // It's useful to record the lane values too for the known minimum number
1913 // of elements so we do those below. This improves the code quality when
1914 // trying to extract the first element, for example.
1915 }
1916
1917 if (BaseIVTy->isFloatingPointTy())
1918 StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
1919
1920 for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
1921 Value *StartIdx = Builder.CreateBinOp(
1922 AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
1923 // The step returned by `createStepForVF` is a runtime-evaluated value
1924 // when VF is scalable. Otherwise, it should be folded into a Constant.
1925 assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
1926 "Expected StartIdx to be folded to a constant when VF is not "
1927 "scalable");
1928 auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
1929 auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
1930 State.set(this, Add, VPLane(Lane));
1931 }
1932}
1933
1934#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1936 VPSlotTracker &SlotTracker) const {
1937 O << Indent;
1939 O << " = SCALAR-STEPS ";
1941}
1942#endif
1943
1945 assert(State.VF.isVector() && "not widening");
1946 auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
1947 // Construct a vector GEP by widening the operands of the scalar GEP as
1948 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
1949 // results in a vector of pointers when at least one operand of the GEP
1950 // is vector-typed. Thus, to keep the representation compact, we only use
1951 // vector-typed operands for loop-varying values.
1952
1953 if (areAllOperandsInvariant()) {
1954 // If we are vectorizing, but the GEP has only loop-invariant operands,
1955 // the GEP we build (by only using vector-typed operands for
1956 // loop-varying values) would be a scalar pointer. Thus, to ensure we
1957 // produce a vector of pointers, we need to either arbitrarily pick an
1958 // operand to broadcast, or broadcast a clone of the original GEP.
1959 // Here, we broadcast a clone of the original.
1960 //
1961 // TODO: If at some point we decide to scalarize instructions having
1962 // loop-invariant operands, this special case will no longer be
1963 // required. We would add the scalarization decision to
1964 // collectLoopScalars() and teach getVectorValue() to broadcast
1965 // the lane-zero scalar value.
1967 for (unsigned I = 0, E = getNumOperands(); I != E; I++)
1968 Ops.push_back(State.get(getOperand(I), VPLane(0)));
1969
1970 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
1971 ArrayRef(Ops).drop_front(), "",
1973 Value *Splat = State.Builder.CreateVectorSplat(State.VF, NewGEP);
1974 State.set(this, Splat);
1975 State.addMetadata(Splat, GEP);
1976 } else {
1977 // If the GEP has at least one loop-varying operand, we are sure to
1978 // produce a vector of pointers unless VF is scalar.
1979 // The pointer operand of the new GEP. If it's loop-invariant, we
1980 // won't broadcast it.
1981 auto *Ptr = isPointerLoopInvariant() ? State.get(getOperand(0), VPLane(0))
1982 : State.get(getOperand(0));
1983
1984 // Collect all the indices for the new GEP. If any index is
1985 // loop-invariant, we won't broadcast it.
1987 for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
1988 VPValue *Operand = getOperand(I);
1989 if (isIndexLoopInvariant(I - 1))
1990 Indices.push_back(State.get(Operand, VPLane(0)));
1991 else
1992 Indices.push_back(State.get(Operand));
1993 }
1994
1995 // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
1996 // but it should be a vector, otherwise.
1997 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
1998 Indices, "", getGEPNoWrapFlags());
1999 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
2000 "NewGEP is not a pointer vector");
2001 State.set(this, NewGEP);
2002 State.addMetadata(NewGEP, GEP);
2003 }
2004}
2005
2006#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2008 VPSlotTracker &SlotTracker) const {
2009 O << Indent << "WIDEN-GEP ";
2010 O << (isPointerLoopInvariant() ? "Inv" : "Var");
2011 for (size_t I = 0; I < getNumOperands() - 1; ++I)
2012 O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";
2013
2014 O << " ";
2016 O << " = getelementptr";
2017 printFlags(O);
2019}
2020#endif
2021
2022static Type *getGEPIndexTy(bool IsScalable, bool IsReverse,
2023 unsigned CurrentPart, IRBuilderBase &Builder) {
2024 // Use i32 for the gep index type when the value is constant,
2025 // or query DataLayout for a more suitable index type otherwise.
2026 const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
2027 return IsScalable && (IsReverse || CurrentPart > 0)
2028 ? DL.getIndexType(Builder.getPtrTy(0))
2029 : Builder.getInt32Ty();
2030}
2031
2033 auto &Builder = State.Builder;
2035 unsigned CurrentPart = getUnrollPart(*this);
2036 Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true,
2037 CurrentPart, Builder);
2038
2039 // The wide store needs to start at the last vector element.
2040 Value *RunTimeVF = State.get(getVFValue(), VPLane(0));
2041 if (IndexTy != RunTimeVF->getType())
2042 RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy);
2043 // NumElt = -CurrentPart * RunTimeVF
2044 Value *NumElt = Builder.CreateMul(
2045 ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
2046 // LastLane = 1 - RunTimeVF
2047 Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
2048 Value *Ptr = State.get(getOperand(0), VPLane(0));
2049 Value *ResultPtr =
2050 Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", getGEPNoWrapFlags());
2051 ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "",
2053
2054 State.set(this, ResultPtr, /*IsScalar*/ true);
2055}
2056
2057#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2059 VPSlotTracker &SlotTracker) const {
2060 O << Indent;
2062 O << " = reverse-vector-pointer";
2063 printFlags(O);
2064 O << " ";
2066}
2067#endif
2068
2070 auto &Builder = State.Builder;
2072 unsigned CurrentPart = getUnrollPart(*this);
2073 Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false,
2074 CurrentPart, Builder);
2075 Value *Ptr = State.get(getOperand(0), VPLane(0));
2076
2077 Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
2078 Value *ResultPtr =
2079 Builder.CreateGEP(IndexedTy, Ptr, Increment, "", getGEPNoWrapFlags());
2080
2081 State.set(this, ResultPtr, /*IsScalar*/ true);
2082}
2083
2084#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2086 VPSlotTracker &SlotTracker) const {
2087 O << Indent;
2089 O << " = vector-pointer ";
2090
2092}
2093#endif
2094
2096 assert(isNormalized() && "Expected blend to be normalized!");
2098 // We know that all PHIs in non-header blocks are converted into
2099 // selects, so we don't have to worry about the insertion order and we
2100 // can just use the builder.
2101 // At this point we generate the predication tree. There may be
2102 // duplications since this is a simple recursive scan, but future
2103 // optimizations will clean it up.
2104
2105 unsigned NumIncoming = getNumIncomingValues();
2106
2107 // Generate a sequence of selects of the form:
2108 // SELECT(Mask3, In3,
2109 // SELECT(Mask2, In2,
2110 // SELECT(Mask1, In1,
2111 // In0)))
2112 // Note that Mask0 is never used: lanes for which no path reaches this phi and
2113 // are essentially undef are taken from In0.
2114 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
2115 Value *Result = nullptr;
2116 for (unsigned In = 0; In < NumIncoming; ++In) {
2117 // We might have single edge PHIs (blocks) - use an identity
2118 // 'select' for the first PHI operand.
2119 Value *In0 = State.get(getIncomingValue(In), OnlyFirstLaneUsed);
2120 if (In == 0)
2121 Result = In0; // Initialize with the first incoming value.
2122 else {
2123 // Select between the current value and the previous incoming edge
2124 // based on the incoming mask.
2125 Value *Cond = State.get(getMask(In), OnlyFirstLaneUsed);
2126 Result = State.Builder.CreateSelect(Cond, In0, Result, "predphi");
2127 }
2128 }
2129 State.set(this, Result, OnlyFirstLaneUsed);
2130}
2131
2133 VPCostContext &Ctx) const {
2135
2136 // Handle cases where only the first lane is used the same way as the legacy
2137 // cost model.
2139 return Ctx.TTI.getCFInstrCost(Instruction::PHI, CostKind);
2140
2141 Type *ResultTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
2142 Type *CmpTy = toVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF);
2143 return (getNumIncomingValues() - 1) *
2144 Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResultTy, CmpTy,
2146}
2147
2148#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2150 VPSlotTracker &SlotTracker) const {
2151 O << Indent << "BLEND ";
2153 O << " =";
2154 if (getNumIncomingValues() == 1) {
2155 // Not a User of any mask: not really blending, this is a
2156 // single-predecessor phi.
2157 O << " ";
2159 } else {
2160 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
2161 O << " ";
2163 if (I == 0)
2164 continue;
2165 O << "/";
2167 }
2168 }
2169}
2170#endif
2171
2173 assert(!State.Lane && "Reduction being replicated.");
2174 Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
2175 RecurKind Kind = RdxDesc.getRecurrenceKind();
2176 // Propagate the fast-math flags carried by the underlying instruction.
2178 State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
2179 Value *NewVecOp = State.get(getVecOp());
2180 if (VPValue *Cond = getCondOp()) {
2181 Value *NewCond = State.get(Cond, State.VF.isScalar());
2182 VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
2183 Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
2184
2185 Value *Start;
2187 Start = RdxDesc.getRecurrenceStartValue();
2188 else
2189 Start = llvm::getRecurrenceIdentity(Kind, ElementTy,
2190 RdxDesc.getFastMathFlags());
2191 if (State.VF.isVector())
2192 Start = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Start);
2193
2194 Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Start);
2195 NewVecOp = Select;
2196 }
2197 Value *NewRed;
2198 Value *NextInChain;
2199 if (IsOrdered) {
2200 if (State.VF.isVector())
2201 NewRed =
2202 createOrderedReduction(State.Builder, RdxDesc, NewVecOp, PrevInChain);
2203 else
2204 NewRed = State.Builder.CreateBinOp(
2205 (Instruction::BinaryOps)RdxDesc.getOpcode(), PrevInChain, NewVecOp);
2206 PrevInChain = NewRed;
2207 NextInChain = NewRed;
2208 } else {
2209 PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
2210 NewRed = createReduction(State.Builder, RdxDesc, NewVecOp);
2212 NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),
2213 NewRed, PrevInChain);
2214 else
2215 NextInChain = State.Builder.CreateBinOp(
2216 (Instruction::BinaryOps)RdxDesc.getOpcode(), NewRed, PrevInChain);
2217 }
2218 State.set(this, NextInChain, /*IsScalar*/ true);
2219}
2220
2222 assert(!State.Lane && "Reduction being replicated.");
2223
2224 auto &Builder = State.Builder;
2225 // Propagate the fast-math flags carried by the underlying instruction.
2226 IRBuilderBase::FastMathFlagGuard FMFGuard(Builder);
2228 Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
2229
2230 RecurKind Kind = RdxDesc.getRecurrenceKind();
2231 Value *Prev = State.get(getChainOp(), /*IsScalar*/ true);
2232 Value *VecOp = State.get(getVecOp());
2233 Value *EVL = State.get(getEVL(), VPLane(0));
2234
2235 VectorBuilder VBuilder(Builder);
2236 VBuilder.setEVL(EVL);
2237 Value *Mask;
2238 // TODO: move the all-true mask generation into VectorBuilder.
2239 if (VPValue *CondOp = getCondOp())
2240 Mask = State.get(CondOp);
2241 else
2242 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2243 VBuilder.setMask(Mask);
2244
2245 Value *NewRed;
2246 if (isOrdered()) {
2247 NewRed = createOrderedReduction(VBuilder, RdxDesc, VecOp, Prev);
2248 } else {
2249 NewRed = createSimpleReduction(VBuilder, VecOp, RdxDesc);
2251 NewRed = createMinMaxOp(Builder, Kind, NewRed, Prev);
2252 else
2253 NewRed = Builder.CreateBinOp((Instruction::BinaryOps)RdxDesc.getOpcode(),
2254 NewRed, Prev);
2255 }
2256 State.set(this, NewRed, /*IsScalar*/ true);
2257}
2258
2260 VPCostContext &Ctx) const {
2261 RecurKind RdxKind = RdxDesc.getRecurrenceKind();
2262 Type *ElementTy = Ctx.Types.inferScalarType(this);
2263 auto *VectorTy = cast<VectorType>(toVectorTy(ElementTy, VF));
2265 unsigned Opcode = RdxDesc.getOpcode();
2266
2267 // TODO: Support any-of and in-loop reductions.
2268 assert(
2270 ForceTargetInstructionCost.getNumOccurrences() > 0) &&
2271 "Any-of reduction not implemented in VPlan-based cost model currently.");
2272 assert(
2273 (!cast<VPReductionPHIRecipe>(getOperand(0))->isInLoop() ||
2274 ForceTargetInstructionCost.getNumOccurrences() > 0) &&
2275 "In-loop reduction not implemented in VPlan-based cost model currently.");
2276
2277 assert(ElementTy->getTypeID() == RdxDesc.getRecurrenceType()->getTypeID() &&
2278 "Inferred type and recurrence type mismatch.");
2279
2280 // Cost = Reduction cost + BinOp cost
2282 Ctx.TTI.getArithmeticInstrCost(Opcode, ElementTy, CostKind);
2285 return Cost + Ctx.TTI.getMinMaxReductionCost(
2286 Id, VectorTy, RdxDesc.getFastMathFlags(), CostKind);
2287 }
2288
2289 return Cost + Ctx.TTI.getArithmeticReductionCost(
2290 Opcode, VectorTy, RdxDesc.getFastMathFlags(), CostKind);
2291}
2292
2293#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2295 VPSlotTracker &SlotTracker) const {
2296 O << Indent << "REDUCE ";
2298 O << " = ";
2300 O << " +";
2301 if (isa<FPMathOperator>(getUnderlyingInstr()))
2303 O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
2305 if (isConditional()) {
2306 O << ", ";
2308 }
2309 O << ")";
2310 if (RdxDesc.IntermediateStore)
2311 O << " (with final reduction value stored in invariant address sank "
2312 "outside of loop)";
2313}
2314
2316 VPSlotTracker &SlotTracker) const {
2318 O << Indent << "REDUCE ";
2320 O << " = ";
2322 O << " +";
2323 if (isa<FPMathOperator>(getUnderlyingInstr()))
2325 O << " vp.reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
2327 O << ", ";
2329 if (isConditional()) {
2330 O << ", ";
2332 }
2333 O << ")";
2334 if (RdxDesc.IntermediateStore)
2335 O << " (with final reduction value stored in invariant address sank "
2336 "outside of loop)";
2337}
2338#endif
2339
2341 // Find if the recipe is used by a widened recipe via an intervening
2342 // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
2343 return any_of(users(), [](const VPUser *U) {
2344 if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))
2345 return any_of(PredR->users(), [PredR](const VPUser *U) {
2346 return !U->usesScalars(PredR);
2347 });
2348 return false;
2349 });
2350}
2351
2353 VPCostContext &Ctx) const {
2354 Instruction *UI = cast<Instruction>(getUnderlyingValue());
2355 // VPReplicateRecipe may be cloned as part of an existing VPlan-to-VPlan
2356 // transform, avoid computing their cost multiple times for now.
2357 Ctx.SkipCostComputation.insert(UI);
2358 return Ctx.getLegacyCost(UI, VF);
2359}
2360
2361#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2363 VPSlotTracker &SlotTracker) const {
2364 O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
2365
2366 if (!getUnderlyingInstr()->getType()->isVoidTy()) {
2368 O << " = ";
2369 }
2370 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
2371 O << "call";
2372 printFlags(O);
2373 O << "@" << CB->getCalledFunction()->getName() << "(";
2375 O, [&O, &SlotTracker](VPValue *Op) {
2376 Op->printAsOperand(O, SlotTracker);
2377 });
2378 O << ")";
2379 } else {
2381 printFlags(O);
2383 }
2384
2385 if (shouldPack())
2386 O << " (S->V)";
2387}
2388#endif
2389
2390Value *VPScalarCastRecipe ::generate(VPTransformState &State) {
2392 "Codegen only implemented for first lane.");
2393 switch (Opcode) {
2394 case Instruction::SExt:
2395 case Instruction::ZExt:
2396 case Instruction::Trunc: {
2397 // Note: SExt/ZExt not used yet.
2398 Value *Op = State.get(getOperand(0), VPLane(0));
2399 return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
2400 }
2401 default:
2402 llvm_unreachable("opcode not implemented yet");
2403 }
2404}
2405
2406void VPScalarCastRecipe ::execute(VPTransformState &State) {
2407 State.set(this, generate(State), VPLane(0));
2408}
2409
2410#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2411void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
2412 VPSlotTracker &SlotTracker) const {
2413 O << Indent << "SCALAR-CAST ";
2414 printAsOperand(O, SlotTracker);
2415 O << " = " << Instruction::getOpcodeName(Opcode) << " ";
2416 printOperands(O, SlotTracker);
2417 O << " to " << *ResultTy;
2418}
2419#endif
2420
2422 assert(State.Lane && "Branch on Mask works only on single instance.");
2423
2424 unsigned Lane = State.Lane->getKnownLane();
2425
2426 Value *ConditionBit = nullptr;
2427 VPValue *BlockInMask = getMask();
2428 if (BlockInMask) {
2429 ConditionBit = State.get(BlockInMask);
2430 if (ConditionBit->getType()->isVectorTy())
2431 ConditionBit = State.Builder.CreateExtractElement(
2432 ConditionBit, State.Builder.getInt32(Lane));
2433 } else // Block in mask is all-one.
2434 ConditionBit = State.Builder.getTrue();
2435
2436 // Replace the temporary unreachable terminator with a new conditional branch,
2437 // whose two destinations will be set later when they are created.
2438 auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
2439 assert(isa<UnreachableInst>(CurrentTerminator) &&
2440 "Expected to replace unreachable terminator with conditional branch.");
2441 auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
2442 CondBr->setSuccessor(0, nullptr);
2443 ReplaceInstWithInst(CurrentTerminator, CondBr);
2444}
2445
2447 VPCostContext &Ctx) const {
2448 // The legacy cost model doesn't assign costs to branches for individual
2449 // replicate regions. Match the current behavior in the VPlan cost model for
2450 // now.
2451 return 0;
2452}
2453
2456 assert(State.Lane && "Predicated instruction PHI works per instance.");
2457 Instruction *ScalarPredInst =
2458 cast<Instruction>(State.get(getOperand(0), *State.Lane));
2459 BasicBlock *PredicatedBB = ScalarPredInst->getParent();
2460 BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
2461 assert(PredicatingBB && "Predicated block has no single predecessor.");
2462 assert(isa<VPReplicateRecipe>(getOperand(0)) &&
2463 "operand must be VPReplicateRecipe");
2464
2465 // By current pack/unpack logic we need to generate only a single phi node: if
2466 // a vector value for the predicated instruction exists at this point it means
2467 // the instruction has vector users only, and a phi for the vector value is
2468 // needed. In this case the recipe of the predicated instruction is marked to
2469 // also do that packing, thereby "hoisting" the insert-element sequence.
2470 // Otherwise, a phi node for the scalar value is needed.
2471 if (State.hasVectorValue(getOperand(0))) {
2472 Value *VectorValue = State.get(getOperand(0));
2473 InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
2474 PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
2475 VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
2476 VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
2477 if (State.hasVectorValue(this))
2478 State.reset(this, VPhi);
2479 else
2480 State.set(this, VPhi);
2481 // NOTE: Currently we need to update the value of the operand, so the next
2482 // predicated iteration inserts its generated value in the correct vector.
2483 State.reset(getOperand(0), VPhi);
2484 } else {
2485 if (vputils::onlyFirstLaneUsed(this) && !State.Lane->isFirstLane())
2486 return;
2487
2488 Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
2489 PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
2490 Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
2491 PredicatingBB);
2492 Phi->addIncoming(ScalarPredInst, PredicatedBB);
2493 if (State.hasScalarValue(this, *State.Lane))
2494 State.reset(this, Phi, *State.Lane);
2495 else
2496 State.set(this, Phi, *State.Lane);
2497 // NOTE: Currently we need to update the value of the operand, so the next
2498 // predicated iteration inserts its generated value in the correct vector.
2499 State.reset(getOperand(0), Phi, *State.Lane);
2500 }
2501}
2502
2503#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2505 VPSlotTracker &SlotTracker) const {
2506 O << Indent << "PHI-PREDICATED-INSTRUCTION ";
2508 O << " = ";
2510}
2511#endif
2512
2514 VPCostContext &Ctx) const {
2516 const Align Alignment =
2518 unsigned AS =
2521
2522 if (!Consecutive) {
2523 // TODO: Using the original IR may not be accurate.
2524 // Currently, ARM will use the underlying IR to calculate gather/scatter
2525 // instruction cost.
2527 assert(!Reverse &&
2528 "Inconsecutive memory access should not have the order.");
2529 return Ctx.TTI.getAddressComputationCost(Ty) +
2531 IsMasked, Alignment, CostKind,
2532 &Ingredient);
2533 }
2534
2536 if (IsMasked) {
2537 Cost += Ctx.TTI.getMaskedMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment,
2538 AS, CostKind);
2539 } else {
2540 TTI::OperandValueInfo OpInfo =
2542 Cost += Ctx.TTI.getMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment, AS,
2543 CostKind, OpInfo, &Ingredient);
2544 }
2545 if (!Reverse)
2546 return Cost;
2547
2549 cast<VectorType>(Ty), {}, CostKind, 0);
2550}
2551
2553 auto *LI = cast<LoadInst>(&Ingredient);
2554
2555 Type *ScalarDataTy = getLoadStoreType(&Ingredient);
2556 auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
2557 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2558 bool CreateGather = !isConsecutive();
2559
2560 auto &Builder = State.Builder;
2562 Value *Mask = nullptr;
2563 if (auto *VPMask = getMask()) {
2564 // Mask reversal is only needed for non-all-one (null) masks, as reverse
2565 // of a null all-one mask is a null mask.
2566 Mask = State.get(VPMask);
2567 if (isReverse())
2568 Mask = Builder.CreateVectorReverse(Mask, "reverse");
2569 }
2570
2571 Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateGather);
2572 Value *NewLI;
2573 if (CreateGather) {
2574 NewLI = Builder.CreateMaskedGather(DataTy, Addr, Alignment, Mask, nullptr,
2575 "wide.masked.gather");
2576 } else if (Mask) {
2577 NewLI =
2578 Builder.CreateMaskedLoad(DataTy, Addr, Alignment, Mask,
2579 PoisonValue::get(DataTy), "wide.masked.load");
2580 } else {
2581 NewLI = Builder.CreateAlignedLoad(DataTy, Addr, Alignment, "wide.load");
2582 }
2583 // Add metadata to the load, but setVectorValue to the reverse shuffle.
2584 State.addMetadata(NewLI, LI);
2585 if (Reverse)
2586 NewLI = Builder.CreateVectorReverse(NewLI, "reverse");
2587 State.set(this, NewLI);
2588}
2589
2590#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2592 VPSlotTracker &SlotTracker) const {
2593 O << Indent << "WIDEN ";
2595 O << " = load ";
2597}
2598#endif
2599
2600/// Use all-true mask for reverse rather than actual mask, as it avoids a
2601/// dependence w/o affecting the result.
2603 Value *EVL, const Twine &Name) {
2604 VectorType *ValTy = cast<VectorType>(Operand->getType());
2605 Value *AllTrueMask =
2606 Builder.CreateVectorSplat(ValTy->getElementCount(), Builder.getTrue());
2607 return Builder.CreateIntrinsic(ValTy, Intrinsic::experimental_vp_reverse,
2608 {Operand, AllTrueMask, EVL}, nullptr, Name);
2609}
2610
2612 auto *LI = cast<LoadInst>(&Ingredient);
2613
2614 Type *ScalarDataTy = getLoadStoreType(&Ingredient);
2615 auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
2616 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2617 bool CreateGather = !isConsecutive();
2618
2619 auto &Builder = State.Builder;
2621 CallInst *NewLI;
2622 Value *EVL = State.get(getEVL(), VPLane(0));
2623 Value *Addr = State.get(getAddr(), !CreateGather);
2624 Value *Mask = nullptr;
2625 if (VPValue *VPMask = getMask()) {
2626 Mask = State.get(VPMask);
2627 if (isReverse())
2628 Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
2629 } else {
2630 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2631 }
2632
2633 if (CreateGather) {
2634 NewLI =
2635 Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL},
2636 nullptr, "wide.masked.gather");
2637 } else {
2638 VectorBuilder VBuilder(Builder);
2639 VBuilder.setEVL(EVL).setMask(Mask);
2640 NewLI = cast<CallInst>(VBuilder.createVectorInstruction(
2641 Instruction::Load, DataTy, Addr, "vp.op.load"));
2642 }
2643 NewLI->addParamAttr(
2644 0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
2645 State.addMetadata(NewLI, LI);
2646 Instruction *Res = NewLI;
2647 if (isReverse())
2648 Res = createReverseEVL(Builder, Res, EVL, "vp.reverse");
2649 State.set(this, Res);
2650}
2651
2653 VPCostContext &Ctx) const {
2654 if (!Consecutive || IsMasked)
2655 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
2656
2657 // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()
2658 // here because the EVL recipes using EVL to replace the tail mask. But in the
2659 // legacy model, it will always calculate the cost of mask.
2660 // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
2661 // don't need to compare to the legacy cost model.
2663 const Align Alignment =
2665 unsigned AS =
2669 Ingredient.getOpcode(), Ty, Alignment, AS, CostKind);
2670 if (!Reverse)
2671 return Cost;
2672
2674 cast<VectorType>(Ty), {}, CostKind, 0);
2675}
2676
2677#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2679 VPSlotTracker &SlotTracker) const {
2680 O << Indent << "WIDEN ";
2682 O << " = vp.load ";
2684}
2685#endif
2686
2688 auto *SI = cast<StoreInst>(&Ingredient);
2689
2690 VPValue *StoredVPValue = getStoredValue();
2691 bool CreateScatter = !isConsecutive();
2692 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2693
2694 auto &Builder = State.Builder;
2696
2697 Value *Mask = nullptr;
2698 if (auto *VPMask = getMask()) {
2699 // Mask reversal is only needed for non-all-one (null) masks, as reverse
2700 // of a null all-one mask is a null mask.
2701 Mask = State.get(VPMask);
2702 if (isReverse())
2703 Mask = Builder.CreateVectorReverse(Mask, "reverse");
2704 }
2705
2706 Value *StoredVal = State.get(StoredVPValue);
2707 if (isReverse()) {
2708 // If we store to reverse consecutive memory locations, then we need
2709 // to reverse the order of elements in the stored value.
2710 StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
2711 // We don't want to update the value in the map as it might be used in
2712 // another expression. So don't call resetVectorValue(StoredVal).
2713 }
2714 Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateScatter);
2715 Instruction *NewSI = nullptr;
2716 if (CreateScatter)
2717 NewSI = Builder.CreateMaskedScatter(StoredVal, Addr, Alignment, Mask);
2718 else if (Mask)
2719 NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);
2720 else
2721 NewSI = Builder.CreateAlignedStore(StoredVal, Addr, Alignment);
2722 State.addMetadata(NewSI, SI);
2723}
2724
2725#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2727 VPSlotTracker &SlotTracker) const {
2728 O << Indent << "WIDEN store ";
2730}
2731#endif
2732
2734 auto *SI = cast<StoreInst>(&Ingredient);
2735
2736 VPValue *StoredValue = getStoredValue();
2737 bool CreateScatter = !isConsecutive();
2738 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2739
2740 auto &Builder = State.Builder;
2742
2743 CallInst *NewSI = nullptr;
2744 Value *StoredVal = State.get(StoredValue);
2745 Value *EVL = State.get(getEVL(), VPLane(0));
2746 if (isReverse())
2747 StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse");
2748 Value *Mask = nullptr;
2749 if (VPValue *VPMask = getMask()) {
2750 Mask = State.get(VPMask);
2751 if (isReverse())
2752 Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
2753 } else {
2754 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2755 }
2756 Value *Addr = State.get(getAddr(), !CreateScatter);
2757 if (CreateScatter) {
2758 NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),
2759 Intrinsic::vp_scatter,
2760 {StoredVal, Addr, Mask, EVL});
2761 } else {
2762 VectorBuilder VBuilder(Builder);
2763 VBuilder.setEVL(EVL).setMask(Mask);
2764 NewSI = cast<CallInst>(VBuilder.createVectorInstruction(
2765 Instruction::Store, Type::getVoidTy(EVL->getContext()),
2766 {StoredVal, Addr}));
2767 }
2768 NewSI->addParamAttr(
2769 1, Attribute::getWithAlignment(NewSI->getContext(), Alignment));
2770 State.addMetadata(NewSI, SI);
2771}
2772
2774 VPCostContext &Ctx) const {
2775 if (!Consecutive || IsMasked)
2776 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
2777
2778 // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()
2779 // here because the EVL recipes using EVL to replace the tail mask. But in the
2780 // legacy model, it will always calculate the cost of mask.
2781 // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
2782 // don't need to compare to the legacy cost model.
2784 const Align Alignment =
2786 unsigned AS =
2790 Ingredient.getOpcode(), Ty, Alignment, AS, CostKind);
2791 if (!Reverse)
2792 return Cost;
2793
2795 cast<VectorType>(Ty), {}, CostKind, 0);
2796}
2797
2798#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2800 VPSlotTracker &SlotTracker) const {
2801 O << Indent << "WIDEN vp.store ";
2803}
2804#endif
2805
2807 VectorType *DstVTy, const DataLayout &DL) {
2808 // Verify that V is a vector type with same number of elements as DstVTy.
2809 auto VF = DstVTy->getElementCount();
2810 auto *SrcVecTy = cast<VectorType>(V->getType());
2811 assert(VF == SrcVecTy->getElementCount() && "Vector dimensions do not match");
2812 Type *SrcElemTy = SrcVecTy->getElementType();
2813 Type *DstElemTy = DstVTy->getElementType();
2814 assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) &&
2815 "Vector elements must have same size");
2816
2817 // Do a direct cast if element types are castable.
2818 if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) {
2819 return Builder.CreateBitOrPointerCast(V, DstVTy);
2820 }
2821 // V cannot be directly casted to desired vector type.
2822 // May happen when V is a floating point vector but DstVTy is a vector of
2823 // pointers or vice-versa. Handle this using a two-step bitcast using an
2824 // intermediate Integer type for the bitcast i.e. Ptr <-> Int <-> Float.
2825 assert((DstElemTy->isPointerTy() != SrcElemTy->isPointerTy()) &&
2826 "Only one type should be a pointer type");
2827 assert((DstElemTy->isFloatingPointTy() != SrcElemTy->isFloatingPointTy()) &&
2828 "Only one type should be a floating point type");
2829 Type *IntTy =
2830 IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy));
2831 auto *VecIntTy = VectorType::get(IntTy, VF);
2832 Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);
2833 return Builder.CreateBitOrPointerCast(CastVal, DstVTy);
2834}
2835
2836/// Return a vector containing interleaved elements from multiple
2837/// smaller input vectors.
2839 const Twine &Name) {
2840 unsigned Factor = Vals.size();
2841 assert(Factor > 1 && "Tried to interleave invalid number of vectors");
2842
2843 VectorType *VecTy = cast<VectorType>(Vals[0]->getType());
2844#ifndef NDEBUG
2845 for (Value *Val : Vals)
2846 assert(Val->getType() == VecTy && "Tried to interleave mismatched types");
2847#endif
2848
2849 // Scalable vectors cannot use arbitrary shufflevectors (only splats), so
2850 // must use intrinsics to interleave.
2851 if (VecTy->isScalableTy()) {
2853 return Builder.CreateIntrinsic(WideVecTy, Intrinsic::vector_interleave2,
2854 Vals,
2855 /*FMFSource=*/nullptr, Name);
2856 }
2857
2858 // Fixed length. Start by concatenating all vectors into a wide vector.
2859 Value *WideVec = concatenateVectors(Builder, Vals);
2860
2861 // Interleave the elements into the wide vector.
2862 const unsigned NumElts = VecTy->getElementCount().getFixedValue();
2863 return Builder.CreateShuffleVector(
2864 WideVec, createInterleaveMask(NumElts, Factor), Name);
2865}
2866
2867// Try to vectorize the interleave group that \p Instr belongs to.
2868//
2869// E.g. Translate following interleaved load group (factor = 3):
2870// for (i = 0; i < N; i+=3) {
2871// R = Pic[i]; // Member of index 0
2872// G = Pic[i+1]; // Member of index 1
2873// B = Pic[i+2]; // Member of index 2
2874// ... // do something to R, G, B
2875// }
2876// To:
2877// %wide.vec = load <12 x i32> ; Read 4 tuples of R,G,B
2878// %R.vec = shuffle %wide.vec, poison, <0, 3, 6, 9> ; R elements
2879// %G.vec = shuffle %wide.vec, poison, <1, 4, 7, 10> ; G elements
2880// %B.vec = shuffle %wide.vec, poison, <2, 5, 8, 11> ; B elements
2881//
2882// Or translate following interleaved store group (factor = 3):
2883// for (i = 0; i < N; i+=3) {
2884// ... do something to R, G, B
2885// Pic[i] = R; // Member of index 0
2886// Pic[i+1] = G; // Member of index 1
2887// Pic[i+2] = B; // Member of index 2
2888// }
2889// To:
2890// %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7>
2891// %B_U.vec = shuffle %B.vec, poison, <0, 1, 2, 3, u, u, u, u>
2892// %interleaved.vec = shuffle %R_G.vec, %B_U.vec,
2893// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements
2894// store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B
2896 assert(!State.Lane && "Interleave group being replicated.");
2897 const InterleaveGroup<Instruction> *Group = IG;
2898 Instruction *Instr = Group->getInsertPos();
2899
2900 // Prepare for the vector type of the interleaved load/store.
2901 Type *ScalarTy = getLoadStoreType(Instr);
2902 unsigned InterleaveFactor = Group->getFactor();
2903 auto *VecTy = VectorType::get(ScalarTy, State.VF * InterleaveFactor);
2904
2905 // TODO: extend the masked interleaved-group support to reversed access.
2906 VPValue *BlockInMask = getMask();
2907 assert((!BlockInMask || !Group->isReverse()) &&
2908 "Reversed masked interleave-group not supported.");
2909
2910 VPValue *Addr = getAddr();
2911 Value *ResAddr = State.get(Addr, VPLane(0));
2912 if (auto *I = dyn_cast<Instruction>(ResAddr))
2913 State.setDebugLocFrom(I->getDebugLoc());
2914
2915 // If the group is reverse, adjust the index to refer to the last vector lane
2916 // instead of the first. We adjust the index from the first vector lane,
2917 // rather than directly getting the pointer for lane VF - 1, because the
2918 // pointer operand of the interleaved access is supposed to be uniform.
2919 if (Group->isReverse()) {
2920 Value *RuntimeVF =
2921 getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF);
2922 Value *Index =
2923 State.Builder.CreateSub(RuntimeVF, State.Builder.getInt32(1));
2924 Index = State.Builder.CreateMul(Index,
2925 State.Builder.getInt32(Group->getFactor()));
2926 Index = State.Builder.CreateNeg(Index);
2927
2928 bool InBounds = false;
2929 if (auto *Gep = dyn_cast<GetElementPtrInst>(ResAddr->stripPointerCasts()))
2930 InBounds = Gep->isInBounds();
2931 ResAddr = State.Builder.CreateGEP(ScalarTy, ResAddr, Index, "", InBounds);
2932 }
2933
2934 State.setDebugLocFrom(Instr->getDebugLoc());
2935 Value *PoisonVec = PoisonValue::get(VecTy);
2936
2937 auto CreateGroupMask = [&BlockInMask, &State,
2938 &InterleaveFactor](Value *MaskForGaps) -> Value * {
2939 if (State.VF.isScalable()) {
2940 assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");
2941 assert(InterleaveFactor == 2 &&
2942 "Unsupported deinterleave factor for scalable vectors");
2943 auto *ResBlockInMask = State.get(BlockInMask);
2944 SmallVector<Value *, 2> Ops = {ResBlockInMask, ResBlockInMask};
2945 auto *MaskTy = VectorType::get(State.Builder.getInt1Ty(),
2946 State.VF.getKnownMinValue() * 2, true);
2947 return State.Builder.CreateIntrinsic(
2948 MaskTy, Intrinsic::vector_interleave2, Ops,
2949 /*FMFSource=*/nullptr, "interleaved.mask");
2950 }
2951
2952 if (!BlockInMask)
2953 return MaskForGaps;
2954
2955 Value *ResBlockInMask = State.get(BlockInMask);
2956 Value *ShuffledMask = State.Builder.CreateShuffleVector(
2957 ResBlockInMask,
2958 createReplicatedMask(InterleaveFactor, State.VF.getKnownMinValue()),
2959 "interleaved.mask");
2960 return MaskForGaps ? State.Builder.CreateBinOp(Instruction::And,
2961 ShuffledMask, MaskForGaps)
2962 : ShuffledMask;
2963 };
2964
2965 const DataLayout &DL = Instr->getDataLayout();
2966 // Vectorize the interleaved load group.
2967 if (isa<LoadInst>(Instr)) {
2968 Value *MaskForGaps = nullptr;
2969 if (NeedsMaskForGaps) {
2970 MaskForGaps = createBitMaskForGaps(State.Builder,
2971 State.VF.getKnownMinValue(), *Group);
2972 assert(MaskForGaps && "Mask for Gaps is required but it is null");
2973 }
2974
2975 Instruction *NewLoad;
2976 if (BlockInMask || MaskForGaps) {
2977 Value *GroupMask = CreateGroupMask(MaskForGaps);
2978 NewLoad = State.Builder.CreateMaskedLoad(VecTy, ResAddr,
2979 Group->getAlign(), GroupMask,
2980 PoisonVec, "wide.masked.vec");
2981 } else
2982 NewLoad = State.Builder.CreateAlignedLoad(VecTy, ResAddr,
2983 Group->getAlign(), "wide.vec");
2984 Group->addMetadata(NewLoad);
2985
2987 const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
2988 if (VecTy->isScalableTy()) {
2989 assert(InterleaveFactor == 2 &&
2990 "Unsupported deinterleave factor for scalable vectors");
2991
2992 // Scalable vectors cannot use arbitrary shufflevectors (only splats),
2993 // so must use intrinsics to deinterleave.
2994 Value *DI = State.Builder.CreateIntrinsic(
2995 Intrinsic::vector_deinterleave2, VecTy, NewLoad,
2996 /*FMFSource=*/nullptr, "strided.vec");
2997 unsigned J = 0;
2998 for (unsigned I = 0; I < InterleaveFactor; ++I) {
2999 Instruction *Member = Group->getMember(I);
3000
3001 if (!Member)
3002 continue;
3003
3004 Value *StridedVec = State.Builder.CreateExtractValue(DI, I);
3005 // If this member has different type, cast the result type.
3006 if (Member->getType() != ScalarTy) {
3007 VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
3008 StridedVec =
3009 createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
3010 }
3011
3012 if (Group->isReverse())
3013 StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");
3014
3015 State.set(VPDefs[J], StridedVec);
3016 ++J;
3017 }
3018
3019 return;
3020 }
3021
3022 // For each member in the group, shuffle out the appropriate data from the
3023 // wide loads.
3024 unsigned J = 0;
3025 for (unsigned I = 0; I < InterleaveFactor; ++I) {
3026 Instruction *Member = Group->getMember(I);
3027
3028 // Skip the gaps in the group.
3029 if (!Member)
3030 continue;
3031
3032 auto StrideMask =
3033 createStrideMask(I, InterleaveFactor, State.VF.getKnownMinValue());
3034 Value *StridedVec =
3035 State.Builder.CreateShuffleVector(NewLoad, StrideMask, "strided.vec");
3036
3037 // If this member has different type, cast the result type.
3038 if (Member->getType() != ScalarTy) {
3039 assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
3040 VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
3041 StridedVec =
3042 createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
3043 }
3044
3045 if (Group->isReverse())
3046 StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");
3047
3048 State.set(VPDefs[J], StridedVec);
3049 ++J;
3050 }
3051 return;
3052 }
3053
3054 // The sub vector type for current instruction.
3055 auto *SubVT = VectorType::get(ScalarTy, State.VF);
3056
3057 // Vectorize the interleaved store group.
3058 Value *MaskForGaps =
3059 createBitMaskForGaps(State.Builder, State.VF.getKnownMinValue(), *Group);
3060 assert((!MaskForGaps || !State.VF.isScalable()) &&
3061 "masking gaps for scalable vectors is not yet supported.");
3062 ArrayRef<VPValue *> StoredValues = getStoredValues();
3063 // Collect the stored vector from each member.
3064 SmallVector<Value *, 4> StoredVecs;
3065 unsigned StoredIdx = 0;
3066 for (unsigned i = 0; i < InterleaveFactor; i++) {
3067 assert((Group->getMember(i) || MaskForGaps) &&
3068 "Fail to get a member from an interleaved store group");
3069 Instruction *Member = Group->getMember(i);
3070
3071 // Skip the gaps in the group.
3072 if (!Member) {
3073 Value *Undef = PoisonValue::get(SubVT);
3074 StoredVecs.push_back(Undef);
3075 continue;
3076 }
3077
3078 Value *StoredVec = State.get(StoredValues[StoredIdx]);
3079 ++StoredIdx;
3080
3081 if (Group->isReverse())
3082 StoredVec = State.Builder.CreateVectorReverse(StoredVec, "reverse");
3083
3084 // If this member has different type, cast it to a unified type.
3085
3086 if (StoredVec->getType() != SubVT)
3087 StoredVec = createBitOrPointerCast(State.Builder, StoredVec, SubVT, DL);
3088
3089 StoredVecs.push_back(StoredVec);
3090 }
3091
3092 // Interleave all the smaller vectors into one wider vector.
3093 Value *IVec = interleaveVectors(State.Builder, StoredVecs, "interleaved.vec");
3094 Instruction *NewStoreInstr;
3095 if (BlockInMask || MaskForGaps) {
3096 Value *GroupMask = CreateGroupMask(MaskForGaps);
3097 NewStoreInstr = State.Builder.CreateMaskedStore(
3098 IVec, ResAddr, Group->getAlign(), GroupMask);
3099 } else
3100 NewStoreInstr =
3101 State.Builder.CreateAlignedStore(IVec, ResAddr, Group->getAlign());
3102
3103 Group->addMetadata(NewStoreInstr);
3104}
3105
3106#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3108 VPSlotTracker &SlotTracker) const {
3109 O << Indent << "INTERLEAVE-GROUP with factor " << IG->getFactor() << " at ";
3110 IG->getInsertPos()->printAsOperand(O, false);
3111 O << ", ";
3113 VPValue *Mask = getMask();
3114 if (Mask) {
3115 O << ", ";
3116 Mask->printAsOperand(O, SlotTracker);
3117 }
3118
3119 unsigned OpIdx = 0;
3120 for (unsigned i = 0; i < IG->getFactor(); ++i) {
3121 if (!IG->getMember(i))
3122 continue;
3123 if (getNumStoreOperands() > 0) {
3124 O << "\n" << Indent << " store ";
3125 getOperand(1 + OpIdx)->printAsOperand(O, SlotTracker);
3126 O << " to index " << i;
3127 } else {
3128 O << "\n" << Indent << " ";
3130 O << " = load from index " << i;
3131 }
3132 ++OpIdx;
3133 }
3134}
3135#endif
3136
3138 VPCostContext &Ctx) const {
3139 Instruction *InsertPos = getInsertPos();
3140 // Find the VPValue index of the interleave group. We need to skip gaps.
3141 unsigned InsertPosIdx = 0;
3142 for (unsigned Idx = 0; IG->getFactor(); ++Idx)
3143 if (auto *Member = IG->getMember(Idx)) {
3144 if (Member == InsertPos)
3145 break;
3146 InsertPosIdx++;
3147 }
3148 Type *ValTy = Ctx.Types.inferScalarType(
3149 getNumDefinedValues() > 0 ? getVPValue(InsertPosIdx)
3150 : getStoredValues()[InsertPosIdx]);
3151 auto *VectorTy = cast<VectorType>(toVectorTy(ValTy, VF));
3152 unsigned AS = getLoadStoreAddressSpace(InsertPos);
3154
3155 unsigned InterleaveFactor = IG->getFactor();
3156 auto *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor);
3157
3158 // Holds the indices of existing members in the interleaved group.
3160 for (unsigned IF = 0; IF < InterleaveFactor; IF++)
3161 if (IG->getMember(IF))
3162 Indices.push_back(IF);
3163
3164 // Calculate the cost of the whole interleaved group.
3166 InsertPos->getOpcode(), WideVecTy, IG->getFactor(), Indices,
3167 IG->getAlign(), AS, CostKind, getMask(), NeedsMaskForGaps);
3168
3169 if (!IG->isReverse())
3170 return Cost;
3171
3172 return Cost + IG->getNumMembers() *
3174 VectorTy, std::nullopt, CostKind, 0);
3175}
3176
3177#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3179 VPSlotTracker &SlotTracker) const {
3180 O << Indent << "EMIT ";
3182 O << " = CANONICAL-INDUCTION ";
3184}
3185#endif
3186
3188 return IsScalarAfterVectorization &&
3189 (!IsScalable || vputils::onlyFirstLaneUsed(this));
3190}
3191
3193 assert(getInductionDescriptor().getKind() ==
3195 "Not a pointer induction according to InductionDescriptor!");
3196 assert(cast<PHINode>(getUnderlyingInstr())->getType()->isPointerTy() &&
3197 "Unexpected type.");
3199 "Recipe should have been replaced");
3200
3201 unsigned CurrentPart = getUnrollPart(*this);
3202
3203 // Build a pointer phi
3204 Value *ScalarStartValue = getStartValue()->getLiveInIRValue();
3205 Type *ScStValueType = ScalarStartValue->getType();
3206
3207 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3208 PHINode *NewPointerPhi = nullptr;
3209 if (CurrentPart == 0) {
3210 auto *IVR = cast<VPHeaderPHIRecipe>(&getParent()
3211 ->getPlan()
3212 ->getVectorLoopRegion()
3213 ->getEntryBasicBlock()
3214 ->front());
3215 PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, /*IsScalar*/ true));
3216 NewPointerPhi = PHINode::Create(ScStValueType, 2, "pointer.phi",
3217 CanonicalIV->getIterator());
3218 NewPointerPhi->addIncoming(ScalarStartValue, VectorPH);
3219 NewPointerPhi->setDebugLoc(getDebugLoc());
3220 } else {
3221 // The recipe has been unrolled. In that case, fetch the single pointer phi
3222 // shared among all unrolled parts of the recipe.
3223 auto *GEP =
3224 cast<GetElementPtrInst>(State.get(getFirstUnrolledPartOperand()));
3225 NewPointerPhi = cast<PHINode>(GEP->getPointerOperand());
3226 }
3227
3228 // A pointer induction, performed by using a gep
3229 BasicBlock::iterator InductionLoc = State.Builder.GetInsertPoint();
3230 Value *ScalarStepValue = State.get(getStepValue(), VPLane(0));
3231 Type *PhiType = State.TypeAnalysis.inferScalarType(getStepValue());
3232 Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF);
3233 // Add induction update using an incorrect block temporarily. The phi node
3234 // will be fixed after VPlan execution. Note that at this point the latch
3235 // block cannot be used, as it does not exist yet.
3236 // TODO: Model increment value in VPlan, by turning the recipe into a
3237 // multi-def and a subclass of VPHeaderPHIRecipe.
3238 if (CurrentPart == 0) {
3239 // The recipe represents the first part of the pointer induction. Create the
3240 // GEP to increment the phi across all unrolled parts.
3241 unsigned UF = CurrentPart == 0 ? getParent()->getPlan()->getUF() : 1;
3242 Value *NumUnrolledElems =
3243 State.Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, UF));
3244
3245 Value *InductionGEP = GetElementPtrInst::Create(
3246 State.Builder.getInt8Ty(), NewPointerPhi,
3247 State.Builder.CreateMul(ScalarStepValue, NumUnrolledElems), "ptr.ind",
3248 InductionLoc);
3249
3250 NewPointerPhi->addIncoming(InductionGEP, VectorPH);
3251 }
3252
3253 // Create actual address geps that use the pointer phi as base and a
3254 // vectorized version of the step value (<step*0, ..., step*N>) as offset.
3255 Type *VecPhiType = VectorType::get(PhiType, State.VF);
3256 Value *StartOffsetScalar = State.Builder.CreateMul(
3257 RuntimeVF, ConstantInt::get(PhiType, CurrentPart));
3258 Value *StartOffset =
3259 State.Builder.CreateVectorSplat(State.VF, StartOffsetScalar);
3260 // Create a vector of consecutive numbers from zero to VF.
3261 StartOffset = State.Builder.CreateAdd(
3262 StartOffset, State.Builder.CreateStepVector(VecPhiType));
3263
3264 assert(ScalarStepValue == State.get(getOperand(1), VPLane(0)) &&
3265 "scalar step must be the same across all parts");
3266 Value *GEP = State.Builder.CreateGEP(
3267 State.Builder.getInt8Ty(), NewPointerPhi,
3268 State.Builder.CreateMul(StartOffset, State.Builder.CreateVectorSplat(
3269 State.VF, ScalarStepValue)),
3270 "vector.gep");
3271 State.set(this, GEP);
3272}
3273
3274#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3276 VPSlotTracker &SlotTracker) const {
3277 assert((getNumOperands() == 2 || getNumOperands() == 4) &&
3278 "unexpected number of operands");
3279 O << Indent << "EMIT ";
3281 O << " = WIDEN-POINTER-INDUCTION ";
3283 O << ", ";
3285 if (getNumOperands() == 4) {
3286 O << ", ";
3288 O << ", ";
3290 }
3291}
3292#endif
3293
3295 assert(!State.Lane && "cannot be used in per-lane");
3296 if (State.ExpandedSCEVs.contains(Expr)) {
3297 // SCEV Expr has already been expanded, result must already be set. At the
3298 // moment we have to execute the entry block twice (once before skeleton
3299 // creation to get expanded SCEVs used by the skeleton and once during
3300 // regular VPlan execution).
3302 assert(State.get(this, VPLane(0)) == State.ExpandedSCEVs[Expr] &&
3303 "Results must match");
3304 return;
3305 }
3306
3307 const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
3308 SCEVExpander Exp(SE, DL, "induction");
3309
3310 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
3311 &*State.Builder.GetInsertPoint());
3312 State.ExpandedSCEVs[Expr] = Res;
3313 State.set(this, Res, VPLane(0));
3314}
3315
3316#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3318 VPSlotTracker &SlotTracker) const {
3319 O << Indent << "EMIT ";
3321 O << " = EXPAND SCEV " << *Expr;
3322}
3323#endif
3324
3326 Value *CanonicalIV = State.get(getOperand(0), /*IsScalar*/ true);
3327 Type *STy = CanonicalIV->getType();
3328 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
3329 ElementCount VF = State.VF;
3330 Value *VStart = VF.isScalar()
3331 ? CanonicalIV
3332 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
3333 Value *VStep = createStepForVF(Builder, STy, VF, getUnrollPart(*this));
3334 if (VF.isVector()) {
3335 VStep = Builder.CreateVectorSplat(VF, VStep);
3336 VStep =
3337 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
3338 }
3339 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
3340 State.set(this, CanonicalVectorIV);
3341}
3342
3343#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3345 VPSlotTracker &SlotTracker) const {
3346 O << Indent << "EMIT ";
3348 O << " = WIDEN-CANONICAL-INDUCTION ";
3350}
3351#endif
3352
3354 auto &Builder = State.Builder;
3355 // Create a vector from the initial value.
3356 auto *VectorInit = getStartValue()->getLiveInIRValue();
3357
3358 Type *VecTy = State.VF.isScalar()
3359 ? VectorInit->getType()
3360 : VectorType::get(VectorInit->getType(), State.VF);
3361
3362 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3363 if (State.VF.isVector()) {
3364 auto *IdxTy = Builder.getInt32Ty();
3365 auto *One = ConstantInt::get(IdxTy, 1);
3366 IRBuilder<>::InsertPointGuard Guard(Builder);
3367 Builder.SetInsertPoint(VectorPH->getTerminator());
3368 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
3369 auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
3370 VectorInit = Builder.CreateInsertElement(
3371 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
3372 }
3373
3374 // Create a phi node for the new recurrence.
3375 PHINode *Phi = PHINode::Create(VecTy, 2, "vector.recur");
3376 Phi->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
3377 Phi->addIncoming(VectorInit, VectorPH);
3378 State.set(this, Phi);
3379}
3380
3383 VPCostContext &Ctx) const {
3385 if (VF.isScalar())
3386 return Ctx.TTI.getCFInstrCost(Instruction::PHI, CostKind);
3387
3388 if (VF.isScalable() && VF.getKnownMinValue() == 1)
3390
3392 std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1);
3393 Type *VectorTy =
3394 toVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
3395
3397 cast<VectorType>(VectorTy), Mask, CostKind,
3398 VF.getKnownMinValue() - 1);
3399}
3400
3401#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3403 VPSlotTracker &SlotTracker) const {
3404 O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
3406 O << " = phi ";
3408}
3409#endif
3410
3412 auto &Builder = State.Builder;
3413
3414 // If this phi is fed by a scaled reduction then it should output a
3415 // vector with fewer elements than the VF.
3416 ElementCount VF = State.VF.divideCoefficientBy(VFScaleFactor);
3417
3418 // Reductions do not have to start at zero. They can start with
3419 // any loop invariant values.
3420 VPValue *StartVPV = getStartValue();
3421 Value *StartV = StartVPV->getLiveInIRValue();
3422
3423 // In order to support recurrences we need to be able to vectorize Phi nodes.
3424 // Phi nodes have cycles, so we need to vectorize them in two stages. This is
3425 // stage #1: We create a new vector PHI node with no incoming edges. We'll use
3426 // this value when we vectorize all of the instructions that use the PHI.
3427 bool ScalarPHI = State.VF.isScalar() || IsInLoop;
3428 Type *VecTy =
3429 ScalarPHI ? StartV->getType() : VectorType::get(StartV->getType(), VF);
3430
3431 BasicBlock *HeaderBB = State.CFG.PrevBB;
3432 assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
3433 "recipe must be in the vector loop header");
3434 auto *Phi = PHINode::Create(VecTy, 2, "vec.phi");
3435 Phi->insertBefore(HeaderBB->getFirstInsertionPt());
3436 State.set(this, Phi, IsInLoop);
3437
3438 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3439
3440 Value *Iden = nullptr;
3441 RecurKind RK = RdxDesc.getRecurrenceKind();
3442 unsigned CurrentPart = getUnrollPart(*this);
3443
3446 // MinMax and AnyOf reductions have the start value as their identity.
3447 if (ScalarPHI) {
3448 Iden = StartV;
3449 } else {
3450 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
3451 Builder.SetInsertPoint(VectorPH->getTerminator());
3452 StartV = Iden = State.get(StartVPV);
3453 }
3455 // [I|F]FindLastIV will use a sentinel value to initialize the reduction
3456 // phi or the resume value from the main vector loop when vectorizing the
3457 // epilogue loop. In the exit block, ComputeReductionResult will generate
3458 // checks to verify if the reduction result is the sentinel value. If the
3459 // result is the sentinel value, it will be corrected back to the start
3460 // value.
3461 // TODO: The sentinel value is not always necessary. When the start value is
3462 // a constant, and smaller than the start value of the induction variable,
3463 // the start value can be directly used to initialize the reduction phi.
3464 Iden = StartV;
3465 if (!ScalarPHI) {
3466 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
3467 Builder.SetInsertPoint(VectorPH->getTerminator());
3468 StartV = Iden = Builder.CreateVectorSplat(State.VF, Iden);
3469 }
3470 } else {
3471 Iden = llvm::getRecurrenceIdentity(RK, VecTy->getScalarType(),
3472 RdxDesc.getFastMathFlags());
3473
3474 if (!ScalarPHI) {
3475 if (CurrentPart == 0) {
3476 // Create start and identity vector values for the reduction in the
3477 // preheader.
3478 // TODO: Introduce recipes in VPlan preheader to create initial values.
3479 Iden = Builder.CreateVectorSplat(VF, Iden);
3480 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
3481 Builder.SetInsertPoint(VectorPH->getTerminator());
3482 Constant *Zero = Builder.getInt32(0);
3483 StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
3484 } else {
3485 Iden = Builder.CreateVectorSplat(VF, Iden);
3486 }
3487 }
3488 }
3489
3490 Phi = cast<PHINode>(State.get(this, IsInLoop));
3491 Value *StartVal = (CurrentPart == 0) ? StartV : Iden;
3492 Phi->addIncoming(StartVal, VectorPH);
3493}
3494
3495#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3497 VPSlotTracker &SlotTracker) const {
3498 O << Indent << "WIDEN-REDUCTION-PHI ";
3499
3501 O << " = phi ";
3503 if (VFScaleFactor != 1)
3504 O << " (VF scaled by 1/" << VFScaleFactor << ")";
3505}
3506#endif
3507
3510 "Non-native vplans are not expected to have VPWidenPHIRecipes.");
3511
3512 Value *Op0 = State.get(getOperand(0));
3513 Type *VecTy = Op0->getType();
3514 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
3515 State.set(this, VecPhi);
3516}
3517
3518#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3520 VPSlotTracker &SlotTracker) const {
3521 O << Indent << "WIDEN-PHI ";
3522
3523 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
3524 // Unless all incoming values are modeled in VPlan print the original PHI
3525 // directly.
3526 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
3527 // values as VPValues.
3528 if (getNumOperands() != OriginalPhi->getNumOperands()) {
3529 O << VPlanIngredient(OriginalPhi);
3530 return;
3531 }
3532
3534 O << " = phi ";
3536}
3537#endif
3538
3539// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
3540// remove VPActiveLaneMaskPHIRecipe.
3542 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3543 Value *StartMask = State.get(getOperand(0));
3544 PHINode *Phi =
3545 State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
3546 Phi->addIncoming(StartMask, VectorPH);
3547 Phi->setDebugLoc(getDebugLoc());
3548 State.set(this, Phi);
3549}
3550
3551#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3553 VPSlotTracker &SlotTracker) const {
3554 O << Indent << "ACTIVE-LANE-MASK-PHI ";
3555
3557 O << " = phi ";
3559}
3560#endif
3561
3562#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3564 VPSlotTracker &SlotTracker) const {
3565 O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";
3566
3568 O << " = phi ";
3570}
3571#endif
3572
3574 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3575 Value *Start = State.get(getStartValue(), VPLane(0));
3576 PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, Name);
3577 Phi->addIncoming(Start, VectorPH);
3578 Phi->setDebugLoc(getDebugLoc());
3579 State.set(this, Phi, /*IsScalar=*/true);
3580}
3581
3582#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3584 VPSlotTracker &SlotTracker) const {
3585 O << Indent << "SCALAR-PHI ";
3587 O << " = phi ";
3589}
3590#endif
AMDGPU Lower Kernel Arguments
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
std::string Name
Hexagon Common GEP
cl::opt< unsigned > ForceTargetInstructionCost("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for " "an instruction to a single constant value. Mostly " "useful for getting consistent testing."))
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
static Instruction * createReverseEVL(IRBuilderBase &Builder, Value *Operand, Value *EVL, const Twine &Name)
Use all-true mask for reverse rather than actual mask, as it avoids a dependence w/o affecting the re...
static Value * interleaveVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vals, const Twine &Name)
Return a vector containing interleaved elements from multiple smaller input vectors.
static Value * createBitOrPointerCast(IRBuilderBase &Builder, Value *V, VectorType *DstVTy, const DataLayout &DL)
cl::opt< unsigned > ForceTargetInstructionCost
static Value * getStepVector(Value *Val, Value *Step, Instruction::BinaryOps BinOp, ElementCount VF, IRBuilderBase &Builder)
This function adds (0 * Step, 1 * Step, 2 * Step, ...) to each vector element of Val.
static Type * getGEPIndexTy(bool IsScalable, bool IsReverse, unsigned CurrentPart, IRBuilderBase &Builder)
static Constant * getSignedIntOrFpConstant(Type *Ty, int64_t C)
A helper function that returns an integer or floating-point constant with value C.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
This file contains the declarations of the Vectorization Plan base classes:
Value * RHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
static Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
Definition: Attributes.cpp:234
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
InstListType::const_iterator getFirstNonPHIIt() const
Iterator returning form of getFirstNonPHI.
Definition: BasicBlock.cpp:374
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:459
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:296
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:292
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1502
This class represents a function call, abstracting a target machine's calling convention.
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:696
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:698
static StringRef getPredicateName(Predicate P)
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:126
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
A debug info location.
Definition: DebugLoc.h:33
constexpr bool isVector() const
One or more elements.
Definition: TypeSize.h:326
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
void setAllowContract(bool B=true)
Definition: FMF.h:91
bool noSignedZeros() const
Definition: FMF.h:68
bool noInfs() const
Definition: FMF.h:67
void setAllowReciprocal(bool B=true)
Definition: FMF.h:88
bool allowReciprocal() const
Definition: FMF.h:69
void print(raw_ostream &O) const
Print fast-math flags to O.
Definition: Operator.cpp:271
void setNoSignedZeros(bool B=true)
Definition: FMF.h:85
bool allowReassoc() const
Flag queries.
Definition: FMF.h:65
bool approxFunc() const
Definition: FMF.h:71
void setNoNaNs(bool B=true)
Definition: FMF.h:79
void setAllowReassoc(bool B=true)
Flag setters.
Definition: FMF.h:76
bool noNaNs() const
Definition: FMF.h:66
void setApproxFunc(bool B=true)
Definition: FMF.h:94
void setNoInfs(bool B=true)
Definition: FMF.h:82
bool allowContract() const
Definition: FMF.h:70
Class to represent function types.
Definition: DerivedTypes.h:105
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:137
ArrayRef< Type * > params() const
Definition: DerivedTypes.h:132
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216
bool willReturn() const
Determine if the function will return.
Definition: Function.h:662
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition: Function.h:595
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:221
bool hasNoUnsignedSignedWrap() const
bool hasNoUnsignedWrap() const
bool isInBounds() const
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Definition: Instructions.h:956
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition: IRBuilder.h:458
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2393
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2503
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition: IRBuilder.h:508
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2121
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2491
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1830
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2066
Value * CreateVectorSplice(Value *V1, Value *V2, int64_t Imm, const Twine &Name="")
Return a vector splice intrinsic if using scalable vectors, otherwise return a shufflevector.
Definition: IRBuilder.cpp:1124
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1152
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2547
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:463
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:890
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:546
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1048
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:172
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2060
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2566
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:523
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:2002
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2108
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:308
Value * CreateVectorReverse(Value *V, const Twine &Name="")
Return a vector value that contains the vector V reversed.
Definition: IRBuilder.cpp:1108
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1889
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
Definition: IRBuilder.h:1744
CallInst * CreateOrReduce(Value *Src)
Create a vector int OR reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:424
InsertPoint saveIP() const
Returns the current insert point.
Definition: IRBuilder.h:274
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:483
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2236
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2398
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2429
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1772
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2273
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1367
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1144
Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Definition: IRBuilder.cpp:963
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2048
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2525
LLVMContext & getContext() const
Definition: IRBuilder.h:173
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2189
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:566
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1350
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2444
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2034
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:566
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1689
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1699
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Definition: IRBuilder.h:286
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1849
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2383
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1610
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:513
Value * CreateStepVector(Type *DstType, const Twine &Name="")
Creates a vector of type DstType with the linear sequence <0, 1, ...>
Definition: IRBuilder.cpp:108
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1384
CallInst * CreateMaskedScatter(Value *Val, Value *Ptrs, Align Alignment, Value *Mask=nullptr)
Create a call to Masked Scatter intrinsic.
Definition: IRBuilder.cpp:627
CallInst * CreateMaskedGather(Type *Ty, Value *Ptrs, Align Alignment, Value *Mask=nullptr, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Gather intrinsic.
Definition: IRBuilder.cpp:596
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
A struct for saving information about induction variables.
@ IK_PtrInduction
Pointer induction var. Step = C.
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
static InstructionCost getInvalid(CostType Val=0)
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:99
bool isBinaryOp() const
Definition: Instruction.h:279
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
const char * getOpcodeName() const
Definition: Instruction.h:276
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
bool isUnaryOp() const
Definition: Instruction.h:278
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:472
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:480
uint32_t getFactor() const
Definition: VectorUtils.h:496
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:550
bool isReverse() const
Definition: VectorUtils.h:495
InstTy * getInsertPos() const
Definition: VectorUtils.h:566
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Align getAlign() const
Definition: VectorUtils.h:497
BlockT * getHeader() const
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:77
FastMathFlags getFastMathFlags() const
static unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
Type * getRecurrenceType() const
Returns the type of the recurrence.
TrackingVH< Value > getRecurrenceStartValue() const
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isFindLastIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
bool isSigned() const
Returns true if all source operands of the recurrence are SExtInsts.
RecurKind getRecurrenceKind() const
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This class uses information about analyze scalars to rewrite expressions in canonical form.
Type * getType() const
Return the LLVM type of this SCEV expression.
This class represents the LLVM 'select' instruction.
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:698
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo Op1Info={OK_AnyValue, OP_None}, OperandValueInfo Op2Info={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of vector reduction intrinsics.
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF=FastMathFlags(), TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputType, Type *AccumType, ElementCount VF, PartialReductionExtendKind OpAExtend, PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp=std::nullopt) const
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask={}, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
@ TCC_Free
Expected to fold away in lowering.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Reverse
Reverse the order of the vector.
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency) const
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
CastContextHint
Represents a hint about the context in which a cast is used.
@ Reversed
The cast is used with a reversed load/store.
@ Masked
The cast is used with a masked load/store.
@ None
The cast is not used with a load/store of any kind.
@ Normal
The cast is used with a normal load/store.
@ Interleave
The cast is used with an interleaved load/store.
@ GatherScatter
The cast is used with a gather/scatter.
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264
static IntegerType * getInt1Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static Type * getVoidTy(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
value_op_iterator value_op_end()
Definition: User.h:309
Value * getOperand(unsigned i) const
Definition: User.h:228
value_op_iterator value_op_begin()
Definition: User.h:306
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:3519
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:3569
iterator end()
Definition: VPlan.h:3553
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:3582
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:2508
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:2513
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition: VPlan.h:2503
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition: VPlan.h:2499
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:396
VPRegionBlock * getParent()
Definition: VPlan.h:488
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:178
const VPBlocksTy & getPredecessors() const
Definition: VPlan.h:519
VPlan * getPlan()
Definition: VPlan.cpp:153
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:158
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2873
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPBranchOnMaskRecipe.
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:292
void dump() const
Dump the VPDef to stderr (for debugging).
Definition: VPlan.cpp:114
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
Definition: VPlanValue.h:415
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
Definition: VPlanValue.h:410
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition: VPlanValue.h:388
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Definition: VPlanValue.h:400
unsigned getVPDefID() const
Definition: VPlanValue.h:420
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:3449
VPValue * getStartValue() const
Definition: VPlan.h:3448
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:2064
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition: VPlan.h:1805
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
@ ResumePhi
Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
Definition: VPlan.h:1216
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:1204
@ CanonicalIVIncrementForPart
Definition: VPlan.h:1219
@ CalculateTripCountMinusVF
Definition: VPlan.h:1217
bool hasResult() const
Definition: VPlan.h:1339
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
unsigned getOpcode() const
Definition: VPlan.h:1316
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
bool isSingleScalar() const
Returns true if this VPInstruction's operands are single scalars and the result is also a single scal...
void execute(VPTransformState &State) override
Generate the instruction.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2587
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2593
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition: VPlan.h:2600
Instruction * getInsertPos() const
Definition: VPlan.h:2635
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInterleaveRecipe.
unsigned getNumStoreOperands() const
Returns the number of stored operands of this interleave group.
Definition: VPlan.h:2624
static bool isVPIntrinsic(Intrinsic::ID)
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition: VPlan.h:153
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlan.h:194
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset)
Definition: VPlan.h:180
static VPLane getFirstLane()
Definition: VPlan.h:178
void execute(VPTransformState &State) override
Generate the reduction in the loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPartialReductionRecipe.
unsigned getOpcode() const
Get the binary op's opcode.
Definition: VPlan.h:2468
void execute(VPTransformState &State) override
Generates phi nodes for live-outs (from a replicate region) as needed to retain SSA form.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:720
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
virtual InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
VPBasicBlock * getParent()
Definition: VPlan.h:745
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:814
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Class to record LLVM IR flag for a recipe along with it.
Definition: VPlan.h:931
ExactFlagsTy ExactFlags
Definition: VPlan.h:981
FastMathFlagsTy FMFs
Definition: VPlan.h:984
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:983
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition: VPlan.h:1151
void setFlags(Instruction *I) const
Set the IR flags for I.
Definition: VPlan.h:1112
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:1154
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:980
GEPNoWrapFlags GEPFlags
Definition: VPlan.h:982
WrapFlagsTy WrapFlags
Definition: VPlan.h:979
bool hasNoUnsignedWrap() const
Definition: VPlan.h:1158
void printFlags(raw_ostream &O) const
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:1145
bool hasNoSignedWrap() const
Definition: VPlan.h:1164
FastMathFlags getFastMathFlags() const
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition: VPlan.h:2746
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition: VPlan.h:2704
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of VPReductionRecipe.
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2708
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Return the recurrence decriptor for the in-loop reduction.
Definition: VPlan.h:2698
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2710
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition: VPlan.h:2702
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2706
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:3706
const VPBlockBase * getEntry() const
Definition: VPlan.h:3745
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPReplicateRecipe.
unsigned getOpcode() const
Definition: VPlan.h:2833
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:3506
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:917
LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
This class can be used to assign names to VPValues.
Definition: VPlanValue.h:441
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Definition: VPlanAnalysis.h:65
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
VPValue * getUnrollPartOperand(VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:200
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition: VPlan.cpp:1453
operand_range operands()
Definition: VPlanValue.h:257
unsigned getNumOperands() const
Definition: VPlanValue.h:236
operand_iterator op_begin()
Definition: VPlanValue.h:253
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:237
virtual bool onlyFirstLaneUsed(const VPValue *Op) const
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlanValue.h:272
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop region.
Definition: VPlan.cpp:1412
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:123
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition: VPlan.cpp:1449
friend class VPInstruction
Definition: VPlanValue.h:47
bool hasMoreThanOneUniqueUser() const
Returns true if the value has more than one unique user.
Definition: VPlanValue.h:138
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:77
user_iterator user_begin()
Definition: VPlanValue.h:128
unsigned getNumUsers() const
Definition: VPlanValue.h:111
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:172
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition: VPlanValue.h:167
user_range users()
Definition: VPlanValue.h:132
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Function * getCalledScalarFunction() const
Definition: VPlan.h:1753
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCallRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
operand_range arg_operands()
Definition: VPlan.h:1757
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1578
void execute(VPTransformState &State) override
Produce widened copies of the cast.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override final
Print the recipe.
void execute(VPTransformState &State) override final
Produce a vp-intrinsic using the opcode and operands of the recipe, processing EVL elements.
VPValue * getEVL()
Definition: VPlan.h:1506
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
PHINode * getPHINode() const
Definition: VPlan.h:2111
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:2108
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:2114
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:2186
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:2195
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition: VPlan.h:1696
void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
Definition: VPlan.h:2944
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition: VPlan.h:2941
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition: VPlan.h:2980
Instruction & Ingredient
Definition: VPlan.h:2935
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
bool Consecutive
Whether the accessed addresses are consecutive.
Definition: VPlan.h:2938
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2994
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2987
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition: VPlan.h:2984
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
VPValue * getFirstUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the first unrolled part,...
Definition: VPlan.h:2240
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
void execute(VPTransformState &State) override
Produce a widened instruction using the opcode and operands of the recipe, processing State....
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getOpcode() const
Definition: VPlan.h:1472
unsigned getUF() const
Definition: VPlan.h:4007
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:694
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
VectorBuilder & setEVL(Value *NewExplicitVectorLength)
Definition: VectorBuilder.h:82
VectorBuilder & setMask(Value *NewMask)
Definition: VectorBuilder.h:78
Value * createVectorInstruction(unsigned Opcode, Type *ReturnTy, ArrayRef< Value * > VecOpArray, const Twine &Name=Twine())
Base class of all SIMD vector types.
Definition: DerivedTypes.h:427
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:665
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static VectorType * getDoubleElementsVectorType(VectorType *VTy)
This static method returns a VectorType with twice as many elements as the input type and the same el...
Definition: DerivedTypes.h:541
Type * getElementType() const
Definition: DerivedTypes.h:460
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
iterator erase(iterator where)
Definition: ilist.h:204
pointer remove(iterator &IT)
Definition: ilist.h:188
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
StringRef getBaseName(ID id)
Return the LLVM name for an intrinsic, without encoded types for overloading, such as "llvm....
Definition: Intrinsics.cpp:41
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
bool isUniformAfterVectorization(const VPValue *VPV)
Returns true if VPV is uniform after vectorization.
Definition: VPlanUtils.h:39
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
Definition: VPlanUtils.cpp:21
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlanUtils.cpp:16
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void ReplaceInstWithInst(BasicBlock *BB, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
Value * createSimpleReduction(IRBuilderBase &B, Value *Src, RecurKind RdxKind)
Create a reduction of the given vector.
Definition: LoopUtils.cpp:1278
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
Definition: LoopUtils.cpp:989
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition: STLExtras.h:2207
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
Definition: LoopUtils.cpp:1076
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
cl::opt< bool > EnableVPlanNativePath("enable-vplan-native-path", cl::Hidden, cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization."))
Definition: VPlan.cpp:53
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool isPointerTy(const Type *T)
Definition: SPIRVUtils.h:250
Value * createOrderedReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence descriptor Desc.
Definition: LoopUtils.cpp:1341
Value * createReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, PHINode *OrigPhi=nullptr)
Create a generic reduction using a recurrence descriptor Desc Fast-math-flags are propagated using th...
Definition: LoopUtils.cpp:1323
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:33
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ Add
Sum of integers.
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
Definition: LoopUtils.cpp:1270
DWARFExpression::Operation Op
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
InstructionCost Cost
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Struct to hold various analysis needed for cost computations.
Definition: VPlan.h:688
LLVMContext & LLVMCtx
Definition: VPlan.h:692
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
Definition: VPlan.cpp:1662
bool skipCostComputation(Instruction *UI, bool IsVector) const
Return true if the cost for UI shouldn't be computed, e.g.
InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const
Return the cost for UI with VF using the legacy cost model as fallback until computing the cost of al...
VPTypeAnalysis Types
Definition: VPlan.h:691
const TargetLibraryInfo & TLI
Definition: VPlan.h:690
const TargetTransformInfo & TTI
Definition: VPlan.h:689
SmallPtrSet< Instruction *, 8 > SkipCostComputation
Definition: VPlan.h:694
void execute(VPTransformState &State) override
Generate the phi nodes.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:343
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:351
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:352
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:236
bool hasScalarValue(VPValue *Def, VPLane Lane)
Definition: VPlan.h:268
bool hasVectorValue(VPValue *Def)
Definition: VPlan.h:266
DenseMap< const SCEV *, Value * > ExpandedSCEVs
Map SCEVs to their expanded values.
Definition: VPlan.h:388
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition: VPlan.h:391
void addMetadata(Value *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:365
Value * get(VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
Definition: VPlan.cpp:253
struct llvm::VPTransformState::CFGState CFG
std::optional< VPLane > Lane
Hold the index to generate specific scalar instructions.
Definition: VPlan.h:249
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:368
const TargetTransformInfo * TTI
Target Transform Info.
Definition: VPlan.h:241
void reset(VPValue *Def, Value *V)
Reset an existing vector value for Def and a given Part.
Definition: VPlan.h:289
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
Definition: VPlan.h:244
Loop * CurrentVectorLoop
The loop object for the current parent region, or nullptr.
Definition: VPlan.h:377
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:376
void set(VPValue *Def, Value *V, bool IsScalar=false)
Set the generated vector Value for a given VPValue, if IsScalar is false.
Definition: VPlan.h:278
void execute(VPTransformState &State) override
Generate the wide load or gather.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:3064
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate a wide load or gather.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool isInvariantCond() const
Definition: VPlan.h:1849
VPValue * getCond() const
Definition: VPlan.h:1845
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenSelectRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition: VPlan.h:3143
void execute(VPTransformState &State) override
Generate the wide store or scatter.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:3146
void execute(VPTransformState &State) override
Generate a wide store or scatter.
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition: VPlan.h:3108
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.