LLVM 20.0.0git
VPlanRecipes.cpp
Go to the documentation of this file.
1//===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains implementations for different VPlan recipes.
11///
12//===----------------------------------------------------------------------===//
13
14#include "VPlan.h"
15#include "VPlanAnalysis.h"
16#include "VPlanPatternMatch.h"
17#include "VPlanUtils.h"
18#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/Twine.h"
22#include "llvm/IR/BasicBlock.h"
23#include "llvm/IR/IRBuilder.h"
24#include "llvm/IR/Instruction.h"
26#include "llvm/IR/Intrinsics.h"
27#include "llvm/IR/Type.h"
28#include "llvm/IR/Value.h"
32#include "llvm/Support/Debug.h"
37#include <cassert>
38
39using namespace llvm;
40
42
43namespace llvm {
45}
47
48#define LV_NAME "loop-vectorize"
49#define DEBUG_TYPE LV_NAME
50
52 switch (getVPDefID()) {
53 case VPInstructionSC:
54 return cast<VPInstruction>(this)->opcodeMayReadOrWriteFromMemory();
55 case VPInterleaveSC:
56 return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
57 case VPWidenStoreEVLSC:
58 case VPWidenStoreSC:
59 return true;
60 case VPReplicateSC:
61 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
62 ->mayWriteToMemory();
63 case VPWidenCallSC:
64 return !cast<VPWidenCallRecipe>(this)
65 ->getCalledScalarFunction()
66 ->onlyReadsMemory();
67 case VPWidenIntrinsicSC:
68 return cast<VPWidenIntrinsicRecipe>(this)->mayWriteToMemory();
69 case VPBranchOnMaskSC:
70 case VPScalarIVStepsSC:
71 case VPPredInstPHISC:
72 return false;
73 case VPBlendSC:
74 case VPReductionEVLSC:
75 case VPReductionSC:
76 case VPVectorPointerSC:
77 case VPWidenCanonicalIVSC:
78 case VPWidenCastSC:
79 case VPWidenGEPSC:
80 case VPWidenIntOrFpInductionSC:
81 case VPWidenLoadEVLSC:
82 case VPWidenLoadSC:
83 case VPWidenPHISC:
84 case VPWidenSC:
85 case VPWidenEVLSC:
86 case VPWidenSelectSC: {
87 const Instruction *I =
88 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
89 (void)I;
90 assert((!I || !I->mayWriteToMemory()) &&
91 "underlying instruction may write to memory");
92 return false;
93 }
94 default:
95 return true;
96 }
97}
98
100 switch (getVPDefID()) {
101 case VPInstructionSC:
102 return cast<VPInstruction>(this)->opcodeMayReadOrWriteFromMemory();
103 case VPWidenLoadEVLSC:
104 case VPWidenLoadSC:
105 return true;
106 case VPReplicateSC:
107 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
108 ->mayReadFromMemory();
109 case VPWidenCallSC:
110 return !cast<VPWidenCallRecipe>(this)
111 ->getCalledScalarFunction()
112 ->onlyWritesMemory();
113 case VPWidenIntrinsicSC:
114 return cast<VPWidenIntrinsicRecipe>(this)->mayReadFromMemory();
115 case VPBranchOnMaskSC:
116 case VPPredInstPHISC:
117 case VPScalarIVStepsSC:
118 case VPWidenStoreEVLSC:
119 case VPWidenStoreSC:
120 return false;
121 case VPBlendSC:
122 case VPReductionEVLSC:
123 case VPReductionSC:
124 case VPVectorPointerSC:
125 case VPWidenCanonicalIVSC:
126 case VPWidenCastSC:
127 case VPWidenGEPSC:
128 case VPWidenIntOrFpInductionSC:
129 case VPWidenPHISC:
130 case VPWidenSC:
131 case VPWidenEVLSC:
132 case VPWidenSelectSC: {
133 const Instruction *I =
134 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
135 (void)I;
136 assert((!I || !I->mayReadFromMemory()) &&
137 "underlying instruction may read from memory");
138 return false;
139 }
140 default:
141 return true;
142 }
143}
144
146 switch (getVPDefID()) {
147 case VPDerivedIVSC:
148 case VPPredInstPHISC:
149 case VPScalarCastSC:
150 case VPReverseVectorPointerSC:
151 return false;
152 case VPInstructionSC:
153 return mayWriteToMemory();
154 case VPWidenCallSC: {
155 Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction();
156 return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn();
157 }
158 case VPWidenIntrinsicSC:
159 return cast<VPWidenIntrinsicRecipe>(this)->mayHaveSideEffects();
160 case VPBlendSC:
161 case VPReductionEVLSC:
162 case VPReductionSC:
163 case VPScalarIVStepsSC:
164 case VPVectorPointerSC:
165 case VPWidenCanonicalIVSC:
166 case VPWidenCastSC:
167 case VPWidenGEPSC:
168 case VPWidenIntOrFpInductionSC:
169 case VPWidenPHISC:
170 case VPWidenPointerInductionSC:
171 case VPWidenSC:
172 case VPWidenEVLSC:
173 case VPWidenSelectSC: {
174 const Instruction *I =
175 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
176 (void)I;
177 assert((!I || !I->mayHaveSideEffects()) &&
178 "underlying instruction has side-effects");
179 return false;
180 }
181 case VPInterleaveSC:
182 return mayWriteToMemory();
183 case VPWidenLoadEVLSC:
184 case VPWidenLoadSC:
185 case VPWidenStoreEVLSC:
186 case VPWidenStoreSC:
187 assert(
188 cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() ==
190 "mayHaveSideffects result for ingredient differs from this "
191 "implementation");
192 return mayWriteToMemory();
193 case VPReplicateSC: {
194 auto *R = cast<VPReplicateRecipe>(this);
195 return R->getUnderlyingInstr()->mayHaveSideEffects();
196 }
197 default:
198 return true;
199 }
200}
201
203 assert(!Parent && "Recipe already in some VPBasicBlock");
204 assert(InsertPos->getParent() &&
205 "Insertion position not in any VPBasicBlock");
206 InsertPos->getParent()->insert(this, InsertPos->getIterator());
207}
208
211 assert(!Parent && "Recipe already in some VPBasicBlock");
212 assert(I == BB.end() || I->getParent() == &BB);
213 BB.insert(this, I);
214}
215
217 assert(!Parent && "Recipe already in some VPBasicBlock");
218 assert(InsertPos->getParent() &&
219 "Insertion position not in any VPBasicBlock");
220 InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator()));
221}
222
224 assert(getParent() && "Recipe not in any VPBasicBlock");
226 Parent = nullptr;
227}
228
230 assert(getParent() && "Recipe not in any VPBasicBlock");
232}
233
236 insertAfter(InsertPos);
237}
238
242 insertBefore(BB, I);
243}
244
246 // Get the underlying instruction for the recipe, if there is one. It is used
247 // to
248 // * decide if cost computation should be skipped for this recipe,
249 // * apply forced target instruction cost.
250 Instruction *UI = nullptr;
251 if (auto *S = dyn_cast<VPSingleDefRecipe>(this))
252 UI = dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
253 else if (auto *IG = dyn_cast<VPInterleaveRecipe>(this))
254 UI = IG->getInsertPos();
255 else if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(this))
256 UI = &WidenMem->getIngredient();
257
258 InstructionCost RecipeCost;
259 if (UI && Ctx.skipCostComputation(UI, VF.isVector())) {
260 RecipeCost = 0;
261 } else {
262 RecipeCost = computeCost(VF, Ctx);
263 if (UI && ForceTargetInstructionCost.getNumOccurrences() > 0 &&
264 RecipeCost.isValid())
266 }
267
268 LLVM_DEBUG({
269 dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": ";
270 dump();
271 });
272 return RecipeCost;
273}
274
276 VPCostContext &Ctx) const {
277 llvm_unreachable("subclasses should implement computeCost");
278}
279
282 VPCostContext &Ctx) const {
283 std::optional<unsigned> Opcode = std::nullopt;
285 if (auto *WidenR = dyn_cast<VPWidenRecipe>(BinOpR))
286 Opcode = std::make_optional(WidenR->getOpcode());
287
288 VPRecipeBase *ExtAR = BinOpR->getOperand(0)->getDefiningRecipe();
289 VPRecipeBase *ExtBR = BinOpR->getOperand(1)->getDefiningRecipe();
290
291 auto *PhiType = Ctx.Types.inferScalarType(getOperand(1));
292 auto *InputTypeA = Ctx.Types.inferScalarType(ExtAR ? ExtAR->getOperand(0)
293 : BinOpR->getOperand(0));
294 auto *InputTypeB = Ctx.Types.inferScalarType(ExtBR ? ExtBR->getOperand(0)
295 : BinOpR->getOperand(1));
296
297 auto GetExtendKind = [](VPRecipeBase *R) {
298 // The extend could come from outside the plan.
299 if (!R)
301 auto *WidenCastR = dyn_cast<VPWidenCastRecipe>(R);
302 if (!WidenCastR)
304 if (WidenCastR->getOpcode() == Instruction::CastOps::ZExt)
306 if (WidenCastR->getOpcode() == Instruction::CastOps::SExt)
309 };
310
311 return Ctx.TTI.getPartialReductionCost(getOpcode(), InputTypeA, InputTypeB,
312 PhiType, VF, GetExtendKind(ExtAR),
313 GetExtendKind(ExtBR), Opcode);
314}
315
318 auto &Builder = State.Builder;
319
320 assert(getOpcode() == Instruction::Add &&
321 "Unhandled partial reduction opcode");
322
323 Value *BinOpVal = State.get(getOperand(0));
324 Value *PhiVal = State.get(getOperand(1));
325 assert(PhiVal && BinOpVal && "Phi and Mul must be set");
326
327 Type *RetTy = PhiVal->getType();
328
329 CallInst *V = Builder.CreateIntrinsic(
330 RetTy, Intrinsic::experimental_vector_partial_reduce_add,
331 {PhiVal, BinOpVal}, nullptr, "partial.reduce");
332
333 State.set(this, V);
334}
335
336#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
338 VPSlotTracker &SlotTracker) const {
339 O << Indent << "PARTIAL-REDUCE ";
341 O << " = " << Instruction::getOpcodeName(getOpcode()) << " ";
343}
344#endif
345
347 assert(OpType == OperationType::FPMathOp &&
348 "recipe doesn't have fast math flags");
349 FastMathFlags Res;
350 Res.setAllowReassoc(FMFs.AllowReassoc);
351 Res.setNoNaNs(FMFs.NoNaNs);
352 Res.setNoInfs(FMFs.NoInfs);
353 Res.setNoSignedZeros(FMFs.NoSignedZeros);
354 Res.setAllowReciprocal(FMFs.AllowReciprocal);
355 Res.setAllowContract(FMFs.AllowContract);
356 Res.setApproxFunc(FMFs.ApproxFunc);
357 return Res;
358}
359
360#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
362#endif
363
364template <unsigned PartOpIdx>
365VPValue *
367 if (U.getNumOperands() == PartOpIdx + 1)
368 return U.getOperand(PartOpIdx);
369 return nullptr;
370}
371
372template <unsigned PartOpIdx>
374 if (auto *UnrollPartOp = getUnrollPartOperand(U))
375 return cast<ConstantInt>(UnrollPartOp->getLiveInIRValue())->getZExtValue();
376 return 0;
377}
378
381 const Twine &Name)
382 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),
383 Pred, DL),
384 Opcode(Opcode), Name(Name.str()) {
385 assert(Opcode == Instruction::ICmp &&
386 "only ICmp predicates supported at the moment");
387}
388
390 std::initializer_list<VPValue *> Operands,
391 FastMathFlags FMFs, DebugLoc DL, const Twine &Name)
392 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),
393 Opcode(Opcode), Name(Name.str()) {
394 // Make sure the VPInstruction is a floating-point operation.
395 assert(isFPMathOp() && "this op can't take fast-math flags");
396}
397
398bool VPInstruction::doesGeneratePerAllLanes() const {
399 return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);
400}
401
402bool VPInstruction::canGenerateScalarForFirstLane() const {
404 return true;
406 return true;
407 switch (Opcode) {
408 case Instruction::ICmp:
409 case Instruction::Select:
417 return true;
418 default:
419 return false;
420 }
421}
422
423Value *VPInstruction::generatePerLane(VPTransformState &State,
424 const VPLane &Lane) {
425 IRBuilderBase &Builder = State.Builder;
426
428 "only PtrAdd opcodes are supported for now");
429 return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
430 State.get(getOperand(1), Lane), Name);
431}
432
433Value *VPInstruction::generate(VPTransformState &State) {
434 IRBuilderBase &Builder = State.Builder;
435
437 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
438 Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
439 Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);
440 auto *Res =
441 Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
442 if (auto *I = dyn_cast<Instruction>(Res))
443 setFlags(I);
444 return Res;
445 }
446
447 switch (getOpcode()) {
448 case VPInstruction::Not: {
449 Value *A = State.get(getOperand(0));
450 return Builder.CreateNot(A, Name);
451 }
452 case Instruction::ICmp: {
453 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
454 Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
455 Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);
456 return Builder.CreateCmp(getPredicate(), A, B, Name);
457 }
458 case Instruction::Select: {
459 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
460 Value *Cond = State.get(getOperand(0), OnlyFirstLaneUsed);
461 Value *Op1 = State.get(getOperand(1), OnlyFirstLaneUsed);
462 Value *Op2 = State.get(getOperand(2), OnlyFirstLaneUsed);
463 return Builder.CreateSelect(Cond, Op1, Op2, Name);
464 }
466 // Get first lane of vector induction variable.
467 Value *VIVElem0 = State.get(getOperand(0), VPLane(0));
468 // Get the original loop tripcount.
469 Value *ScalarTC = State.get(getOperand(1), VPLane(0));
470
471 // If this part of the active lane mask is scalar, generate the CMP directly
472 // to avoid unnecessary extracts.
473 if (State.VF.isScalar())
474 return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC,
475 Name);
476
477 auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
478 auto *PredTy = VectorType::get(Int1Ty, State.VF);
479 return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
480 {PredTy, ScalarTC->getType()},
481 {VIVElem0, ScalarTC}, nullptr, Name);
482 }
484 // Generate code to combine the previous and current values in vector v3.
485 //
486 // vector.ph:
487 // v_init = vector(..., ..., ..., a[-1])
488 // br vector.body
489 //
490 // vector.body
491 // i = phi [0, vector.ph], [i+4, vector.body]
492 // v1 = phi [v_init, vector.ph], [v2, vector.body]
493 // v2 = a[i, i+1, i+2, i+3];
494 // v3 = vector(v1(3), v2(0, 1, 2))
495
496 auto *V1 = State.get(getOperand(0));
497 if (!V1->getType()->isVectorTy())
498 return V1;
499 Value *V2 = State.get(getOperand(1));
500 return Builder.CreateVectorSplice(V1, V2, -1, Name);
501 }
503 unsigned UF = getParent()->getPlan()->getUF();
504 Value *ScalarTC = State.get(getOperand(0), VPLane(0));
505 Value *Step = createStepForVF(Builder, ScalarTC->getType(), State.VF, UF);
506 Value *Sub = Builder.CreateSub(ScalarTC, Step);
507 Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
508 Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
509 return Builder.CreateSelect(Cmp, Sub, Zero);
510 }
512 // TODO: Restructure this code with an explicit remainder loop, vsetvli can
513 // be outside of the main loop.
514 Value *AVL = State.get(getOperand(0), /*IsScalar*/ true);
515 // Compute EVL
516 assert(AVL->getType()->isIntegerTy() &&
517 "Requested vector length should be an integer.");
518
519 assert(State.VF.isScalable() && "Expected scalable vector factor.");
520 Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue());
521
522 Value *EVL = State.Builder.CreateIntrinsic(
523 State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,
524 {AVL, VFArg, State.Builder.getTrue()});
525 return EVL;
526 }
528 unsigned Part = getUnrollPart(*this);
529 auto *IV = State.get(getOperand(0), VPLane(0));
530 assert(Part != 0 && "Must have a positive part");
531 // The canonical IV is incremented by the vectorization factor (num of
532 // SIMD elements) times the unroll part.
533 Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
534 return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),
536 }
538 Value *Cond = State.get(getOperand(0), VPLane(0));
539 // Replace the temporary unreachable terminator with a new conditional
540 // branch, hooking it up to backward destination for exiting blocks now and
541 // to forward destination(s) later when they are created.
542 BranchInst *CondBr =
543 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
544 CondBr->setSuccessor(0, nullptr);
546
547 if (!getParent()->isExiting())
548 return CondBr;
549
550 VPRegionBlock *ParentRegion = getParent()->getParent();
551 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
552 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
553 return CondBr;
554 }
556 // First create the compare.
557 Value *IV = State.get(getOperand(0), /*IsScalar*/ true);
558 Value *TC = State.get(getOperand(1), /*IsScalar*/ true);
559 Value *Cond = Builder.CreateICmpEQ(IV, TC);
560
561 // Now create the branch.
562 auto *Plan = getParent()->getPlan();
563 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
564 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
565
566 // Replace the temporary unreachable terminator with a new conditional
567 // branch, hooking it up to backward destination (the header) now and to the
568 // forward destination (the exit/middle block) later when it is created.
569 // Note that CreateCondBr expects a valid BB as first argument, so we need
570 // to set it to nullptr later.
571 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
572 State.CFG.VPBB2IRBB[Header]);
573 CondBr->setSuccessor(0, nullptr);
575 return CondBr;
576 }
578 // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
579 // and will be removed by breaking up the recipe further.
580 auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
581 auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
582 // Get its reduction variable descriptor.
583 const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
584
585 RecurKind RK = RdxDesc.getRecurrenceKind();
586
587 Type *PhiTy = OrigPhi->getType();
588 // The recipe's operands are the reduction phi, followed by one operand for
589 // each part of the reduction.
590 unsigned UF = getNumOperands() - 1;
591 VectorParts RdxParts(UF);
592 for (unsigned Part = 0; Part < UF; ++Part)
593 RdxParts[Part] = State.get(getOperand(1 + Part), PhiR->isInLoop());
594
595 // If the vector reduction can be performed in a smaller type, we truncate
596 // then extend the loop exit value to enable InstCombine to evaluate the
597 // entire expression in the smaller type.
598 // TODO: Handle this in truncateToMinBW.
599 if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
600 Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF);
601 for (unsigned Part = 0; Part < UF; ++Part)
602 RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
603 }
604 // Reduce all of the unrolled parts into a single vector.
605 Value *ReducedPartRdx = RdxParts[0];
606 unsigned Op = RdxDesc.getOpcode();
608 Op = Instruction::Or;
609
610 if (PhiR->isOrdered()) {
611 ReducedPartRdx = RdxParts[UF - 1];
612 } else {
613 // Floating-point operations should have some FMF to enable the reduction.
615 Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
616 for (unsigned Part = 1; Part < UF; ++Part) {
617 Value *RdxPart = RdxParts[Part];
618 if (Op != Instruction::ICmp && Op != Instruction::FCmp)
619 ReducedPartRdx = Builder.CreateBinOp(
620 (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
622 ReducedPartRdx =
623 createMinMaxOp(Builder, RecurKind::SMax, ReducedPartRdx, RdxPart);
624 else
625 ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
626 }
627 }
628
629 // Create the reduction after the loop. Note that inloop reductions create
630 // the target reduction in the loop using a Reduction recipe.
631 if ((State.VF.isVector() ||
634 !PhiR->isInLoop()) {
635 ReducedPartRdx =
636 createReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
637 // If the reduction can be performed in a smaller type, we need to extend
638 // the reduction to the wider type before we branch to the original loop.
639 if (PhiTy != RdxDesc.getRecurrenceType())
640 ReducedPartRdx = RdxDesc.isSigned()
641 ? Builder.CreateSExt(ReducedPartRdx, PhiTy)
642 : Builder.CreateZExt(ReducedPartRdx, PhiTy);
643 }
644
645 return ReducedPartRdx;
646 }
648 auto *CI = cast<ConstantInt>(getOperand(1)->getLiveInIRValue());
649 unsigned Offset = CI->getZExtValue();
650 assert(Offset > 0 && "Offset from end must be positive");
651 Value *Res;
652 if (State.VF.isVector()) {
653 assert(Offset <= State.VF.getKnownMinValue() &&
654 "invalid offset to extract from");
655 // Extract lane VF - Offset from the operand.
656 Res = State.get(getOperand(0), VPLane::getLaneFromEnd(State.VF, Offset));
657 } else {
658 assert(Offset <= 1 && "invalid offset to extract from");
659 Res = State.get(getOperand(0));
660 }
661 if (isa<ExtractElementInst>(Res))
662 Res->setName(Name);
663 return Res;
664 }
666 Value *A = State.get(getOperand(0));
667 Value *B = State.get(getOperand(1));
668 return Builder.CreateLogicalAnd(A, B, Name);
669 }
672 "can only generate first lane for PtrAdd");
673 Value *Ptr = State.get(getOperand(0), VPLane(0));
674 Value *Addend = State.get(getOperand(1), VPLane(0));
675 return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
676 }
678 Value *IncomingFromVPlanPred =
679 State.get(getOperand(0), /* IsScalar */ true);
680 Value *IncomingFromOtherPreds =
681 State.get(getOperand(1), /* IsScalar */ true);
682 auto *NewPhi =
683 Builder.CreatePHI(State.TypeAnalysis.inferScalarType(this), 2, Name);
684 BasicBlock *VPlanPred =
685 State.CFG
686 .VPBB2IRBB[cast<VPBasicBlock>(getParent()->getPredecessors()[0])];
687 NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred);
688 for (auto *OtherPred : predecessors(Builder.GetInsertBlock())) {
689 if (OtherPred == VPlanPred)
690 continue;
691 NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred);
692 }
693 return NewPhi;
694 }
696 Value *A = State.get(getOperand(0));
697 return Builder.CreateOrReduce(A);
698 }
699
700 default:
701 llvm_unreachable("Unsupported opcode for instruction");
702 }
703}
704
709}
710
713}
714
715#if !defined(NDEBUG)
716bool VPInstruction::isFPMathOp() const {
717 // Inspired by FPMathOperator::classof. Notable differences are that we don't
718 // support Call, PHI and Select opcodes here yet.
719 return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
720 Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
721 Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
722 Opcode == Instruction::FCmp || Opcode == Instruction::Select;
723}
724#endif
725
727 assert(!State.Lane && "VPInstruction executing an Lane");
729 assert((hasFastMathFlags() == isFPMathOp() ||
730 getOpcode() == Instruction::Select) &&
731 "Recipe not a FPMathOp but has fast-math flags?");
732 if (hasFastMathFlags())
735 bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&
738 bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
739 if (GeneratesPerAllLanes) {
740 for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
741 Lane != NumLanes; ++Lane) {
742 Value *GeneratedValue = generatePerLane(State, VPLane(Lane));
743 assert(GeneratedValue && "generatePerLane must produce a value");
744 State.set(this, GeneratedValue, VPLane(Lane));
745 }
746 return;
747 }
748
749 Value *GeneratedValue = generate(State);
750 if (!hasResult())
751 return;
752 assert(GeneratedValue && "generate must produce a value");
753 assert(
754 (GeneratedValue->getType()->isVectorTy() == !GeneratesPerFirstLaneOnly ||
755 State.VF.isScalar()) &&
756 "scalar value but not only first lane defined");
757 State.set(this, GeneratedValue,
758 /*IsScalar*/ GeneratesPerFirstLaneOnly);
759}
760
763 return false;
764 switch (getOpcode()) {
765 case Instruction::ICmp:
766 case Instruction::Select:
775 return false;
776 default:
777 return true;
778 }
779}
780
782 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
784 return vputils::onlyFirstLaneUsed(this);
785
786 switch (getOpcode()) {
787 default:
788 return false;
789 case Instruction::ICmp:
790 case Instruction::Select:
791 case Instruction::Or:
793 // TODO: Cover additional opcodes.
794 return vputils::onlyFirstLaneUsed(this);
802 return true;
803 };
804 llvm_unreachable("switch should return");
805}
806
808 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
810 return vputils::onlyFirstPartUsed(this);
811
812 switch (getOpcode()) {
813 default:
814 return false;
815 case Instruction::ICmp:
816 case Instruction::Select:
817 return vputils::onlyFirstPartUsed(this);
821 return true;
822 };
823 llvm_unreachable("switch should return");
824}
825
826#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
828 VPSlotTracker SlotTracker(getParent()->getPlan());
829 print(dbgs(), "", SlotTracker);
830}
831
833 VPSlotTracker &SlotTracker) const {
834 O << Indent << "EMIT ";
835
836 if (hasResult()) {
838 O << " = ";
839 }
840
841 switch (getOpcode()) {
843 O << "not";
844 break;
846 O << "combined load";
847 break;
849 O << "combined store";
850 break;
852 O << "active lane mask";
853 break;
855 O << "resume-phi";
856 break;
858 O << "EXPLICIT-VECTOR-LENGTH";
859 break;
861 O << "first-order splice";
862 break;
864 O << "branch-on-cond";
865 break;
867 O << "TC > VF ? TC - VF : 0";
868 break;
870 O << "VF * Part +";
871 break;
873 O << "branch-on-count";
874 break;
876 O << "extract-from-end";
877 break;
879 O << "compute-reduction-result";
880 break;
882 O << "logical-and";
883 break;
885 O << "ptradd";
886 break;
888 O << "any-of";
889 break;
890 default:
892 }
893
894 printFlags(O);
896
897 if (auto DL = getDebugLoc()) {
898 O << ", !dbg ";
899 DL.print(O);
900 }
901}
902#endif
903
905 assert((isa<PHINode>(&I) || getNumOperands() == 0) &&
906 "Only PHINodes can have extra operands");
907 for (const auto &[Idx, Op] : enumerate(operands())) {
908 VPValue *ExitValue = Op;
909 auto Lane = vputils::isUniformAfterVectorization(ExitValue)
913 auto *PredVPBB = Pred->getExitingBasicBlock();
914 BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
915 // Set insertion point in PredBB in case an extract needs to be generated.
916 // TODO: Model extracts explicitly.
917 State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
918 Value *V = State.get(ExitValue, VPLane(Lane));
919 auto *Phi = cast<PHINode>(&I);
920 // If there is no existing block for PredBB in the phi, add a new incoming
921 // value. Otherwise update the existing incoming value for PredBB.
922 if (Phi->getBasicBlockIndex(PredBB) == -1)
923 Phi->addIncoming(V, PredBB);
924 else
925 Phi->setIncomingValueForBlock(PredBB, V);
926 }
927
928 // Advance the insert point after the wrapped IR instruction. This allows
929 // interleaving VPIRInstructions and other recipes.
930 State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator()));
931}
932
934 VPCostContext &Ctx) const {
935 // The recipe wraps an existing IR instruction on the border of VPlan's scope,
936 // hence it does not contribute to the cost-modeling for the VPlan.
937 return 0;
938}
939
940#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
942 VPSlotTracker &SlotTracker) const {
943 O << Indent << "IR " << I;
944
945 if (getNumOperands() != 0) {
946 O << " (extra operand" << (getNumOperands() > 1 ? "s" : "") << ": ";
948 enumerate(operands()), O, [this, &O, &SlotTracker](auto Op) {
949 Op.value()->printAsOperand(O, SlotTracker);
950 O << " from ";
951 getParent()->getPredecessors()[Op.index()]->printAsOperand(O);
952 });
953 O << ")";
954 }
955}
956#endif
957
959 assert(State.VF.isVector() && "not widening");
961
962 FunctionType *VFTy = Variant->getFunctionType();
963 // Add return type if intrinsic is overloaded on it.
965 for (const auto &I : enumerate(arg_operands())) {
966 Value *Arg;
967 // Some vectorized function variants may also take a scalar argument,
968 // e.g. linear parameters for pointers. This needs to be the scalar value
969 // from the start of the respective part when interleaving.
970 if (!VFTy->getParamType(I.index())->isVectorTy())
971 Arg = State.get(I.value(), VPLane(0));
972 else
973 Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
974 Args.push_back(Arg);
975 }
976
977 assert(Variant != nullptr && "Can't create vector function.");
978
979 auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
981 if (CI)
982 CI->getOperandBundlesAsDefs(OpBundles);
983
984 CallInst *V = State.Builder.CreateCall(Variant, Args, OpBundles);
985 setFlags(V);
986
987 if (!V->getType()->isVoidTy())
988 State.set(this, V);
989 State.addMetadata(V, CI);
990}
991
993 VPCostContext &Ctx) const {
995 return Ctx.TTI.getCallInstrCost(nullptr, Variant->getReturnType(),
996 Variant->getFunctionType()->params(),
997 CostKind);
998}
999
1000#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1002 VPSlotTracker &SlotTracker) const {
1003 O << Indent << "WIDEN-CALL ";
1004
1005 Function *CalledFn = getCalledScalarFunction();
1006 if (CalledFn->getReturnType()->isVoidTy())
1007 O << "void ";
1008 else {
1010 O << " = ";
1011 }
1012
1013 O << "call";
1014 printFlags(O);
1015 O << " @" << CalledFn->getName() << "(";
1017 Op->printAsOperand(O, SlotTracker);
1018 });
1019 O << ")";
1020
1021 O << " (using library function";
1022 if (Variant->hasName())
1023 O << ": " << Variant->getName();
1024 O << ")";
1025}
1026#endif
1027
1029 assert(State.VF.isVector() && "not widening");
1031
1032 SmallVector<Type *, 2> TysForDecl;
1033 // Add return type if intrinsic is overloaded on it.
1034 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1, State.TTI))
1035 TysForDecl.push_back(VectorType::get(getResultType(), State.VF));
1037 for (const auto &I : enumerate(operands())) {
1038 // Some intrinsics have a scalar argument - don't replace it with a
1039 // vector.
1040 Value *Arg;
1041 if (isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index(),
1042 State.TTI))
1043 Arg = State.get(I.value(), VPLane(0));
1044 else
1045 Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
1046 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index(),
1047 State.TTI))
1048 TysForDecl.push_back(Arg->getType());
1049 Args.push_back(Arg);
1050 }
1051
1052 // Use vector version of the intrinsic.
1053 Module *M = State.Builder.GetInsertBlock()->getModule();
1054 Function *VectorF =
1055 Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
1056 assert(VectorF &&
1057 "Can't retrieve vector intrinsic or vector-predication intrinsics.");
1058
1059 auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
1061 if (CI)
1062 CI->getOperandBundlesAsDefs(OpBundles);
1063
1064 CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
1065
1066 setFlags(V);
1067
1068 if (!V->getType()->isVoidTy())
1069 State.set(this, V);
1070 State.addMetadata(V, CI);
1071}
1072
1074 VPCostContext &Ctx) const {
1076
1077 // Some backends analyze intrinsic arguments to determine cost. Use the
1078 // underlying value for the operand if it has one. Otherwise try to use the
1079 // operand of the underlying call instruction, if there is one. Otherwise
1080 // clear Arguments.
1081 // TODO: Rework TTI interface to be independent of concrete IR values.
1083 for (const auto &[Idx, Op] : enumerate(operands())) {
1084 auto *V = Op->getUnderlyingValue();
1085 if (!V) {
1086 // Push all the VP Intrinsic's ops into the Argments even if is nullptr.
1087 // Some VP Intrinsic's cost will assert the number of parameters.
1088 // Mainly appears in the following two scenarios:
1089 // 1. EVL Op is nullptr
1090 // 2. The Argmunt of the VP Intrinsic is also the VP Intrinsic
1091 if (VPIntrinsic::isVPIntrinsic(VectorIntrinsicID)) {
1092 Arguments.push_back(V);
1093 continue;
1094 }
1095 if (auto *UI = dyn_cast_or_null<CallBase>(getUnderlyingValue())) {
1096 Arguments.push_back(UI->getArgOperand(Idx));
1097 continue;
1098 }
1099 Arguments.clear();
1100 break;
1101 }
1102 Arguments.push_back(V);
1103 }
1104
1105 Type *RetTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1106 SmallVector<Type *> ParamTys;
1107 for (unsigned I = 0; I != getNumOperands(); ++I)
1108 ParamTys.push_back(
1110
1111 // TODO: Rework TTI interface to avoid reliance on underlying IntrinsicInst.
1113 IntrinsicCostAttributes CostAttrs(
1114 VectorIntrinsicID, RetTy, Arguments, ParamTys, FMF,
1115 dyn_cast_or_null<IntrinsicInst>(getUnderlyingValue()));
1116 return Ctx.TTI.getIntrinsicInstrCost(CostAttrs, CostKind);
1117}
1118
1120 return Intrinsic::getBaseName(VectorIntrinsicID);
1121}
1122
1124 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
1125 // Vector predication intrinsics only demand the the first lane the last
1126 // operand (the EVL operand).
1127 return VPIntrinsic::isVPIntrinsic(VectorIntrinsicID) &&
1128 Op == getOperand(getNumOperands() - 1);
1129}
1130
1131#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1133 VPSlotTracker &SlotTracker) const {
1134 O << Indent << "WIDEN-INTRINSIC ";
1135 if (ResultTy->isVoidTy()) {
1136 O << "void ";
1137 } else {
1139 O << " = ";
1140 }
1141
1142 O << "call";
1143 printFlags(O);
1144 O << getIntrinsicName() << "(";
1145
1147 Op->printAsOperand(O, SlotTracker);
1148 });
1149 O << ")";
1150}
1151#endif
1152
1155 IRBuilderBase &Builder = State.Builder;
1156
1157 Value *Address = State.get(getOperand(0));
1158 Value *IncAmt = State.get(getOperand(1), /*IsScalar=*/true);
1159 VectorType *VTy = cast<VectorType>(Address->getType());
1160
1161 // The histogram intrinsic requires a mask even if the recipe doesn't;
1162 // if the mask operand was omitted then all lanes should be executed and
1163 // we just need to synthesize an all-true mask.
1164 Value *Mask = nullptr;
1165 if (VPValue *VPMask = getMask())
1166 Mask = State.get(VPMask);
1167 else
1168 Mask =
1169 Builder.CreateVectorSplat(VTy->getElementCount(), Builder.getInt1(1));
1170
1171 // If this is a subtract, we want to invert the increment amount. We may
1172 // add a separate intrinsic in future, but for now we'll try this.
1173 if (Opcode == Instruction::Sub)
1174 IncAmt = Builder.CreateNeg(IncAmt);
1175 else
1176 assert(Opcode == Instruction::Add && "only add or sub supported for now");
1177
1178 State.Builder.CreateIntrinsic(Intrinsic::experimental_vector_histogram_add,
1179 {VTy, IncAmt->getType()},
1180 {Address, IncAmt, Mask});
1182
1184 VPCostContext &Ctx) const {
1185 // FIXME: Take the gather and scatter into account as well. For now we're
1186 // generating the same cost as the fallback path, but we'll likely
1187 // need to create a new TTI method for determining the cost, including
1188 // whether we can use base + vec-of-smaller-indices or just
1189 // vec-of-pointers.
1190 assert(VF.isVector() && "Invalid VF for histogram cost");
1191 Type *AddressTy = Ctx.Types.inferScalarType(getOperand(0));
1192 VPValue *IncAmt = getOperand(1);
1193 Type *IncTy = Ctx.Types.inferScalarType(IncAmt);
1194 VectorType *VTy = VectorType::get(IncTy, VF);
1195
1196 // Assume that a non-constant update value (or a constant != 1) requires
1197 // a multiply, and add that into the cost.
1198 InstructionCost MulCost =
1199 Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VTy);
1200 if (IncAmt->isLiveIn()) {
1201 ConstantInt *CI = dyn_cast<ConstantInt>(IncAmt->getLiveInIRValue());
1202
1203 if (CI && CI->getZExtValue() == 1)
1204 MulCost = TTI::TCC_Free;
1205 }
1206
1207 // Find the cost of the histogram operation itself.
1208 Type *PtrTy = VectorType::get(AddressTy, VF);
1209 Type *MaskTy = VectorType::get(Type::getInt1Ty(Ctx.LLVMCtx), VF);
1210 IntrinsicCostAttributes ICA(Intrinsic::experimental_vector_histogram_add,
1212 {PtrTy, IncTy, MaskTy});
1213
1214 // Add the costs together with the add/sub operation.
1215 return Ctx.TTI.getIntrinsicInstrCost(
1217 MulCost + Ctx.TTI.getArithmeticInstrCost(Opcode, VTy);
1218}
1219
1220#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1222 VPSlotTracker &SlotTracker) const {
1223 O << Indent << "WIDEN-HISTOGRAM buckets: ";
1225
1226 if (Opcode == Instruction::Sub)
1227 O << ", dec: ";
1228 else {
1229 assert(Opcode == Instruction::Add);
1230 O << ", inc: ";
1231 }
1233
1234 if (VPValue *Mask = getMask()) {
1235 O << ", mask: ";
1236 Mask->printAsOperand(O, SlotTracker);
1237 }
1238}
1239
1241 VPSlotTracker &SlotTracker) const {
1242 O << Indent << "WIDEN-SELECT ";
1244 O << " = select ";
1245 printFlags(O);
1247 O << ", ";
1249 O << ", ";
1251 O << (isInvariantCond() ? " (condition is loop invariant)" : "");
1252}
1253#endif
1254
1257
1258 // The condition can be loop invariant but still defined inside the
1259 // loop. This means that we can't just use the original 'cond' value.
1260 // We have to take the 'vectorized' value and pick the first lane.
1261 // Instcombine will make this a no-op.
1262 auto *InvarCond =
1263 isInvariantCond() ? State.get(getCond(), VPLane(0)) : nullptr;
1264
1265 Value *Cond = InvarCond ? InvarCond : State.get(getCond());
1266 Value *Op0 = State.get(getOperand(1));
1267 Value *Op1 = State.get(getOperand(2));
1268 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
1269 State.set(this, Sel);
1270 if (isa<FPMathOperator>(Sel))
1271 setFlags(cast<Instruction>(Sel));
1272 State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1273}
1274
1276 VPCostContext &Ctx) const {
1277 SelectInst *SI = cast<SelectInst>(getUnderlyingValue());
1278 bool ScalarCond = getOperand(0)->isDefinedOutsideLoopRegions();
1279 Type *ScalarTy = Ctx.Types.inferScalarType(this);
1280 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1282
1283 VPValue *Op0, *Op1;
1284 using namespace llvm::VPlanPatternMatch;
1285 if (!ScalarCond && ScalarTy->getScalarSizeInBits() == 1 &&
1286 (match(this, m_LogicalAnd(m_VPValue(Op0), m_VPValue(Op1))) ||
1287 match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1))))) {
1288 // select x, y, false --> x & y
1289 // select x, true, y --> x | y
1290 const auto [Op1VK, Op1VP] = Ctx.getOperandInfo(Op0);
1291 const auto [Op2VK, Op2VP] = Ctx.getOperandInfo(Op1);
1292
1294 if (all_of(operands(),
1295 [](VPValue *Op) { return Op->getUnderlyingValue(); }))
1296 Operands.append(SI->op_begin(), SI->op_end());
1297 bool IsLogicalOr = match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1)));
1298 return Ctx.TTI.getArithmeticInstrCost(
1299 IsLogicalOr ? Instruction::Or : Instruction::And, VectorTy, CostKind,
1300 {Op1VK, Op1VP}, {Op2VK, Op2VP}, Operands, SI);
1301 }
1302
1303 Type *CondTy = Ctx.Types.inferScalarType(getOperand(0));
1304 if (!ScalarCond)
1305 CondTy = VectorType::get(CondTy, VF);
1306
1308 if (auto *Cmp = dyn_cast<CmpInst>(SI->getCondition()))
1309 Pred = Cmp->getPredicate();
1310 return Ctx.TTI.getCmpSelInstrCost(Instruction::Select, VectorTy, CondTy, Pred,
1313}
1314
1315VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
1316 const FastMathFlags &FMF) {
1317 AllowReassoc = FMF.allowReassoc();
1318 NoNaNs = FMF.noNaNs();
1319 NoInfs = FMF.noInfs();
1320 NoSignedZeros = FMF.noSignedZeros();
1321 AllowReciprocal = FMF.allowReciprocal();
1322 AllowContract = FMF.allowContract();
1323 ApproxFunc = FMF.approxFunc();
1324}
1325
1326#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1328 switch (OpType) {
1329 case OperationType::Cmp:
1331 break;
1332 case OperationType::DisjointOp:
1334 O << " disjoint";
1335 break;
1336 case OperationType::PossiblyExactOp:
1337 if (ExactFlags.IsExact)
1338 O << " exact";
1339 break;
1340 case OperationType::OverflowingBinOp:
1341 if (WrapFlags.HasNUW)
1342 O << " nuw";
1343 if (WrapFlags.HasNSW)
1344 O << " nsw";
1345 break;
1346 case OperationType::FPMathOp:
1348 break;
1349 case OperationType::GEPOp:
1350 if (GEPFlags.isInBounds())
1351 O << " inbounds";
1353 O << " nusw";
1355 O << " nuw";
1356 break;
1357 case OperationType::NonNegOp:
1358 if (NonNegFlags.NonNeg)
1359 O << " nneg";
1360 break;
1361 case OperationType::Other:
1362 break;
1363 }
1364 if (getNumOperands() > 0)
1365 O << " ";
1366}
1367#endif
1368
1371 auto &Builder = State.Builder;
1372 switch (Opcode) {
1373 case Instruction::Call:
1374 case Instruction::Br:
1375 case Instruction::PHI:
1376 case Instruction::GetElementPtr:
1377 case Instruction::Select:
1378 llvm_unreachable("This instruction is handled by a different recipe.");
1379 case Instruction::UDiv:
1380 case Instruction::SDiv:
1381 case Instruction::SRem:
1382 case Instruction::URem:
1383 case Instruction::Add:
1384 case Instruction::FAdd:
1385 case Instruction::Sub:
1386 case Instruction::FSub:
1387 case Instruction::FNeg:
1388 case Instruction::Mul:
1389 case Instruction::FMul:
1390 case Instruction::FDiv:
1391 case Instruction::FRem:
1392 case Instruction::Shl:
1393 case Instruction::LShr:
1394 case Instruction::AShr:
1395 case Instruction::And:
1396 case Instruction::Or:
1397 case Instruction::Xor: {
1398 // Just widen unops and binops.
1400 for (VPValue *VPOp : operands())
1401 Ops.push_back(State.get(VPOp));
1402
1403 Value *V = Builder.CreateNAryOp(Opcode, Ops);
1404
1405 if (auto *VecOp = dyn_cast<Instruction>(V))
1406 setFlags(VecOp);
1407
1408 // Use this vector value for all users of the original instruction.
1409 State.set(this, V);
1410 State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1411 break;
1412 }
1413 case Instruction::Freeze: {
1414 Value *Op = State.get(getOperand(0));
1415
1416 Value *Freeze = Builder.CreateFreeze(Op);
1417 State.set(this, Freeze);
1418 break;
1419 }
1420 case Instruction::ICmp:
1421 case Instruction::FCmp: {
1422 // Widen compares. Generate vector compares.
1423 bool FCmp = Opcode == Instruction::FCmp;
1424 Value *A = State.get(getOperand(0));
1425 Value *B = State.get(getOperand(1));
1426 Value *C = nullptr;
1427 if (FCmp) {
1428 // Propagate fast math flags.
1429 C = Builder.CreateFCmpFMF(
1430 getPredicate(), A, B,
1431 dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1432 } else {
1433 C = Builder.CreateICmp(getPredicate(), A, B);
1434 }
1435 State.set(this, C);
1436 State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1437 break;
1438 }
1439 default:
1440 // This instruction is not vectorized by simple widening.
1441 LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "
1442 << Instruction::getOpcodeName(Opcode));
1443 llvm_unreachable("Unhandled instruction!");
1444 } // end of switch.
1445
1446#if !defined(NDEBUG)
1447 // Verify that VPlan type inference results agree with the type of the
1448 // generated values.
1450 State.get(this)->getType() &&
1451 "inferred type and type from generated instructions do not match");
1452#endif
1453}
1454
1456 VPCostContext &Ctx) const {
1458 switch (Opcode) {
1459 case Instruction::FNeg: {
1460 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1461 return Ctx.TTI.getArithmeticInstrCost(
1462 Opcode, VectorTy, CostKind,
1465 }
1466
1467 case Instruction::UDiv:
1468 case Instruction::SDiv:
1469 case Instruction::SRem:
1470 case Instruction::URem:
1471 // More complex computation, let the legacy cost-model handle this for now.
1472 return Ctx.getLegacyCost(cast<Instruction>(getUnderlyingValue()), VF);
1473 case Instruction::Add:
1474 case Instruction::FAdd:
1475 case Instruction::Sub:
1476 case Instruction::FSub:
1477 case Instruction::Mul:
1478 case Instruction::FMul:
1479 case Instruction::FDiv:
1480 case Instruction::FRem:
1481 case Instruction::Shl:
1482 case Instruction::LShr:
1483 case Instruction::AShr:
1484 case Instruction::And:
1485 case Instruction::Or:
1486 case Instruction::Xor: {
1487 VPValue *RHS = getOperand(1);
1488 // Certain instructions can be cheaper to vectorize if they have a constant
1489 // second vector operand. One example of this are shifts on x86.
1492 if (RHS->isLiveIn())
1493 RHSInfo = Ctx.TTI.getOperandInfo(RHS->getLiveInIRValue());
1494
1495 if (RHSInfo.Kind == TargetTransformInfo::OK_AnyValue &&
1498 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1499 Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
1500
1502 if (CtxI)
1503 Operands.append(CtxI->value_op_begin(), CtxI->value_op_end());
1504 return Ctx.TTI.getArithmeticInstrCost(
1505 Opcode, VectorTy, CostKind,
1507 RHSInfo, Operands, CtxI, &Ctx.TLI);
1508 }
1509 case Instruction::Freeze: {
1510 // This opcode is unknown. Assume that it is the same as 'mul'.
1511 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1512 return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, CostKind);
1513 }
1514 case Instruction::ICmp:
1515 case Instruction::FCmp: {
1516 Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
1517 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
1518 return Ctx.TTI.getCmpSelInstrCost(Opcode, VectorTy, nullptr, getPredicate(),
1519 CostKind,
1522 }
1523 default:
1524 llvm_unreachable("Unsupported opcode for instruction");
1525 }
1526}
1527
1529 unsigned Opcode = getOpcode();
1530 // TODO: Support other opcodes
1531 if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode))
1532 llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute");
1533
1535
1536 assert(State.get(getOperand(0))->getType()->isVectorTy() &&
1537 "VPWidenEVLRecipe should not be used for scalars");
1538
1539 VPValue *EVL = getEVL();
1540 Value *EVLArg = State.get(EVL, /*NeedsScalar=*/true);
1541 IRBuilderBase &BuilderIR = State.Builder;
1542 VectorBuilder Builder(BuilderIR);
1543 Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
1544
1546 for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) {
1547 VPValue *VPOp = getOperand(I);
1548 Ops.push_back(State.get(VPOp));
1549 }
1550
1551 Builder.setMask(Mask).setEVL(EVLArg);
1552 Value *VPInst =
1553 Builder.createVectorInstruction(Opcode, Ops[0]->getType(), Ops, "vp.op");
1554 // Currently vp-intrinsics only accept FMF flags.
1555 // TODO: Enable other flags when support is added.
1556 if (isa<FPMathOperator>(VPInst))
1557 setFlags(cast<Instruction>(VPInst));
1558
1559 State.set(this, VPInst);
1560 State.addMetadata(VPInst,
1561 dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1562}
1563
1564#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1566 VPSlotTracker &SlotTracker) const {
1567 O << Indent << "WIDEN ";
1569 O << " = " << Instruction::getOpcodeName(Opcode);
1570 printFlags(O);
1572}
1573
1575 VPSlotTracker &SlotTracker) const {
1576 O << Indent << "WIDEN ";
1578 O << " = vp." << Instruction::getOpcodeName(getOpcode());
1579 printFlags(O);
1581}
1582#endif
1583
1586 auto &Builder = State.Builder;
1587 /// Vectorize casts.
1588 assert(State.VF.isVector() && "Not vectorizing?");
1589 Type *DestTy = VectorType::get(getResultType(), State.VF);
1590 VPValue *Op = getOperand(0);
1591 Value *A = State.get(Op);
1592 Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
1593 State.set(this, Cast);
1594 State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
1595 if (auto *CastOp = dyn_cast<Instruction>(Cast))
1596 setFlags(CastOp);
1597}
1598
1600 VPCostContext &Ctx) const {
1601 // TODO: In some cases, VPWidenCastRecipes are created but not considered in
1602 // the legacy cost model, including truncates/extends when evaluating a
1603 // reduction in a smaller type.
1604 if (!getUnderlyingValue())
1605 return 0;
1606 // Computes the CastContextHint from a recipes that may access memory.
1607 auto ComputeCCH = [&](const VPRecipeBase *R) -> TTI::CastContextHint {
1608 if (VF.isScalar())
1610 if (isa<VPInterleaveRecipe>(R))
1612 if (const auto *ReplicateRecipe = dyn_cast<VPReplicateRecipe>(R))
1613 return ReplicateRecipe->isPredicated() ? TTI::CastContextHint::Masked
1615 const auto *WidenMemoryRecipe = dyn_cast<VPWidenMemoryRecipe>(R);
1616 if (WidenMemoryRecipe == nullptr)
1618 if (!WidenMemoryRecipe->isConsecutive())
1620 if (WidenMemoryRecipe->isReverse())
1622 if (WidenMemoryRecipe->isMasked())
1625 };
1626
1627 VPValue *Operand = getOperand(0);
1629 // For Trunc/FPTrunc, get the context from the only user.
1630 if ((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
1632 if (auto *StoreRecipe = dyn_cast<VPRecipeBase>(*user_begin()))
1633 CCH = ComputeCCH(StoreRecipe);
1634 }
1635 // For Z/Sext, get the context from the operand.
1636 else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt ||
1637 Opcode == Instruction::FPExt) {
1638 if (Operand->isLiveIn())
1640 else if (Operand->getDefiningRecipe())
1641 CCH = ComputeCCH(Operand->getDefiningRecipe());
1642 }
1643
1644 auto *SrcTy =
1645 cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(Operand), VF));
1646 auto *DestTy = cast<VectorType>(toVectorTy(getResultType(), VF));
1647 // Arm TTI will use the underlying instruction to determine the cost.
1648 return Ctx.TTI.getCastInstrCost(
1649 Opcode, DestTy, SrcTy, CCH, TTI::TCK_RecipThroughput,
1650 dyn_cast_if_present<Instruction>(getUnderlyingValue()));
1651}
1652
1653#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1655 VPSlotTracker &SlotTracker) const {
1656 O << Indent << "WIDEN-CAST ";
1658 O << " = " << Instruction::getOpcodeName(Opcode);
1659 printFlags(O);
1661 O << " to " << *getResultType();
1662}
1663#endif
1664
1666 VPCostContext &Ctx) const {
1667 return Ctx.TTI.getCFInstrCost(Instruction::PHI, TTI::TCK_RecipThroughput);
1668}
1669
1670/// This function adds
1671/// (0 * Step, 1 * Step, 2 * Step, ...)
1672/// to each vector element of Val.
1673/// \p Opcode is relevant for FP induction variable.
1674static Value *getStepVector(Value *Val, Value *Step,
1676 IRBuilderBase &Builder) {
1677 assert(VF.isVector() && "only vector VFs are supported");
1678
1679 // Create and check the types.
1680 auto *ValVTy = cast<VectorType>(Val->getType());
1681 ElementCount VLen = ValVTy->getElementCount();
1682
1683 Type *STy = Val->getType()->getScalarType();
1684 assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
1685 "Induction Step must be an integer or FP");
1686 assert(Step->getType() == STy && "Step has wrong type");
1687
1689
1690 // Create a vector of consecutive numbers from zero to VF.
1691 VectorType *InitVecValVTy = ValVTy;
1692 if (STy->isFloatingPointTy()) {
1693 Type *InitVecValSTy =
1695 InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
1696 }
1697 Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
1698
1699 if (STy->isIntegerTy()) {
1700 Step = Builder.CreateVectorSplat(VLen, Step);
1701 assert(Step->getType() == Val->getType() && "Invalid step vec");
1702 // FIXME: The newly created binary instructions should contain nsw/nuw
1703 // flags, which can be found from the original scalar operations.
1704 Step = Builder.CreateMul(InitVec, Step);
1705 return Builder.CreateAdd(Val, Step, "induction");
1706 }
1707
1708 // Floating point induction.
1709 assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
1710 "Binary Opcode should be specified for FP induction");
1711 InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
1712
1713 Step = Builder.CreateVectorSplat(VLen, Step);
1714 Value *MulOp = Builder.CreateFMul(InitVec, Step);
1715 return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
1716}
1717
1718/// A helper function that returns an integer or floating-point constant with
1719/// value C.
1721 return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
1722 : ConstantFP::get(Ty, C);
1723}
1724
1726 assert(!State.Lane && "Int or FP induction being replicated.");
1727
1728 Value *Start = getStartValue()->getLiveInIRValue();
1730 TruncInst *Trunc = getTruncInst();
1731 IRBuilderBase &Builder = State.Builder;
1732 assert(getPHINode()->getType() == ID.getStartValue()->getType() &&
1733 "Types must match");
1734 assert(State.VF.isVector() && "must have vector VF");
1735
1736 // The value from the original loop to which we are mapping the new induction
1737 // variable.
1738 Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : getPHINode();
1739
1740 // Fast-math-flags propagate from the original induction instruction.
1741 IRBuilder<>::FastMathFlagGuard FMFG(Builder);
1742 if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
1743 Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
1744
1745 // Now do the actual transformations, and start with fetching the step value.
1746 Value *Step = State.get(getStepValue(), VPLane(0));
1747
1748 assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
1749 "Expected either an induction phi-node or a truncate of it!");
1750
1751 // Construct the initial value of the vector IV in the vector loop preheader
1752 auto CurrIP = Builder.saveIP();
1753 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1754 Builder.SetInsertPoint(VectorPH->getTerminator());
1755 if (isa<TruncInst>(EntryVal)) {
1756 assert(Start->getType()->isIntegerTy() &&
1757 "Truncation requires an integer type");
1758 auto *TruncType = cast<IntegerType>(EntryVal->getType());
1759 Step = Builder.CreateTrunc(Step, TruncType);
1760 Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
1761 }
1762
1763 Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
1764 Value *SteppedStart = getStepVector(SplatStart, Step, ID.getInductionOpcode(),
1765 State.VF, State.Builder);
1766
1767 // We create vector phi nodes for both integer and floating-point induction
1768 // variables. Here, we determine the kind of arithmetic we will perform.
1771 if (Step->getType()->isIntegerTy()) {
1772 AddOp = Instruction::Add;
1773 MulOp = Instruction::Mul;
1774 } else {
1775 AddOp = ID.getInductionOpcode();
1776 MulOp = Instruction::FMul;
1777 }
1778
1779 Value *SplatVF;
1780 if (VPValue *SplatVFOperand = getSplatVFValue()) {
1781 // The recipe has been unrolled. In that case, fetch the splat value for the
1782 // induction increment.
1783 SplatVF = State.get(SplatVFOperand);
1784 } else {
1785 // Multiply the vectorization factor by the step using integer or
1786 // floating-point arithmetic as appropriate.
1787 Type *StepType = Step->getType();
1788 Value *RuntimeVF = State.get(getVFValue(), VPLane(0));
1789 if (Step->getType()->isFloatingPointTy())
1790 RuntimeVF = Builder.CreateUIToFP(RuntimeVF, StepType);
1791 else
1792 RuntimeVF = Builder.CreateZExtOrTrunc(RuntimeVF, StepType);
1793 Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
1794
1795 // Create a vector splat to use in the induction update.
1796 SplatVF = Builder.CreateVectorSplat(State.VF, Mul);
1797 }
1798
1799 Builder.restoreIP(CurrIP);
1800
1801 // We may need to add the step a number of times, depending on the unroll
1802 // factor. The last of those goes into the PHI.
1803 PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");
1804 VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1805 VecInd->setDebugLoc(getDebugLoc());
1806 State.set(this, VecInd);
1807
1808 Instruction *LastInduction = cast<Instruction>(
1809 Builder.CreateBinOp(AddOp, VecInd, SplatVF, "vec.ind.next"));
1810 if (isa<TruncInst>(EntryVal))
1811 State.addMetadata(LastInduction, EntryVal);
1812 LastInduction->setDebugLoc(getDebugLoc());
1813
1814 VecInd->addIncoming(SteppedStart, VectorPH);
1815 // Add induction update using an incorrect block temporarily. The phi node
1816 // will be fixed after VPlan execution. Note that at this point the latch
1817 // block cannot be used, as it does not exist yet.
1818 // TODO: Model increment value in VPlan, by turning the recipe into a
1819 // multi-def and a subclass of VPHeaderPHIRecipe.
1820 VecInd->addIncoming(LastInduction, VectorPH);
1821}
1822
1823#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1825 VPSlotTracker &SlotTracker) const {
1826 O << Indent;
1828 O << " = WIDEN-INDUCTION ";
1830
1831 if (auto *TI = getTruncInst())
1832 O << " (truncated to " << *TI->getType() << ")";
1833}
1834#endif
1835
1837 // The step may be defined by a recipe in the preheader (e.g. if it requires
1838 // SCEV expansion), but for the canonical induction the step is required to be
1839 // 1, which is represented as live-in.
1841 return false;
1842 auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
1843 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
1844 auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin());
1845 return StartC && StartC->isZero() && StepC && StepC->isOne() &&
1846 getScalarType() == CanIV->getScalarType();
1847}
1848
1849#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1851 VPSlotTracker &SlotTracker) const {
1852 O << Indent;
1854 O << " = DERIVED-IV ";
1856 O << " + ";
1858 O << " * ";
1860}
1861#endif
1862
1864 // Fast-math-flags propagate from the original induction instruction.
1866 if (hasFastMathFlags())
1868
1869 /// Compute scalar induction steps. \p ScalarIV is the scalar induction
1870 /// variable on which to base the steps, \p Step is the size of the step.
1871
1872 Value *BaseIV = State.get(getOperand(0), VPLane(0));
1873 Value *Step = State.get(getStepValue(), VPLane(0));
1874 IRBuilderBase &Builder = State.Builder;
1875
1876 // Ensure step has the same type as that of scalar IV.
1877 Type *BaseIVTy = BaseIV->getType()->getScalarType();
1878 assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");
1879
1880 // We build scalar steps for both integer and floating-point induction
1881 // variables. Here, we determine the kind of arithmetic we will perform.
1884 if (BaseIVTy->isIntegerTy()) {
1885 AddOp = Instruction::Add;
1886 MulOp = Instruction::Mul;
1887 } else {
1888 AddOp = InductionOpcode;
1889 MulOp = Instruction::FMul;
1890 }
1891
1892 // Determine the number of scalars we need to generate for each unroll
1893 // iteration.
1894 bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
1895 // Compute the scalar steps and save the results in State.
1896 Type *IntStepTy =
1897 IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
1898 Type *VecIVTy = nullptr;
1899 Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
1900 if (!FirstLaneOnly && State.VF.isScalable()) {
1901 VecIVTy = VectorType::get(BaseIVTy, State.VF);
1902 UnitStepVec =
1903 Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
1904 SplatStep = Builder.CreateVectorSplat(State.VF, Step);
1905 SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
1906 }
1907
1908 unsigned StartLane = 0;
1909 unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
1910 if (State.Lane) {
1911 StartLane = State.Lane->getKnownLane();
1912 EndLane = StartLane + 1;
1913 }
1914 Value *StartIdx0 =
1915 createStepForVF(Builder, IntStepTy, State.VF, getUnrollPart(*this));
1916
1917 if (!FirstLaneOnly && State.VF.isScalable()) {
1918 auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
1919 auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
1920 if (BaseIVTy->isFloatingPointTy())
1921 InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
1922 auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
1923 auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
1924 State.set(this, Add);
1925 // It's useful to record the lane values too for the known minimum number
1926 // of elements so we do those below. This improves the code quality when
1927 // trying to extract the first element, for example.
1928 }
1929
1930 if (BaseIVTy->isFloatingPointTy())
1931 StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
1932
1933 for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
1934 Value *StartIdx = Builder.CreateBinOp(
1935 AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
1936 // The step returned by `createStepForVF` is a runtime-evaluated value
1937 // when VF is scalable. Otherwise, it should be folded into a Constant.
1938 assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
1939 "Expected StartIdx to be folded to a constant when VF is not "
1940 "scalable");
1941 auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
1942 auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
1943 State.set(this, Add, VPLane(Lane));
1944 }
1945}
1946
1947#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1949 VPSlotTracker &SlotTracker) const {
1950 O << Indent;
1952 O << " = SCALAR-STEPS ";
1954}
1955#endif
1956
1958 assert(State.VF.isVector() && "not widening");
1959 auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
1960 // Construct a vector GEP by widening the operands of the scalar GEP as
1961 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
1962 // results in a vector of pointers when at least one operand of the GEP
1963 // is vector-typed. Thus, to keep the representation compact, we only use
1964 // vector-typed operands for loop-varying values.
1965
1966 if (areAllOperandsInvariant()) {
1967 // If we are vectorizing, but the GEP has only loop-invariant operands,
1968 // the GEP we build (by only using vector-typed operands for
1969 // loop-varying values) would be a scalar pointer. Thus, to ensure we
1970 // produce a vector of pointers, we need to either arbitrarily pick an
1971 // operand to broadcast, or broadcast a clone of the original GEP.
1972 // Here, we broadcast a clone of the original.
1973 //
1974 // TODO: If at some point we decide to scalarize instructions having
1975 // loop-invariant operands, this special case will no longer be
1976 // required. We would add the scalarization decision to
1977 // collectLoopScalars() and teach getVectorValue() to broadcast
1978 // the lane-zero scalar value.
1980 for (unsigned I = 0, E = getNumOperands(); I != E; I++)
1981 Ops.push_back(State.get(getOperand(I), VPLane(0)));
1982
1983 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
1984 ArrayRef(Ops).drop_front(), "",
1986 Value *Splat = State.Builder.CreateVectorSplat(State.VF, NewGEP);
1987 State.set(this, Splat);
1988 State.addMetadata(Splat, GEP);
1989 } else {
1990 // If the GEP has at least one loop-varying operand, we are sure to
1991 // produce a vector of pointers unless VF is scalar.
1992 // The pointer operand of the new GEP. If it's loop-invariant, we
1993 // won't broadcast it.
1994 auto *Ptr = isPointerLoopInvariant() ? State.get(getOperand(0), VPLane(0))
1995 : State.get(getOperand(0));
1996
1997 // Collect all the indices for the new GEP. If any index is
1998 // loop-invariant, we won't broadcast it.
2000 for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
2001 VPValue *Operand = getOperand(I);
2002 if (isIndexLoopInvariant(I - 1))
2003 Indices.push_back(State.get(Operand, VPLane(0)));
2004 else
2005 Indices.push_back(State.get(Operand));
2006 }
2007
2008 // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
2009 // but it should be a vector, otherwise.
2010 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
2011 Indices, "", getGEPNoWrapFlags());
2012 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
2013 "NewGEP is not a pointer vector");
2014 State.set(this, NewGEP);
2015 State.addMetadata(NewGEP, GEP);
2016 }
2017}
2018
2019#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2021 VPSlotTracker &SlotTracker) const {
2022 O << Indent << "WIDEN-GEP ";
2023 O << (isPointerLoopInvariant() ? "Inv" : "Var");
2024 for (size_t I = 0; I < getNumOperands() - 1; ++I)
2025 O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";
2026
2027 O << " ";
2029 O << " = getelementptr";
2030 printFlags(O);
2032}
2033#endif
2034
2035static Type *getGEPIndexTy(bool IsScalable, bool IsReverse,
2036 unsigned CurrentPart, IRBuilderBase &Builder) {
2037 // Use i32 for the gep index type when the value is constant,
2038 // or query DataLayout for a more suitable index type otherwise.
2039 const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
2040 return IsScalable && (IsReverse || CurrentPart > 0)
2041 ? DL.getIndexType(Builder.getPtrTy(0))
2042 : Builder.getInt32Ty();
2043}
2044
2046 auto &Builder = State.Builder;
2048 unsigned CurrentPart = getUnrollPart(*this);
2049 Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true,
2050 CurrentPart, Builder);
2051
2052 // The wide store needs to start at the last vector element.
2053 Value *RunTimeVF = State.get(getVFValue(), VPLane(0));
2054 if (IndexTy != RunTimeVF->getType())
2055 RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy);
2056 // NumElt = -CurrentPart * RunTimeVF
2057 Value *NumElt = Builder.CreateMul(
2058 ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
2059 // LastLane = 1 - RunTimeVF
2060 Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
2061 Value *Ptr = State.get(getOperand(0), VPLane(0));
2062 Value *ResultPtr =
2063 Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", getGEPNoWrapFlags());
2064 ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "",
2066
2067 State.set(this, ResultPtr, /*IsScalar*/ true);
2068}
2069
2070#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2072 VPSlotTracker &SlotTracker) const {
2073 O << Indent;
2075 O << " = reverse-vector-pointer";
2076 printFlags(O);
2078}
2079#endif
2080
2082 auto &Builder = State.Builder;
2084 unsigned CurrentPart = getUnrollPart(*this);
2085 Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false,
2086 CurrentPart, Builder);
2087 Value *Ptr = State.get(getOperand(0), VPLane(0));
2088
2089 Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
2090 Value *ResultPtr =
2091 Builder.CreateGEP(IndexedTy, Ptr, Increment, "", getGEPNoWrapFlags());
2092
2093 State.set(this, ResultPtr, /*IsScalar*/ true);
2094}
2095
2096#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2098 VPSlotTracker &SlotTracker) const {
2099 O << Indent;
2101 O << " = vector-pointer ";
2102
2104}
2105#endif
2106
2108 assert(isNormalized() && "Expected blend to be normalized!");
2110 // We know that all PHIs in non-header blocks are converted into
2111 // selects, so we don't have to worry about the insertion order and we
2112 // can just use the builder.
2113 // At this point we generate the predication tree. There may be
2114 // duplications since this is a simple recursive scan, but future
2115 // optimizations will clean it up.
2116
2117 unsigned NumIncoming = getNumIncomingValues();
2118
2119 // Generate a sequence of selects of the form:
2120 // SELECT(Mask3, In3,
2121 // SELECT(Mask2, In2,
2122 // SELECT(Mask1, In1,
2123 // In0)))
2124 // Note that Mask0 is never used: lanes for which no path reaches this phi and
2125 // are essentially undef are taken from In0.
2126 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
2127 Value *Result = nullptr;
2128 for (unsigned In = 0; In < NumIncoming; ++In) {
2129 // We might have single edge PHIs (blocks) - use an identity
2130 // 'select' for the first PHI operand.
2131 Value *In0 = State.get(getIncomingValue(In), OnlyFirstLaneUsed);
2132 if (In == 0)
2133 Result = In0; // Initialize with the first incoming value.
2134 else {
2135 // Select between the current value and the previous incoming edge
2136 // based on the incoming mask.
2137 Value *Cond = State.get(getMask(In), OnlyFirstLaneUsed);
2138 Result = State.Builder.CreateSelect(Cond, In0, Result, "predphi");
2139 }
2140 }
2141 State.set(this, Result, OnlyFirstLaneUsed);
2142}
2143
2145 VPCostContext &Ctx) const {
2147
2148 // Handle cases where only the first lane is used the same way as the legacy
2149 // cost model.
2151 return Ctx.TTI.getCFInstrCost(Instruction::PHI, CostKind);
2152
2153 Type *ResultTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
2154 Type *CmpTy = toVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF);
2155 return (getNumIncomingValues() - 1) *
2156 Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResultTy, CmpTy,
2158}
2159
2160#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2162 VPSlotTracker &SlotTracker) const {
2163 O << Indent << "BLEND ";
2165 O << " =";
2166 if (getNumIncomingValues() == 1) {
2167 // Not a User of any mask: not really blending, this is a
2168 // single-predecessor phi.
2169 O << " ";
2171 } else {
2172 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
2173 O << " ";
2175 if (I == 0)
2176 continue;
2177 O << "/";
2179 }
2180 }
2181}
2182#endif
2183
2185 assert(!State.Lane && "Reduction being replicated.");
2186 Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
2187 RecurKind Kind = RdxDesc.getRecurrenceKind();
2188 // Propagate the fast-math flags carried by the underlying instruction.
2190 State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
2192 Value *NewVecOp = State.get(getVecOp());
2193 if (VPValue *Cond = getCondOp()) {
2194 Value *NewCond = State.get(Cond, State.VF.isScalar());
2195 VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
2196 Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
2197
2198 Value *Start;
2200 Start = RdxDesc.getRecurrenceStartValue();
2201 else
2202 Start = llvm::getRecurrenceIdentity(Kind, ElementTy,
2203 RdxDesc.getFastMathFlags());
2204 if (State.VF.isVector())
2205 Start = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Start);
2206
2207 Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Start);
2208 NewVecOp = Select;
2209 }
2210 Value *NewRed;
2211 Value *NextInChain;
2212 if (IsOrdered) {
2213 if (State.VF.isVector())
2214 NewRed =
2215 createOrderedReduction(State.Builder, RdxDesc, NewVecOp, PrevInChain);
2216 else
2217 NewRed = State.Builder.CreateBinOp(
2218 (Instruction::BinaryOps)RdxDesc.getOpcode(), PrevInChain, NewVecOp);
2219 PrevInChain = NewRed;
2220 NextInChain = NewRed;
2221 } else {
2222 PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
2223 NewRed = createReduction(State.Builder, RdxDesc, NewVecOp);
2225 NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),
2226 NewRed, PrevInChain);
2227 else
2228 NextInChain = State.Builder.CreateBinOp(
2229 (Instruction::BinaryOps)RdxDesc.getOpcode(), NewRed, PrevInChain);
2230 }
2231 State.set(this, NextInChain, /*IsScalar*/ true);
2232}
2233
2235 assert(!State.Lane && "Reduction being replicated.");
2236
2237 auto &Builder = State.Builder;
2238 // Propagate the fast-math flags carried by the underlying instruction.
2239 IRBuilderBase::FastMathFlagGuard FMFGuard(Builder);
2241 Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
2242
2243 RecurKind Kind = RdxDesc.getRecurrenceKind();
2244 Value *Prev = State.get(getChainOp(), /*IsScalar*/ true);
2245 Value *VecOp = State.get(getVecOp());
2246 Value *EVL = State.get(getEVL(), VPLane(0));
2247
2248 VectorBuilder VBuilder(Builder);
2249 VBuilder.setEVL(EVL);
2250 Value *Mask;
2251 // TODO: move the all-true mask generation into VectorBuilder.
2252 if (VPValue *CondOp = getCondOp())
2253 Mask = State.get(CondOp);
2254 else
2255 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2256 VBuilder.setMask(Mask);
2257
2258 Value *NewRed;
2259 if (isOrdered()) {
2260 NewRed = createOrderedReduction(VBuilder, RdxDesc, VecOp, Prev);
2261 } else {
2262 NewRed = createSimpleReduction(VBuilder, VecOp, RdxDesc);
2264 NewRed = createMinMaxOp(Builder, Kind, NewRed, Prev);
2265 else
2266 NewRed = Builder.CreateBinOp((Instruction::BinaryOps)RdxDesc.getOpcode(),
2267 NewRed, Prev);
2268 }
2269 State.set(this, NewRed, /*IsScalar*/ true);
2270}
2271
2273 VPCostContext &Ctx) const {
2274 RecurKind RdxKind = RdxDesc.getRecurrenceKind();
2275 Type *ElementTy = Ctx.Types.inferScalarType(this);
2276 auto *VectorTy = cast<VectorType>(toVectorTy(ElementTy, VF));
2278 unsigned Opcode = RdxDesc.getOpcode();
2279
2280 // TODO: Support any-of and in-loop reductions.
2281 assert(
2283 ForceTargetInstructionCost.getNumOccurrences() > 0) &&
2284 "Any-of reduction not implemented in VPlan-based cost model currently.");
2285 assert(
2286 (!cast<VPReductionPHIRecipe>(getOperand(0))->isInLoop() ||
2287 ForceTargetInstructionCost.getNumOccurrences() > 0) &&
2288 "In-loop reduction not implemented in VPlan-based cost model currently.");
2289
2290 assert(ElementTy->getTypeID() == RdxDesc.getRecurrenceType()->getTypeID() &&
2291 "Inferred type and recurrence type mismatch.");
2292
2293 // Cost = Reduction cost + BinOp cost
2295 Ctx.TTI.getArithmeticInstrCost(Opcode, ElementTy, CostKind);
2298 return Cost + Ctx.TTI.getMinMaxReductionCost(
2299 Id, VectorTy, RdxDesc.getFastMathFlags(), CostKind);
2300 }
2301
2302 return Cost + Ctx.TTI.getArithmeticReductionCost(
2303 Opcode, VectorTy, RdxDesc.getFastMathFlags(), CostKind);
2304}
2305
2306#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2308 VPSlotTracker &SlotTracker) const {
2309 O << Indent << "REDUCE ";
2311 O << " = ";
2313 O << " +";
2314 if (isa<FPMathOperator>(getUnderlyingInstr()))
2316 O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
2318 if (isConditional()) {
2319 O << ", ";
2321 }
2322 O << ")";
2323 if (RdxDesc.IntermediateStore)
2324 O << " (with final reduction value stored in invariant address sank "
2325 "outside of loop)";
2326}
2327
2329 VPSlotTracker &SlotTracker) const {
2331 O << Indent << "REDUCE ";
2333 O << " = ";
2335 O << " +";
2336 if (isa<FPMathOperator>(getUnderlyingInstr()))
2338 O << " vp.reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
2340 O << ", ";
2342 if (isConditional()) {
2343 O << ", ";
2345 }
2346 O << ")";
2347 if (RdxDesc.IntermediateStore)
2348 O << " (with final reduction value stored in invariant address sank "
2349 "outside of loop)";
2350}
2351#endif
2352
2354 // Find if the recipe is used by a widened recipe via an intervening
2355 // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
2356 return any_of(users(), [](const VPUser *U) {
2357 if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))
2358 return any_of(PredR->users(), [PredR](const VPUser *U) {
2359 return !U->usesScalars(PredR);
2360 });
2361 return false;
2362 });
2363}
2364
2366 VPCostContext &Ctx) const {
2367 Instruction *UI = cast<Instruction>(getUnderlyingValue());
2368 // VPReplicateRecipe may be cloned as part of an existing VPlan-to-VPlan
2369 // transform, avoid computing their cost multiple times for now.
2370 Ctx.SkipCostComputation.insert(UI);
2371 return Ctx.getLegacyCost(UI, VF);
2372}
2373
2374#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2376 VPSlotTracker &SlotTracker) const {
2377 O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
2378
2379 if (!getUnderlyingInstr()->getType()->isVoidTy()) {
2381 O << " = ";
2382 }
2383 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
2384 O << "call";
2385 printFlags(O);
2386 O << "@" << CB->getCalledFunction()->getName() << "(";
2388 O, [&O, &SlotTracker](VPValue *Op) {
2389 Op->printAsOperand(O, SlotTracker);
2390 });
2391 O << ")";
2392 } else {
2394 printFlags(O);
2396 }
2397
2398 if (shouldPack())
2399 O << " (S->V)";
2400}
2401#endif
2402
2403Value *VPScalarCastRecipe ::generate(VPTransformState &State) {
2406 "Codegen only implemented for first lane.");
2407 switch (Opcode) {
2408 case Instruction::SExt:
2409 case Instruction::ZExt:
2410 case Instruction::Trunc: {
2411 // Note: SExt/ZExt not used yet.
2412 Value *Op = State.get(getOperand(0), VPLane(0));
2413 return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
2414 }
2415 default:
2416 llvm_unreachable("opcode not implemented yet");
2417 }
2418}
2419
2420void VPScalarCastRecipe ::execute(VPTransformState &State) {
2421 State.set(this, generate(State), VPLane(0));
2422}
2423
2424#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2425void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
2426 VPSlotTracker &SlotTracker) const {
2427 O << Indent << "SCALAR-CAST ";
2428 printAsOperand(O, SlotTracker);
2429 O << " = " << Instruction::getOpcodeName(Opcode) << " ";
2430 printOperands(O, SlotTracker);
2431 O << " to " << *ResultTy;
2432}
2433#endif
2434
2436 assert(State.Lane && "Branch on Mask works only on single instance.");
2437
2438 unsigned Lane = State.Lane->getKnownLane();
2439
2440 Value *ConditionBit = nullptr;
2441 VPValue *BlockInMask = getMask();
2442 if (BlockInMask) {
2443 ConditionBit = State.get(BlockInMask);
2444 if (ConditionBit->getType()->isVectorTy())
2445 ConditionBit = State.Builder.CreateExtractElement(
2446 ConditionBit, State.Builder.getInt32(Lane));
2447 } else // Block in mask is all-one.
2448 ConditionBit = State.Builder.getTrue();
2449
2450 // Replace the temporary unreachable terminator with a new conditional branch,
2451 // whose two destinations will be set later when they are created.
2452 auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
2453 assert(isa<UnreachableInst>(CurrentTerminator) &&
2454 "Expected to replace unreachable terminator with conditional branch.");
2455 auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
2456 CondBr->setSuccessor(0, nullptr);
2457 ReplaceInstWithInst(CurrentTerminator, CondBr);
2458}
2459
2461 VPCostContext &Ctx) const {
2462 // The legacy cost model doesn't assign costs to branches for individual
2463 // replicate regions. Match the current behavior in the VPlan cost model for
2464 // now.
2465 return 0;
2466}
2467
2470 assert(State.Lane && "Predicated instruction PHI works per instance.");
2471 Instruction *ScalarPredInst =
2472 cast<Instruction>(State.get(getOperand(0), *State.Lane));
2473 BasicBlock *PredicatedBB = ScalarPredInst->getParent();
2474 BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
2475 assert(PredicatingBB && "Predicated block has no single predecessor.");
2476 assert(isa<VPReplicateRecipe>(getOperand(0)) &&
2477 "operand must be VPReplicateRecipe");
2478
2479 // By current pack/unpack logic we need to generate only a single phi node: if
2480 // a vector value for the predicated instruction exists at this point it means
2481 // the instruction has vector users only, and a phi for the vector value is
2482 // needed. In this case the recipe of the predicated instruction is marked to
2483 // also do that packing, thereby "hoisting" the insert-element sequence.
2484 // Otherwise, a phi node for the scalar value is needed.
2485 if (State.hasVectorValue(getOperand(0))) {
2486 Value *VectorValue = State.get(getOperand(0));
2487 InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
2488 PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
2489 VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
2490 VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
2491 if (State.hasVectorValue(this))
2492 State.reset(this, VPhi);
2493 else
2494 State.set(this, VPhi);
2495 // NOTE: Currently we need to update the value of the operand, so the next
2496 // predicated iteration inserts its generated value in the correct vector.
2497 State.reset(getOperand(0), VPhi);
2498 } else {
2499 if (vputils::onlyFirstLaneUsed(this) && !State.Lane->isFirstLane())
2500 return;
2501
2502 Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
2503 PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
2504 Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
2505 PredicatingBB);
2506 Phi->addIncoming(ScalarPredInst, PredicatedBB);
2507 if (State.hasScalarValue(this, *State.Lane))
2508 State.reset(this, Phi, *State.Lane);
2509 else
2510 State.set(this, Phi, *State.Lane);
2511 // NOTE: Currently we need to update the value of the operand, so the next
2512 // predicated iteration inserts its generated value in the correct vector.
2513 State.reset(getOperand(0), Phi, *State.Lane);
2514 }
2515}
2516
2517#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2519 VPSlotTracker &SlotTracker) const {
2520 O << Indent << "PHI-PREDICATED-INSTRUCTION ";
2522 O << " = ";
2524}
2525#endif
2526
2528 VPCostContext &Ctx) const {
2530 const Align Alignment =
2532 unsigned AS =
2535
2536 if (!Consecutive) {
2537 // TODO: Using the original IR may not be accurate.
2538 // Currently, ARM will use the underlying IR to calculate gather/scatter
2539 // instruction cost.
2541 assert(!Reverse &&
2542 "Inconsecutive memory access should not have the order.");
2543 return Ctx.TTI.getAddressComputationCost(Ty) +
2545 IsMasked, Alignment, CostKind,
2546 &Ingredient);
2547 }
2548
2550 if (IsMasked) {
2551 Cost += Ctx.TTI.getMaskedMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment,
2552 AS, CostKind);
2553 } else {
2554 TTI::OperandValueInfo OpInfo =
2556 Cost += Ctx.TTI.getMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment, AS,
2557 CostKind, OpInfo, &Ingredient);
2558 }
2559 if (!Reverse)
2560 return Cost;
2561
2563 cast<VectorType>(Ty), {}, CostKind, 0);
2564}
2565
2567 auto *LI = cast<LoadInst>(&Ingredient);
2568
2569 Type *ScalarDataTy = getLoadStoreType(&Ingredient);
2570 auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
2571 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2572 bool CreateGather = !isConsecutive();
2573
2574 auto &Builder = State.Builder;
2576 Value *Mask = nullptr;
2577 if (auto *VPMask = getMask()) {
2578 // Mask reversal is only needed for non-all-one (null) masks, as reverse
2579 // of a null all-one mask is a null mask.
2580 Mask = State.get(VPMask);
2581 if (isReverse())
2582 Mask = Builder.CreateVectorReverse(Mask, "reverse");
2583 }
2584
2585 Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateGather);
2586 Value *NewLI;
2587 if (CreateGather) {
2588 NewLI = Builder.CreateMaskedGather(DataTy, Addr, Alignment, Mask, nullptr,
2589 "wide.masked.gather");
2590 } else if (Mask) {
2591 NewLI =
2592 Builder.CreateMaskedLoad(DataTy, Addr, Alignment, Mask,
2593 PoisonValue::get(DataTy), "wide.masked.load");
2594 } else {
2595 NewLI = Builder.CreateAlignedLoad(DataTy, Addr, Alignment, "wide.load");
2596 }
2597 // Add metadata to the load, but setVectorValue to the reverse shuffle.
2598 State.addMetadata(NewLI, LI);
2599 if (Reverse)
2600 NewLI = Builder.CreateVectorReverse(NewLI, "reverse");
2601 State.set(this, NewLI);
2602}
2603
2604#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2606 VPSlotTracker &SlotTracker) const {
2607 O << Indent << "WIDEN ";
2609 O << " = load ";
2611}
2612#endif
2613
2614/// Use all-true mask for reverse rather than actual mask, as it avoids a
2615/// dependence w/o affecting the result.
2617 Value *EVL, const Twine &Name) {
2618 VectorType *ValTy = cast<VectorType>(Operand->getType());
2619 Value *AllTrueMask =
2620 Builder.CreateVectorSplat(ValTy->getElementCount(), Builder.getTrue());
2621 return Builder.CreateIntrinsic(ValTy, Intrinsic::experimental_vp_reverse,
2622 {Operand, AllTrueMask, EVL}, nullptr, Name);
2623}
2624
2626 auto *LI = cast<LoadInst>(&Ingredient);
2627
2628 Type *ScalarDataTy = getLoadStoreType(&Ingredient);
2629 auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
2630 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2631 bool CreateGather = !isConsecutive();
2632
2633 auto &Builder = State.Builder;
2635 CallInst *NewLI;
2636 Value *EVL = State.get(getEVL(), VPLane(0));
2637 Value *Addr = State.get(getAddr(), !CreateGather);
2638 Value *Mask = nullptr;
2639 if (VPValue *VPMask = getMask()) {
2640 Mask = State.get(VPMask);
2641 if (isReverse())
2642 Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
2643 } else {
2644 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2645 }
2646
2647 if (CreateGather) {
2648 NewLI =
2649 Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL},
2650 nullptr, "wide.masked.gather");
2651 } else {
2652 VectorBuilder VBuilder(Builder);
2653 VBuilder.setEVL(EVL).setMask(Mask);
2654 NewLI = cast<CallInst>(VBuilder.createVectorInstruction(
2655 Instruction::Load, DataTy, Addr, "vp.op.load"));
2656 }
2657 NewLI->addParamAttr(
2658 0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
2659 State.addMetadata(NewLI, LI);
2660 Instruction *Res = NewLI;
2661 if (isReverse())
2662 Res = createReverseEVL(Builder, Res, EVL, "vp.reverse");
2663 State.set(this, Res);
2664}
2665
2667 VPCostContext &Ctx) const {
2668 if (!Consecutive || IsMasked)
2669 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
2670
2671 // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()
2672 // here because the EVL recipes using EVL to replace the tail mask. But in the
2673 // legacy model, it will always calculate the cost of mask.
2674 // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
2675 // don't need to compare to the legacy cost model.
2677 const Align Alignment =
2679 unsigned AS =
2683 Ingredient.getOpcode(), Ty, Alignment, AS, CostKind);
2684 if (!Reverse)
2685 return Cost;
2686
2688 cast<VectorType>(Ty), {}, CostKind, 0);
2689}
2690
2691#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2693 VPSlotTracker &SlotTracker) const {
2694 O << Indent << "WIDEN ";
2696 O << " = vp.load ";
2698}
2699#endif
2700
2702 auto *SI = cast<StoreInst>(&Ingredient);
2703
2704 VPValue *StoredVPValue = getStoredValue();
2705 bool CreateScatter = !isConsecutive();
2706 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2707
2708 auto &Builder = State.Builder;
2710
2711 Value *Mask = nullptr;
2712 if (auto *VPMask = getMask()) {
2713 // Mask reversal is only needed for non-all-one (null) masks, as reverse
2714 // of a null all-one mask is a null mask.
2715 Mask = State.get(VPMask);
2716 if (isReverse())
2717 Mask = Builder.CreateVectorReverse(Mask, "reverse");
2718 }
2719
2720 Value *StoredVal = State.get(StoredVPValue);
2721 if (isReverse()) {
2722 // If we store to reverse consecutive memory locations, then we need
2723 // to reverse the order of elements in the stored value.
2724 StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
2725 // We don't want to update the value in the map as it might be used in
2726 // another expression. So don't call resetVectorValue(StoredVal).
2727 }
2728 Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateScatter);
2729 Instruction *NewSI = nullptr;
2730 if (CreateScatter)
2731 NewSI = Builder.CreateMaskedScatter(StoredVal, Addr, Alignment, Mask);
2732 else if (Mask)
2733 NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);
2734 else
2735 NewSI = Builder.CreateAlignedStore(StoredVal, Addr, Alignment);
2736 State.addMetadata(NewSI, SI);
2737}
2738
2739#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2741 VPSlotTracker &SlotTracker) const {
2742 O << Indent << "WIDEN store ";
2744}
2745#endif
2746
2748 auto *SI = cast<StoreInst>(&Ingredient);
2749
2750 VPValue *StoredValue = getStoredValue();
2751 bool CreateScatter = !isConsecutive();
2752 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2753
2754 auto &Builder = State.Builder;
2756
2757 CallInst *NewSI = nullptr;
2758 Value *StoredVal = State.get(StoredValue);
2759 Value *EVL = State.get(getEVL(), VPLane(0));
2760 if (isReverse())
2761 StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse");
2762 Value *Mask = nullptr;
2763 if (VPValue *VPMask = getMask()) {
2764 Mask = State.get(VPMask);
2765 if (isReverse())
2766 Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
2767 } else {
2768 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2769 }
2770 Value *Addr = State.get(getAddr(), !CreateScatter);
2771 if (CreateScatter) {
2772 NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),
2773 Intrinsic::vp_scatter,
2774 {StoredVal, Addr, Mask, EVL});
2775 } else {
2776 VectorBuilder VBuilder(Builder);
2777 VBuilder.setEVL(EVL).setMask(Mask);
2778 NewSI = cast<CallInst>(VBuilder.createVectorInstruction(
2779 Instruction::Store, Type::getVoidTy(EVL->getContext()),
2780 {StoredVal, Addr}));
2781 }
2782 NewSI->addParamAttr(
2783 1, Attribute::getWithAlignment(NewSI->getContext(), Alignment));
2784 State.addMetadata(NewSI, SI);
2785}
2786
2788 VPCostContext &Ctx) const {
2789 if (!Consecutive || IsMasked)
2790 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
2791
2792 // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()
2793 // here because the EVL recipes using EVL to replace the tail mask. But in the
2794 // legacy model, it will always calculate the cost of mask.
2795 // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
2796 // don't need to compare to the legacy cost model.
2798 const Align Alignment =
2800 unsigned AS =
2804 Ingredient.getOpcode(), Ty, Alignment, AS, CostKind);
2805 if (!Reverse)
2806 return Cost;
2807
2809 cast<VectorType>(Ty), {}, CostKind, 0);
2810}
2811
2812#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2814 VPSlotTracker &SlotTracker) const {
2815 O << Indent << "WIDEN vp.store ";
2817}
2818#endif
2819
2821 VectorType *DstVTy, const DataLayout &DL) {
2822 // Verify that V is a vector type with same number of elements as DstVTy.
2823 auto VF = DstVTy->getElementCount();
2824 auto *SrcVecTy = cast<VectorType>(V->getType());
2825 assert(VF == SrcVecTy->getElementCount() && "Vector dimensions do not match");
2826 Type *SrcElemTy = SrcVecTy->getElementType();
2827 Type *DstElemTy = DstVTy->getElementType();
2828 assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) &&
2829 "Vector elements must have same size");
2830
2831 // Do a direct cast if element types are castable.
2832 if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) {
2833 return Builder.CreateBitOrPointerCast(V, DstVTy);
2834 }
2835 // V cannot be directly casted to desired vector type.
2836 // May happen when V is a floating point vector but DstVTy is a vector of
2837 // pointers or vice-versa. Handle this using a two-step bitcast using an
2838 // intermediate Integer type for the bitcast i.e. Ptr <-> Int <-> Float.
2839 assert((DstElemTy->isPointerTy() != SrcElemTy->isPointerTy()) &&
2840 "Only one type should be a pointer type");
2841 assert((DstElemTy->isFloatingPointTy() != SrcElemTy->isFloatingPointTy()) &&
2842 "Only one type should be a floating point type");
2843 Type *IntTy =
2844 IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy));
2845 auto *VecIntTy = VectorType::get(IntTy, VF);
2846 Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);
2847 return Builder.CreateBitOrPointerCast(CastVal, DstVTy);
2848}
2849
2850/// Return a vector containing interleaved elements from multiple
2851/// smaller input vectors.
2853 const Twine &Name) {
2854 unsigned Factor = Vals.size();
2855 assert(Factor > 1 && "Tried to interleave invalid number of vectors");
2856
2857 VectorType *VecTy = cast<VectorType>(Vals[0]->getType());
2858#ifndef NDEBUG
2859 for (Value *Val : Vals)
2860 assert(Val->getType() == VecTy && "Tried to interleave mismatched types");
2861#endif
2862
2863 // Scalable vectors cannot use arbitrary shufflevectors (only splats), so
2864 // must use intrinsics to interleave.
2865 if (VecTy->isScalableTy()) {
2867 return Builder.CreateIntrinsic(WideVecTy, Intrinsic::vector_interleave2,
2868 Vals,
2869 /*FMFSource=*/nullptr, Name);
2870 }
2871
2872 // Fixed length. Start by concatenating all vectors into a wide vector.
2873 Value *WideVec = concatenateVectors(Builder, Vals);
2874
2875 // Interleave the elements into the wide vector.
2876 const unsigned NumElts = VecTy->getElementCount().getFixedValue();
2877 return Builder.CreateShuffleVector(
2878 WideVec, createInterleaveMask(NumElts, Factor), Name);
2879}
2880
2881// Try to vectorize the interleave group that \p Instr belongs to.
2882//
2883// E.g. Translate following interleaved load group (factor = 3):
2884// for (i = 0; i < N; i+=3) {
2885// R = Pic[i]; // Member of index 0
2886// G = Pic[i+1]; // Member of index 1
2887// B = Pic[i+2]; // Member of index 2
2888// ... // do something to R, G, B
2889// }
2890// To:
2891// %wide.vec = load <12 x i32> ; Read 4 tuples of R,G,B
2892// %R.vec = shuffle %wide.vec, poison, <0, 3, 6, 9> ; R elements
2893// %G.vec = shuffle %wide.vec, poison, <1, 4, 7, 10> ; G elements
2894// %B.vec = shuffle %wide.vec, poison, <2, 5, 8, 11> ; B elements
2895//
2896// Or translate following interleaved store group (factor = 3):
2897// for (i = 0; i < N; i+=3) {
2898// ... do something to R, G, B
2899// Pic[i] = R; // Member of index 0
2900// Pic[i+1] = G; // Member of index 1
2901// Pic[i+2] = B; // Member of index 2
2902// }
2903// To:
2904// %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7>
2905// %B_U.vec = shuffle %B.vec, poison, <0, 1, 2, 3, u, u, u, u>
2906// %interleaved.vec = shuffle %R_G.vec, %B_U.vec,
2907// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements
2908// store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B
2910 assert(!State.Lane && "Interleave group being replicated.");
2911 const InterleaveGroup<Instruction> *Group = IG;
2912 Instruction *Instr = Group->getInsertPos();
2913
2914 // Prepare for the vector type of the interleaved load/store.
2915 Type *ScalarTy = getLoadStoreType(Instr);
2916 unsigned InterleaveFactor = Group->getFactor();
2917 auto *VecTy = VectorType::get(ScalarTy, State.VF * InterleaveFactor);
2918
2919 // TODO: extend the masked interleaved-group support to reversed access.
2920 VPValue *BlockInMask = getMask();
2921 assert((!BlockInMask || !Group->isReverse()) &&
2922 "Reversed masked interleave-group not supported.");
2923
2924 VPValue *Addr = getAddr();
2925 Value *ResAddr = State.get(Addr, VPLane(0));
2926 if (auto *I = dyn_cast<Instruction>(ResAddr))
2927 State.setDebugLocFrom(I->getDebugLoc());
2928
2929 // If the group is reverse, adjust the index to refer to the last vector lane
2930 // instead of the first. We adjust the index from the first vector lane,
2931 // rather than directly getting the pointer for lane VF - 1, because the
2932 // pointer operand of the interleaved access is supposed to be uniform.
2933 if (Group->isReverse()) {
2934 Value *RuntimeVF =
2935 getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF);
2936 Value *Index =
2937 State.Builder.CreateSub(RuntimeVF, State.Builder.getInt32(1));
2938 Index = State.Builder.CreateMul(Index,
2939 State.Builder.getInt32(Group->getFactor()));
2940 Index = State.Builder.CreateNeg(Index);
2941
2942 bool InBounds = false;
2943 if (auto *Gep = dyn_cast<GetElementPtrInst>(ResAddr->stripPointerCasts()))
2944 InBounds = Gep->isInBounds();
2945 ResAddr = State.Builder.CreateGEP(ScalarTy, ResAddr, Index, "", InBounds);
2946 }
2947
2948 State.setDebugLocFrom(Instr->getDebugLoc());
2949 Value *PoisonVec = PoisonValue::get(VecTy);
2950
2951 auto CreateGroupMask = [&BlockInMask, &State,
2952 &InterleaveFactor](Value *MaskForGaps) -> Value * {
2953 if (State.VF.isScalable()) {
2954 assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");
2955 assert(InterleaveFactor == 2 &&
2956 "Unsupported deinterleave factor for scalable vectors");
2957 auto *ResBlockInMask = State.get(BlockInMask);
2958 SmallVector<Value *, 2> Ops = {ResBlockInMask, ResBlockInMask};
2959 auto *MaskTy = VectorType::get(State.Builder.getInt1Ty(),
2960 State.VF.getKnownMinValue() * 2, true);
2961 return State.Builder.CreateIntrinsic(
2962 MaskTy, Intrinsic::vector_interleave2, Ops,
2963 /*FMFSource=*/nullptr, "interleaved.mask");
2964 }
2965
2966 if (!BlockInMask)
2967 return MaskForGaps;
2968
2969 Value *ResBlockInMask = State.get(BlockInMask);
2970 Value *ShuffledMask = State.Builder.CreateShuffleVector(
2971 ResBlockInMask,
2972 createReplicatedMask(InterleaveFactor, State.VF.getKnownMinValue()),
2973 "interleaved.mask");
2974 return MaskForGaps ? State.Builder.CreateBinOp(Instruction::And,
2975 ShuffledMask, MaskForGaps)
2976 : ShuffledMask;
2977 };
2978
2979 const DataLayout &DL = Instr->getDataLayout();
2980 // Vectorize the interleaved load group.
2981 if (isa<LoadInst>(Instr)) {
2982 Value *MaskForGaps = nullptr;
2983 if (NeedsMaskForGaps) {
2984 MaskForGaps = createBitMaskForGaps(State.Builder,
2985 State.VF.getKnownMinValue(), *Group);
2986 assert(MaskForGaps && "Mask for Gaps is required but it is null");
2987 }
2988
2989 Instruction *NewLoad;
2990 if (BlockInMask || MaskForGaps) {
2991 Value *GroupMask = CreateGroupMask(MaskForGaps);
2992 NewLoad = State.Builder.CreateMaskedLoad(VecTy, ResAddr,
2993 Group->getAlign(), GroupMask,
2994 PoisonVec, "wide.masked.vec");
2995 } else
2996 NewLoad = State.Builder.CreateAlignedLoad(VecTy, ResAddr,
2997 Group->getAlign(), "wide.vec");
2998 Group->addMetadata(NewLoad);
2999
3001 const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
3002 if (VecTy->isScalableTy()) {
3003 assert(InterleaveFactor == 2 &&
3004 "Unsupported deinterleave factor for scalable vectors");
3005
3006 // Scalable vectors cannot use arbitrary shufflevectors (only splats),
3007 // so must use intrinsics to deinterleave.
3008 Value *DI = State.Builder.CreateIntrinsic(
3009 Intrinsic::vector_deinterleave2, VecTy, NewLoad,
3010 /*FMFSource=*/nullptr, "strided.vec");
3011 unsigned J = 0;
3012 for (unsigned I = 0; I < InterleaveFactor; ++I) {
3013 Instruction *Member = Group->getMember(I);
3014
3015 if (!Member)
3016 continue;
3017
3018 Value *StridedVec = State.Builder.CreateExtractValue(DI, I);
3019 // If this member has different type, cast the result type.
3020 if (Member->getType() != ScalarTy) {
3021 VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
3022 StridedVec =
3023 createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
3024 }
3025
3026 if (Group->isReverse())
3027 StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");
3028
3029 State.set(VPDefs[J], StridedVec);
3030 ++J;
3031 }
3032
3033 return;
3034 }
3035
3036 // For each member in the group, shuffle out the appropriate data from the
3037 // wide loads.
3038 unsigned J = 0;
3039 for (unsigned I = 0; I < InterleaveFactor; ++I) {
3040 Instruction *Member = Group->getMember(I);
3041
3042 // Skip the gaps in the group.
3043 if (!Member)
3044 continue;
3045
3046 auto StrideMask =
3047 createStrideMask(I, InterleaveFactor, State.VF.getKnownMinValue());
3048 Value *StridedVec =
3049 State.Builder.CreateShuffleVector(NewLoad, StrideMask, "strided.vec");
3050
3051 // If this member has different type, cast the result type.
3052 if (Member->getType() != ScalarTy) {
3053 assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
3054 VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
3055 StridedVec =
3056 createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
3057 }
3058
3059 if (Group->isReverse())
3060 StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");
3061
3062 State.set(VPDefs[J], StridedVec);
3063 ++J;
3064 }
3065 return;
3066 }
3067
3068 // The sub vector type for current instruction.
3069 auto *SubVT = VectorType::get(ScalarTy, State.VF);
3070
3071 // Vectorize the interleaved store group.
3072 Value *MaskForGaps =
3073 createBitMaskForGaps(State.Builder, State.VF.getKnownMinValue(), *Group);
3074 assert((!MaskForGaps || !State.VF.isScalable()) &&
3075 "masking gaps for scalable vectors is not yet supported.");
3076 ArrayRef<VPValue *> StoredValues = getStoredValues();
3077 // Collect the stored vector from each member.
3078 SmallVector<Value *, 4> StoredVecs;
3079 unsigned StoredIdx = 0;
3080 for (unsigned i = 0; i < InterleaveFactor; i++) {
3081 assert((Group->getMember(i) || MaskForGaps) &&
3082 "Fail to get a member from an interleaved store group");
3083 Instruction *Member = Group->getMember(i);
3084
3085 // Skip the gaps in the group.
3086 if (!Member) {
3087 Value *Undef = PoisonValue::get(SubVT);
3088 StoredVecs.push_back(Undef);
3089 continue;
3090 }
3091
3092 Value *StoredVec = State.get(StoredValues[StoredIdx]);
3093 ++StoredIdx;
3094
3095 if (Group->isReverse())
3096 StoredVec = State.Builder.CreateVectorReverse(StoredVec, "reverse");
3097
3098 // If this member has different type, cast it to a unified type.
3099
3100 if (StoredVec->getType() != SubVT)
3101 StoredVec = createBitOrPointerCast(State.Builder, StoredVec, SubVT, DL);
3102
3103 StoredVecs.push_back(StoredVec);
3104 }
3105
3106 // Interleave all the smaller vectors into one wider vector.
3107 Value *IVec = interleaveVectors(State.Builder, StoredVecs, "interleaved.vec");
3108 Instruction *NewStoreInstr;
3109 if (BlockInMask || MaskForGaps) {
3110 Value *GroupMask = CreateGroupMask(MaskForGaps);
3111 NewStoreInstr = State.Builder.CreateMaskedStore(
3112 IVec, ResAddr, Group->getAlign(), GroupMask);
3113 } else
3114 NewStoreInstr =
3115 State.Builder.CreateAlignedStore(IVec, ResAddr, Group->getAlign());
3116
3117 Group->addMetadata(NewStoreInstr);
3118}
3119
3120#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3122 VPSlotTracker &SlotTracker) const {
3123 O << Indent << "INTERLEAVE-GROUP with factor " << IG->getFactor() << " at ";
3124 IG->getInsertPos()->printAsOperand(O, false);
3125 O << ", ";
3127 VPValue *Mask = getMask();
3128 if (Mask) {
3129 O << ", ";
3130 Mask->printAsOperand(O, SlotTracker);
3131 }
3132
3133 unsigned OpIdx = 0;
3134 for (unsigned i = 0; i < IG->getFactor(); ++i) {
3135 if (!IG->getMember(i))
3136 continue;
3137 if (getNumStoreOperands() > 0) {
3138 O << "\n" << Indent << " store ";
3139 getOperand(1 + OpIdx)->printAsOperand(O, SlotTracker);
3140 O << " to index " << i;
3141 } else {
3142 O << "\n" << Indent << " ";
3144 O << " = load from index " << i;
3145 }
3146 ++OpIdx;
3147 }
3148}
3149#endif
3150
3152 VPCostContext &Ctx) const {
3153 Instruction *InsertPos = getInsertPos();
3154 // Find the VPValue index of the interleave group. We need to skip gaps.
3155 unsigned InsertPosIdx = 0;
3156 for (unsigned Idx = 0; IG->getFactor(); ++Idx)
3157 if (auto *Member = IG->getMember(Idx)) {
3158 if (Member == InsertPos)
3159 break;
3160 InsertPosIdx++;
3161 }
3162 Type *ValTy = Ctx.Types.inferScalarType(
3163 getNumDefinedValues() > 0 ? getVPValue(InsertPosIdx)
3164 : getStoredValues()[InsertPosIdx]);
3165 auto *VectorTy = cast<VectorType>(toVectorTy(ValTy, VF));
3166 unsigned AS = getLoadStoreAddressSpace(InsertPos);
3168
3169 unsigned InterleaveFactor = IG->getFactor();
3170 auto *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor);
3171
3172 // Holds the indices of existing members in the interleaved group.
3174 for (unsigned IF = 0; IF < InterleaveFactor; IF++)
3175 if (IG->getMember(IF))
3176 Indices.push_back(IF);
3177
3178 // Calculate the cost of the whole interleaved group.
3180 InsertPos->getOpcode(), WideVecTy, IG->getFactor(), Indices,
3181 IG->getAlign(), AS, CostKind, getMask(), NeedsMaskForGaps);
3182
3183 if (!IG->isReverse())
3184 return Cost;
3185
3186 return Cost + IG->getNumMembers() *
3188 VectorTy, std::nullopt, CostKind, 0);
3189}
3190
3191#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3193 VPSlotTracker &SlotTracker) const {
3194 O << Indent << "EMIT ";
3196 O << " = CANONICAL-INDUCTION ";
3198}
3199#endif
3200
3202 return IsScalarAfterVectorization &&
3203 (!IsScalable || vputils::onlyFirstLaneUsed(this));
3204}
3205
3207 assert(getInductionDescriptor().getKind() ==
3209 "Not a pointer induction according to InductionDescriptor!");
3210 assert(cast<PHINode>(getUnderlyingInstr())->getType()->isPointerTy() &&
3211 "Unexpected type.");
3213 "Recipe should have been replaced");
3214
3215 unsigned CurrentPart = getUnrollPart(*this);
3216
3217 // Build a pointer phi
3218 Value *ScalarStartValue = getStartValue()->getLiveInIRValue();
3219 Type *ScStValueType = ScalarStartValue->getType();
3220
3221 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3222 PHINode *NewPointerPhi = nullptr;
3223 if (CurrentPart == 0) {
3224 auto *IVR = cast<VPHeaderPHIRecipe>(&getParent()
3225 ->getPlan()
3226 ->getVectorLoopRegion()
3227 ->getEntryBasicBlock()
3228 ->front());
3229 PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, /*IsScalar*/ true));
3230 NewPointerPhi = PHINode::Create(ScStValueType, 2, "pointer.phi",
3231 CanonicalIV->getIterator());
3232 NewPointerPhi->addIncoming(ScalarStartValue, VectorPH);
3233 NewPointerPhi->setDebugLoc(getDebugLoc());
3234 } else {
3235 // The recipe has been unrolled. In that case, fetch the single pointer phi
3236 // shared among all unrolled parts of the recipe.
3237 auto *GEP =
3238 cast<GetElementPtrInst>(State.get(getFirstUnrolledPartOperand()));
3239 NewPointerPhi = cast<PHINode>(GEP->getPointerOperand());
3240 }
3241
3242 // A pointer induction, performed by using a gep
3243 BasicBlock::iterator InductionLoc = State.Builder.GetInsertPoint();
3244 Value *ScalarStepValue = State.get(getStepValue(), VPLane(0));
3245 Type *PhiType = State.TypeAnalysis.inferScalarType(getStepValue());
3246 Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF);
3247 // Add induction update using an incorrect block temporarily. The phi node
3248 // will be fixed after VPlan execution. Note that at this point the latch
3249 // block cannot be used, as it does not exist yet.
3250 // TODO: Model increment value in VPlan, by turning the recipe into a
3251 // multi-def and a subclass of VPHeaderPHIRecipe.
3252 if (CurrentPart == 0) {
3253 // The recipe represents the first part of the pointer induction. Create the
3254 // GEP to increment the phi across all unrolled parts.
3255 unsigned UF = CurrentPart == 0 ? getParent()->getPlan()->getUF() : 1;
3256 Value *NumUnrolledElems =
3257 State.Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, UF));
3258
3259 Value *InductionGEP = GetElementPtrInst::Create(
3260 State.Builder.getInt8Ty(), NewPointerPhi,
3261 State.Builder.CreateMul(ScalarStepValue, NumUnrolledElems), "ptr.ind",
3262 InductionLoc);
3263
3264 NewPointerPhi->addIncoming(InductionGEP, VectorPH);
3265 }
3266
3267 // Create actual address geps that use the pointer phi as base and a
3268 // vectorized version of the step value (<step*0, ..., step*N>) as offset.
3269 Type *VecPhiType = VectorType::get(PhiType, State.VF);
3270 Value *StartOffsetScalar = State.Builder.CreateMul(
3271 RuntimeVF, ConstantInt::get(PhiType, CurrentPart));
3272 Value *StartOffset =
3273 State.Builder.CreateVectorSplat(State.VF, StartOffsetScalar);
3274 // Create a vector of consecutive numbers from zero to VF.
3275 StartOffset = State.Builder.CreateAdd(
3276 StartOffset, State.Builder.CreateStepVector(VecPhiType));
3277
3278 assert(ScalarStepValue == State.get(getOperand(1), VPLane(0)) &&
3279 "scalar step must be the same across all parts");
3280 Value *GEP = State.Builder.CreateGEP(
3281 State.Builder.getInt8Ty(), NewPointerPhi,
3282 State.Builder.CreateMul(StartOffset, State.Builder.CreateVectorSplat(
3283 State.VF, ScalarStepValue)),
3284 "vector.gep");
3285 State.set(this, GEP);
3286}
3287
3288#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3290 VPSlotTracker &SlotTracker) const {
3291 assert((getNumOperands() == 2 || getNumOperands() == 4) &&
3292 "unexpected number of operands");
3293 O << Indent << "EMIT ";
3295 O << " = WIDEN-POINTER-INDUCTION ";
3297 O << ", ";
3299 if (getNumOperands() == 4) {
3300 O << ", ";
3302 O << ", ";
3304 }
3305}
3306#endif
3307
3309 assert(!State.Lane && "cannot be used in per-lane");
3310 if (State.ExpandedSCEVs.contains(Expr)) {
3311 // SCEV Expr has already been expanded, result must already be set. At the
3312 // moment we have to execute the entry block twice (once before skeleton
3313 // creation to get expanded SCEVs used by the skeleton and once during
3314 // regular VPlan execution).
3316 assert(State.get(this, VPLane(0)) == State.ExpandedSCEVs[Expr] &&
3317 "Results must match");
3318 return;
3319 }
3320
3321 const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
3322 SCEVExpander Exp(SE, DL, "induction");
3323
3324 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
3325 &*State.Builder.GetInsertPoint());
3326 State.ExpandedSCEVs[Expr] = Res;
3327 State.set(this, Res, VPLane(0));
3328}
3329
3330#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3332 VPSlotTracker &SlotTracker) const {
3333 O << Indent << "EMIT ";
3335 O << " = EXPAND SCEV " << *Expr;
3336}
3337#endif
3338
3340 Value *CanonicalIV = State.get(getOperand(0), /*IsScalar*/ true);
3341 Type *STy = CanonicalIV->getType();
3342 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
3343 ElementCount VF = State.VF;
3344 Value *VStart = VF.isScalar()
3345 ? CanonicalIV
3346 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
3347 Value *VStep = createStepForVF(Builder, STy, VF, getUnrollPart(*this));
3348 if (VF.isVector()) {
3349 VStep = Builder.CreateVectorSplat(VF, VStep);
3350 VStep =
3351 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
3352 }
3353 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
3354 State.set(this, CanonicalVectorIV);
3355}
3356
3357#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3359 VPSlotTracker &SlotTracker) const {
3360 O << Indent << "EMIT ";
3362 O << " = WIDEN-CANONICAL-INDUCTION ";
3364}
3365#endif
3366
3368 auto &Builder = State.Builder;
3369 // Create a vector from the initial value.
3370 auto *VectorInit = getStartValue()->getLiveInIRValue();
3371
3372 Type *VecTy = State.VF.isScalar()
3373 ? VectorInit->getType()
3374 : VectorType::get(VectorInit->getType(), State.VF);
3375
3376 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3377 if (State.VF.isVector()) {
3378 auto *IdxTy = Builder.getInt32Ty();
3379 auto *One = ConstantInt::get(IdxTy, 1);
3380 IRBuilder<>::InsertPointGuard Guard(Builder);
3381 Builder.SetInsertPoint(VectorPH->getTerminator());
3382 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
3383 auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
3384 VectorInit = Builder.CreateInsertElement(
3385 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
3386 }
3387
3388 // Create a phi node for the new recurrence.
3389 PHINode *Phi = PHINode::Create(VecTy, 2, "vector.recur");
3390 Phi->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
3391 Phi->addIncoming(VectorInit, VectorPH);
3392 State.set(this, Phi);
3393}
3394
3397 VPCostContext &Ctx) const {
3399 if (VF.isScalar())
3400 return Ctx.TTI.getCFInstrCost(Instruction::PHI, CostKind);
3401
3402 if (VF.isScalable() && VF.getKnownMinValue() == 1)
3404
3406 std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1);
3407 Type *VectorTy =
3408 toVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
3409
3411 cast<VectorType>(VectorTy), Mask, CostKind,
3412 VF.getKnownMinValue() - 1);
3413}
3414
3415#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3417 VPSlotTracker &SlotTracker) const {
3418 O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
3420 O << " = phi ";
3422}
3423#endif
3424
3426 auto &Builder = State.Builder;
3427
3428 // If this phi is fed by a scaled reduction then it should output a
3429 // vector with fewer elements than the VF.
3430 ElementCount VF = State.VF.divideCoefficientBy(VFScaleFactor);
3431
3432 // Reductions do not have to start at zero. They can start with
3433 // any loop invariant values.
3434 VPValue *StartVPV = getStartValue();
3435 Value *StartV = StartVPV->getLiveInIRValue();
3436
3437 // In order to support recurrences we need to be able to vectorize Phi nodes.
3438 // Phi nodes have cycles, so we need to vectorize them in two stages. This is
3439 // stage #1: We create a new vector PHI node with no incoming edges. We'll use
3440 // this value when we vectorize all of the instructions that use the PHI.
3441 bool ScalarPHI = State.VF.isScalar() || IsInLoop;
3442 Type *VecTy =
3443 ScalarPHI ? StartV->getType() : VectorType::get(StartV->getType(), VF);
3444
3445 BasicBlock *HeaderBB = State.CFG.PrevBB;
3446 assert(State.CurrentParentLoop->getHeader() == HeaderBB &&
3447 "recipe must be in the vector loop header");
3448 auto *Phi = PHINode::Create(VecTy, 2, "vec.phi");
3449 Phi->insertBefore(HeaderBB->getFirstInsertionPt());
3450 State.set(this, Phi, IsInLoop);
3451
3452 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3453
3454 Value *Iden = nullptr;
3455 RecurKind RK = RdxDesc.getRecurrenceKind();
3456 unsigned CurrentPart = getUnrollPart(*this);
3457
3460 // MinMax and AnyOf reductions have the start value as their identity.
3461 if (ScalarPHI) {
3462 Iden = StartV;
3463 } else {
3464 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
3465 Builder.SetInsertPoint(VectorPH->getTerminator());
3466 StartV = Iden = State.get(StartVPV);
3467 }
3469 // [I|F]FindLastIV will use a sentinel value to initialize the reduction
3470 // phi or the resume value from the main vector loop when vectorizing the
3471 // epilogue loop. In the exit block, ComputeReductionResult will generate
3472 // checks to verify if the reduction result is the sentinel value. If the
3473 // result is the sentinel value, it will be corrected back to the start
3474 // value.
3475 // TODO: The sentinel value is not always necessary. When the start value is
3476 // a constant, and smaller than the start value of the induction variable,
3477 // the start value can be directly used to initialize the reduction phi.
3478 Iden = StartV;
3479 if (!ScalarPHI) {
3480 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
3481 Builder.SetInsertPoint(VectorPH->getTerminator());
3482 StartV = Iden = Builder.CreateVectorSplat(State.VF, Iden);
3483 }
3484 } else {
3485 Iden = llvm::getRecurrenceIdentity(RK, VecTy->getScalarType(),
3486 RdxDesc.getFastMathFlags());
3487
3488 if (!ScalarPHI) {
3489 if (CurrentPart == 0) {
3490 // Create start and identity vector values for the reduction in the
3491 // preheader.
3492 // TODO: Introduce recipes in VPlan preheader to create initial values.
3493 Iden = Builder.CreateVectorSplat(VF, Iden);
3494 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
3495 Builder.SetInsertPoint(VectorPH->getTerminator());
3496 Constant *Zero = Builder.getInt32(0);
3497 StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
3498 } else {
3499 Iden = Builder.CreateVectorSplat(VF, Iden);
3500 }
3501 }
3502 }
3503
3504 Phi = cast<PHINode>(State.get(this, IsInLoop));
3505 Value *StartVal = (CurrentPart == 0) ? StartV : Iden;
3506 Phi->addIncoming(StartVal, VectorPH);
3507}
3508
3509#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3511 VPSlotTracker &SlotTracker) const {
3512 O << Indent << "WIDEN-REDUCTION-PHI ";
3513
3515 O << " = phi ";
3517 if (VFScaleFactor != 1)
3518 O << " (VF scaled by 1/" << VFScaleFactor << ")";
3519}
3520#endif
3521
3524 "Non-native vplans are not expected to have VPWidenPHIRecipes.");
3525
3526 Value *Op0 = State.get(getOperand(0));
3527 Type *VecTy = Op0->getType();
3528 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
3529 State.set(this, VecPhi);
3530}
3531
3532#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3534 VPSlotTracker &SlotTracker) const {
3535 O << Indent << "WIDEN-PHI ";
3536
3537 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
3538 // Unless all incoming values are modeled in VPlan print the original PHI
3539 // directly.
3540 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
3541 // values as VPValues.
3542 if (getNumOperands() != OriginalPhi->getNumOperands()) {
3543 O << VPlanIngredient(OriginalPhi);
3544 return;
3545 }
3546
3548 O << " = phi ";
3550}
3551#endif
3552
3553// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
3554// remove VPActiveLaneMaskPHIRecipe.
3556 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3557 Value *StartMask = State.get(getOperand(0));
3558 PHINode *Phi =
3559 State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
3560 Phi->addIncoming(StartMask, VectorPH);
3561 Phi->setDebugLoc(getDebugLoc());
3562 State.set(this, Phi);
3563}
3564
3565#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3567 VPSlotTracker &SlotTracker) const {
3568 O << Indent << "ACTIVE-LANE-MASK-PHI ";
3569
3571 O << " = phi ";
3573}
3574#endif
3575
3576#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3578 VPSlotTracker &SlotTracker) const {
3579 O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";
3580
3582 O << " = phi ";
3584}
3585#endif
3586
3588 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3589 Value *Start = State.get(getStartValue(), VPLane(0));
3590 PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, Name);
3591 Phi->addIncoming(Start, VectorPH);
3592 Phi->setDebugLoc(getDebugLoc());
3593 State.set(this, Phi, /*IsScalar=*/true);
3594}
3595
3596#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3598 VPSlotTracker &SlotTracker) const {
3599 O << Indent << "SCALAR-PHI ";
3601 O << " = phi ";
3603}
3604#endif
AMDGPU Lower Kernel Arguments
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
std::string Name
Hexagon Common GEP
cl::opt< unsigned > ForceTargetInstructionCost("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for " "an instruction to a single constant value. Mostly " "useful for getting consistent testing."))
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
static Instruction * createReverseEVL(IRBuilderBase &Builder, Value *Operand, Value *EVL, const Twine &Name)
Use all-true mask for reverse rather than actual mask, as it avoids a dependence w/o affecting the re...
static Value * interleaveVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vals, const Twine &Name)
Return a vector containing interleaved elements from multiple smaller input vectors.
static Value * createBitOrPointerCast(IRBuilderBase &Builder, Value *V, VectorType *DstVTy, const DataLayout &DL)
cl::opt< unsigned > ForceTargetInstructionCost
static Value * getStepVector(Value *Val, Value *Step, Instruction::BinaryOps BinOp, ElementCount VF, IRBuilderBase &Builder)
This function adds (0 * Step, 1 * Step, 2 * Step, ...) to each vector element of Val.
static Type * getGEPIndexTy(bool IsScalable, bool IsReverse, unsigned CurrentPart, IRBuilderBase &Builder)
static Constant * getSignedIntOrFpConstant(Type *Ty, int64_t C)
A helper function that returns an integer or floating-point constant with value C.
This file contains the declarations of the Vectorization Plan base classes:
Value * RHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
static Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
Definition: Attributes.cpp:234
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
InstListType::const_iterator getFirstNonPHIIt() const
Iterator returning form of getFirstNonPHI.
Definition: BasicBlock.cpp:374
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:459
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:296
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:292
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1494
This class represents a function call, abstracting a target machine's calling convention.
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:696
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:698
static StringRef getPredicateName(Predicate P)
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:126
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
A debug info location.
Definition: DebugLoc.h:33
constexpr bool isVector() const
One or more elements.
Definition: TypeSize.h:326
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
void setAllowContract(bool B=true)
Definition: FMF.h:91
bool noSignedZeros() const
Definition: FMF.h:68
bool noInfs() const
Definition: FMF.h:67
void setAllowReciprocal(bool B=true)
Definition: FMF.h:88
bool allowReciprocal() const
Definition: FMF.h:69
void print(raw_ostream &O) const
Print fast-math flags to O.
Definition: Operator.cpp:271
void setNoSignedZeros(bool B=true)
Definition: FMF.h:85
bool allowReassoc() const
Flag queries.
Definition: FMF.h:65
bool approxFunc() const
Definition: FMF.h:71
void setNoNaNs(bool B=true)
Definition: FMF.h:79
void setAllowReassoc(bool B=true)
Flag setters.
Definition: FMF.h:76
bool noNaNs() const
Definition: FMF.h:66
void setApproxFunc(bool B=true)
Definition: FMF.h:94
void setNoInfs(bool B=true)
Definition: FMF.h:82
bool allowContract() const
Definition: FMF.h:70
Class to represent function types.
Definition: DerivedTypes.h:105
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:137
ArrayRef< Type * > params() const
Definition: DerivedTypes.h:132
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216
bool willReturn() const
Determine if the function will return.
Definition: Function.h:662
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition: Function.h:595
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:221
bool hasNoUnsignedSignedWrap() const
bool hasNoUnsignedWrap() const
bool isInBounds() const
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Definition: Instructions.h:956
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:113
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition: IRBuilder.h:480
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2511
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition: IRBuilder.h:530
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2106
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2499
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1815
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2051
Value * CreateVectorSplice(Value *V1, Value *V2, int64_t Imm, const Twine &Name="")
Return a vector splice intrinsic if using scalable vectors, otherwise return a shufflevector.
Definition: IRBuilder.cpp:1135
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1163
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2555
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:485
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:546
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1053
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:194
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2045
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2574
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:545
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1987
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Definition: IRBuilder.h:2186
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2093
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:193
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:330
Value * CreateVectorReverse(Value *V, const Twine &Name="")
Return a vector value that contains the vector V reversed.
Definition: IRBuilder.cpp:1119
Value * CreateFCmpFMF(CmpInst::Predicate P, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2398
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1874
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
Definition: IRBuilder.h:1733
CallInst * CreateOrReduce(Value *Src)
Create a vector int OR reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:424
InsertPoint saveIP() const
Returns the current insert point.
Definition: IRBuilder.h:296
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:900
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:505
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2234
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2404
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2435
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1757
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2270
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1387
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1164
Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Definition: IRBuilder.cpp:968
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2033
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2533
LLVMContext & getContext() const
Definition: IRBuilder.h:195
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:566
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1370
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2449
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2019
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:588
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1671
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1688
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Definition: IRBuilder.h:308
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:199
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1834
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2380
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1614
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:535
Value * CreateStepVector(Type *DstType, const Twine &Name="")
Creates a vector of type DstType with the linear sequence <0, 1, ...>
Definition: IRBuilder.cpp:108
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1404
CallInst * CreateMaskedScatter(Value *Val, Value *Ptrs, Align Alignment, Value *Mask=nullptr)
Create a call to Masked Scatter intrinsic.
Definition: IRBuilder.cpp:627
CallInst * CreateMaskedGather(Type *Ty, Value *Ptrs, Align Alignment, Value *Mask=nullptr, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Gather intrinsic.
Definition: IRBuilder.cpp:596
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2705
A struct for saving information about induction variables.
@ IK_PtrInduction
Pointer induction var. Step = C.
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
static InstructionCost getInvalid(CostType Val=0)
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:99
bool isBinaryOp() const
Definition: Instruction.h:279
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
const char * getOpcodeName() const
Definition: Instruction.h:276
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
bool isUnaryOp() const
Definition: Instruction.h:278
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:472
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:488
uint32_t getFactor() const
Definition: VectorUtils.h:504
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:558
bool isReverse() const
Definition: VectorUtils.h:503
InstTy * getInsertPos() const
Definition: VectorUtils.h:574
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Align getAlign() const
Definition: VectorUtils.h:505
BlockT * getHeader() const
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:77
FastMathFlags getFastMathFlags() const
static unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
Type * getRecurrenceType() const
Returns the type of the recurrence.
TrackingVH< Value > getRecurrenceStartValue() const
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isFindLastIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
bool isSigned() const
Returns true if all source operands of the recurrence are SExtInsts.
RecurKind getRecurrenceKind() const
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This class uses information about analyze scalars to rewrite expressions in canonical form.
Type * getType() const
Return the LLVM type of this SCEV expression.
This class represents the LLVM 'select' instruction.
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:698
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo Op1Info={OK_AnyValue, OP_None}, OperandValueInfo Op2Info={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of vector reduction intrinsics.
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF=FastMathFlags(), TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask={}, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
@ TCC_Free
Expected to fold away in lowering.
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, PartialReductionExtendKind OpAExtend, PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp=std::nullopt) const
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Reverse
Reverse the order of the vector.
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency) const
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
CastContextHint
Represents a hint about the context in which a cast is used.
@ Reversed
The cast is used with a reversed load/store.
@ Masked
The cast is used with a masked load/store.
@ None
The cast is not used with a load/store of any kind.
@ Normal
The cast is used with a normal load/store.
@ Interleave
The cast is used with an interleaved load/store.
@ GatherScatter
The cast is used with a gather/scatter.
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264
static IntegerType * getInt1Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static Type * getVoidTy(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
value_op_iterator value_op_end()
Definition: User.h:309
Value * getOperand(unsigned i) const
Definition: User.h:228
value_op_iterator value_op_begin()
Definition: User.h:306
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:3528
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:3581
iterator end()
Definition: VPlan.h:3565
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:3594
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:2515
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:2520
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition: VPlan.h:2510
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition: VPlan.h:2506
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:397
VPRegionBlock * getParent()
Definition: VPlan.h:489
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:178
const VPBlocksTy & getPredecessors() const
Definition: VPlan.h:520
VPlan * getPlan()
Definition: VPlan.cpp:153
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:158
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2882
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPBranchOnMaskRecipe.
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:292
void dump() const
Dump the VPDef to stderr (for debugging).
Definition: VPlan.cpp:114
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
Definition: VPlanValue.h:415
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
Definition: VPlanValue.h:410
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition: VPlanValue.h:388
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Definition: VPlanValue.h:400
unsigned getVPDefID() const
Definition: VPlanValue.h:420
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:3458
VPValue * getStartValue() const
Definition: VPlan.h:3457
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:2062
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition: VPlan.h:1804
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
@ ResumePhi
Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
Definition: VPlan.h:1210
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:1198
@ CanonicalIVIncrementForPart
Definition: VPlan.h:1213
@ CalculateTripCountMinusVF
Definition: VPlan.h:1211
bool hasResult() const
Definition: VPlan.h:1333
bool opcodeMayReadOrWriteFromMemory() const
Returns true if the underlying opcode may read from or write to memory.
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
unsigned getOpcode() const
Definition: VPlan.h:1310
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
bool isSingleScalar() const
Returns true if this VPInstruction's operands are single scalars and the result is also a single scal...
void execute(VPTransformState &State) override
Generate the instruction.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2594
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2600
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition: VPlan.h:2607
Instruction * getInsertPos() const
Definition: VPlan.h:2642
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInterleaveRecipe.
unsigned getNumStoreOperands() const
Returns the number of stored operands of this interleave group.
Definition: VPlan.h:2631
static bool isVPIntrinsic(Intrinsic::ID)
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition: VPlan.h:153
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlan.h:194
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset)
Definition: VPlan.h:180
static VPLane getFirstLane()
Definition: VPlan.h:178
void execute(VPTransformState &State) override
Generate the reduction in the loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPartialReductionRecipe.
unsigned getOpcode() const
Get the binary op's opcode.
Definition: VPlan.h:2475
void execute(VPTransformState &State) override
Generates phi nodes for live-outs (from a replicate region) as needed to retain SSA form.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:714
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
virtual InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
VPBasicBlock * getParent()
Definition: VPlan.h:739
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:808
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Class to record LLVM IR flag for a recipe along with it.
Definition: VPlan.h:925
ExactFlagsTy ExactFlags
Definition: VPlan.h:975
FastMathFlagsTy FMFs
Definition: VPlan.h:978
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:977
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition: VPlan.h:1145
void setFlags(Instruction *I) const
Set the IR flags for I.
Definition: VPlan.h:1106
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:1148
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:974
GEPNoWrapFlags GEPFlags
Definition: VPlan.h:976
WrapFlagsTy WrapFlags
Definition: VPlan.h:973
bool hasNoUnsignedWrap() const
Definition: VPlan.h:1152
void printFlags(raw_ostream &O) const
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:1139
bool hasNoSignedWrap() const
Definition: VPlan.h:1158
FastMathFlags getFastMathFlags() const
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition: VPlan.h:2755
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition: VPlan.h:2713
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of VPReductionRecipe.
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2717
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Return the recurrence decriptor for the in-loop reduction.
Definition: VPlan.h:2707
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2719
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition: VPlan.h:2711
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2715
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:3705
const VPBlockBase * getEntry() const
Definition: VPlan.h:3741
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPReplicateRecipe.
unsigned getOpcode() const
Definition: VPlan.h:2842
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:3515
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:911
LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
This class can be used to assign names to VPValues.
Definition: VPlanValue.h:441
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Definition: VPlanAnalysis.h:65
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
VPValue * getUnrollPartOperand(VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:200
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition: VPlan.cpp:1456
operand_range operands()
Definition: VPlanValue.h:257
unsigned getNumOperands() const
Definition: VPlanValue.h:236
operand_iterator op_begin()
Definition: VPlanValue.h:253
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:237
virtual bool onlyFirstLaneUsed(const VPValue *Op) const
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlanValue.h:272
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop region.
Definition: VPlan.cpp:1417
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:123
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition: VPlan.cpp:1452
friend class VPInstruction
Definition: VPlanValue.h:47
bool hasMoreThanOneUniqueUser() const
Returns true if the value has more than one unique user.
Definition: VPlanValue.h:138
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:77
user_iterator user_begin()
Definition: VPlanValue.h:128
unsigned getNumUsers() const
Definition: VPlanValue.h:111
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:172
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition: VPlanValue.h:167
user_range users()
Definition: VPlanValue.h:132
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Function * getCalledScalarFunction() const
Definition: VPlan.h:1752
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCallRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
operand_range arg_operands()
Definition: VPlan.h:1756
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1575
void execute(VPTransformState &State) override
Produce widened copies of the cast.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override final
Print the recipe.
void execute(VPTransformState &State) override final
Produce a vp-intrinsic using the opcode and operands of the recipe, processing EVL elements.
VPValue * getEVL()
Definition: VPlan.h:1503
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
PHINode * getPHINode() const
Definition: VPlan.h:2118
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:2115
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:2121
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:2193
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:2202
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition: VPlan.h:1695
void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
Definition: VPlan.h:2953
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition: VPlan.h:2950
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition: VPlan.h:2989
Instruction & Ingredient
Definition: VPlan.h:2944
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
bool Consecutive
Whether the accessed addresses are consecutive.
Definition: VPlan.h:2947
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:3003
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2996
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition: VPlan.h:2993
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
VPValue * getFirstUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the first unrolled part,...
Definition: VPlan.h:2247
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
void execute(VPTransformState &State) override
Produce a widened instruction using the opcode and operands of the recipe, processing State....
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getOpcode() const
Definition: VPlan.h:1469
unsigned getUF() const
Definition: VPlan.h:4010
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:694
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
VectorBuilder & setEVL(Value *NewExplicitVectorLength)
Definition: VectorBuilder.h:82
VectorBuilder & setMask(Value *NewMask)
Definition: VectorBuilder.h:78
Value * createVectorInstruction(unsigned Opcode, Type *ReturnTy, ArrayRef< Value * > VecOpArray, const Twine &Name=Twine())
Base class of all SIMD vector types.
Definition: DerivedTypes.h:427
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:665
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static VectorType * getDoubleElementsVectorType(VectorType *VTy)
This static method returns a VectorType with twice as many elements as the input type and the same el...
Definition: DerivedTypes.h:541
Type * getElementType() const
Definition: DerivedTypes.h:460
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
iterator erase(iterator where)
Definition: ilist.h:204
pointer remove(iterator &IT)
Definition: ilist.h:188
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
StringRef getBaseName(ID id)
Return the LLVM name for an intrinsic, without encoded types for overloading, such as "llvm....
Definition: Intrinsics.cpp:41
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
bool isUniformAfterVectorization(const VPValue *VPV)
Returns true if VPV is uniform after vectorization.
Definition: VPlanUtils.h:39
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
Definition: VPlanUtils.cpp:21
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlanUtils.cpp:16
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void ReplaceInstWithInst(BasicBlock *BB, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
Value * createSimpleReduction(IRBuilderBase &B, Value *Src, RecurKind RdxKind)
Create a reduction of the given vector.
Definition: LoopUtils.cpp:1278
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
Definition: LoopUtils.cpp:989
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition: STLExtras.h:2207
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
Definition: LoopUtils.cpp:1076
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
cl::opt< bool > EnableVPlanNativePath("enable-vplan-native-path", cl::Hidden, cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization."))
Definition: VPlan.cpp:53
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool isPointerTy(const Type *T)
Definition: SPIRVUtils.h:255
Value * createOrderedReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence descriptor Desc.
Definition: LoopUtils.cpp:1341
Value * createReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, PHINode *OrigPhi=nullptr)
Create a generic reduction using a recurrence descriptor Desc Fast-math-flags are propagated using th...
Definition: LoopUtils.cpp:1323
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:33
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ Add
Sum of integers.
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
Definition: LoopUtils.cpp:1270
DWARFExpression::Operation Op
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
InstructionCost Cost
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Struct to hold various analysis needed for cost computations.
Definition: VPlan.h:682
LLVMContext & LLVMCtx
Definition: VPlan.h:686
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
Definition: VPlan.cpp:1665
bool skipCostComputation(Instruction *UI, bool IsVector) const
Return true if the cost for UI shouldn't be computed, e.g.
InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const
Return the cost for UI with VF using the legacy cost model as fallback until computing the cost of al...
VPTypeAnalysis Types
Definition: VPlan.h:685
const TargetLibraryInfo & TLI
Definition: VPlan.h:684
const TargetTransformInfo & TTI
Definition: VPlan.h:683
SmallPtrSet< Instruction *, 8 > SkipCostComputation
Definition: VPlan.h:688
void execute(VPTransformState &State) override
Generate the phi nodes.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:344
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:352
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:348
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:236
bool hasScalarValue(VPValue *Def, VPLane Lane)
Definition: VPlan.h:269
bool hasVectorValue(VPValue *Def)
Definition: VPlan.h:267
DenseMap< const SCEV *, Value * > ExpandedSCEVs
Map SCEVs to their expanded values.
Definition: VPlan.h:389
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition: VPlan.h:392
void addMetadata(Value *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:361
Value * get(VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
Definition: VPlan.cpp:249
struct llvm::VPTransformState::CFGState CFG
std::optional< VPLane > Lane
Hold the index to generate specific scalar instructions.
Definition: VPlan.h:250
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:369
const TargetTransformInfo * TTI
Target Transform Info.
Definition: VPlan.h:242
void reset(VPValue *Def, Value *V)
Reset an existing vector value for Def and a given Part.
Definition: VPlan.h:290
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
Definition: VPlan.h:245
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:372
Loop * CurrentParentLoop
The parent loop object for the current scope, or nullptr.
Definition: VPlan.h:378
void set(VPValue *Def, Value *V, bool IsScalar=false)
Set the generated vector Value for a given VPValue, if IsScalar is false.
Definition: VPlan.h:279
void execute(VPTransformState &State) override
Generate the wide load or gather.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:3073
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate a wide load or gather.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool isInvariantCond() const
Definition: VPlan.h:1847
VPValue * getCond() const
Definition: VPlan.h:1843
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenSelectRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition: VPlan.h:3152
void execute(VPTransformState &State) override
Generate the wide store or scatter.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:3155
void execute(VPTransformState &State) override
Generate a wide store or scatter.
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition: VPlan.h:3117
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.