LLVM 19.0.0git
VPlanRecipes.cpp
Go to the documentation of this file.
1//===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains implementations for different VPlan recipes.
11///
12//===----------------------------------------------------------------------===//
13
14#include "VPlan.h"
15#include "VPlanAnalysis.h"
16#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Twine.h"
20#include "llvm/IR/BasicBlock.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/Instruction.h"
24#include "llvm/IR/Type.h"
25#include "llvm/IR/Value.h"
28#include "llvm/Support/Debug.h"
33#include <cassert>
34
35using namespace llvm;
36
38
39namespace llvm {
41}
42
43#define LV_NAME "loop-vectorize"
44#define DEBUG_TYPE LV_NAME
45
47 switch (getVPDefID()) {
48 case VPInterleaveSC:
49 return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
50 case VPWidenMemoryInstructionSC: {
51 return cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
52 }
53 case VPReplicateSC:
54 case VPWidenCallSC:
55 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
56 ->mayWriteToMemory();
57 case VPBranchOnMaskSC:
58 case VPScalarIVStepsSC:
59 case VPPredInstPHISC:
60 return false;
61 case VPBlendSC:
62 case VPReductionSC:
63 case VPWidenCanonicalIVSC:
64 case VPWidenCastSC:
65 case VPWidenGEPSC:
66 case VPWidenIntOrFpInductionSC:
67 case VPWidenPHISC:
68 case VPWidenSC:
69 case VPWidenSelectSC: {
70 const Instruction *I =
71 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
72 (void)I;
73 assert((!I || !I->mayWriteToMemory()) &&
74 "underlying instruction may write to memory");
75 return false;
76 }
77 default:
78 return true;
79 }
80}
81
83 switch (getVPDefID()) {
84 case VPWidenMemoryInstructionSC: {
85 return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
86 }
87 case VPReplicateSC:
88 case VPWidenCallSC:
89 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
90 ->mayReadFromMemory();
91 case VPBranchOnMaskSC:
92 case VPScalarIVStepsSC:
93 case VPPredInstPHISC:
94 return false;
95 case VPBlendSC:
96 case VPReductionSC:
97 case VPWidenCanonicalIVSC:
98 case VPWidenCastSC:
99 case VPWidenGEPSC:
100 case VPWidenIntOrFpInductionSC:
101 case VPWidenPHISC:
102 case VPWidenSC:
103 case VPWidenSelectSC: {
104 const Instruction *I =
105 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
106 (void)I;
107 assert((!I || !I->mayReadFromMemory()) &&
108 "underlying instruction may read from memory");
109 return false;
110 }
111 default:
112 return true;
113 }
114}
115
117 switch (getVPDefID()) {
118 case VPDerivedIVSC:
119 case VPPredInstPHISC:
120 case VPScalarCastSC:
121 return false;
122 case VPInstructionSC:
123 switch (cast<VPInstruction>(this)->getOpcode()) {
124 case Instruction::Or:
125 case Instruction::ICmp:
126 case Instruction::Select:
130 return false;
131 default:
132 return true;
133 }
134 case VPWidenCallSC:
135 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
136 ->mayHaveSideEffects();
137 case VPBlendSC:
138 case VPReductionSC:
139 case VPScalarIVStepsSC:
140 case VPWidenCanonicalIVSC:
141 case VPWidenCastSC:
142 case VPWidenGEPSC:
143 case VPWidenIntOrFpInductionSC:
144 case VPWidenPHISC:
145 case VPWidenPointerInductionSC:
146 case VPWidenSC:
147 case VPWidenSelectSC: {
148 const Instruction *I =
149 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
150 (void)I;
151 assert((!I || !I->mayHaveSideEffects()) &&
152 "underlying instruction has side-effects");
153 return false;
154 }
155 case VPInterleaveSC:
156 return mayWriteToMemory();
157 case VPWidenMemoryInstructionSC:
158 assert(cast<VPWidenMemoryInstructionRecipe>(this)
159 ->getIngredient()
161 "mayHaveSideffects result for ingredient differs from this "
162 "implementation");
163 return mayWriteToMemory();
164 case VPReplicateSC: {
165 auto *R = cast<VPReplicateRecipe>(this);
166 return R->getUnderlyingInstr()->mayHaveSideEffects();
167 }
168 default:
169 return true;
170 }
171}
172
174 auto Lane = VPLane::getLastLaneForVF(State.VF);
175 VPValue *ExitValue = getOperand(0);
177 Lane = VPLane::getFirstLane();
178 VPBasicBlock *MiddleVPBB =
179 cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
180 assert(MiddleVPBB->getNumSuccessors() == 0 &&
181 "the middle block must not have any successors");
182 BasicBlock *MiddleBB = State.CFG.VPBB2IRBB[MiddleVPBB];
183 Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),
184 MiddleBB);
185}
186
187#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
189 O << "Live-out ";
191 O << " = ";
193 O << "\n";
194}
195#endif
196
198 assert(!Parent && "Recipe already in some VPBasicBlock");
199 assert(InsertPos->getParent() &&
200 "Insertion position not in any VPBasicBlock");
201 InsertPos->getParent()->insert(this, InsertPos->getIterator());
202}
203
206 assert(!Parent && "Recipe already in some VPBasicBlock");
207 assert(I == BB.end() || I->getParent() == &BB);
208 BB.insert(this, I);
209}
210
212 assert(!Parent && "Recipe already in some VPBasicBlock");
213 assert(InsertPos->getParent() &&
214 "Insertion position not in any VPBasicBlock");
215 InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator()));
216}
217
219 assert(getParent() && "Recipe not in any VPBasicBlock");
221 Parent = nullptr;
222}
223
225 assert(getParent() && "Recipe not in any VPBasicBlock");
227}
228
231 insertAfter(InsertPos);
232}
233
237 insertBefore(BB, I);
238}
239
241 assert(OpType == OperationType::FPMathOp &&
242 "recipe doesn't have fast math flags");
243 FastMathFlags Res;
244 Res.setAllowReassoc(FMFs.AllowReassoc);
245 Res.setNoNaNs(FMFs.NoNaNs);
246 Res.setNoInfs(FMFs.NoInfs);
247 Res.setNoSignedZeros(FMFs.NoSignedZeros);
248 Res.setAllowReciprocal(FMFs.AllowReciprocal);
249 Res.setAllowContract(FMFs.AllowContract);
250 Res.setApproxFunc(FMFs.ApproxFunc);
251 return Res;
252}
253
256 const Twine &Name)
257 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),
258 Pred, DL),
259 Opcode(Opcode), Name(Name.str()) {
260 assert(Opcode == Instruction::ICmp &&
261 "only ICmp predicates supported at the moment");
262}
263
265 std::initializer_list<VPValue *> Operands,
266 FastMathFlags FMFs, DebugLoc DL, const Twine &Name)
267 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),
268 Opcode(Opcode), Name(Name.str()) {
269 // Make sure the VPInstruction is a floating-point operation.
270 assert(isFPMathOp() && "this op can't take fast-math flags");
271}
272
273Value *VPInstruction::generateInstruction(VPTransformState &State,
274 unsigned Part) {
275 IRBuilderBase &Builder = State.Builder;
277
279 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
280 if (Part != 0 && vputils::onlyFirstPartUsed(this))
281 return State.get(this, 0, OnlyFirstLaneUsed);
282
283 Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed);
284 Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed);
285 auto *Res =
286 Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
287 if (auto *I = dyn_cast<Instruction>(Res))
288 setFlags(I);
289 return Res;
290 }
291
292 switch (getOpcode()) {
293 case VPInstruction::Not: {
294 Value *A = State.get(getOperand(0), Part);
295 return Builder.CreateNot(A, Name);
296 }
297 case Instruction::ICmp: {
298 Value *A = State.get(getOperand(0), Part);
299 Value *B = State.get(getOperand(1), Part);
300 return Builder.CreateCmp(getPredicate(), A, B, Name);
301 }
302 case Instruction::Select: {
303 Value *Cond = State.get(getOperand(0), Part);
304 Value *Op1 = State.get(getOperand(1), Part);
305 Value *Op2 = State.get(getOperand(2), Part);
306 return Builder.CreateSelect(Cond, Op1, Op2, Name);
307 }
309 // Get first lane of vector induction variable.
310 Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
311 // Get the original loop tripcount.
312 Value *ScalarTC = State.get(getOperand(1), VPIteration(Part, 0));
313
314 // If this part of the active lane mask is scalar, generate the CMP directly
315 // to avoid unnecessary extracts.
316 if (State.VF.isScalar())
317 return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC,
318 Name);
319
320 auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
321 auto *PredTy = VectorType::get(Int1Ty, State.VF);
322 return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
323 {PredTy, ScalarTC->getType()},
324 {VIVElem0, ScalarTC}, nullptr, Name);
325 }
327 // Generate code to combine the previous and current values in vector v3.
328 //
329 // vector.ph:
330 // v_init = vector(..., ..., ..., a[-1])
331 // br vector.body
332 //
333 // vector.body
334 // i = phi [0, vector.ph], [i+4, vector.body]
335 // v1 = phi [v_init, vector.ph], [v2, vector.body]
336 // v2 = a[i, i+1, i+2, i+3];
337 // v3 = vector(v1(3), v2(0, 1, 2))
338
339 // For the first part, use the recurrence phi (v1), otherwise v2.
340 auto *V1 = State.get(getOperand(0), 0);
341 Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
342 if (!PartMinus1->getType()->isVectorTy())
343 return PartMinus1;
344 Value *V2 = State.get(getOperand(1), Part);
345 return Builder.CreateVectorSplice(PartMinus1, V2, -1, Name);
346 }
348 Value *ScalarTC = State.get(getOperand(0), {0, 0});
349 Value *Step =
350 createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF);
351 Value *Sub = Builder.CreateSub(ScalarTC, Step);
352 Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
353 Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
354 return Builder.CreateSelect(Cmp, Sub, Zero);
355 }
357 auto *IV = State.get(getOperand(0), VPIteration(0, 0));
358 if (Part == 0)
359 return IV;
360
361 // The canonical IV is incremented by the vectorization factor (num of SIMD
362 // elements) times the unroll part.
363 Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
364 return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),
366 }
368 if (Part != 0)
369 return nullptr;
370
371 Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
372 VPRegionBlock *ParentRegion = getParent()->getParent();
373 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
374
375 // Replace the temporary unreachable terminator with a new conditional
376 // branch, hooking it up to backward destination for exiting blocks now and
377 // to forward destination(s) later when they are created.
378 BranchInst *CondBr =
379 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
380
381 if (getParent()->isExiting())
382 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
383
384 CondBr->setSuccessor(0, nullptr);
386 return CondBr;
387 }
389 if (Part != 0)
390 return nullptr;
391 // First create the compare.
392 Value *IV = State.get(getOperand(0), Part, /*IsScalar*/ true);
393 Value *TC = State.get(getOperand(1), Part, /*IsScalar*/ true);
394 Value *Cond = Builder.CreateICmpEQ(IV, TC);
395
396 // Now create the branch.
397 auto *Plan = getParent()->getPlan();
398 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
399 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
400
401 // Replace the temporary unreachable terminator with a new conditional
402 // branch, hooking it up to backward destination (the header) now and to the
403 // forward destination (the exit/middle block) later when it is created.
404 // Note that CreateCondBr expects a valid BB as first argument, so we need
405 // to set it to nullptr later.
406 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
407 State.CFG.VPBB2IRBB[Header]);
408 CondBr->setSuccessor(0, nullptr);
410 return CondBr;
411 }
413 if (Part != 0)
414 return State.get(this, 0, /*IsScalar*/ true);
415
416 // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
417 // and will be removed by breaking up the recipe further.
418 auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
419 auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
420 // Get its reduction variable descriptor.
421 const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
422
423 RecurKind RK = RdxDesc.getRecurrenceKind();
424
426
427 VPValue *LoopExitingDef = getOperand(1);
428 Type *PhiTy = OrigPhi->getType();
429 VectorParts RdxParts(State.UF);
430 for (unsigned Part = 0; Part < State.UF; ++Part)
431 RdxParts[Part] = State.get(LoopExitingDef, Part, PhiR->isInLoop());
432
433 // If the vector reduction can be performed in a smaller type, we truncate
434 // then extend the loop exit value to enable InstCombine to evaluate the
435 // entire expression in the smaller type.
436 // TODO: Handle this in truncateToMinBW.
437 if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
438 Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF);
439 for (unsigned Part = 0; Part < State.UF; ++Part)
440 RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
441 }
442 // Reduce all of the unrolled parts into a single vector.
443 Value *ReducedPartRdx = RdxParts[0];
444 unsigned Op = RecurrenceDescriptor::getOpcode(RK);
445
446 if (PhiR->isOrdered()) {
447 ReducedPartRdx = RdxParts[State.UF - 1];
448 } else {
449 // Floating-point operations should have some FMF to enable the reduction.
451 Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
452 for (unsigned Part = 1; Part < State.UF; ++Part) {
453 Value *RdxPart = RdxParts[Part];
454 if (Op != Instruction::ICmp && Op != Instruction::FCmp)
455 ReducedPartRdx = Builder.CreateBinOp(
456 (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
458 TrackingVH<Value> ReductionStartValue =
459 RdxDesc.getRecurrenceStartValue();
460 ReducedPartRdx = createAnyOfOp(Builder, ReductionStartValue, RK,
461 ReducedPartRdx, RdxPart);
462 } else
463 ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
464 }
465 }
466
467 // Create the reduction after the loop. Note that inloop reductions create
468 // the target reduction in the loop using a Reduction recipe.
469 if (State.VF.isVector() && !PhiR->isInLoop()) {
470 ReducedPartRdx =
471 createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
472 // If the reduction can be performed in a smaller type, we need to extend
473 // the reduction to the wider type before we branch to the original loop.
474 if (PhiTy != RdxDesc.getRecurrenceType())
475 ReducedPartRdx = RdxDesc.isSigned()
476 ? Builder.CreateSExt(ReducedPartRdx, PhiTy)
477 : Builder.CreateZExt(ReducedPartRdx, PhiTy);
478 }
479
480 // If there were stores of the reduction value to a uniform memory address
481 // inside the loop, create the final store here.
482 if (StoreInst *SI = RdxDesc.IntermediateStore) {
483 auto *NewSI = Builder.CreateAlignedStore(
484 ReducedPartRdx, SI->getPointerOperand(), SI->getAlign());
485 propagateMetadata(NewSI, SI);
486 }
487
488 return ReducedPartRdx;
489 }
490 default:
491 llvm_unreachable("Unsupported opcode for instruction");
492 }
493}
494
495#if !defined(NDEBUG)
496bool VPInstruction::isFPMathOp() const {
497 // Inspired by FPMathOperator::classof. Notable differences are that we don't
498 // support Call, PHI and Select opcodes here yet.
499 return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
500 Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
501 Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
502 Opcode == Instruction::FCmp || Opcode == Instruction::Select;
503}
504#endif
505
507 assert(!State.Instance && "VPInstruction executing an Instance");
509 assert((hasFastMathFlags() == isFPMathOp() ||
510 getOpcode() == Instruction::Select) &&
511 "Recipe not a FPMathOp but has fast-math flags?");
512 if (hasFastMathFlags())
514 for (unsigned Part = 0; Part < State.UF; ++Part) {
515 Value *GeneratedValue = generateInstruction(State, Part);
516 if (!hasResult())
517 continue;
518 assert(GeneratedValue && "generateInstruction must produce a value");
519
520 bool IsVector = GeneratedValue->getType()->isVectorTy();
521 State.set(this, GeneratedValue, Part, !IsVector);
523 State.VF.isScalar() || vputils::onlyFirstLaneUsed(this)) &&
524 "scalar value but not only first lane used");
525 }
526}
527
529 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
531 return vputils::onlyFirstLaneUsed(this);
532
533 switch (getOpcode()) {
534 default:
535 return false;
536 case Instruction::ICmp:
537 // TODO: Cover additional opcodes.
538 return vputils::onlyFirstLaneUsed(this);
543 return true;
544 };
545 llvm_unreachable("switch should return");
546}
547
548#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
550 VPSlotTracker SlotTracker(getParent()->getPlan());
551 print(dbgs(), "", SlotTracker);
552}
553
555 VPSlotTracker &SlotTracker) const {
556 O << Indent << "EMIT ";
557
558 if (hasResult()) {
560 O << " = ";
561 }
562
563 switch (getOpcode()) {
565 O << "not";
566 break;
568 O << "combined load";
569 break;
571 O << "combined store";
572 break;
574 O << "active lane mask";
575 break;
577 O << "first-order splice";
578 break;
580 O << "branch-on-cond";
581 break;
583 O << "TC > VF ? TC - VF : 0";
584 break;
586 O << "VF * Part +";
587 break;
589 O << "branch-on-count";
590 break;
592 O << "compute-reduction-result";
593 break;
594 default:
596 }
597
598 printFlags(O);
600
601 if (auto DL = getDebugLoc()) {
602 O << ", !dbg ";
603 DL.print(O);
604 }
605}
606#endif
607
609 assert(State.VF.isVector() && "not widening");
610 auto &CI = *cast<CallInst>(getUnderlyingInstr());
611 assert(!isa<DbgInfoIntrinsic>(CI) &&
612 "DbgInfoIntrinsic should have been dropped during VPlan construction");
614
615 bool UseIntrinsic = VectorIntrinsicID != Intrinsic::not_intrinsic;
616 FunctionType *VFTy = nullptr;
617 if (Variant)
618 VFTy = Variant->getFunctionType();
619 for (unsigned Part = 0; Part < State.UF; ++Part) {
620 SmallVector<Type *, 2> TysForDecl;
621 // Add return type if intrinsic is overloaded on it.
622 if (UseIntrinsic &&
623 isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1))
624 TysForDecl.push_back(
625 VectorType::get(CI.getType()->getScalarType(), State.VF));
627 for (const auto &I : enumerate(operands())) {
628 // Some intrinsics have a scalar argument - don't replace it with a
629 // vector.
630 Value *Arg;
631 if (UseIntrinsic &&
632 isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))
633 Arg = State.get(I.value(), VPIteration(0, 0));
634 // Some vectorized function variants may also take a scalar argument,
635 // e.g. linear parameters for pointers. This needs to be the scalar value
636 // from the start of the respective part when interleaving.
637 else if (VFTy && !VFTy->getParamType(I.index())->isVectorTy())
638 Arg = State.get(I.value(), VPIteration(Part, 0));
639 else
640 Arg = State.get(I.value(), Part);
641 if (UseIntrinsic &&
642 isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index()))
643 TysForDecl.push_back(Arg->getType());
644 Args.push_back(Arg);
645 }
646
647 Function *VectorF;
648 if (UseIntrinsic) {
649 // Use vector version of the intrinsic.
650 Module *M = State.Builder.GetInsertBlock()->getModule();
651 VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl);
652 assert(VectorF && "Can't retrieve vector intrinsic.");
653 } else {
654#ifndef NDEBUG
655 assert(Variant != nullptr && "Can't create vector function.");
656#endif
657 VectorF = Variant;
658 }
659
661 CI.getOperandBundlesAsDefs(OpBundles);
662 CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
663
664 if (isa<FPMathOperator>(V))
665 V->copyFastMathFlags(&CI);
666
667 if (!V->getType()->isVoidTy())
668 State.set(this, V, Part);
669 State.addMetadata(V, &CI);
670 }
671}
672
673#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
675 VPSlotTracker &SlotTracker) const {
676 O << Indent << "WIDEN-CALL ";
677
678 auto *CI = cast<CallInst>(getUnderlyingInstr());
679 if (CI->getType()->isVoidTy())
680 O << "void ";
681 else {
683 O << " = ";
684 }
685
686 O << "call @" << CI->getCalledFunction()->getName() << "(";
688 O << ")";
689
690 if (VectorIntrinsicID)
691 O << " (using vector intrinsic)";
692 else {
693 O << " (using library function";
694 if (Variant->hasName())
695 O << ": " << Variant->getName();
696 O << ")";
697 }
698}
699
701 VPSlotTracker &SlotTracker) const {
702 O << Indent << "WIDEN-SELECT ";
704 O << " = select ";
706 O << ", ";
708 O << ", ";
710 O << (isInvariantCond() ? " (condition is loop invariant)" : "");
711}
712#endif
713
716
717 // The condition can be loop invariant but still defined inside the
718 // loop. This means that we can't just use the original 'cond' value.
719 // We have to take the 'vectorized' value and pick the first lane.
720 // Instcombine will make this a no-op.
721 auto *InvarCond =
722 isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr;
723
724 for (unsigned Part = 0; Part < State.UF; ++Part) {
725 Value *Cond = InvarCond ? InvarCond : State.get(getCond(), Part);
726 Value *Op0 = State.get(getOperand(1), Part);
727 Value *Op1 = State.get(getOperand(2), Part);
728 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
729 State.set(this, Sel, Part);
730 State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
731 }
732}
733
734VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
735 const FastMathFlags &FMF) {
736 AllowReassoc = FMF.allowReassoc();
737 NoNaNs = FMF.noNaNs();
738 NoInfs = FMF.noInfs();
739 NoSignedZeros = FMF.noSignedZeros();
740 AllowReciprocal = FMF.allowReciprocal();
741 AllowContract = FMF.allowContract();
742 ApproxFunc = FMF.approxFunc();
743}
744
745#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
747 switch (OpType) {
748 case OperationType::Cmp:
750 break;
751 case OperationType::DisjointOp:
752 if (DisjointFlags.IsDisjoint)
753 O << " disjoint";
754 break;
755 case OperationType::PossiblyExactOp:
756 if (ExactFlags.IsExact)
757 O << " exact";
758 break;
759 case OperationType::OverflowingBinOp:
760 if (WrapFlags.HasNUW)
761 O << " nuw";
762 if (WrapFlags.HasNSW)
763 O << " nsw";
764 break;
765 case OperationType::FPMathOp:
767 break;
768 case OperationType::GEPOp:
770 O << " inbounds";
771 break;
772 case OperationType::NonNegOp:
773 if (NonNegFlags.NonNeg)
774 O << " nneg";
775 break;
776 case OperationType::Other:
777 break;
778 }
779 if (getNumOperands() > 0)
780 O << " ";
781}
782#endif
783
786 auto &Builder = State.Builder;
787 switch (Opcode) {
788 case Instruction::Call:
789 case Instruction::Br:
790 case Instruction::PHI:
791 case Instruction::GetElementPtr:
792 case Instruction::Select:
793 llvm_unreachable("This instruction is handled by a different recipe.");
794 case Instruction::UDiv:
795 case Instruction::SDiv:
796 case Instruction::SRem:
797 case Instruction::URem:
798 case Instruction::Add:
799 case Instruction::FAdd:
800 case Instruction::Sub:
801 case Instruction::FSub:
802 case Instruction::FNeg:
803 case Instruction::Mul:
804 case Instruction::FMul:
805 case Instruction::FDiv:
806 case Instruction::FRem:
807 case Instruction::Shl:
808 case Instruction::LShr:
809 case Instruction::AShr:
810 case Instruction::And:
811 case Instruction::Or:
812 case Instruction::Xor: {
813 // Just widen unops and binops.
814 for (unsigned Part = 0; Part < State.UF; ++Part) {
816 for (VPValue *VPOp : operands())
817 Ops.push_back(State.get(VPOp, Part));
818
819 Value *V = Builder.CreateNAryOp(Opcode, Ops);
820
821 if (auto *VecOp = dyn_cast<Instruction>(V))
822 setFlags(VecOp);
823
824 // Use this vector value for all users of the original instruction.
825 State.set(this, V, Part);
826 State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
827 }
828
829 break;
830 }
831 case Instruction::Freeze: {
832 for (unsigned Part = 0; Part < State.UF; ++Part) {
833 Value *Op = State.get(getOperand(0), Part);
834
835 Value *Freeze = Builder.CreateFreeze(Op);
836 State.set(this, Freeze, Part);
837 }
838 break;
839 }
840 case Instruction::ICmp:
841 case Instruction::FCmp: {
842 // Widen compares. Generate vector compares.
843 bool FCmp = Opcode == Instruction::FCmp;
844 for (unsigned Part = 0; Part < State.UF; ++Part) {
845 Value *A = State.get(getOperand(0), Part);
846 Value *B = State.get(getOperand(1), Part);
847 Value *C = nullptr;
848 if (FCmp) {
849 // Propagate fast math flags.
850 IRBuilder<>::FastMathFlagGuard FMFG(Builder);
851 if (auto *I = dyn_cast_or_null<Instruction>(getUnderlyingValue()))
852 Builder.setFastMathFlags(I->getFastMathFlags());
853 C = Builder.CreateFCmp(getPredicate(), A, B);
854 } else {
855 C = Builder.CreateICmp(getPredicate(), A, B);
856 }
857 State.set(this, C, Part);
858 State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
859 }
860
861 break;
862 }
863 default:
864 // This instruction is not vectorized by simple widening.
865 LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "
866 << Instruction::getOpcodeName(Opcode));
867 llvm_unreachable("Unhandled instruction!");
868 } // end of switch.
869
870#if !defined(NDEBUG)
871 // Verify that VPlan type inference results agree with the type of the
872 // generated values.
873 for (unsigned Part = 0; Part < State.UF; ++Part) {
875 State.VF) == State.get(this, Part)->getType() &&
876 "inferred type and type from generated instructions do not match");
877 }
878#endif
879}
880
881#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
883 VPSlotTracker &SlotTracker) const {
884 O << Indent << "WIDEN ";
886 O << " = " << Instruction::getOpcodeName(Opcode);
887 printFlags(O);
889}
890#endif
891
894 auto &Builder = State.Builder;
895 /// Vectorize casts.
896 assert(State.VF.isVector() && "Not vectorizing?");
897 Type *DestTy = VectorType::get(getResultType(), State.VF);
898 VPValue *Op = getOperand(0);
899 for (unsigned Part = 0; Part < State.UF; ++Part) {
900 if (Part > 0 && Op->isLiveIn()) {
901 // FIXME: Remove once explicit unrolling is implemented using VPlan.
902 State.set(this, State.get(this, 0), Part);
903 continue;
904 }
905 Value *A = State.get(Op, Part);
906 Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
907 State.set(this, Cast, Part);
908 State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
909 }
910}
911
912#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
914 VPSlotTracker &SlotTracker) const {
915 O << Indent << "WIDEN-CAST ";
917 O << " = " << Instruction::getOpcodeName(Opcode) << " ";
918 printFlags(O);
920 O << " to " << *getResultType();
921}
922#endif
923
924/// This function adds
925/// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...)
926/// to each vector element of Val. The sequence starts at StartIndex.
927/// \p Opcode is relevant for FP induction variable.
928static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,
930 IRBuilderBase &Builder) {
931 assert(VF.isVector() && "only vector VFs are supported");
932
933 // Create and check the types.
934 auto *ValVTy = cast<VectorType>(Val->getType());
935 ElementCount VLen = ValVTy->getElementCount();
936
937 Type *STy = Val->getType()->getScalarType();
938 assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
939 "Induction Step must be an integer or FP");
940 assert(Step->getType() == STy && "Step has wrong type");
941
943
944 // Create a vector of consecutive numbers from zero to VF.
945 VectorType *InitVecValVTy = ValVTy;
946 if (STy->isFloatingPointTy()) {
947 Type *InitVecValSTy =
949 InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
950 }
951 Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
952
953 // Splat the StartIdx
954 Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx);
955
956 if (STy->isIntegerTy()) {
957 InitVec = Builder.CreateAdd(InitVec, StartIdxSplat);
958 Step = Builder.CreateVectorSplat(VLen, Step);
959 assert(Step->getType() == Val->getType() && "Invalid step vec");
960 // FIXME: The newly created binary instructions should contain nsw/nuw
961 // flags, which can be found from the original scalar operations.
962 Step = Builder.CreateMul(InitVec, Step);
963 return Builder.CreateAdd(Val, Step, "induction");
964 }
965
966 // Floating point induction.
967 assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
968 "Binary Opcode should be specified for FP induction");
969 InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
970 InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat);
971
972 Step = Builder.CreateVectorSplat(VLen, Step);
973 Value *MulOp = Builder.CreateFMul(InitVec, Step);
974 return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
975}
976
977/// A helper function that returns an integer or floating-point constant with
978/// value C.
980 return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
981 : ConstantFP::get(Ty, C);
982}
983
985 ElementCount VF) {
986 assert(FTy->isFloatingPointTy() && "Expected floating point type!");
987 Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits());
988 Value *RuntimeVF = getRuntimeVF(B, IntTy, VF);
989 return B.CreateUIToFP(RuntimeVF, FTy);
990}
991
993 assert(!State.Instance && "Int or FP induction being replicated.");
994
997 TruncInst *Trunc = getTruncInst();
998 IRBuilderBase &Builder = State.Builder;
999 assert(IV->getType() == ID.getStartValue()->getType() && "Types must match");
1000 assert(State.VF.isVector() && "must have vector VF");
1001
1002 // The value from the original loop to which we are mapping the new induction
1003 // variable.
1004 Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV;
1005
1006 // Fast-math-flags propagate from the original induction instruction.
1007 IRBuilder<>::FastMathFlagGuard FMFG(Builder);
1008 if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
1009 Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
1010
1011 // Now do the actual transformations, and start with fetching the step value.
1012 Value *Step = State.get(getStepValue(), VPIteration(0, 0));
1013
1014 assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
1015 "Expected either an induction phi-node or a truncate of it!");
1016
1017 // Construct the initial value of the vector IV in the vector loop preheader
1018 auto CurrIP = Builder.saveIP();
1019 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1020 Builder.SetInsertPoint(VectorPH->getTerminator());
1021 if (isa<TruncInst>(EntryVal)) {
1022 assert(Start->getType()->isIntegerTy() &&
1023 "Truncation requires an integer type");
1024 auto *TruncType = cast<IntegerType>(EntryVal->getType());
1025 Step = Builder.CreateTrunc(Step, TruncType);
1026 Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
1027 }
1028
1029 Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0);
1030 Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
1031 Value *SteppedStart = getStepVector(
1032 SplatStart, Zero, Step, ID.getInductionOpcode(), State.VF, State.Builder);
1033
1034 // We create vector phi nodes for both integer and floating-point induction
1035 // variables. Here, we determine the kind of arithmetic we will perform.
1038 if (Step->getType()->isIntegerTy()) {
1039 AddOp = Instruction::Add;
1040 MulOp = Instruction::Mul;
1041 } else {
1042 AddOp = ID.getInductionOpcode();
1043 MulOp = Instruction::FMul;
1044 }
1045
1046 // Multiply the vectorization factor by the step using integer or
1047 // floating-point arithmetic as appropriate.
1048 Type *StepType = Step->getType();
1049 Value *RuntimeVF;
1050 if (Step->getType()->isFloatingPointTy())
1051 RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF);
1052 else
1053 RuntimeVF = getRuntimeVF(Builder, StepType, State.VF);
1054 Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
1055
1056 // Create a vector splat to use in the induction update.
1057 //
1058 // FIXME: If the step is non-constant, we create the vector splat with
1059 // IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't
1060 // handle a constant vector splat.
1061 Value *SplatVF = isa<Constant>(Mul)
1062 ? ConstantVector::getSplat(State.VF, cast<Constant>(Mul))
1063 : Builder.CreateVectorSplat(State.VF, Mul);
1064 Builder.restoreIP(CurrIP);
1065
1066 // We may need to add the step a number of times, depending on the unroll
1067 // factor. The last of those goes into the PHI.
1068 PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");
1069 VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1070 VecInd->setDebugLoc(EntryVal->getDebugLoc());
1071 Instruction *LastInduction = VecInd;
1072 for (unsigned Part = 0; Part < State.UF; ++Part) {
1073 State.set(this, LastInduction, Part);
1074
1075 if (isa<TruncInst>(EntryVal))
1076 State.addMetadata(LastInduction, EntryVal);
1077
1078 LastInduction = cast<Instruction>(
1079 Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"));
1080 LastInduction->setDebugLoc(EntryVal->getDebugLoc());
1081 }
1082
1083 LastInduction->setName("vec.ind.next");
1084 VecInd->addIncoming(SteppedStart, VectorPH);
1085 // Add induction update using an incorrect block temporarily. The phi node
1086 // will be fixed after VPlan execution. Note that at this point the latch
1087 // block cannot be used, as it does not exist yet.
1088 // TODO: Model increment value in VPlan, by turning the recipe into a
1089 // multi-def and a subclass of VPHeaderPHIRecipe.
1090 VecInd->addIncoming(LastInduction, VectorPH);
1091}
1092
1093#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1095 VPSlotTracker &SlotTracker) const {
1096 O << Indent << "WIDEN-INDUCTION";
1097 if (getTruncInst()) {
1098 O << "\\l\"";
1099 O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\"";
1100 O << " +\n" << Indent << "\" ";
1102 } else
1103 O << " " << VPlanIngredient(IV);
1104
1105 O << ", ";
1107}
1108#endif
1109
1111 // The step may be defined by a recipe in the preheader (e.g. if it requires
1112 // SCEV expansion), but for the canonical induction the step is required to be
1113 // 1, which is represented as live-in.
1115 return false;
1116 auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
1117 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
1118 return StartC && StartC->isZero() && StepC && StepC->isOne();
1119}
1120
1121#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1123 VPSlotTracker &SlotTracker) const {
1124 O << Indent;
1126 O << Indent << "= DERIVED-IV ";
1128 O << " + ";
1130 O << " * ";
1132}
1133#endif
1134
1136 // Fast-math-flags propagate from the original induction instruction.
1138 if (hasFastMathFlags())
1140
1141 /// Compute scalar induction steps. \p ScalarIV is the scalar induction
1142 /// variable on which to base the steps, \p Step is the size of the step.
1143
1144 Value *BaseIV = State.get(getOperand(0), VPIteration(0, 0));
1145 Value *Step = State.get(getStepValue(), VPIteration(0, 0));
1146 IRBuilderBase &Builder = State.Builder;
1147
1148 // Ensure step has the same type as that of scalar IV.
1149 Type *BaseIVTy = BaseIV->getType()->getScalarType();
1150 assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");
1151
1152 // We build scalar steps for both integer and floating-point induction
1153 // variables. Here, we determine the kind of arithmetic we will perform.
1156 if (BaseIVTy->isIntegerTy()) {
1157 AddOp = Instruction::Add;
1158 MulOp = Instruction::Mul;
1159 } else {
1160 AddOp = InductionOpcode;
1161 MulOp = Instruction::FMul;
1162 }
1163
1164 // Determine the number of scalars we need to generate for each unroll
1165 // iteration.
1166 bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
1167 // Compute the scalar steps and save the results in State.
1168 Type *IntStepTy =
1169 IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
1170 Type *VecIVTy = nullptr;
1171 Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
1172 if (!FirstLaneOnly && State.VF.isScalable()) {
1173 VecIVTy = VectorType::get(BaseIVTy, State.VF);
1174 UnitStepVec =
1175 Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
1176 SplatStep = Builder.CreateVectorSplat(State.VF, Step);
1177 SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
1178 }
1179
1180 unsigned StartPart = 0;
1181 unsigned EndPart = State.UF;
1182 unsigned StartLane = 0;
1183 unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
1184 if (State.Instance) {
1185 StartPart = State.Instance->Part;
1186 EndPart = StartPart + 1;
1187 StartLane = State.Instance->Lane.getKnownLane();
1188 EndLane = StartLane + 1;
1189 }
1190 for (unsigned Part = StartPart; Part < EndPart; ++Part) {
1191 Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part);
1192
1193 if (!FirstLaneOnly && State.VF.isScalable()) {
1194 auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
1195 auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
1196 if (BaseIVTy->isFloatingPointTy())
1197 InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
1198 auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
1199 auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
1200 State.set(this, Add, Part);
1201 // It's useful to record the lane values too for the known minimum number
1202 // of elements so we do those below. This improves the code quality when
1203 // trying to extract the first element, for example.
1204 }
1205
1206 if (BaseIVTy->isFloatingPointTy())
1207 StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
1208
1209 for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
1210 Value *StartIdx = Builder.CreateBinOp(
1211 AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
1212 // The step returned by `createStepForVF` is a runtime-evaluated value
1213 // when VF is scalable. Otherwise, it should be folded into a Constant.
1214 assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
1215 "Expected StartIdx to be folded to a constant when VF is not "
1216 "scalable");
1217 auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
1218 auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
1219 State.set(this, Add, VPIteration(Part, Lane));
1220 }
1221 }
1222}
1223
1224#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1226 VPSlotTracker &SlotTracker) const {
1227 O << Indent;
1229 O << " = SCALAR-STEPS ";
1231}
1232#endif
1233
1235 assert(State.VF.isVector() && "not widening");
1236 auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
1237 // Construct a vector GEP by widening the operands of the scalar GEP as
1238 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
1239 // results in a vector of pointers when at least one operand of the GEP
1240 // is vector-typed. Thus, to keep the representation compact, we only use
1241 // vector-typed operands for loop-varying values.
1242
1243 if (areAllOperandsInvariant()) {
1244 // If we are vectorizing, but the GEP has only loop-invariant operands,
1245 // the GEP we build (by only using vector-typed operands for
1246 // loop-varying values) would be a scalar pointer. Thus, to ensure we
1247 // produce a vector of pointers, we need to either arbitrarily pick an
1248 // operand to broadcast, or broadcast a clone of the original GEP.
1249 // Here, we broadcast a clone of the original.
1250 //
1251 // TODO: If at some point we decide to scalarize instructions having
1252 // loop-invariant operands, this special case will no longer be
1253 // required. We would add the scalarization decision to
1254 // collectLoopScalars() and teach getVectorValue() to broadcast
1255 // the lane-zero scalar value.
1257 for (unsigned I = 0, E = getNumOperands(); I != E; I++)
1258 Ops.push_back(State.get(getOperand(I), VPIteration(0, 0)));
1259
1260 auto *NewGEP =
1261 State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
1262 ArrayRef(Ops).drop_front(), "", isInBounds());
1263 for (unsigned Part = 0; Part < State.UF; ++Part) {
1264 Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, NewGEP);
1265 State.set(this, EntryPart, Part);
1266 State.addMetadata(EntryPart, GEP);
1267 }
1268 } else {
1269 // If the GEP has at least one loop-varying operand, we are sure to
1270 // produce a vector of pointers. But if we are only unrolling, we want
1271 // to produce a scalar GEP for each unroll part. Thus, the GEP we
1272 // produce with the code below will be scalar (if VF == 1) or vector
1273 // (otherwise). Note that for the unroll-only case, we still maintain
1274 // values in the vector mapping with initVector, as we do for other
1275 // instructions.
1276 for (unsigned Part = 0; Part < State.UF; ++Part) {
1277 // The pointer operand of the new GEP. If it's loop-invariant, we
1278 // won't broadcast it.
1279 auto *Ptr = isPointerLoopInvariant()
1280 ? State.get(getOperand(0), VPIteration(0, 0))
1281 : State.get(getOperand(0), Part);
1282
1283 // Collect all the indices for the new GEP. If any index is
1284 // loop-invariant, we won't broadcast it.
1286 for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
1287 VPValue *Operand = getOperand(I);
1288 if (isIndexLoopInvariant(I - 1))
1289 Indices.push_back(State.get(Operand, VPIteration(0, 0)));
1290 else
1291 Indices.push_back(State.get(Operand, Part));
1292 }
1293
1294 // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
1295 // but it should be a vector, otherwise.
1296 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
1297 Indices, "", isInBounds());
1298 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
1299 "NewGEP is not a pointer vector");
1300 State.set(this, NewGEP, Part);
1301 State.addMetadata(NewGEP, GEP);
1302 }
1303 }
1304}
1305
1306#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1308 VPSlotTracker &SlotTracker) const {
1309 O << Indent << "WIDEN-GEP ";
1310 O << (isPointerLoopInvariant() ? "Inv" : "Var");
1311 for (size_t I = 0; I < getNumOperands() - 1; ++I)
1312 O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";
1313
1314 O << " ";
1316 O << " = getelementptr";
1317 printFlags(O);
1319}
1320#endif
1321
1322void VPVectorPointerRecipe ::execute(VPTransformState &State) {
1323 auto &Builder = State.Builder;
1325 for (unsigned Part = 0; Part < State.UF; ++Part) {
1326 // Calculate the pointer for the specific unroll-part.
1327 Value *PartPtr = nullptr;
1328 // Use i32 for the gep index type when the value is constant,
1329 // or query DataLayout for a more suitable index type otherwise.
1330 const DataLayout &DL =
1331 Builder.GetInsertBlock()->getModule()->getDataLayout();
1332 Type *IndexTy = State.VF.isScalable() && (IsReverse || Part > 0)
1333 ? DL.getIndexType(IndexedTy->getPointerTo())
1334 : Builder.getInt32Ty();
1335 Value *Ptr = State.get(getOperand(0), VPIteration(0, 0));
1336 bool InBounds = isInBounds();
1337 if (IsReverse) {
1338 // If the address is consecutive but reversed, then the
1339 // wide store needs to start at the last vector element.
1340 // RunTimeVF = VScale * VF.getKnownMinValue()
1341 // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
1342 Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
1343 // NumElt = -Part * RunTimeVF
1344 Value *NumElt = Builder.CreateMul(
1345 ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF);
1346 // LastLane = 1 - RunTimeVF
1347 Value *LastLane =
1348 Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
1349 PartPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
1350 PartPtr = Builder.CreateGEP(IndexedTy, PartPtr, LastLane, "", InBounds);
1351 } else {
1352 Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part);
1353 PartPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
1354 }
1355
1356 State.set(this, PartPtr, Part, /*IsScalar*/ true);
1357 }
1358}
1359
1360#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1362 VPSlotTracker &SlotTracker) const {
1363 O << Indent;
1365 O << " = vector-pointer ";
1366 if (IsReverse)
1367 O << "(reverse) ";
1368
1370}
1371#endif
1372
1375 // We know that all PHIs in non-header blocks are converted into
1376 // selects, so we don't have to worry about the insertion order and we
1377 // can just use the builder.
1378 // At this point we generate the predication tree. There may be
1379 // duplications since this is a simple recursive scan, but future
1380 // optimizations will clean it up.
1381
1382 unsigned NumIncoming = getNumIncomingValues();
1383
1384 // Generate a sequence of selects of the form:
1385 // SELECT(Mask3, In3,
1386 // SELECT(Mask2, In2,
1387 // SELECT(Mask1, In1,
1388 // In0)))
1389 // Note that Mask0 is never used: lanes for which no path reaches this phi and
1390 // are essentially undef are taken from In0.
1391 VectorParts Entry(State.UF);
1392 for (unsigned In = 0; In < NumIncoming; ++In) {
1393 for (unsigned Part = 0; Part < State.UF; ++Part) {
1394 // We might have single edge PHIs (blocks) - use an identity
1395 // 'select' for the first PHI operand.
1396 Value *In0 = State.get(getIncomingValue(In), Part);
1397 if (In == 0)
1398 Entry[Part] = In0; // Initialize with the first incoming value.
1399 else {
1400 // Select between the current value and the previous incoming edge
1401 // based on the incoming mask.
1402 Value *Cond = State.get(getMask(In), Part);
1403 Entry[Part] =
1404 State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
1405 }
1406 }
1407 }
1408 for (unsigned Part = 0; Part < State.UF; ++Part)
1409 State.set(this, Entry[Part], Part);
1410}
1411
1412#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1414 VPSlotTracker &SlotTracker) const {
1415 O << Indent << "BLEND ";
1417 O << " =";
1418 if (getNumIncomingValues() == 1) {
1419 // Not a User of any mask: not really blending, this is a
1420 // single-predecessor phi.
1421 O << " ";
1423 } else {
1424 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
1425 O << " ";
1427 O << "/";
1429 }
1430 }
1431}
1432
1434 VPSlotTracker &SlotTracker) const {
1435 O << Indent << "REDUCE ";
1437 O << " = ";
1439 O << " +";
1440 if (isa<FPMathOperator>(getUnderlyingInstr()))
1442 O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
1444 if (getCondOp()) {
1445 O << ", ";
1447 }
1448 O << ")";
1449 if (RdxDesc.IntermediateStore)
1450 O << " (with final reduction value stored in invariant address sank "
1451 "outside of loop)";
1452}
1453#endif
1454
1456 // Find if the recipe is used by a widened recipe via an intervening
1457 // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
1458 return any_of(users(), [](const VPUser *U) {
1459 if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))
1460 return any_of(PredR->users(), [PredR](const VPUser *U) {
1461 return !U->usesScalars(PredR);
1462 });
1463 return false;
1464 });
1465}
1466
1467#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1469 VPSlotTracker &SlotTracker) const {
1470 O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
1471
1472 if (!getUnderlyingInstr()->getType()->isVoidTy()) {
1474 O << " = ";
1475 }
1476 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
1477 O << "call";
1478 printFlags(O);
1479 O << "@" << CB->getCalledFunction()->getName() << "(";
1481 O, [&O, &SlotTracker](VPValue *Op) {
1482 Op->printAsOperand(O, SlotTracker);
1483 });
1484 O << ")";
1485 } else {
1487 printFlags(O);
1489 }
1490
1491 if (shouldPack())
1492 O << " (S->V)";
1493}
1494#endif
1495
1496/// Checks if \p C is uniform across all VFs and UFs. It is considered as such
1497/// if it is either defined outside the vector region or its operand is known to
1498/// be uniform across all VFs and UFs (e.g. VPDerivedIV or VPCanonicalIVPHI).
1499/// TODO: Uniformity should be associated with a VPValue and there should be a
1500/// generic way to check.
1502 return C->isDefinedOutsideVectorRegions() ||
1503 isa<VPDerivedIVRecipe>(C->getOperand(0)) ||
1504 isa<VPCanonicalIVPHIRecipe>(C->getOperand(0));
1505}
1506
1507Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) {
1509 "Codegen only implemented for first lane.");
1510 switch (Opcode) {
1511 case Instruction::SExt:
1512 case Instruction::ZExt:
1513 case Instruction::Trunc: {
1514 // Note: SExt/ZExt not used yet.
1515 Value *Op = State.get(getOperand(0), VPIteration(Part, 0));
1516 return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
1517 }
1518 default:
1519 llvm_unreachable("opcode not implemented yet");
1520 }
1521}
1522
1523void VPScalarCastRecipe ::execute(VPTransformState &State) {
1524 bool IsUniformAcrossVFsAndUFs = isUniformAcrossVFsAndUFs(this);
1525 for (unsigned Part = 0; Part != State.UF; ++Part) {
1526 Value *Res;
1527 // Only generate a single instance, if the recipe is uniform across UFs and
1528 // VFs.
1529 if (Part > 0 && IsUniformAcrossVFsAndUFs)
1530 Res = State.get(this, VPIteration(0, 0));
1531 else
1532 Res = generate(State, Part);
1533 State.set(this, Res, VPIteration(Part, 0));
1534 }
1535}
1536
1537#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1538void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
1539 VPSlotTracker &SlotTracker) const {
1540 O << Indent << "SCALAR-CAST ";
1541 printAsOperand(O, SlotTracker);
1542 O << " = " << Instruction::getOpcodeName(Opcode) << " ";
1543 printOperands(O, SlotTracker);
1544 O << " to " << *ResultTy;
1545}
1546#endif
1547
1549 assert(State.Instance && "Branch on Mask works only on single instance.");
1550
1551 unsigned Part = State.Instance->Part;
1552 unsigned Lane = State.Instance->Lane.getKnownLane();
1553
1554 Value *ConditionBit = nullptr;
1555 VPValue *BlockInMask = getMask();
1556 if (BlockInMask) {
1557 ConditionBit = State.get(BlockInMask, Part);
1558 if (ConditionBit->getType()->isVectorTy())
1559 ConditionBit = State.Builder.CreateExtractElement(
1560 ConditionBit, State.Builder.getInt32(Lane));
1561 } else // Block in mask is all-one.
1562 ConditionBit = State.Builder.getTrue();
1563
1564 // Replace the temporary unreachable terminator with a new conditional branch,
1565 // whose two destinations will be set later when they are created.
1566 auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
1567 assert(isa<UnreachableInst>(CurrentTerminator) &&
1568 "Expected to replace unreachable terminator with conditional branch.");
1569 auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
1570 CondBr->setSuccessor(0, nullptr);
1571 ReplaceInstWithInst(CurrentTerminator, CondBr);
1572}
1573
1575 assert(State.Instance && "Predicated instruction PHI works per instance.");
1576 Instruction *ScalarPredInst =
1577 cast<Instruction>(State.get(getOperand(0), *State.Instance));
1578 BasicBlock *PredicatedBB = ScalarPredInst->getParent();
1579 BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
1580 assert(PredicatingBB && "Predicated block has no single predecessor.");
1581 assert(isa<VPReplicateRecipe>(getOperand(0)) &&
1582 "operand must be VPReplicateRecipe");
1583
1584 // By current pack/unpack logic we need to generate only a single phi node: if
1585 // a vector value for the predicated instruction exists at this point it means
1586 // the instruction has vector users only, and a phi for the vector value is
1587 // needed. In this case the recipe of the predicated instruction is marked to
1588 // also do that packing, thereby "hoisting" the insert-element sequence.
1589 // Otherwise, a phi node for the scalar value is needed.
1590 unsigned Part = State.Instance->Part;
1591 if (State.hasVectorValue(getOperand(0), Part)) {
1592 Value *VectorValue = State.get(getOperand(0), Part);
1593 InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
1594 PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
1595 VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
1596 VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
1597 if (State.hasVectorValue(this, Part))
1598 State.reset(this, VPhi, Part);
1599 else
1600 State.set(this, VPhi, Part);
1601 // NOTE: Currently we need to update the value of the operand, so the next
1602 // predicated iteration inserts its generated value in the correct vector.
1603 State.reset(getOperand(0), VPhi, Part);
1604 } else {
1605 Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
1606 PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
1607 Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
1608 PredicatingBB);
1609 Phi->addIncoming(ScalarPredInst, PredicatedBB);
1610 if (State.hasScalarValue(this, *State.Instance))
1611 State.reset(this, Phi, *State.Instance);
1612 else
1613 State.set(this, Phi, *State.Instance);
1614 // NOTE: Currently we need to update the value of the operand, so the next
1615 // predicated iteration inserts its generated value in the correct vector.
1616 State.reset(getOperand(0), Phi, *State.Instance);
1617 }
1618}
1619
1620#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1622 VPSlotTracker &SlotTracker) const {
1623 O << Indent << "PHI-PREDICATED-INSTRUCTION ";
1625 O << " = ";
1627}
1628
1630 VPSlotTracker &SlotTracker) const {
1631 O << Indent << "WIDEN ";
1632
1633 if (!isStore()) {
1635 O << " = ";
1636 }
1637 O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";
1638
1640}
1641#endif
1642
1644 Value *Start = getStartValue()->getLiveInIRValue();
1645 PHINode *EntryPart = PHINode::Create(Start->getType(), 2, "index");
1646 EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1647
1648 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1649 EntryPart->addIncoming(Start, VectorPH);
1650 EntryPart->setDebugLoc(getDebugLoc());
1651 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1652 State.set(this, EntryPart, Part, /*IsScalar*/ true);
1653}
1654
1655#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1657 VPSlotTracker &SlotTracker) const {
1658 O << Indent << "EMIT ";
1660 O << " = CANONICAL-INDUCTION ";
1662}
1663#endif
1664
1667 VPValue *Step) const {
1668 // Must be an integer induction.
1670 return false;
1671 // Start must match the start value of this canonical induction.
1672 if (Start != getStartValue())
1673 return false;
1674
1675 // If the step is defined by a recipe, it is not a ConstantInt.
1676 if (Step->getDefiningRecipe())
1677 return false;
1678
1679 ConstantInt *StepC = dyn_cast<ConstantInt>(Step->getLiveInIRValue());
1680 return StepC && StepC->isOne();
1681}
1682
1684 return IsScalarAfterVectorization &&
1685 (!IsScalable || vputils::onlyFirstLaneUsed(this));
1686}
1687
1688#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1690 VPSlotTracker &SlotTracker) const {
1691 O << Indent << "EMIT ";
1693 O << " = WIDEN-POINTER-INDUCTION ";
1695 O << ", " << *IndDesc.getStep();
1696}
1697#endif
1698
1700 assert(!State.Instance && "cannot be used in per-lane");
1701 const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();
1702 SCEVExpander Exp(SE, DL, "induction");
1703
1704 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
1705 &*State.Builder.GetInsertPoint());
1706 assert(!State.ExpandedSCEVs.contains(Expr) &&
1707 "Same SCEV expanded multiple times");
1708 State.ExpandedSCEVs[Expr] = Res;
1709 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1710 State.set(this, Res, {Part, 0});
1711}
1712
1713#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1715 VPSlotTracker &SlotTracker) const {
1716 O << Indent << "EMIT ";
1718 O << " = EXPAND SCEV " << *Expr;
1719}
1720#endif
1721
1723 Value *CanonicalIV = State.get(getOperand(0), 0, /*IsScalar*/ true);
1724 Type *STy = CanonicalIV->getType();
1725 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
1726 ElementCount VF = State.VF;
1727 Value *VStart = VF.isScalar()
1728 ? CanonicalIV
1729 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
1730 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
1731 Value *VStep = createStepForVF(Builder, STy, VF, Part);
1732 if (VF.isVector()) {
1733 VStep = Builder.CreateVectorSplat(VF, VStep);
1734 VStep =
1735 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
1736 }
1737 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
1738 State.set(this, CanonicalVectorIV, Part);
1739 }
1740}
1741
1742#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1744 VPSlotTracker &SlotTracker) const {
1745 O << Indent << "EMIT ";
1747 O << " = WIDEN-CANONICAL-INDUCTION ";
1749}
1750#endif
1751
1753 auto &Builder = State.Builder;
1754 // Create a vector from the initial value.
1755 auto *VectorInit = getStartValue()->getLiveInIRValue();
1756
1757 Type *VecTy = State.VF.isScalar()
1758 ? VectorInit->getType()
1759 : VectorType::get(VectorInit->getType(), State.VF);
1760
1761 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1762 if (State.VF.isVector()) {
1763 auto *IdxTy = Builder.getInt32Ty();
1764 auto *One = ConstantInt::get(IdxTy, 1);
1765 IRBuilder<>::InsertPointGuard Guard(Builder);
1766 Builder.SetInsertPoint(VectorPH->getTerminator());
1767 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
1768 auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
1769 VectorInit = Builder.CreateInsertElement(
1770 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
1771 }
1772
1773 // Create a phi node for the new recurrence.
1774 PHINode *EntryPart = PHINode::Create(VecTy, 2, "vector.recur");
1775 EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1776 EntryPart->addIncoming(VectorInit, VectorPH);
1777 State.set(this, EntryPart, 0);
1778}
1779
1780#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1782 VPSlotTracker &SlotTracker) const {
1783 O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
1785 O << " = phi ";
1787}
1788#endif
1789
1791 auto &Builder = State.Builder;
1792
1793 // Reductions do not have to start at zero. They can start with
1794 // any loop invariant values.
1795 VPValue *StartVPV = getStartValue();
1796 Value *StartV = StartVPV->getLiveInIRValue();
1797
1798 // In order to support recurrences we need to be able to vectorize Phi nodes.
1799 // Phi nodes have cycles, so we need to vectorize them in two stages. This is
1800 // stage #1: We create a new vector PHI node with no incoming edges. We'll use
1801 // this value when we vectorize all of the instructions that use the PHI.
1802 bool ScalarPHI = State.VF.isScalar() || IsInLoop;
1803 Type *VecTy = ScalarPHI ? StartV->getType()
1804 : VectorType::get(StartV->getType(), State.VF);
1805
1806 BasicBlock *HeaderBB = State.CFG.PrevBB;
1807 assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
1808 "recipe must be in the vector loop header");
1809 unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
1810 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1811 Instruction *EntryPart = PHINode::Create(VecTy, 2, "vec.phi");
1812 EntryPart->insertBefore(HeaderBB->getFirstInsertionPt());
1813 State.set(this, EntryPart, Part, IsInLoop);
1814 }
1815
1816 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1817
1818 Value *Iden = nullptr;
1819 RecurKind RK = RdxDesc.getRecurrenceKind();
1822 // MinMax and AnyOf reductions have the start value as their identity.
1823 if (ScalarPHI) {
1824 Iden = StartV;
1825 } else {
1826 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
1827 Builder.SetInsertPoint(VectorPH->getTerminator());
1828 StartV = Iden =
1829 Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
1830 }
1831 } else {
1832 Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
1833 RdxDesc.getFastMathFlags());
1834
1835 if (!ScalarPHI) {
1836 Iden = Builder.CreateVectorSplat(State.VF, Iden);
1837 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
1838 Builder.SetInsertPoint(VectorPH->getTerminator());
1839 Constant *Zero = Builder.getInt32(0);
1840 StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
1841 }
1842 }
1843
1844 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1845 Value *EntryPart = State.get(this, Part, IsInLoop);
1846 // Make sure to add the reduction start value only to the
1847 // first unroll part.
1848 Value *StartVal = (Part == 0) ? StartV : Iden;
1849 cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
1850 }
1851}
1852
1853#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1855 VPSlotTracker &SlotTracker) const {
1856 O << Indent << "WIDEN-REDUCTION-PHI ";
1857
1859 O << " = phi ";
1861}
1862#endif
1863
1866 "Non-native vplans are not expected to have VPWidenPHIRecipes.");
1867
1868 Value *Op0 = State.get(getOperand(0), 0);
1869 Type *VecTy = Op0->getType();
1870 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
1871 State.set(this, VecPhi, 0);
1872}
1873
1874#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1876 VPSlotTracker &SlotTracker) const {
1877 O << Indent << "WIDEN-PHI ";
1878
1879 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
1880 // Unless all incoming values are modeled in VPlan print the original PHI
1881 // directly.
1882 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
1883 // values as VPValues.
1884 if (getNumOperands() != OriginalPhi->getNumOperands()) {
1885 O << VPlanIngredient(OriginalPhi);
1886 return;
1887 }
1888
1890 O << " = phi ";
1892}
1893#endif
1894
1895// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
1896// remove VPActiveLaneMaskPHIRecipe.
1898 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1899 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
1900 Value *StartMask = State.get(getOperand(0), Part);
1901 PHINode *EntryPart =
1902 State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
1903 EntryPart->addIncoming(StartMask, VectorPH);
1904 EntryPart->setDebugLoc(getDebugLoc());
1905 State.set(this, EntryPart, Part);
1906 }
1907}
1908
1909#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1911 VPSlotTracker &SlotTracker) const {
1912 O << Indent << "ACTIVE-LANE-MASK-PHI ";
1913
1915 O << " = phi ";
1917}
1918#endif
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::string Name
Hexagon Common GEP
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static Value * getStepVector(Value *Val, Value *StartIdx, Value *Step, Instruction::BinaryOps BinOp, ElementCount VF, IRBuilderBase &Builder)
This function adds (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step,...
static bool isUniformAcrossVFsAndUFs(VPScalarCastRecipe *C)
Checks if C is uniform across all VFs and UFs.
static Constant * getSignedIntOrFpConstant(Type *Ty, int64_t C)
A helper function that returns an integer or floating-point constant with value C.
static Value * getRuntimeVFAsFloat(IRBuilderBase &B, Type *FTy, ElementCount VF)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:396
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:439
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:220
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:276
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, BasicBlock::iterator InsertBefore)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:965
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:988
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:990
static StringRef getPredicateName(Predicate P)
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:210
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:122
static Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:1449
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
A debug info location.
Definition: DebugLoc.h:33
constexpr bool isVector() const
One or more elements.
Definition: TypeSize.h:311
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:307
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
void setAllowContract(bool B=true)
Definition: FMF.h:91
bool noSignedZeros() const
Definition: FMF.h:68
bool noInfs() const
Definition: FMF.h:67
void setAllowReciprocal(bool B=true)
Definition: FMF.h:88
bool allowReciprocal() const
Definition: FMF.h:69
void print(raw_ostream &O) const
Print fast-math flags to O.
Definition: Operator.cpp:246
void setNoSignedZeros(bool B=true)
Definition: FMF.h:85
bool allowReassoc() const
Flag queries.
Definition: FMF.h:65
bool approxFunc() const
Definition: FMF.h:71
void setNoNaNs(bool B=true)
Definition: FMF.h:79
void setAllowReassoc(bool B=true)
Flag setters.
Definition: FMF.h:76
bool noNaNs() const
Definition: FMF.h:66
void setApproxFunc(bool B=true)
Definition: FMF.h:94
void setNoInfs(bool B=true)
Definition: FMF.h:82
bool allowContract() const
Definition: FMF.h:70
Class to represent function types.
Definition: DerivedTypes.h:103
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:200
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2006
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2344
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2455
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2077
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2443
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1527
Value * CreateVectorSplice(Value *V1, Value *V2, int64_t Imm, const Twine &Name="")
Return a vector splice intrinsic if using scalable vectors, otherwise return a shufflevector.
Definition: IRBuilder.cpp:1186
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1214
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:460
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:932
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1110
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2022
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2518
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:520
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:305
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:220
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2070
InsertPoint saveIP() const
Returns the current insert point.
Definition: IRBuilder.h:271
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:480
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2349
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2380
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1748
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2224
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1338
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1114
Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Definition: IRBuilder.cpp:1005
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2010
LLVMContext & getContext() const
Definition: IRBuilder.h:176
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1321
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1660
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2144
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Definition: IRBuilder.h:283
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1825
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2395
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1865
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2334
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1581
Value * CreateStepVector(Type *DstType, const Twine &Name="")
Creates a vector of type DstType with the linear sequence <0, 1, ...>
Definition: IRBuilder.cpp:109
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1355
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2649
A struct for saving information about induction variables.
const SCEV * getStep() const
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_IntInduction
Integer induction variable. Step = C.
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:453
bool isBinaryOp() const
Definition: Instruction.h:256
const BasicBlock * getParent() const
Definition: Instruction.h:151
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
const char * getOpcodeName() const
Definition: Instruction.h:253
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:251
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:450
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
BlockT * getHeader() const
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:287
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:71
FastMathFlags getFastMathFlags() const
static unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
Type * getRecurrenceType() const
Returns the type of the recurrence.
TrackingVH< Value > getRecurrenceStartValue() const
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
bool isSigned() const
Returns true if all source operands of the recurrence are SExtInsts.
RecurKind getRecurrenceKind() const
Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF) const
Returns identity corresponding to the RecurrenceKind.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This class uses information about analyze scalars to rewrite expressions in canonical form.
Type * getType() const
Return the LLVM type of this SCEV expression.
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:690
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
Value handle that tracks a Value across RAUW.
Definition: ValueHandle.h:331
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
static IntegerType * getInt1Ty(LLVMContext &C)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:140
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
Value * getOperand(unsigned i) const
Definition: User.h:169
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:2594
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:2641
iterator end()
Definition: VPlan.h:2625
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:2653
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:1907
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:1910
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account that a single incoming value has no mask.
Definition: VPlan.h:1904
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPRegionBlock * getParent()
Definition: VPlan.h:493
size_t getNumSuccessors() const
Definition: VPlan.h:538
VPlan * getPlan()
Definition: VPlan.cpp:148
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:153
VPBlockBase * getSingleSuccessor() const
Definition: VPlan.h:528
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2174
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
void execute(VPTransformState &State) override
Generate the canonical scalar induction phi of the vector loop.
bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start, VPValue *Step) const
Check if the induction described by Kind, /p Start and Step is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:314
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition: VPlanValue.h:398
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Definition: VPlanValue.h:410
unsigned getVPDefID() const
Definition: VPlanValue.h:430
VPCanonicalIVPHIRecipe * getCanonicalIV() const
Definition: VPlan.h:2529
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:2532
VPValue * getStartValue() const
Definition: VPlan.h:2528
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:1602
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:1145
@ CanonicalIVIncrementForPart
Definition: VPlan.h:1154
@ CalculateTripCountMinusVF
Definition: VPlan.h:1152
bool hasResult() const
Definition: VPlan.h:1233
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
unsigned getOpcode() const
Definition: VPlan.h:1209
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
void execute(VPTransformState &State) override
Generate the instruction.
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlan.h:169
static VPLane getFirstLane()
Definition: VPlan.h:167
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the VPLiveOut to O.
PHINode * getPhi() const
Definition: VPlan.h:698
void fixPhi(VPlan &Plan, VPTransformState &State)
Fixup the wrapped LCSSA phi node in the unique exit block.
void execute(VPTransformState &State) override
Generates phi nodes for live-outs as needed to retain SSA form.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:713
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
VPBasicBlock * getParent()
Definition: VPlan.h:738
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:804
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Class to record LLVM IR flag for a recipe along with it.
Definition: VPlan.h:896
ExactFlagsTy ExactFlags
Definition: VPlan.h:950
FastMathFlagsTy FMFs
Definition: VPlan.h:953
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:952
void setFlags(Instruction *I) const
Set the IR flags for I.
Definition: VPlan.h:1068
bool isInBounds() const
Definition: VPlan.h:1107
GEPFlagsTy GEPFlags
Definition: VPlan.h:951
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:1114
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:949
WrapFlagsTy WrapFlags
Definition: VPlan.h:948
bool hasNoUnsignedWrap() const
Definition: VPlan.h:1118
void printFlags(raw_ostream &O) const
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:1101
bool hasNoSignedWrap() const
Definition: VPlan.h:1124
FastMathFlags getFastMathFlags() const
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition: VPlan.h:1874
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2060
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2062
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2058
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:2727
const VPBlockBase * getEntry() const
Definition: VPlan.h:2766
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
VPScalarCastRecipe is a recipe to create scalar cast instructions.
Definition: VPlan.h:1359
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:2581
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:887
This class can be used to assign consecutive numbers to all VPValues in a VPlan and allows querying t...
Definition: VPlanValue.h:448
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:204
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition: VPlan.cpp:1327
operand_range operands()
Definition: VPlanValue.h:279
unsigned getNumOperands() const
Definition: VPlanValue.h:253
operand_iterator op_begin()
Definition: VPlanValue.h:275
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:254
Value * getUnderlyingValue()
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:78
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:118
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition: VPlan.cpp:1312
friend class VPInstruction
Definition: VPlanValue.h:47
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:174
user_range users()
Definition: VPlanValue.h:134
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1355
void execute(VPTransformState &State) override
Produce widened copies of the cast.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:1686
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:1681
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:1692
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool isStore() const
Returns true if this recipe is a store.
Definition: VPlan.h:2293
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce widened copies of all Ingredients.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:2828
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.h:3042
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:5043
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:676
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
self_iterator getIterator()
Definition: ilist_node.h:109
iterator erase(iterator where)
Definition: ilist.h:204
pointer remove(iterator &IT)
Definition: ilist.h:188
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1451
bool isUniformAfterVectorization(VPValue *VPV)
Returns true if VPV is uniform after vectorization.
Definition: VPlan.h:3402
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
Definition: VPlan.cpp:1417
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlan.cpp:1412
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void ReplaceInstWithInst(BasicBlock *BB, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2415
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition: STLExtras.h:2174
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
Definition: LoopUtils.cpp:1046
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1738
cl::opt< bool > EnableVPlanNativePath("enable-vplan-native-path", cl::Hidden, cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization."))
Definition: VPlan.cpp:53
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:34
@ Mul
Product of integers.
@ Add
Sum of integers.
Value * createAnyOfOp(IRBuilderBase &Builder, Value *StartVal, RecurKind RK, Value *Left, Value *Right)
See RecurrenceDescriptor::isAnyOfPattern for a description of the pattern we are trying to match.
Definition: LoopUtils.cpp:1037
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1888
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Value * createTargetReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, PHINode *OrigPhi=nullptr)
Create a generic target reduction using a recurrence descriptor Desc The target is queried to determi...
Definition: LoopUtils.cpp:1207
void execute(VPTransformState &State) override
Generate the phi nodes.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPIteration represents a single point in the iteration space of the output (vectorized and/or unrolle...
Definition: VPlan.h:219
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:369
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:377
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:348
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:236
Value * get(VPValue *Def, unsigned Part, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def and a given Part if IsScalar is false,...
Definition: VPlan.cpp:247
DenseMap< const SCEV *, Value * > ExpandedSCEVs
Map SCEVs to their expanded values.
Definition: VPlan.h:413
void addMetadata(Instruction *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:361
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition: VPlan.h:416
void reset(VPValue *Def, Value *V, unsigned Part)
Reset an existing vector value for Def and a given Part.
Definition: VPlan.h:303
struct llvm::VPTransformState::CFGState CFG
void set(VPValue *Def, Value *V, unsigned Part, bool IsScalar=false)
Set the generated vector Value for a given VPValue and a given Part, if IsScalar is false.
Definition: VPlan.h:288
std::optional< VPIteration > Instance
Hold the indices to generate specific scalar instructions.
Definition: VPlan.h:248
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:393
bool hasScalarValue(VPValue *Def, VPIteration Instance)
Definition: VPlan.h:276
bool hasVectorValue(VPValue *Def, unsigned Part)
Definition: VPlan.h:270
ElementCount VF
The chosen Vectorization and Unroll Factors of the loop being vectorized.
Definition: VPlan.h:242
Loop * CurrentVectorLoop
The loop object for the current parent region, or nullptr.
Definition: VPlan.h:402
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:381
bool isInvariantCond() const
Definition: VPlan.h:1465
VPValue * getCond() const
Definition: VPlan.h:1461
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.