LLVM 19.0.0git
VPlanRecipes.cpp
Go to the documentation of this file.
1//===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains implementations for different VPlan recipes.
11///
12//===----------------------------------------------------------------------===//
13
14#include "VPlan.h"
15#include "VPlanAnalysis.h"
16#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Twine.h"
20#include "llvm/IR/BasicBlock.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/Instruction.h"
24#include "llvm/IR/Type.h"
25#include "llvm/IR/Value.h"
28#include "llvm/Support/Debug.h"
33#include <cassert>
34
35using namespace llvm;
36
38
39namespace llvm {
41}
42
43#define LV_NAME "loop-vectorize"
44#define DEBUG_TYPE LV_NAME
45
47 switch (getVPDefID()) {
48 case VPInterleaveSC:
49 return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
50 case VPWidenStoreEVLSC:
51 case VPWidenStoreSC:
52 return true;
53 case VPReplicateSC:
54 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
55 ->mayWriteToMemory();
56 case VPWidenCallSC:
57 return !cast<VPWidenCallRecipe>(this)
58 ->getCalledScalarFunction()
59 ->onlyReadsMemory();
60 case VPBranchOnMaskSC:
61 case VPScalarIVStepsSC:
62 case VPPredInstPHISC:
63 return false;
64 case VPBlendSC:
65 case VPReductionSC:
66 case VPWidenCanonicalIVSC:
67 case VPWidenCastSC:
68 case VPWidenGEPSC:
69 case VPWidenIntOrFpInductionSC:
70 case VPWidenLoadEVLSC:
71 case VPWidenLoadSC:
72 case VPWidenPHISC:
73 case VPWidenSC:
74 case VPWidenSelectSC: {
75 const Instruction *I =
76 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
77 (void)I;
78 assert((!I || !I->mayWriteToMemory()) &&
79 "underlying instruction may write to memory");
80 return false;
81 }
82 default:
83 return true;
84 }
85}
86
88 switch (getVPDefID()) {
89 case VPWidenLoadEVLSC:
90 case VPWidenLoadSC:
91 return true;
92 case VPReplicateSC:
93 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
94 ->mayReadFromMemory();
95 case VPWidenCallSC:
96 return !cast<VPWidenCallRecipe>(this)
97 ->getCalledScalarFunction()
98 ->onlyWritesMemory();
99 case VPBranchOnMaskSC:
100 case VPPredInstPHISC:
101 case VPScalarIVStepsSC:
102 case VPWidenStoreEVLSC:
103 case VPWidenStoreSC:
104 return false;
105 case VPBlendSC:
106 case VPReductionSC:
107 case VPWidenCanonicalIVSC:
108 case VPWidenCastSC:
109 case VPWidenGEPSC:
110 case VPWidenIntOrFpInductionSC:
111 case VPWidenPHISC:
112 case VPWidenSC:
113 case VPWidenSelectSC: {
114 const Instruction *I =
115 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
116 (void)I;
117 assert((!I || !I->mayReadFromMemory()) &&
118 "underlying instruction may read from memory");
119 return false;
120 }
121 default:
122 return true;
123 }
124}
125
127 switch (getVPDefID()) {
128 case VPDerivedIVSC:
129 case VPPredInstPHISC:
130 case VPScalarCastSC:
131 return false;
132 case VPInstructionSC:
133 switch (cast<VPInstruction>(this)->getOpcode()) {
134 case Instruction::Or:
135 case Instruction::ICmp:
136 case Instruction::Select:
142 return false;
143 default:
144 return true;
145 }
146 case VPWidenCallSC: {
147 Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction();
148 return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn();
149 }
150 case VPBlendSC:
151 case VPReductionSC:
152 case VPScalarIVStepsSC:
153 case VPWidenCanonicalIVSC:
154 case VPWidenCastSC:
155 case VPWidenGEPSC:
156 case VPWidenIntOrFpInductionSC:
157 case VPWidenPHISC:
158 case VPWidenPointerInductionSC:
159 case VPWidenSC:
160 case VPWidenSelectSC: {
161 const Instruction *I =
162 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
163 (void)I;
164 assert((!I || !I->mayHaveSideEffects()) &&
165 "underlying instruction has side-effects");
166 return false;
167 }
168 case VPInterleaveSC:
169 return mayWriteToMemory();
170 case VPWidenLoadEVLSC:
171 case VPWidenLoadSC:
172 case VPWidenStoreEVLSC:
173 case VPWidenStoreSC:
174 assert(
175 cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() ==
177 "mayHaveSideffects result for ingredient differs from this "
178 "implementation");
179 return mayWriteToMemory();
180 case VPReplicateSC: {
181 auto *R = cast<VPReplicateRecipe>(this);
182 return R->getUnderlyingInstr()->mayHaveSideEffects();
183 }
184 default:
185 return true;
186 }
187}
188
190 auto Lane = VPLane::getLastLaneForVF(State.VF);
191 VPValue *ExitValue = getOperand(0);
193 Lane = VPLane::getFirstLane();
194 VPBasicBlock *MiddleVPBB =
195 cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
196 assert(MiddleVPBB->getNumSuccessors() == 0 &&
197 "the middle block must not have any successors");
198 BasicBlock *MiddleBB = State.CFG.VPBB2IRBB[MiddleVPBB];
199 Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),
200 MiddleBB);
201}
202
203#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
205 O << "Live-out ";
207 O << " = ";
209 O << "\n";
210}
211#endif
212
214 assert(!Parent && "Recipe already in some VPBasicBlock");
215 assert(InsertPos->getParent() &&
216 "Insertion position not in any VPBasicBlock");
217 InsertPos->getParent()->insert(this, InsertPos->getIterator());
218}
219
222 assert(!Parent && "Recipe already in some VPBasicBlock");
223 assert(I == BB.end() || I->getParent() == &BB);
224 BB.insert(this, I);
225}
226
228 assert(!Parent && "Recipe already in some VPBasicBlock");
229 assert(InsertPos->getParent() &&
230 "Insertion position not in any VPBasicBlock");
231 InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator()));
232}
233
235 assert(getParent() && "Recipe not in any VPBasicBlock");
237 Parent = nullptr;
238}
239
241 assert(getParent() && "Recipe not in any VPBasicBlock");
243}
244
247 insertAfter(InsertPos);
248}
249
253 insertBefore(BB, I);
254}
255
257 assert(OpType == OperationType::FPMathOp &&
258 "recipe doesn't have fast math flags");
259 FastMathFlags Res;
260 Res.setAllowReassoc(FMFs.AllowReassoc);
261 Res.setNoNaNs(FMFs.NoNaNs);
262 Res.setNoInfs(FMFs.NoInfs);
263 Res.setNoSignedZeros(FMFs.NoSignedZeros);
264 Res.setAllowReciprocal(FMFs.AllowReciprocal);
265 Res.setAllowContract(FMFs.AllowContract);
266 Res.setApproxFunc(FMFs.ApproxFunc);
267 return Res;
268}
269
272 const Twine &Name)
273 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),
274 Pred, DL),
275 Opcode(Opcode), Name(Name.str()) {
276 assert(Opcode == Instruction::ICmp &&
277 "only ICmp predicates supported at the moment");
278}
279
281 std::initializer_list<VPValue *> Operands,
282 FastMathFlags FMFs, DebugLoc DL, const Twine &Name)
283 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),
284 Opcode(Opcode), Name(Name.str()) {
285 // Make sure the VPInstruction is a floating-point operation.
286 assert(isFPMathOp() && "this op can't take fast-math flags");
287}
288
289bool VPInstruction::doesGeneratePerAllLanes() const {
290 return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);
291}
292
293bool VPInstruction::canGenerateScalarForFirstLane() const {
295 return true;
296
297 switch (Opcode) {
305 return true;
306 default:
307 return false;
308 }
309}
310
311Value *VPInstruction::generatePerLane(VPTransformState &State,
312 const VPIteration &Lane) {
313 IRBuilderBase &Builder = State.Builder;
314
316 "only PtrAdd opcodes are supported for now");
317 return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
318 State.get(getOperand(1), Lane), Name);
319}
320
321Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
322 IRBuilderBase &Builder = State.Builder;
323
325 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
326 if (Part != 0 && vputils::onlyFirstPartUsed(this))
327 return State.get(this, 0, OnlyFirstLaneUsed);
328
329 Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed);
330 Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed);
331 auto *Res =
332 Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
333 if (auto *I = dyn_cast<Instruction>(Res))
334 setFlags(I);
335 return Res;
336 }
337
338 switch (getOpcode()) {
339 case VPInstruction::Not: {
340 Value *A = State.get(getOperand(0), Part);
341 return Builder.CreateNot(A, Name);
342 }
343 case Instruction::ICmp: {
344 Value *A = State.get(getOperand(0), Part);
345 Value *B = State.get(getOperand(1), Part);
346 return Builder.CreateCmp(getPredicate(), A, B, Name);
347 }
348 case Instruction::Select: {
349 Value *Cond = State.get(getOperand(0), Part);
350 Value *Op1 = State.get(getOperand(1), Part);
351 Value *Op2 = State.get(getOperand(2), Part);
352 return Builder.CreateSelect(Cond, Op1, Op2, Name);
353 }
355 // Get first lane of vector induction variable.
356 Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
357 // Get the original loop tripcount.
358 Value *ScalarTC = State.get(getOperand(1), VPIteration(Part, 0));
359
360 // If this part of the active lane mask is scalar, generate the CMP directly
361 // to avoid unnecessary extracts.
362 if (State.VF.isScalar())
363 return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC,
364 Name);
365
366 auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
367 auto *PredTy = VectorType::get(Int1Ty, State.VF);
368 return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
369 {PredTy, ScalarTC->getType()},
370 {VIVElem0, ScalarTC}, nullptr, Name);
371 }
373 // Generate code to combine the previous and current values in vector v3.
374 //
375 // vector.ph:
376 // v_init = vector(..., ..., ..., a[-1])
377 // br vector.body
378 //
379 // vector.body
380 // i = phi [0, vector.ph], [i+4, vector.body]
381 // v1 = phi [v_init, vector.ph], [v2, vector.body]
382 // v2 = a[i, i+1, i+2, i+3];
383 // v3 = vector(v1(3), v2(0, 1, 2))
384
385 // For the first part, use the recurrence phi (v1), otherwise v2.
386 auto *V1 = State.get(getOperand(0), 0);
387 Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
388 if (!PartMinus1->getType()->isVectorTy())
389 return PartMinus1;
390 Value *V2 = State.get(getOperand(1), Part);
391 return Builder.CreateVectorSplice(PartMinus1, V2, -1, Name);
392 }
394 if (Part != 0)
395 return State.get(this, 0, /*IsScalar*/ true);
396
397 Value *ScalarTC = State.get(getOperand(0), {0, 0});
398 Value *Step =
399 createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF);
400 Value *Sub = Builder.CreateSub(ScalarTC, Step);
401 Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
402 Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
403 return Builder.CreateSelect(Cmp, Sub, Zero);
404 }
406 // Compute EVL
407 auto GetEVL = [=](VPTransformState &State, Value *AVL) {
408 assert(AVL->getType()->isIntegerTy() &&
409 "Requested vector length should be an integer.");
410
411 // TODO: Add support for MaxSafeDist for correct loop emission.
412 assert(State.VF.isScalable() && "Expected scalable vector factor.");
413 Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue());
414
415 Value *EVL = State.Builder.CreateIntrinsic(
416 State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,
417 {AVL, VFArg, State.Builder.getTrue()});
418 return EVL;
419 };
420 // TODO: Restructure this code with an explicit remainder loop, vsetvli can
421 // be outside of the main loop.
422 assert(Part == 0 && "No unrolling expected for predicated vectorization.");
423 // Compute VTC - IV as the AVL (requested vector length).
424 Value *Index = State.get(getOperand(0), VPIteration(0, 0));
425 Value *TripCount = State.get(getOperand(1), VPIteration(0, 0));
426 Value *AVL = State.Builder.CreateSub(TripCount, Index);
427 Value *EVL = GetEVL(State, AVL);
428 return EVL;
429 }
431 auto *IV = State.get(getOperand(0), VPIteration(0, 0));
432 if (Part == 0)
433 return IV;
434
435 // The canonical IV is incremented by the vectorization factor (num of SIMD
436 // elements) times the unroll part.
437 Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
438 return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),
440 }
442 if (Part != 0)
443 return nullptr;
444
445 Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
446 VPRegionBlock *ParentRegion = getParent()->getParent();
447 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
448
449 // Replace the temporary unreachable terminator with a new conditional
450 // branch, hooking it up to backward destination for exiting blocks now and
451 // to forward destination(s) later when they are created.
452 BranchInst *CondBr =
453 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
454
455 if (getParent()->isExiting())
456 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
457
458 CondBr->setSuccessor(0, nullptr);
460 return CondBr;
461 }
463 if (Part != 0)
464 return nullptr;
465 // First create the compare.
466 Value *IV = State.get(getOperand(0), Part, /*IsScalar*/ true);
467 Value *TC = State.get(getOperand(1), Part, /*IsScalar*/ true);
468 Value *Cond = Builder.CreateICmpEQ(IV, TC);
469
470 // Now create the branch.
471 auto *Plan = getParent()->getPlan();
472 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
473 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
474
475 // Replace the temporary unreachable terminator with a new conditional
476 // branch, hooking it up to backward destination (the header) now and to the
477 // forward destination (the exit/middle block) later when it is created.
478 // Note that CreateCondBr expects a valid BB as first argument, so we need
479 // to set it to nullptr later.
480 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
481 State.CFG.VPBB2IRBB[Header]);
482 CondBr->setSuccessor(0, nullptr);
484 return CondBr;
485 }
487 if (Part != 0)
488 return State.get(this, 0, /*IsScalar*/ true);
489
490 // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
491 // and will be removed by breaking up the recipe further.
492 auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
493 auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
494 // Get its reduction variable descriptor.
495 const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
496
497 RecurKind RK = RdxDesc.getRecurrenceKind();
498
499 VPValue *LoopExitingDef = getOperand(1);
500 Type *PhiTy = OrigPhi->getType();
501 VectorParts RdxParts(State.UF);
502 for (unsigned Part = 0; Part < State.UF; ++Part)
503 RdxParts[Part] = State.get(LoopExitingDef, Part, PhiR->isInLoop());
504
505 // If the vector reduction can be performed in a smaller type, we truncate
506 // then extend the loop exit value to enable InstCombine to evaluate the
507 // entire expression in the smaller type.
508 // TODO: Handle this in truncateToMinBW.
509 if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
510 Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF);
511 for (unsigned Part = 0; Part < State.UF; ++Part)
512 RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
513 }
514 // Reduce all of the unrolled parts into a single vector.
515 Value *ReducedPartRdx = RdxParts[0];
516 unsigned Op = RecurrenceDescriptor::getOpcode(RK);
518 Op = Instruction::Or;
519
520 if (PhiR->isOrdered()) {
521 ReducedPartRdx = RdxParts[State.UF - 1];
522 } else {
523 // Floating-point operations should have some FMF to enable the reduction.
525 Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
526 for (unsigned Part = 1; Part < State.UF; ++Part) {
527 Value *RdxPart = RdxParts[Part];
528 if (Op != Instruction::ICmp && Op != Instruction::FCmp)
529 ReducedPartRdx = Builder.CreateBinOp(
530 (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
531 else
532 ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
533 }
534 }
535
536 // Create the reduction after the loop. Note that inloop reductions create
537 // the target reduction in the loop using a Reduction recipe.
538 if ((State.VF.isVector() ||
540 !PhiR->isInLoop()) {
541 ReducedPartRdx =
542 createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
543 // If the reduction can be performed in a smaller type, we need to extend
544 // the reduction to the wider type before we branch to the original loop.
545 if (PhiTy != RdxDesc.getRecurrenceType())
546 ReducedPartRdx = RdxDesc.isSigned()
547 ? Builder.CreateSExt(ReducedPartRdx, PhiTy)
548 : Builder.CreateZExt(ReducedPartRdx, PhiTy);
549 }
550
551 // If there were stores of the reduction value to a uniform memory address
552 // inside the loop, create the final store here.
553 if (StoreInst *SI = RdxDesc.IntermediateStore) {
554 auto *NewSI = Builder.CreateAlignedStore(
555 ReducedPartRdx, SI->getPointerOperand(), SI->getAlign());
556 propagateMetadata(NewSI, SI);
557 }
558
559 return ReducedPartRdx;
560 }
562 Value *A = State.get(getOperand(0), Part);
563 Value *B = State.get(getOperand(1), Part);
564 return Builder.CreateLogicalAnd(A, B, Name);
565 }
568 "can only generate first lane for PtrAdd");
569 Value *Ptr = State.get(getOperand(0), Part, /* IsScalar */ true);
570 Value *Addend = State.get(getOperand(1), Part, /* IsScalar */ true);
571 return Builder.CreatePtrAdd(Ptr, Addend, Name);
572 }
573 default:
574 llvm_unreachable("Unsupported opcode for instruction");
575 }
576}
577
578#if !defined(NDEBUG)
579bool VPInstruction::isFPMathOp() const {
580 // Inspired by FPMathOperator::classof. Notable differences are that we don't
581 // support Call, PHI and Select opcodes here yet.
582 return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
583 Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
584 Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
585 Opcode == Instruction::FCmp || Opcode == Instruction::Select;
586}
587#endif
588
590 assert(!State.Instance && "VPInstruction executing an Instance");
592 assert((hasFastMathFlags() == isFPMathOp() ||
593 getOpcode() == Instruction::Select) &&
594 "Recipe not a FPMathOp but has fast-math flags?");
595 if (hasFastMathFlags())
598 bool GeneratesPerFirstLaneOnly =
599 canGenerateScalarForFirstLane() &&
602 bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
603 for (unsigned Part = 0; Part < State.UF; ++Part) {
604 if (GeneratesPerAllLanes) {
605 for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
606 Lane != NumLanes; ++Lane) {
607 Value *GeneratedValue = generatePerLane(State, VPIteration(Part, Lane));
608 assert(GeneratedValue && "generatePerLane must produce a value");
609 State.set(this, GeneratedValue, VPIteration(Part, Lane));
610 }
611 continue;
612 }
613
614 Value *GeneratedValue = generatePerPart(State, Part);
615 if (!hasResult())
616 continue;
617 assert(GeneratedValue && "generatePerPart must produce a value");
618 assert((GeneratedValue->getType()->isVectorTy() ==
619 !GeneratesPerFirstLaneOnly ||
620 State.VF.isScalar()) &&
621 "scalar value but not only first lane defined");
622 State.set(this, GeneratedValue, Part,
623 /*IsScalar*/ GeneratesPerFirstLaneOnly);
624 }
625}
626
628 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
630 return vputils::onlyFirstLaneUsed(this);
631
632 switch (getOpcode()) {
633 default:
634 return false;
635 case Instruction::ICmp:
637 // TODO: Cover additional opcodes.
638 return vputils::onlyFirstLaneUsed(this);
644 return true;
645 };
646 llvm_unreachable("switch should return");
647}
648
649#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
651 VPSlotTracker SlotTracker(getParent()->getPlan());
652 print(dbgs(), "", SlotTracker);
653}
654
656 VPSlotTracker &SlotTracker) const {
657 O << Indent << "EMIT ";
658
659 if (hasResult()) {
661 O << " = ";
662 }
663
664 switch (getOpcode()) {
666 O << "not";
667 break;
669 O << "combined load";
670 break;
672 O << "combined store";
673 break;
675 O << "active lane mask";
676 break;
678 O << "EXPLICIT-VECTOR-LENGTH";
679 break;
681 O << "first-order splice";
682 break;
684 O << "branch-on-cond";
685 break;
687 O << "TC > VF ? TC - VF : 0";
688 break;
690 O << "VF * Part +";
691 break;
693 O << "branch-on-count";
694 break;
696 O << "compute-reduction-result";
697 break;
699 O << "logical-and";
700 break;
702 O << "ptradd";
703 break;
704 default:
706 }
707
708 printFlags(O);
710
711 if (auto DL = getDebugLoc()) {
712 O << ", !dbg ";
713 DL.print(O);
714 }
715}
716#endif
717
719 assert(State.VF.isVector() && "not widening");
720 Function *CalledScalarFn = getCalledScalarFunction();
721 assert(!isDbgInfoIntrinsic(CalledScalarFn->getIntrinsicID()) &&
722 "DbgInfoIntrinsic should have been dropped during VPlan construction");
724
725 bool UseIntrinsic = VectorIntrinsicID != Intrinsic::not_intrinsic;
726 FunctionType *VFTy = nullptr;
727 if (Variant)
728 VFTy = Variant->getFunctionType();
729 for (unsigned Part = 0; Part < State.UF; ++Part) {
730 SmallVector<Type *, 2> TysForDecl;
731 // Add return type if intrinsic is overloaded on it.
732 if (UseIntrinsic &&
733 isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1))
734 TysForDecl.push_back(VectorType::get(
735 CalledScalarFn->getReturnType()->getScalarType(), State.VF));
737 for (const auto &I : enumerate(arg_operands())) {
738 // Some intrinsics have a scalar argument - don't replace it with a
739 // vector.
740 Value *Arg;
741 if (UseIntrinsic &&
742 isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))
743 Arg = State.get(I.value(), VPIteration(0, 0));
744 // Some vectorized function variants may also take a scalar argument,
745 // e.g. linear parameters for pointers. This needs to be the scalar value
746 // from the start of the respective part when interleaving.
747 else if (VFTy && !VFTy->getParamType(I.index())->isVectorTy())
748 Arg = State.get(I.value(), VPIteration(Part, 0));
749 else
750 Arg = State.get(I.value(), Part);
751 if (UseIntrinsic &&
752 isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index()))
753 TysForDecl.push_back(Arg->getType());
754 Args.push_back(Arg);
755 }
756
757 Function *VectorF;
758 if (UseIntrinsic) {
759 // Use vector version of the intrinsic.
760 Module *M = State.Builder.GetInsertBlock()->getModule();
761 VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl);
762 assert(VectorF && "Can't retrieve vector intrinsic.");
763 } else {
764#ifndef NDEBUG
765 assert(Variant != nullptr && "Can't create vector function.");
766#endif
767 VectorF = Variant;
768 }
769
770 auto *CI = cast_or_null<CallInst>(getUnderlyingInstr());
772 if (CI)
773 CI->getOperandBundlesAsDefs(OpBundles);
774
775 CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
776
777 if (isa<FPMathOperator>(V))
778 V->copyFastMathFlags(CI);
779
780 if (!V->getType()->isVoidTy())
781 State.set(this, V, Part);
782 State.addMetadata(V, CI);
783 }
784}
785
786#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
788 VPSlotTracker &SlotTracker) const {
789 O << Indent << "WIDEN-CALL ";
790
791 Function *CalledFn = getCalledScalarFunction();
792 if (CalledFn->getReturnType()->isVoidTy())
793 O << "void ";
794 else {
796 O << " = ";
797 }
798
799 O << "call @" << CalledFn->getName() << "(";
801 Op->printAsOperand(O, SlotTracker);
802 });
803 O << ")";
804
805 if (VectorIntrinsicID)
806 O << " (using vector intrinsic)";
807 else {
808 O << " (using library function";
809 if (Variant->hasName())
810 O << ": " << Variant->getName();
811 O << ")";
812 }
813}
814
816 VPSlotTracker &SlotTracker) const {
817 O << Indent << "WIDEN-SELECT ";
819 O << " = select ";
821 O << ", ";
823 O << ", ";
825 O << (isInvariantCond() ? " (condition is loop invariant)" : "");
826}
827#endif
828
831
832 // The condition can be loop invariant but still defined inside the
833 // loop. This means that we can't just use the original 'cond' value.
834 // We have to take the 'vectorized' value and pick the first lane.
835 // Instcombine will make this a no-op.
836 auto *InvarCond =
837 isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr;
838
839 for (unsigned Part = 0; Part < State.UF; ++Part) {
840 Value *Cond = InvarCond ? InvarCond : State.get(getCond(), Part);
841 Value *Op0 = State.get(getOperand(1), Part);
842 Value *Op1 = State.get(getOperand(2), Part);
843 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
844 State.set(this, Sel, Part);
845 State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
846 }
847}
848
849VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
850 const FastMathFlags &FMF) {
851 AllowReassoc = FMF.allowReassoc();
852 NoNaNs = FMF.noNaNs();
853 NoInfs = FMF.noInfs();
854 NoSignedZeros = FMF.noSignedZeros();
855 AllowReciprocal = FMF.allowReciprocal();
856 AllowContract = FMF.allowContract();
857 ApproxFunc = FMF.approxFunc();
858}
859
860#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
862 switch (OpType) {
863 case OperationType::Cmp:
865 break;
866 case OperationType::DisjointOp:
868 O << " disjoint";
869 break;
870 case OperationType::PossiblyExactOp:
871 if (ExactFlags.IsExact)
872 O << " exact";
873 break;
874 case OperationType::OverflowingBinOp:
875 if (WrapFlags.HasNUW)
876 O << " nuw";
877 if (WrapFlags.HasNSW)
878 O << " nsw";
879 break;
880 case OperationType::FPMathOp:
882 break;
883 case OperationType::GEPOp:
885 O << " inbounds";
886 break;
887 case OperationType::NonNegOp:
888 if (NonNegFlags.NonNeg)
889 O << " nneg";
890 break;
891 case OperationType::Other:
892 break;
893 }
894 if (getNumOperands() > 0)
895 O << " ";
896}
897#endif
898
901 auto &Builder = State.Builder;
902 switch (Opcode) {
903 case Instruction::Call:
904 case Instruction::Br:
905 case Instruction::PHI:
906 case Instruction::GetElementPtr:
907 case Instruction::Select:
908 llvm_unreachable("This instruction is handled by a different recipe.");
909 case Instruction::UDiv:
910 case Instruction::SDiv:
911 case Instruction::SRem:
912 case Instruction::URem:
913 case Instruction::Add:
914 case Instruction::FAdd:
915 case Instruction::Sub:
916 case Instruction::FSub:
917 case Instruction::FNeg:
918 case Instruction::Mul:
919 case Instruction::FMul:
920 case Instruction::FDiv:
921 case Instruction::FRem:
922 case Instruction::Shl:
923 case Instruction::LShr:
924 case Instruction::AShr:
925 case Instruction::And:
926 case Instruction::Or:
927 case Instruction::Xor: {
928 // Just widen unops and binops.
929 for (unsigned Part = 0; Part < State.UF; ++Part) {
931 for (VPValue *VPOp : operands())
932 Ops.push_back(State.get(VPOp, Part));
933
934 Value *V = Builder.CreateNAryOp(Opcode, Ops);
935
936 if (auto *VecOp = dyn_cast<Instruction>(V))
937 setFlags(VecOp);
938
939 // Use this vector value for all users of the original instruction.
940 State.set(this, V, Part);
941 State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
942 }
943
944 break;
945 }
946 case Instruction::Freeze: {
947 for (unsigned Part = 0; Part < State.UF; ++Part) {
948 Value *Op = State.get(getOperand(0), Part);
949
950 Value *Freeze = Builder.CreateFreeze(Op);
951 State.set(this, Freeze, Part);
952 }
953 break;
954 }
955 case Instruction::ICmp:
956 case Instruction::FCmp: {
957 // Widen compares. Generate vector compares.
958 bool FCmp = Opcode == Instruction::FCmp;
959 for (unsigned Part = 0; Part < State.UF; ++Part) {
960 Value *A = State.get(getOperand(0), Part);
961 Value *B = State.get(getOperand(1), Part);
962 Value *C = nullptr;
963 if (FCmp) {
964 // Propagate fast math flags.
965 IRBuilder<>::FastMathFlagGuard FMFG(Builder);
966 if (auto *I = dyn_cast_or_null<Instruction>(getUnderlyingValue()))
967 Builder.setFastMathFlags(I->getFastMathFlags());
968 C = Builder.CreateFCmp(getPredicate(), A, B);
969 } else {
970 C = Builder.CreateICmp(getPredicate(), A, B);
971 }
972 State.set(this, C, Part);
973 State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
974 }
975
976 break;
977 }
978 default:
979 // This instruction is not vectorized by simple widening.
980 LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "
981 << Instruction::getOpcodeName(Opcode));
982 llvm_unreachable("Unhandled instruction!");
983 } // end of switch.
984
985#if !defined(NDEBUG)
986 // Verify that VPlan type inference results agree with the type of the
987 // generated values.
988 for (unsigned Part = 0; Part < State.UF; ++Part) {
990 State.VF) == State.get(this, Part)->getType() &&
991 "inferred type and type from generated instructions do not match");
992 }
993#endif
994}
995
996#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
998 VPSlotTracker &SlotTracker) const {
999 O << Indent << "WIDEN ";
1001 O << " = " << Instruction::getOpcodeName(Opcode);
1002 printFlags(O);
1004}
1005#endif
1006
1009 auto &Builder = State.Builder;
1010 /// Vectorize casts.
1011 assert(State.VF.isVector() && "Not vectorizing?");
1012 Type *DestTy = VectorType::get(getResultType(), State.VF);
1013 VPValue *Op = getOperand(0);
1014 for (unsigned Part = 0; Part < State.UF; ++Part) {
1015 if (Part > 0 && Op->isLiveIn()) {
1016 // FIXME: Remove once explicit unrolling is implemented using VPlan.
1017 State.set(this, State.get(this, 0), Part);
1018 continue;
1019 }
1020 Value *A = State.get(Op, Part);
1021 Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
1022 State.set(this, Cast, Part);
1023 State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
1024 }
1025}
1026
1027#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1029 VPSlotTracker &SlotTracker) const {
1030 O << Indent << "WIDEN-CAST ";
1032 O << " = " << Instruction::getOpcodeName(Opcode) << " ";
1033 printFlags(O);
1035 O << " to " << *getResultType();
1036}
1037#endif
1038
1039/// This function adds
1040/// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...)
1041/// to each vector element of Val. The sequence starts at StartIndex.
1042/// \p Opcode is relevant for FP induction variable.
1043static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,
1045 IRBuilderBase &Builder) {
1046 assert(VF.isVector() && "only vector VFs are supported");
1047
1048 // Create and check the types.
1049 auto *ValVTy = cast<VectorType>(Val->getType());
1050 ElementCount VLen = ValVTy->getElementCount();
1051
1052 Type *STy = Val->getType()->getScalarType();
1053 assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
1054 "Induction Step must be an integer or FP");
1055 assert(Step->getType() == STy && "Step has wrong type");
1056
1058
1059 // Create a vector of consecutive numbers from zero to VF.
1060 VectorType *InitVecValVTy = ValVTy;
1061 if (STy->isFloatingPointTy()) {
1062 Type *InitVecValSTy =
1064 InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
1065 }
1066 Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
1067
1068 // Splat the StartIdx
1069 Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx);
1070
1071 if (STy->isIntegerTy()) {
1072 InitVec = Builder.CreateAdd(InitVec, StartIdxSplat);
1073 Step = Builder.CreateVectorSplat(VLen, Step);
1074 assert(Step->getType() == Val->getType() && "Invalid step vec");
1075 // FIXME: The newly created binary instructions should contain nsw/nuw
1076 // flags, which can be found from the original scalar operations.
1077 Step = Builder.CreateMul(InitVec, Step);
1078 return Builder.CreateAdd(Val, Step, "induction");
1079 }
1080
1081 // Floating point induction.
1082 assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
1083 "Binary Opcode should be specified for FP induction");
1084 InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
1085 InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat);
1086
1087 Step = Builder.CreateVectorSplat(VLen, Step);
1088 Value *MulOp = Builder.CreateFMul(InitVec, Step);
1089 return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
1090}
1091
1092/// A helper function that returns an integer or floating-point constant with
1093/// value C.
1095 return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
1096 : ConstantFP::get(Ty, C);
1097}
1098
1100 ElementCount VF) {
1101 assert(FTy->isFloatingPointTy() && "Expected floating point type!");
1102 Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits());
1103 Value *RuntimeVF = getRuntimeVF(B, IntTy, VF);
1104 return B.CreateUIToFP(RuntimeVF, FTy);
1105}
1106
1108 assert(!State.Instance && "Int or FP induction being replicated.");
1109
1110 Value *Start = getStartValue()->getLiveInIRValue();
1112 TruncInst *Trunc = getTruncInst();
1113 IRBuilderBase &Builder = State.Builder;
1114 assert(IV->getType() == ID.getStartValue()->getType() && "Types must match");
1115 assert(State.VF.isVector() && "must have vector VF");
1116
1117 // The value from the original loop to which we are mapping the new induction
1118 // variable.
1119 Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV;
1120
1121 // Fast-math-flags propagate from the original induction instruction.
1122 IRBuilder<>::FastMathFlagGuard FMFG(Builder);
1123 if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
1124 Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
1125
1126 // Now do the actual transformations, and start with fetching the step value.
1127 Value *Step = State.get(getStepValue(), VPIteration(0, 0));
1128
1129 assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
1130 "Expected either an induction phi-node or a truncate of it!");
1131
1132 // Construct the initial value of the vector IV in the vector loop preheader
1133 auto CurrIP = Builder.saveIP();
1134 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1135 Builder.SetInsertPoint(VectorPH->getTerminator());
1136 if (isa<TruncInst>(EntryVal)) {
1137 assert(Start->getType()->isIntegerTy() &&
1138 "Truncation requires an integer type");
1139 auto *TruncType = cast<IntegerType>(EntryVal->getType());
1140 Step = Builder.CreateTrunc(Step, TruncType);
1141 Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
1142 }
1143
1144 Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0);
1145 Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
1146 Value *SteppedStart = getStepVector(
1147 SplatStart, Zero, Step, ID.getInductionOpcode(), State.VF, State.Builder);
1148
1149 // We create vector phi nodes for both integer and floating-point induction
1150 // variables. Here, we determine the kind of arithmetic we will perform.
1153 if (Step->getType()->isIntegerTy()) {
1154 AddOp = Instruction::Add;
1155 MulOp = Instruction::Mul;
1156 } else {
1157 AddOp = ID.getInductionOpcode();
1158 MulOp = Instruction::FMul;
1159 }
1160
1161 // Multiply the vectorization factor by the step using integer or
1162 // floating-point arithmetic as appropriate.
1163 Type *StepType = Step->getType();
1164 Value *RuntimeVF;
1165 if (Step->getType()->isFloatingPointTy())
1166 RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF);
1167 else
1168 RuntimeVF = getRuntimeVF(Builder, StepType, State.VF);
1169 Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
1170
1171 // Create a vector splat to use in the induction update.
1172 //
1173 // FIXME: If the step is non-constant, we create the vector splat with
1174 // IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't
1175 // handle a constant vector splat.
1176 Value *SplatVF = isa<Constant>(Mul)
1177 ? ConstantVector::getSplat(State.VF, cast<Constant>(Mul))
1178 : Builder.CreateVectorSplat(State.VF, Mul);
1179 Builder.restoreIP(CurrIP);
1180
1181 // We may need to add the step a number of times, depending on the unroll
1182 // factor. The last of those goes into the PHI.
1183 PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");
1184 VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1185 VecInd->setDebugLoc(EntryVal->getDebugLoc());
1186 Instruction *LastInduction = VecInd;
1187 for (unsigned Part = 0; Part < State.UF; ++Part) {
1188 State.set(this, LastInduction, Part);
1189
1190 if (isa<TruncInst>(EntryVal))
1191 State.addMetadata(LastInduction, EntryVal);
1192
1193 LastInduction = cast<Instruction>(
1194 Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"));
1195 LastInduction->setDebugLoc(EntryVal->getDebugLoc());
1196 }
1197
1198 LastInduction->setName("vec.ind.next");
1199 VecInd->addIncoming(SteppedStart, VectorPH);
1200 // Add induction update using an incorrect block temporarily. The phi node
1201 // will be fixed after VPlan execution. Note that at this point the latch
1202 // block cannot be used, as it does not exist yet.
1203 // TODO: Model increment value in VPlan, by turning the recipe into a
1204 // multi-def and a subclass of VPHeaderPHIRecipe.
1205 VecInd->addIncoming(LastInduction, VectorPH);
1206}
1207
1208#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1210 VPSlotTracker &SlotTracker) const {
1211 O << Indent << "WIDEN-INDUCTION";
1212 if (getTruncInst()) {
1213 O << "\\l\"";
1214 O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\"";
1215 O << " +\n" << Indent << "\" ";
1217 } else
1218 O << " " << VPlanIngredient(IV);
1219
1220 O << ", ";
1222}
1223#endif
1224
1226 // The step may be defined by a recipe in the preheader (e.g. if it requires
1227 // SCEV expansion), but for the canonical induction the step is required to be
1228 // 1, which is represented as live-in.
1230 return false;
1231 auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
1232 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
1233 auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin());
1234 return StartC && StartC->isZero() && StepC && StepC->isOne() &&
1235 getScalarType() == CanIV->getScalarType();
1236}
1237
1238#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1240 VPSlotTracker &SlotTracker) const {
1241 O << Indent;
1243 O << Indent << "= DERIVED-IV ";
1245 O << " + ";
1247 O << " * ";
1249}
1250#endif
1251
1253 // Fast-math-flags propagate from the original induction instruction.
1255 if (hasFastMathFlags())
1257
1258 /// Compute scalar induction steps. \p ScalarIV is the scalar induction
1259 /// variable on which to base the steps, \p Step is the size of the step.
1260
1261 Value *BaseIV = State.get(getOperand(0), VPIteration(0, 0));
1262 Value *Step = State.get(getStepValue(), VPIteration(0, 0));
1263 IRBuilderBase &Builder = State.Builder;
1264
1265 // Ensure step has the same type as that of scalar IV.
1266 Type *BaseIVTy = BaseIV->getType()->getScalarType();
1267 assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");
1268
1269 // We build scalar steps for both integer and floating-point induction
1270 // variables. Here, we determine the kind of arithmetic we will perform.
1273 if (BaseIVTy->isIntegerTy()) {
1274 AddOp = Instruction::Add;
1275 MulOp = Instruction::Mul;
1276 } else {
1277 AddOp = InductionOpcode;
1278 MulOp = Instruction::FMul;
1279 }
1280
1281 // Determine the number of scalars we need to generate for each unroll
1282 // iteration.
1283 bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
1284 // Compute the scalar steps and save the results in State.
1285 Type *IntStepTy =
1286 IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
1287 Type *VecIVTy = nullptr;
1288 Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
1289 if (!FirstLaneOnly && State.VF.isScalable()) {
1290 VecIVTy = VectorType::get(BaseIVTy, State.VF);
1291 UnitStepVec =
1292 Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
1293 SplatStep = Builder.CreateVectorSplat(State.VF, Step);
1294 SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
1295 }
1296
1297 unsigned StartPart = 0;
1298 unsigned EndPart = State.UF;
1299 unsigned StartLane = 0;
1300 unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
1301 if (State.Instance) {
1302 StartPart = State.Instance->Part;
1303 EndPart = StartPart + 1;
1304 StartLane = State.Instance->Lane.getKnownLane();
1305 EndLane = StartLane + 1;
1306 }
1307 for (unsigned Part = StartPart; Part < EndPart; ++Part) {
1308 Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part);
1309
1310 if (!FirstLaneOnly && State.VF.isScalable()) {
1311 auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
1312 auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
1313 if (BaseIVTy->isFloatingPointTy())
1314 InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
1315 auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
1316 auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
1317 State.set(this, Add, Part);
1318 // It's useful to record the lane values too for the known minimum number
1319 // of elements so we do those below. This improves the code quality when
1320 // trying to extract the first element, for example.
1321 }
1322
1323 if (BaseIVTy->isFloatingPointTy())
1324 StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
1325
1326 for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
1327 Value *StartIdx = Builder.CreateBinOp(
1328 AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
1329 // The step returned by `createStepForVF` is a runtime-evaluated value
1330 // when VF is scalable. Otherwise, it should be folded into a Constant.
1331 assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
1332 "Expected StartIdx to be folded to a constant when VF is not "
1333 "scalable");
1334 auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
1335 auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
1336 State.set(this, Add, VPIteration(Part, Lane));
1337 }
1338 }
1339}
1340
1341#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1343 VPSlotTracker &SlotTracker) const {
1344 O << Indent;
1346 O << " = SCALAR-STEPS ";
1348}
1349#endif
1350
1352 assert(State.VF.isVector() && "not widening");
1353 auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
1354 // Construct a vector GEP by widening the operands of the scalar GEP as
1355 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
1356 // results in a vector of pointers when at least one operand of the GEP
1357 // is vector-typed. Thus, to keep the representation compact, we only use
1358 // vector-typed operands for loop-varying values.
1359
1360 if (areAllOperandsInvariant()) {
1361 // If we are vectorizing, but the GEP has only loop-invariant operands,
1362 // the GEP we build (by only using vector-typed operands for
1363 // loop-varying values) would be a scalar pointer. Thus, to ensure we
1364 // produce a vector of pointers, we need to either arbitrarily pick an
1365 // operand to broadcast, or broadcast a clone of the original GEP.
1366 // Here, we broadcast a clone of the original.
1367 //
1368 // TODO: If at some point we decide to scalarize instructions having
1369 // loop-invariant operands, this special case will no longer be
1370 // required. We would add the scalarization decision to
1371 // collectLoopScalars() and teach getVectorValue() to broadcast
1372 // the lane-zero scalar value.
1374 for (unsigned I = 0, E = getNumOperands(); I != E; I++)
1375 Ops.push_back(State.get(getOperand(I), VPIteration(0, 0)));
1376
1377 auto *NewGEP =
1378 State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
1379 ArrayRef(Ops).drop_front(), "", isInBounds());
1380 for (unsigned Part = 0; Part < State.UF; ++Part) {
1381 Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, NewGEP);
1382 State.set(this, EntryPart, Part);
1383 State.addMetadata(EntryPart, GEP);
1384 }
1385 } else {
1386 // If the GEP has at least one loop-varying operand, we are sure to
1387 // produce a vector of pointers. But if we are only unrolling, we want
1388 // to produce a scalar GEP for each unroll part. Thus, the GEP we
1389 // produce with the code below will be scalar (if VF == 1) or vector
1390 // (otherwise). Note that for the unroll-only case, we still maintain
1391 // values in the vector mapping with initVector, as we do for other
1392 // instructions.
1393 for (unsigned Part = 0; Part < State.UF; ++Part) {
1394 // The pointer operand of the new GEP. If it's loop-invariant, we
1395 // won't broadcast it.
1396 auto *Ptr = isPointerLoopInvariant()
1397 ? State.get(getOperand(0), VPIteration(0, 0))
1398 : State.get(getOperand(0), Part);
1399
1400 // Collect all the indices for the new GEP. If any index is
1401 // loop-invariant, we won't broadcast it.
1403 for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
1404 VPValue *Operand = getOperand(I);
1405 if (isIndexLoopInvariant(I - 1))
1406 Indices.push_back(State.get(Operand, VPIteration(0, 0)));
1407 else
1408 Indices.push_back(State.get(Operand, Part));
1409 }
1410
1411 // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
1412 // but it should be a vector, otherwise.
1413 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
1414 Indices, "", isInBounds());
1415 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
1416 "NewGEP is not a pointer vector");
1417 State.set(this, NewGEP, Part);
1418 State.addMetadata(NewGEP, GEP);
1419 }
1420 }
1421}
1422
1423#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1425 VPSlotTracker &SlotTracker) const {
1426 O << Indent << "WIDEN-GEP ";
1427 O << (isPointerLoopInvariant() ? "Inv" : "Var");
1428 for (size_t I = 0; I < getNumOperands() - 1; ++I)
1429 O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";
1430
1431 O << " ";
1433 O << " = getelementptr";
1434 printFlags(O);
1436}
1437#endif
1438
1439void VPVectorPointerRecipe ::execute(VPTransformState &State) {
1440 auto &Builder = State.Builder;
1442 for (unsigned Part = 0; Part < State.UF; ++Part) {
1443 // Calculate the pointer for the specific unroll-part.
1444 Value *PartPtr = nullptr;
1445 // Use i32 for the gep index type when the value is constant,
1446 // or query DataLayout for a more suitable index type otherwise.
1447 const DataLayout &DL =
1448 Builder.GetInsertBlock()->getModule()->getDataLayout();
1449 Type *IndexTy = State.VF.isScalable() && (IsReverse || Part > 0)
1450 ? DL.getIndexType(IndexedTy->getPointerTo())
1451 : Builder.getInt32Ty();
1452 Value *Ptr = State.get(getOperand(0), VPIteration(0, 0));
1453 bool InBounds = isInBounds();
1454 if (IsReverse) {
1455 // If the address is consecutive but reversed, then the
1456 // wide store needs to start at the last vector element.
1457 // RunTimeVF = VScale * VF.getKnownMinValue()
1458 // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
1459 Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
1460 // NumElt = -Part * RunTimeVF
1461 Value *NumElt = Builder.CreateMul(
1462 ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF);
1463 // LastLane = 1 - RunTimeVF
1464 Value *LastLane =
1465 Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
1466 PartPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
1467 PartPtr = Builder.CreateGEP(IndexedTy, PartPtr, LastLane, "", InBounds);
1468 } else {
1469 Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part);
1470 PartPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
1471 }
1472
1473 State.set(this, PartPtr, Part, /*IsScalar*/ true);
1474 }
1475}
1476
1477#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1479 VPSlotTracker &SlotTracker) const {
1480 O << Indent;
1482 O << " = vector-pointer ";
1483 if (IsReverse)
1484 O << "(reverse) ";
1485
1487}
1488#endif
1489
1492 // We know that all PHIs in non-header blocks are converted into
1493 // selects, so we don't have to worry about the insertion order and we
1494 // can just use the builder.
1495 // At this point we generate the predication tree. There may be
1496 // duplications since this is a simple recursive scan, but future
1497 // optimizations will clean it up.
1498
1499 unsigned NumIncoming = getNumIncomingValues();
1500
1501 // Generate a sequence of selects of the form:
1502 // SELECT(Mask3, In3,
1503 // SELECT(Mask2, In2,
1504 // SELECT(Mask1, In1,
1505 // In0)))
1506 // Note that Mask0 is never used: lanes for which no path reaches this phi and
1507 // are essentially undef are taken from In0.
1508 VectorParts Entry(State.UF);
1509 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
1510 for (unsigned In = 0; In < NumIncoming; ++In) {
1511 for (unsigned Part = 0; Part < State.UF; ++Part) {
1512 // We might have single edge PHIs (blocks) - use an identity
1513 // 'select' for the first PHI operand.
1514 Value *In0 = State.get(getIncomingValue(In), Part, OnlyFirstLaneUsed);
1515 if (In == 0)
1516 Entry[Part] = In0; // Initialize with the first incoming value.
1517 else {
1518 // Select between the current value and the previous incoming edge
1519 // based on the incoming mask.
1520 Value *Cond = State.get(getMask(In), Part, OnlyFirstLaneUsed);
1521 Entry[Part] =
1522 State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
1523 }
1524 }
1525 }
1526 for (unsigned Part = 0; Part < State.UF; ++Part)
1527 State.set(this, Entry[Part], Part, OnlyFirstLaneUsed);
1528}
1529
1530#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1532 VPSlotTracker &SlotTracker) const {
1533 O << Indent << "BLEND ";
1535 O << " =";
1536 if (getNumIncomingValues() == 1) {
1537 // Not a User of any mask: not really blending, this is a
1538 // single-predecessor phi.
1539 O << " ";
1541 } else {
1542 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
1543 O << " ";
1545 if (I == 0)
1546 continue;
1547 O << "/";
1549 }
1550 }
1551}
1552#endif
1553
1555 assert(!State.Instance && "Reduction being replicated.");
1556 Value *PrevInChain = State.get(getChainOp(), 0, /*IsScalar*/ true);
1557 RecurKind Kind = RdxDesc.getRecurrenceKind();
1558 // Propagate the fast-math flags carried by the underlying instruction.
1560 State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
1561 for (unsigned Part = 0; Part < State.UF; ++Part) {
1562 Value *NewVecOp = State.get(getVecOp(), Part);
1563 if (VPValue *Cond = getCondOp()) {
1564 Value *NewCond = State.get(Cond, Part, State.VF.isScalar());
1565 VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
1566 Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
1567 Value *Iden = RdxDesc.getRecurrenceIdentity(Kind, ElementTy,
1568 RdxDesc.getFastMathFlags());
1569 if (State.VF.isVector()) {
1570 Iden = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Iden);
1571 }
1572
1573 Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Iden);
1574 NewVecOp = Select;
1575 }
1576 Value *NewRed;
1577 Value *NextInChain;
1578 if (IsOrdered) {
1579 if (State.VF.isVector())
1580 NewRed = createOrderedReduction(State.Builder, RdxDesc, NewVecOp,
1581 PrevInChain);
1582 else
1583 NewRed = State.Builder.CreateBinOp(
1584 (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), PrevInChain,
1585 NewVecOp);
1586 PrevInChain = NewRed;
1587 } else {
1588 PrevInChain = State.get(getChainOp(), Part, /*IsScalar*/ true);
1589 NewRed = createTargetReduction(State.Builder, RdxDesc, NewVecOp);
1590 }
1592 NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),
1593 NewRed, PrevInChain);
1594 } else if (IsOrdered)
1595 NextInChain = NewRed;
1596 else
1597 NextInChain = State.Builder.CreateBinOp(
1598 (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, PrevInChain);
1599 State.set(this, NextInChain, Part, /*IsScalar*/ true);
1600 }
1601}
1602
1603#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1605 VPSlotTracker &SlotTracker) const {
1606 O << Indent << "REDUCE ";
1608 O << " = ";
1610 O << " +";
1611 if (isa<FPMathOperator>(getUnderlyingInstr()))
1613 O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
1615 if (getCondOp()) {
1616 O << ", ";
1618 }
1619 O << ")";
1620 if (RdxDesc.IntermediateStore)
1621 O << " (with final reduction value stored in invariant address sank "
1622 "outside of loop)";
1623}
1624#endif
1625
1627 // Find if the recipe is used by a widened recipe via an intervening
1628 // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
1629 return any_of(users(), [](const VPUser *U) {
1630 if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))
1631 return any_of(PredR->users(), [PredR](const VPUser *U) {
1632 return !U->usesScalars(PredR);
1633 });
1634 return false;
1635 });
1636}
1637
1638#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1640 VPSlotTracker &SlotTracker) const {
1641 O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
1642
1643 if (!getUnderlyingInstr()->getType()->isVoidTy()) {
1645 O << " = ";
1646 }
1647 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
1648 O << "call";
1649 printFlags(O);
1650 O << "@" << CB->getCalledFunction()->getName() << "(";
1652 O, [&O, &SlotTracker](VPValue *Op) {
1653 Op->printAsOperand(O, SlotTracker);
1654 });
1655 O << ")";
1656 } else {
1658 printFlags(O);
1660 }
1661
1662 if (shouldPack())
1663 O << " (S->V)";
1664}
1665#endif
1666
1667/// Checks if \p C is uniform across all VFs and UFs. It is considered as such
1668/// if it is either defined outside the vector region or its operand is known to
1669/// be uniform across all VFs and UFs (e.g. VPDerivedIV or VPCanonicalIVPHI).
1670/// TODO: Uniformity should be associated with a VPValue and there should be a
1671/// generic way to check.
1673 return C->isDefinedOutsideVectorRegions() ||
1674 isa<VPDerivedIVRecipe>(C->getOperand(0)) ||
1675 isa<VPCanonicalIVPHIRecipe>(C->getOperand(0));
1676}
1677
1678Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) {
1680 "Codegen only implemented for first lane.");
1681 switch (Opcode) {
1682 case Instruction::SExt:
1683 case Instruction::ZExt:
1684 case Instruction::Trunc: {
1685 // Note: SExt/ZExt not used yet.
1686 Value *Op = State.get(getOperand(0), VPIteration(Part, 0));
1687 return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
1688 }
1689 default:
1690 llvm_unreachable("opcode not implemented yet");
1691 }
1692}
1693
1694void VPScalarCastRecipe ::execute(VPTransformState &State) {
1695 bool IsUniformAcrossVFsAndUFs = isUniformAcrossVFsAndUFs(this);
1696 for (unsigned Part = 0; Part != State.UF; ++Part) {
1697 Value *Res;
1698 // Only generate a single instance, if the recipe is uniform across UFs and
1699 // VFs.
1700 if (Part > 0 && IsUniformAcrossVFsAndUFs)
1701 Res = State.get(this, VPIteration(0, 0));
1702 else
1703 Res = generate(State, Part);
1704 State.set(this, Res, VPIteration(Part, 0));
1705 }
1706}
1707
1708#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1709void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
1710 VPSlotTracker &SlotTracker) const {
1711 O << Indent << "SCALAR-CAST ";
1712 printAsOperand(O, SlotTracker);
1713 O << " = " << Instruction::getOpcodeName(Opcode) << " ";
1714 printOperands(O, SlotTracker);
1715 O << " to " << *ResultTy;
1716}
1717#endif
1718
1720 assert(State.Instance && "Branch on Mask works only on single instance.");
1721
1722 unsigned Part = State.Instance->Part;
1723 unsigned Lane = State.Instance->Lane.getKnownLane();
1724
1725 Value *ConditionBit = nullptr;
1726 VPValue *BlockInMask = getMask();
1727 if (BlockInMask) {
1728 ConditionBit = State.get(BlockInMask, Part);
1729 if (ConditionBit->getType()->isVectorTy())
1730 ConditionBit = State.Builder.CreateExtractElement(
1731 ConditionBit, State.Builder.getInt32(Lane));
1732 } else // Block in mask is all-one.
1733 ConditionBit = State.Builder.getTrue();
1734
1735 // Replace the temporary unreachable terminator with a new conditional branch,
1736 // whose two destinations will be set later when they are created.
1737 auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
1738 assert(isa<UnreachableInst>(CurrentTerminator) &&
1739 "Expected to replace unreachable terminator with conditional branch.");
1740 auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
1741 CondBr->setSuccessor(0, nullptr);
1742 ReplaceInstWithInst(CurrentTerminator, CondBr);
1743}
1744
1746 assert(State.Instance && "Predicated instruction PHI works per instance.");
1747 Instruction *ScalarPredInst =
1748 cast<Instruction>(State.get(getOperand(0), *State.Instance));
1749 BasicBlock *PredicatedBB = ScalarPredInst->getParent();
1750 BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
1751 assert(PredicatingBB && "Predicated block has no single predecessor.");
1752 assert(isa<VPReplicateRecipe>(getOperand(0)) &&
1753 "operand must be VPReplicateRecipe");
1754
1755 // By current pack/unpack logic we need to generate only a single phi node: if
1756 // a vector value for the predicated instruction exists at this point it means
1757 // the instruction has vector users only, and a phi for the vector value is
1758 // needed. In this case the recipe of the predicated instruction is marked to
1759 // also do that packing, thereby "hoisting" the insert-element sequence.
1760 // Otherwise, a phi node for the scalar value is needed.
1761 unsigned Part = State.Instance->Part;
1762 if (State.hasVectorValue(getOperand(0), Part)) {
1763 Value *VectorValue = State.get(getOperand(0), Part);
1764 InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
1765 PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
1766 VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
1767 VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
1768 if (State.hasVectorValue(this, Part))
1769 State.reset(this, VPhi, Part);
1770 else
1771 State.set(this, VPhi, Part);
1772 // NOTE: Currently we need to update the value of the operand, so the next
1773 // predicated iteration inserts its generated value in the correct vector.
1774 State.reset(getOperand(0), VPhi, Part);
1775 } else {
1776 Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
1777 PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
1778 Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
1779 PredicatingBB);
1780 Phi->addIncoming(ScalarPredInst, PredicatedBB);
1781 if (State.hasScalarValue(this, *State.Instance))
1782 State.reset(this, Phi, *State.Instance);
1783 else
1784 State.set(this, Phi, *State.Instance);
1785 // NOTE: Currently we need to update the value of the operand, so the next
1786 // predicated iteration inserts its generated value in the correct vector.
1787 State.reset(getOperand(0), Phi, *State.Instance);
1788 }
1789}
1790
1791#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1793 VPSlotTracker &SlotTracker) const {
1794 O << Indent << "PHI-PREDICATED-INSTRUCTION ";
1796 O << " = ";
1798}
1799
1801 VPSlotTracker &SlotTracker) const {
1802 O << Indent << "WIDEN ";
1804 O << " = load ";
1806}
1807
1809 VPSlotTracker &SlotTracker) const {
1810 O << Indent << "WIDEN ";
1812 O << " = vp.load ";
1814}
1815
1817 VPSlotTracker &SlotTracker) const {
1818 O << Indent << "WIDEN store ";
1820}
1821
1823 VPSlotTracker &SlotTracker) const {
1824 O << Indent << "WIDEN vp.store ";
1826}
1827#endif
1828
1830 Value *Start = getStartValue()->getLiveInIRValue();
1831 PHINode *EntryPart = PHINode::Create(Start->getType(), 2, "index");
1832 EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1833
1834 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1835 EntryPart->addIncoming(Start, VectorPH);
1836 EntryPart->setDebugLoc(getDebugLoc());
1837 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1838 State.set(this, EntryPart, Part, /*IsScalar*/ true);
1839}
1840
1841#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1843 VPSlotTracker &SlotTracker) const {
1844 O << Indent << "EMIT ";
1846 O << " = CANONICAL-INDUCTION ";
1848}
1849#endif
1850
1853 VPValue *Step) const {
1854 // Must be an integer induction.
1856 return false;
1857 // Start must match the start value of this canonical induction.
1858 if (Start != getStartValue())
1859 return false;
1860
1861 // If the step is defined by a recipe, it is not a ConstantInt.
1862 if (Step->getDefiningRecipe())
1863 return false;
1864
1865 ConstantInt *StepC = dyn_cast<ConstantInt>(Step->getLiveInIRValue());
1866 return StepC && StepC->isOne();
1867}
1868
1870 return IsScalarAfterVectorization &&
1871 (!IsScalable || vputils::onlyFirstLaneUsed(this));
1872}
1873
1874#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1876 VPSlotTracker &SlotTracker) const {
1877 O << Indent << "EMIT ";
1879 O << " = WIDEN-POINTER-INDUCTION ";
1881 O << ", " << *IndDesc.getStep();
1882}
1883#endif
1884
1886 assert(!State.Instance && "cannot be used in per-lane");
1887 const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();
1888 SCEVExpander Exp(SE, DL, "induction");
1889
1890 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
1891 &*State.Builder.GetInsertPoint());
1892 assert(!State.ExpandedSCEVs.contains(Expr) &&
1893 "Same SCEV expanded multiple times");
1894 State.ExpandedSCEVs[Expr] = Res;
1895 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1896 State.set(this, Res, {Part, 0});
1897}
1898
1899#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1901 VPSlotTracker &SlotTracker) const {
1902 O << Indent << "EMIT ";
1904 O << " = EXPAND SCEV " << *Expr;
1905}
1906#endif
1907
1909 Value *CanonicalIV = State.get(getOperand(0), 0, /*IsScalar*/ true);
1910 Type *STy = CanonicalIV->getType();
1911 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
1912 ElementCount VF = State.VF;
1913 Value *VStart = VF.isScalar()
1914 ? CanonicalIV
1915 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
1916 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
1917 Value *VStep = createStepForVF(Builder, STy, VF, Part);
1918 if (VF.isVector()) {
1919 VStep = Builder.CreateVectorSplat(VF, VStep);
1920 VStep =
1921 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
1922 }
1923 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
1924 State.set(this, CanonicalVectorIV, Part);
1925 }
1926}
1927
1928#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1930 VPSlotTracker &SlotTracker) const {
1931 O << Indent << "EMIT ";
1933 O << " = WIDEN-CANONICAL-INDUCTION ";
1935}
1936#endif
1937
1939 auto &Builder = State.Builder;
1940 // Create a vector from the initial value.
1941 auto *VectorInit = getStartValue()->getLiveInIRValue();
1942
1943 Type *VecTy = State.VF.isScalar()
1944 ? VectorInit->getType()
1945 : VectorType::get(VectorInit->getType(), State.VF);
1946
1947 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1948 if (State.VF.isVector()) {
1949 auto *IdxTy = Builder.getInt32Ty();
1950 auto *One = ConstantInt::get(IdxTy, 1);
1951 IRBuilder<>::InsertPointGuard Guard(Builder);
1952 Builder.SetInsertPoint(VectorPH->getTerminator());
1953 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
1954 auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
1955 VectorInit = Builder.CreateInsertElement(
1956 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
1957 }
1958
1959 // Create a phi node for the new recurrence.
1960 PHINode *EntryPart = PHINode::Create(VecTy, 2, "vector.recur");
1961 EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1962 EntryPart->addIncoming(VectorInit, VectorPH);
1963 State.set(this, EntryPart, 0);
1964}
1965
1966#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1968 VPSlotTracker &SlotTracker) const {
1969 O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
1971 O << " = phi ";
1973}
1974#endif
1975
1977 auto &Builder = State.Builder;
1978
1979 // Reductions do not have to start at zero. They can start with
1980 // any loop invariant values.
1981 VPValue *StartVPV = getStartValue();
1982 Value *StartV = StartVPV->getLiveInIRValue();
1983
1984 // In order to support recurrences we need to be able to vectorize Phi nodes.
1985 // Phi nodes have cycles, so we need to vectorize them in two stages. This is
1986 // stage #1: We create a new vector PHI node with no incoming edges. We'll use
1987 // this value when we vectorize all of the instructions that use the PHI.
1988 bool ScalarPHI = State.VF.isScalar() || IsInLoop;
1989 Type *VecTy = ScalarPHI ? StartV->getType()
1990 : VectorType::get(StartV->getType(), State.VF);
1991
1992 BasicBlock *HeaderBB = State.CFG.PrevBB;
1993 assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
1994 "recipe must be in the vector loop header");
1995 unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
1996 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1997 Instruction *EntryPart = PHINode::Create(VecTy, 2, "vec.phi");
1998 EntryPart->insertBefore(HeaderBB->getFirstInsertionPt());
1999 State.set(this, EntryPart, Part, IsInLoop);
2000 }
2001
2002 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
2003
2004 Value *Iden = nullptr;
2005 RecurKind RK = RdxDesc.getRecurrenceKind();
2008 // MinMax and AnyOf reductions have the start value as their identity.
2009 if (ScalarPHI) {
2010 Iden = StartV;
2011 } else {
2012 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
2013 Builder.SetInsertPoint(VectorPH->getTerminator());
2014 StartV = Iden =
2015 Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
2016 }
2017 } else {
2018 Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
2019 RdxDesc.getFastMathFlags());
2020
2021 if (!ScalarPHI) {
2022 Iden = Builder.CreateVectorSplat(State.VF, Iden);
2023 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
2024 Builder.SetInsertPoint(VectorPH->getTerminator());
2025 Constant *Zero = Builder.getInt32(0);
2026 StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
2027 }
2028 }
2029
2030 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
2031 Value *EntryPart = State.get(this, Part, IsInLoop);
2032 // Make sure to add the reduction start value only to the
2033 // first unroll part.
2034 Value *StartVal = (Part == 0) ? StartV : Iden;
2035 cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
2036 }
2037}
2038
2039#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2041 VPSlotTracker &SlotTracker) const {
2042 O << Indent << "WIDEN-REDUCTION-PHI ";
2043
2045 O << " = phi ";
2047}
2048#endif
2049
2052 "Non-native vplans are not expected to have VPWidenPHIRecipes.");
2053
2054 Value *Op0 = State.get(getOperand(0), 0);
2055 Type *VecTy = Op0->getType();
2056 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
2057 State.set(this, VecPhi, 0);
2058}
2059
2060#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2062 VPSlotTracker &SlotTracker) const {
2063 O << Indent << "WIDEN-PHI ";
2064
2065 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
2066 // Unless all incoming values are modeled in VPlan print the original PHI
2067 // directly.
2068 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
2069 // values as VPValues.
2070 if (getNumOperands() != OriginalPhi->getNumOperands()) {
2071 O << VPlanIngredient(OriginalPhi);
2072 return;
2073 }
2074
2076 O << " = phi ";
2078}
2079#endif
2080
2081// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
2082// remove VPActiveLaneMaskPHIRecipe.
2084 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
2085 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
2086 Value *StartMask = State.get(getOperand(0), Part);
2087 PHINode *EntryPart =
2088 State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
2089 EntryPart->addIncoming(StartMask, VectorPH);
2090 EntryPart->setDebugLoc(getDebugLoc());
2091 State.set(this, EntryPart, Part);
2092 }
2093}
2094
2095#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2097 VPSlotTracker &SlotTracker) const {
2098 O << Indent << "ACTIVE-LANE-MASK-PHI ";
2099
2101 O << " = phi ";
2103}
2104#endif
2105
2107 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
2108 assert(State.UF == 1 && "Expected unroll factor 1 for VP vectorization.");
2109 Value *Start = State.get(getOperand(0), VPIteration(0, 0));
2110 PHINode *EntryPart =
2111 State.Builder.CreatePHI(Start->getType(), 2, "evl.based.iv");
2112 EntryPart->addIncoming(Start, VectorPH);
2113 EntryPart->setDebugLoc(getDebugLoc());
2114 State.set(this, EntryPart, 0, /*IsScalar=*/true);
2115}
2116
2117#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2119 VPSlotTracker &SlotTracker) const {
2120 O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";
2121
2123 O << " = phi ";
2125}
2126#endif
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu AMDGPU Register Bank Select
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::string Name
Hexagon Common GEP
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static Value * getStepVector(Value *Val, Value *StartIdx, Value *Step, Instruction::BinaryOps BinOp, ElementCount VF, IRBuilderBase &Builder)
This function adds (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step,...
static bool isUniformAcrossVFsAndUFs(VPScalarCastRecipe *C)
Checks if C is uniform across all VFs and UFs.
static Constant * getSignedIntOrFpConstant(Type *Ty, int64_t C)
A helper function that returns an integer or floating-point constant with value C.
static Value * getRuntimeVFAsFloat(IRBuilderBase &B, Type *FTy, ElementCount VF)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:409
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:452
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:289
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, BasicBlock::iterator InsertBefore)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:1016
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:1018
static StringRef getPredicateName(Predicate P)
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:211
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:123
static Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:1449
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
A debug info location.
Definition: DebugLoc.h:33
constexpr bool isVector() const
One or more elements.
Definition: TypeSize.h:323
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:319
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
void setAllowContract(bool B=true)
Definition: FMF.h:91
bool noSignedZeros() const
Definition: FMF.h:68
bool noInfs() const
Definition: FMF.h:67
void setAllowReciprocal(bool B=true)
Definition: FMF.h:88
bool allowReciprocal() const
Definition: FMF.h:69
void print(raw_ostream &O) const
Print fast-math flags to O.
Definition: Operator.cpp:259
void setNoSignedZeros(bool B=true)
Definition: FMF.h:85
bool allowReassoc() const
Flag queries.
Definition: FMF.h:65
bool approxFunc() const
Definition: FMF.h:71
void setNoNaNs(bool B=true)
Definition: FMF.h:79
void setAllowReassoc(bool B=true)
Flag setters.
Definition: FMF.h:76
bool noNaNs() const
Definition: FMF.h:66
void setApproxFunc(bool B=true)
Definition: FMF.h:94
void setNoInfs(bool B=true)
Definition: FMF.h:82
bool allowContract() const
Definition: FMF.h:70
Class to represent function types.
Definition: DerivedTypes.h:103
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:202
bool willReturn() const
Determine if the function will return.
Definition: Function.h:643
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:232
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition: Function.h:576
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:207
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2361
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2472
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1978
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2094
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2460
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1533
Value * CreateVectorSplice(Value *V1, Value *V2, int64_t Imm, const Twine &Name="")
Return a vector splice intrinsic if using scalable vectors, otherwise return a shufflevector.
Definition: IRBuilder.cpp:1166
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1193
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:466
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:932
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1091
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2033
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2535
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:526
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2081
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:311
InsertPoint saveIP() const
Returns the current insert point.
Definition: IRBuilder.h:277
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:486
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2366
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2397
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1749
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2241
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1120
Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Definition: IRBuilder.cpp:1005
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2021
LLVMContext & getContext() const
Definition: IRBuilder.h:176
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1327
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1666
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1676
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2161
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Definition: IRBuilder.h:289
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1826
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2412
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1866
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2351
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1587
Value * CreateStepVector(Type *DstType, const Twine &Name="")
Creates a vector of type DstType with the linear sequence <0, 1, ...>
Definition: IRBuilder.cpp:109
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1361
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
A struct for saving information about induction variables.
const SCEV * getStep() const
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_IntInduction
Integer induction variable. Step = C.
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:454
bool isBinaryOp() const
Definition: Instruction.h:257
const BasicBlock * getParent() const
Definition: Instruction.h:152
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
const char * getOpcodeName() const
Definition: Instruction.h:254
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:451
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
BlockT * getHeader() const
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:71
FastMathFlags getFastMathFlags() const
static unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
Type * getRecurrenceType() const
Returns the type of the recurrence.
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
bool isSigned() const
Returns true if all source operands of the recurrence are SExtInsts.
RecurKind getRecurrenceKind() const
Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF) const
Returns identity corresponding to the RecurrenceKind.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This class uses information about analyze scalars to rewrite expressions in canonical form.
Type * getType() const
Return the LLVM type of this SCEV expression.
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:693
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
static IntegerType * getInt1Ty(LLVMContext &C)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:140
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
Value * getOperand(unsigned i) const
Definition: User.h:169
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:2827
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:2874
iterator end()
Definition: VPlan.h:2858
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:2886
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:1973
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:1978
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account that the first incoming value has no mask.
Definition: VPlan.h:1970
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPRegionBlock * getParent()
Definition: VPlan.h:489
size_t getNumSuccessors() const
Definition: VPlan.h:534
VPlan * getPlan()
Definition: VPlan.cpp:148
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:153
VPBlockBase * getSingleSuccessor() const
Definition: VPlan.h:524
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2249
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
void execute(VPTransformState &State) override
Generate the canonical scalar induction phi of the vector loop.
bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start, VPValue *Step) const
Check if the induction described by Kind, /p Start and Step is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:313
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition: VPlanValue.h:401
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Definition: VPlanValue.h:413
unsigned getVPDefID() const
Definition: VPlanValue.h:433
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:2765
VPValue * getStartValue() const
Definition: VPlan.h:2764
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate phi for handling IV based on EVL over iterations correctly.
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:1669
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:1166
@ CanonicalIVIncrementForPart
Definition: VPlan.h:1176
@ CalculateTripCountMinusVF
Definition: VPlan.h:1174
bool hasResult() const
Definition: VPlan.h:1285
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
unsigned getOpcode() const
Definition: VPlan.h:1261
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
void execute(VPTransformState &State) override
Generate the instruction.
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlan.h:169
static VPLane getFirstLane()
Definition: VPlan.h:167
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the VPLiveOut to O.
PHINode * getPhi() const
Definition: VPlan.h:694
void fixPhi(VPlan &Plan, VPTransformState &State)
Fixup the wrapped LCSSA phi node in the unique exit block.
void execute(VPTransformState &State) override
Generates phi nodes for live-outs as needed to retain SSA form.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:709
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
VPBasicBlock * getParent()
Definition: VPlan.h:734
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:800
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Class to record LLVM IR flag for a recipe along with it.
Definition: VPlan.h:898
ExactFlagsTy ExactFlags
Definition: VPlan.h:954
FastMathFlagsTy FMFs
Definition: VPlan.h:957
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:956
void setFlags(Instruction *I) const
Set the IR flags for I.
Definition: VPlan.h:1083
bool isInBounds() const
Definition: VPlan.h:1122
GEPFlagsTy GEPFlags
Definition: VPlan.h:955
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:1129
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:953
WrapFlagsTy WrapFlags
Definition: VPlan.h:952
bool hasNoUnsignedWrap() const
Definition: VPlan.h:1133
void printFlags(raw_ostream &O) const
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:1116
bool hasNoSignedWrap() const
Definition: VPlan.h:1139
FastMathFlags getFastMathFlags() const
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition: VPlan.h:1942
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2133
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2135
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2131
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:2960
const VPBlockBase * getEntry() const
Definition: VPlan.h:2999
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getOpcode() const
Definition: VPlan.h:2213
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
VPScalarCastRecipe is a recipe to create scalar cast instructions.
Definition: VPlan.h:1412
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:2814
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:889
This class can be used to assign names to VPValues.
Definition: VPlanValue.h:454
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:203
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition: VPlan.cpp:1306
operand_range operands()
Definition: VPlanValue.h:278
unsigned getNumOperands() const
Definition: VPlanValue.h:252
operand_iterator op_begin()
Definition: VPlanValue.h:274
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:253
Value * getUnderlyingValue()
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:77
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:118
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition: VPlan.cpp:1302
friend class VPInstruction
Definition: VPlanValue.h:47
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:173
user_range users()
Definition: VPlanValue.h:133
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Function * getCalledScalarFunction() const
Definition: VPlan.h:1485
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
operand_range arg_operands()
Definition: VPlan.h:1489
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1408
void execute(VPTransformState &State) override
Produce widened copies of the cast.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:1753
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:1748
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:1767
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:1759
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce widened copies of all Ingredients.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:3061
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.h:3248
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:5079
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:641
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:676
Type * getElementType() const
Definition: DerivedTypes.h:436
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
self_iterator getIterator()
Definition: ilist_node.h:109
iterator erase(iterator where)
Definition: ilist.h:204
pointer remove(iterator &IT)
Definition: ilist.h:188
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1469
bool isUniformAfterVectorization(VPValue *VPV)
Returns true if VPV is uniform after vectorization.
Definition: VPlan.h:3608
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
Definition: VPlan.cpp:1454
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlan.cpp:1449
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void ReplaceInstWithInst(BasicBlock *BB, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2406
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition: STLExtras.h:2165
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
Definition: LoopUtils.cpp:1037
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
cl::opt< bool > EnableVPlanNativePath("enable-vplan-native-path", cl::Hidden, cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization."))
Definition: VPlan.cpp:53
static bool isDbgInfoIntrinsic(Intrinsic::ID ID)
Check if ID corresponds to a debug info intrinsic.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Value * createOrderedReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence descriptor Desc.
Definition: LoopUtils.cpp:1211
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:34
@ Mul
Product of integers.
@ Add
Sum of integers.
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Value * createTargetReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, PHINode *OrigPhi=nullptr)
Create a generic target reduction using a recurrence descriptor Desc The target is queried to determi...
Definition: LoopUtils.cpp:1195
void execute(VPTransformState &State) override
Generate the phi nodes.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPIteration represents a single point in the iteration space of the output (vectorized and/or unrolle...
Definition: VPlan.h:219
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:365
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:373
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:348
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:236
Value * get(VPValue *Def, unsigned Part, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def and a given Part if IsScalar is false,...
Definition: VPlan.cpp:247
DenseMap< const SCEV *, Value * > ExpandedSCEVs
Map SCEVs to their expanded values.
Definition: VPlan.h:409
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition: VPlan.h:412
void addMetadata(Value *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:361
void reset(VPValue *Def, Value *V, unsigned Part)
Reset an existing vector value for Def and a given Part.
Definition: VPlan.h:303
struct llvm::VPTransformState::CFGState CFG
void set(VPValue *Def, Value *V, unsigned Part, bool IsScalar=false)
Set the generated vector Value for a given VPValue and a given Part, if IsScalar is false.
Definition: VPlan.h:288
std::optional< VPIteration > Instance
Hold the indices to generate specific scalar instructions.
Definition: VPlan.h:248
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:389
bool hasScalarValue(VPValue *Def, VPIteration Instance)
Definition: VPlan.h:276
bool hasVectorValue(VPValue *Def, unsigned Part)
Definition: VPlan.h:270
ElementCount VF
The chosen Vectorization and Unroll Factors of the loop being vectorized.
Definition: VPlan.h:242
Loop * CurrentVectorLoop
The loop object for the current parent region, or nullptr.
Definition: VPlan.h:398
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:372
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool isInvariantCond() const
Definition: VPlan.h:1532
VPValue * getCond() const
Definition: VPlan.h:1528
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.