LLVM 19.0.0git
VPlanRecipes.cpp
Go to the documentation of this file.
1//===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains implementations for different VPlan recipes.
11///
12//===----------------------------------------------------------------------===//
13
14#include "VPlan.h"
15#include "VPlanAnalysis.h"
16#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Twine.h"
20#include "llvm/IR/BasicBlock.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/Instruction.h"
24#include "llvm/IR/Type.h"
25#include "llvm/IR/Value.h"
28#include "llvm/Support/Debug.h"
33#include <cassert>
34
35using namespace llvm;
36
38
39namespace llvm {
41}
42
43#define LV_NAME "loop-vectorize"
44#define DEBUG_TYPE LV_NAME
45
47 switch (getVPDefID()) {
48 case VPInterleaveSC:
49 return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
50 case VPWidenStoreEVLSC:
51 case VPWidenStoreSC:
52 return true;
53 case VPReplicateSC:
54 case VPWidenCallSC:
55 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
56 ->mayWriteToMemory();
57 case VPBranchOnMaskSC:
58 case VPScalarIVStepsSC:
59 case VPPredInstPHISC:
60 return false;
61 case VPBlendSC:
62 case VPReductionSC:
63 case VPWidenCanonicalIVSC:
64 case VPWidenCastSC:
65 case VPWidenGEPSC:
66 case VPWidenIntOrFpInductionSC:
67 case VPWidenLoadEVLSC:
68 case VPWidenLoadSC:
69 case VPWidenPHISC:
70 case VPWidenSC:
71 case VPWidenSelectSC: {
72 const Instruction *I =
73 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
74 (void)I;
75 assert((!I || !I->mayWriteToMemory()) &&
76 "underlying instruction may write to memory");
77 return false;
78 }
79 default:
80 return true;
81 }
82}
83
85 switch (getVPDefID()) {
86 case VPWidenLoadEVLSC:
87 case VPWidenLoadSC:
88 return true;
89 case VPReplicateSC:
90 case VPWidenCallSC:
91 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
92 ->mayReadFromMemory();
93 case VPBranchOnMaskSC:
94 case VPPredInstPHISC:
95 case VPScalarIVStepsSC:
96 case VPWidenStoreEVLSC:
97 case VPWidenStoreSC:
98 return false;
99 case VPBlendSC:
100 case VPReductionSC:
101 case VPWidenCanonicalIVSC:
102 case VPWidenCastSC:
103 case VPWidenGEPSC:
104 case VPWidenIntOrFpInductionSC:
105 case VPWidenPHISC:
106 case VPWidenSC:
107 case VPWidenSelectSC: {
108 const Instruction *I =
109 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
110 (void)I;
111 assert((!I || !I->mayReadFromMemory()) &&
112 "underlying instruction may read from memory");
113 return false;
114 }
115 default:
116 return true;
117 }
118}
119
121 switch (getVPDefID()) {
122 case VPDerivedIVSC:
123 case VPPredInstPHISC:
124 case VPScalarCastSC:
125 return false;
126 case VPInstructionSC:
127 switch (cast<VPInstruction>(this)->getOpcode()) {
128 case Instruction::Or:
129 case Instruction::ICmp:
130 case Instruction::Select:
135 return false;
136 default:
137 return true;
138 }
139 case VPWidenCallSC:
140 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
141 ->mayHaveSideEffects();
142 case VPBlendSC:
143 case VPReductionSC:
144 case VPScalarIVStepsSC:
145 case VPWidenCanonicalIVSC:
146 case VPWidenCastSC:
147 case VPWidenGEPSC:
148 case VPWidenIntOrFpInductionSC:
149 case VPWidenPHISC:
150 case VPWidenPointerInductionSC:
151 case VPWidenSC:
152 case VPWidenSelectSC: {
153 const Instruction *I =
154 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
155 (void)I;
156 assert((!I || !I->mayHaveSideEffects()) &&
157 "underlying instruction has side-effects");
158 return false;
159 }
160 case VPInterleaveSC:
161 return mayWriteToMemory();
162 case VPWidenLoadEVLSC:
163 case VPWidenLoadSC:
164 case VPWidenStoreEVLSC:
165 case VPWidenStoreSC:
166 assert(
167 cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() ==
169 "mayHaveSideffects result for ingredient differs from this "
170 "implementation");
171 return mayWriteToMemory();
172 case VPReplicateSC: {
173 auto *R = cast<VPReplicateRecipe>(this);
174 return R->getUnderlyingInstr()->mayHaveSideEffects();
175 }
176 default:
177 return true;
178 }
179}
180
182 auto Lane = VPLane::getLastLaneForVF(State.VF);
183 VPValue *ExitValue = getOperand(0);
185 Lane = VPLane::getFirstLane();
186 VPBasicBlock *MiddleVPBB =
187 cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
188 assert(MiddleVPBB->getNumSuccessors() == 0 &&
189 "the middle block must not have any successors");
190 BasicBlock *MiddleBB = State.CFG.VPBB2IRBB[MiddleVPBB];
191 Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),
192 MiddleBB);
193}
194
195#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
197 O << "Live-out ";
199 O << " = ";
201 O << "\n";
202}
203#endif
204
206 assert(!Parent && "Recipe already in some VPBasicBlock");
207 assert(InsertPos->getParent() &&
208 "Insertion position not in any VPBasicBlock");
209 InsertPos->getParent()->insert(this, InsertPos->getIterator());
210}
211
214 assert(!Parent && "Recipe already in some VPBasicBlock");
215 assert(I == BB.end() || I->getParent() == &BB);
216 BB.insert(this, I);
217}
218
220 assert(!Parent && "Recipe already in some VPBasicBlock");
221 assert(InsertPos->getParent() &&
222 "Insertion position not in any VPBasicBlock");
223 InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator()));
224}
225
227 assert(getParent() && "Recipe not in any VPBasicBlock");
229 Parent = nullptr;
230}
231
233 assert(getParent() && "Recipe not in any VPBasicBlock");
235}
236
239 insertAfter(InsertPos);
240}
241
245 insertBefore(BB, I);
246}
247
249 assert(OpType == OperationType::FPMathOp &&
250 "recipe doesn't have fast math flags");
251 FastMathFlags Res;
252 Res.setAllowReassoc(FMFs.AllowReassoc);
253 Res.setNoNaNs(FMFs.NoNaNs);
254 Res.setNoInfs(FMFs.NoInfs);
255 Res.setNoSignedZeros(FMFs.NoSignedZeros);
256 Res.setAllowReciprocal(FMFs.AllowReciprocal);
257 Res.setAllowContract(FMFs.AllowContract);
258 Res.setApproxFunc(FMFs.ApproxFunc);
259 return Res;
260}
261
264 const Twine &Name)
265 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),
266 Pred, DL),
267 Opcode(Opcode), Name(Name.str()) {
268 assert(Opcode == Instruction::ICmp &&
269 "only ICmp predicates supported at the moment");
270}
271
273 std::initializer_list<VPValue *> Operands,
274 FastMathFlags FMFs, DebugLoc DL, const Twine &Name)
275 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),
276 Opcode(Opcode), Name(Name.str()) {
277 // Make sure the VPInstruction is a floating-point operation.
278 assert(isFPMathOp() && "this op can't take fast-math flags");
279}
280
281bool VPInstruction::doesGeneratePerAllLanes() const {
282 return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);
283}
284
285bool VPInstruction::canGenerateScalarForFirstLane() const {
287 return true;
288
289 switch (Opcode) {
297 return true;
298 default:
299 return false;
300 }
301}
302
303Value *VPInstruction::generatePerLane(VPTransformState &State,
304 const VPIteration &Lane) {
305 IRBuilderBase &Builder = State.Builder;
306
308 "only PtrAdd opcodes are supported for now");
309 return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
310 State.get(getOperand(1), Lane), Name);
311}
312
313Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
314 IRBuilderBase &Builder = State.Builder;
315
317 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
318 if (Part != 0 && vputils::onlyFirstPartUsed(this))
319 return State.get(this, 0, OnlyFirstLaneUsed);
320
321 Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed);
322 Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed);
323 auto *Res =
324 Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
325 if (auto *I = dyn_cast<Instruction>(Res))
326 setFlags(I);
327 return Res;
328 }
329
330 switch (getOpcode()) {
331 case VPInstruction::Not: {
332 Value *A = State.get(getOperand(0), Part);
333 return Builder.CreateNot(A, Name);
334 }
335 case Instruction::ICmp: {
336 Value *A = State.get(getOperand(0), Part);
337 Value *B = State.get(getOperand(1), Part);
338 return Builder.CreateCmp(getPredicate(), A, B, Name);
339 }
340 case Instruction::Select: {
341 Value *Cond = State.get(getOperand(0), Part);
342 Value *Op1 = State.get(getOperand(1), Part);
343 Value *Op2 = State.get(getOperand(2), Part);
344 return Builder.CreateSelect(Cond, Op1, Op2, Name);
345 }
347 // Get first lane of vector induction variable.
348 Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
349 // Get the original loop tripcount.
350 Value *ScalarTC = State.get(getOperand(1), VPIteration(Part, 0));
351
352 // If this part of the active lane mask is scalar, generate the CMP directly
353 // to avoid unnecessary extracts.
354 if (State.VF.isScalar())
355 return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC,
356 Name);
357
358 auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
359 auto *PredTy = VectorType::get(Int1Ty, State.VF);
360 return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
361 {PredTy, ScalarTC->getType()},
362 {VIVElem0, ScalarTC}, nullptr, Name);
363 }
365 // Generate code to combine the previous and current values in vector v3.
366 //
367 // vector.ph:
368 // v_init = vector(..., ..., ..., a[-1])
369 // br vector.body
370 //
371 // vector.body
372 // i = phi [0, vector.ph], [i+4, vector.body]
373 // v1 = phi [v_init, vector.ph], [v2, vector.body]
374 // v2 = a[i, i+1, i+2, i+3];
375 // v3 = vector(v1(3), v2(0, 1, 2))
376
377 // For the first part, use the recurrence phi (v1), otherwise v2.
378 auto *V1 = State.get(getOperand(0), 0);
379 Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
380 if (!PartMinus1->getType()->isVectorTy())
381 return PartMinus1;
382 Value *V2 = State.get(getOperand(1), Part);
383 return Builder.CreateVectorSplice(PartMinus1, V2, -1, Name);
384 }
386 if (Part != 0)
387 return State.get(this, 0, /*IsScalar*/ true);
388
389 Value *ScalarTC = State.get(getOperand(0), {0, 0});
390 Value *Step =
391 createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF);
392 Value *Sub = Builder.CreateSub(ScalarTC, Step);
393 Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
394 Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
395 return Builder.CreateSelect(Cmp, Sub, Zero);
396 }
398 // Compute EVL
399 auto GetEVL = [=](VPTransformState &State, Value *AVL) {
400 assert(AVL->getType()->isIntegerTy() &&
401 "Requested vector length should be an integer.");
402
403 // TODO: Add support for MaxSafeDist for correct loop emission.
404 assert(State.VF.isScalable() && "Expected scalable vector factor.");
405 Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue());
406
407 Value *EVL = State.Builder.CreateIntrinsic(
408 State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,
409 {AVL, VFArg, State.Builder.getTrue()});
410 return EVL;
411 };
412 // TODO: Restructure this code with an explicit remainder loop, vsetvli can
413 // be outside of the main loop.
414 assert(Part == 0 && "No unrolling expected for predicated vectorization.");
415 // Compute VTC - IV as the AVL (requested vector length).
416 Value *Index = State.get(getOperand(0), VPIteration(0, 0));
417 Value *TripCount = State.get(getOperand(1), VPIteration(0, 0));
418 Value *AVL = State.Builder.CreateSub(TripCount, Index);
419 Value *EVL = GetEVL(State, AVL);
420 return EVL;
421 }
423 auto *IV = State.get(getOperand(0), VPIteration(0, 0));
424 if (Part == 0)
425 return IV;
426
427 // The canonical IV is incremented by the vectorization factor (num of SIMD
428 // elements) times the unroll part.
429 Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
430 return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),
432 }
434 if (Part != 0)
435 return nullptr;
436
437 Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
438 VPRegionBlock *ParentRegion = getParent()->getParent();
439 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
440
441 // Replace the temporary unreachable terminator with a new conditional
442 // branch, hooking it up to backward destination for exiting blocks now and
443 // to forward destination(s) later when they are created.
444 BranchInst *CondBr =
445 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
446
447 if (getParent()->isExiting())
448 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
449
450 CondBr->setSuccessor(0, nullptr);
452 return CondBr;
453 }
455 if (Part != 0)
456 return nullptr;
457 // First create the compare.
458 Value *IV = State.get(getOperand(0), Part, /*IsScalar*/ true);
459 Value *TC = State.get(getOperand(1), Part, /*IsScalar*/ true);
460 Value *Cond = Builder.CreateICmpEQ(IV, TC);
461
462 // Now create the branch.
463 auto *Plan = getParent()->getPlan();
464 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
465 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
466
467 // Replace the temporary unreachable terminator with a new conditional
468 // branch, hooking it up to backward destination (the header) now and to the
469 // forward destination (the exit/middle block) later when it is created.
470 // Note that CreateCondBr expects a valid BB as first argument, so we need
471 // to set it to nullptr later.
472 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
473 State.CFG.VPBB2IRBB[Header]);
474 CondBr->setSuccessor(0, nullptr);
476 return CondBr;
477 }
479 if (Part != 0)
480 return State.get(this, 0, /*IsScalar*/ true);
481
482 // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
483 // and will be removed by breaking up the recipe further.
484 auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
485 auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
486 // Get its reduction variable descriptor.
487 const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
488
489 RecurKind RK = RdxDesc.getRecurrenceKind();
490
491 VPValue *LoopExitingDef = getOperand(1);
492 Type *PhiTy = OrigPhi->getType();
493 VectorParts RdxParts(State.UF);
494 for (unsigned Part = 0; Part < State.UF; ++Part)
495 RdxParts[Part] = State.get(LoopExitingDef, Part, PhiR->isInLoop());
496
497 // If the vector reduction can be performed in a smaller type, we truncate
498 // then extend the loop exit value to enable InstCombine to evaluate the
499 // entire expression in the smaller type.
500 // TODO: Handle this in truncateToMinBW.
501 if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
502 Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF);
503 for (unsigned Part = 0; Part < State.UF; ++Part)
504 RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
505 }
506 // Reduce all of the unrolled parts into a single vector.
507 Value *ReducedPartRdx = RdxParts[0];
508 unsigned Op = RecurrenceDescriptor::getOpcode(RK);
509
510 if (PhiR->isOrdered()) {
511 ReducedPartRdx = RdxParts[State.UF - 1];
512 } else {
513 // Floating-point operations should have some FMF to enable the reduction.
515 Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
516 for (unsigned Part = 1; Part < State.UF; ++Part) {
517 Value *RdxPart = RdxParts[Part];
518 if (Op != Instruction::ICmp && Op != Instruction::FCmp)
519 ReducedPartRdx = Builder.CreateBinOp(
520 (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
522 TrackingVH<Value> ReductionStartValue =
523 RdxDesc.getRecurrenceStartValue();
524 ReducedPartRdx = createAnyOfOp(Builder, ReductionStartValue, RK,
525 ReducedPartRdx, RdxPart);
526 } else
527 ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
528 }
529 }
530
531 // Create the reduction after the loop. Note that inloop reductions create
532 // the target reduction in the loop using a Reduction recipe.
533 if (State.VF.isVector() && !PhiR->isInLoop()) {
534 ReducedPartRdx =
535 createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
536 // If the reduction can be performed in a smaller type, we need to extend
537 // the reduction to the wider type before we branch to the original loop.
538 if (PhiTy != RdxDesc.getRecurrenceType())
539 ReducedPartRdx = RdxDesc.isSigned()
540 ? Builder.CreateSExt(ReducedPartRdx, PhiTy)
541 : Builder.CreateZExt(ReducedPartRdx, PhiTy);
542 }
543
544 // If there were stores of the reduction value to a uniform memory address
545 // inside the loop, create the final store here.
546 if (StoreInst *SI = RdxDesc.IntermediateStore) {
547 auto *NewSI = Builder.CreateAlignedStore(
548 ReducedPartRdx, SI->getPointerOperand(), SI->getAlign());
549 propagateMetadata(NewSI, SI);
550 }
551
552 return ReducedPartRdx;
553 }
556 "can only generate first lane for PtrAdd");
557 Value *Ptr = State.get(getOperand(0), Part, /* IsScalar */ true);
558 Value *Addend = State.get(getOperand(1), Part, /* IsScalar */ true);
559 return Builder.CreatePtrAdd(Ptr, Addend, Name);
560 }
561 default:
562 llvm_unreachable("Unsupported opcode for instruction");
563 }
564}
565
566#if !defined(NDEBUG)
567bool VPInstruction::isFPMathOp() const {
568 // Inspired by FPMathOperator::classof. Notable differences are that we don't
569 // support Call, PHI and Select opcodes here yet.
570 return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
571 Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
572 Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
573 Opcode == Instruction::FCmp || Opcode == Instruction::Select;
574}
575#endif
576
578 assert(!State.Instance && "VPInstruction executing an Instance");
580 assert((hasFastMathFlags() == isFPMathOp() ||
581 getOpcode() == Instruction::Select) &&
582 "Recipe not a FPMathOp but has fast-math flags?");
583 if (hasFastMathFlags())
586 bool GeneratesPerFirstLaneOnly =
587 canGenerateScalarForFirstLane() &&
590 bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
591 for (unsigned Part = 0; Part < State.UF; ++Part) {
592 if (GeneratesPerAllLanes) {
593 for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
594 Lane != NumLanes; ++Lane) {
595 Value *GeneratedValue = generatePerLane(State, VPIteration(Part, Lane));
596 assert(GeneratedValue && "generatePerLane must produce a value");
597 State.set(this, GeneratedValue, VPIteration(Part, Lane));
598 }
599 continue;
600 }
601
602 Value *GeneratedValue = generatePerPart(State, Part);
603 if (!hasResult())
604 continue;
605 assert(GeneratedValue && "generatePerPart must produce a value");
606 assert((GeneratedValue->getType()->isVectorTy() ==
607 !GeneratesPerFirstLaneOnly ||
608 State.VF.isScalar()) &&
609 "scalar value but not only first lane defined");
610 State.set(this, GeneratedValue, Part,
611 /*IsScalar*/ GeneratesPerFirstLaneOnly);
612 }
613}
614
616 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
618 return vputils::onlyFirstLaneUsed(this);
619
620 switch (getOpcode()) {
621 default:
622 return false;
623 case Instruction::ICmp:
625 // TODO: Cover additional opcodes.
626 return vputils::onlyFirstLaneUsed(this);
632 return true;
633 };
634 llvm_unreachable("switch should return");
635}
636
637#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
639 VPSlotTracker SlotTracker(getParent()->getPlan());
640 print(dbgs(), "", SlotTracker);
641}
642
644 VPSlotTracker &SlotTracker) const {
645 O << Indent << "EMIT ";
646
647 if (hasResult()) {
649 O << " = ";
650 }
651
652 switch (getOpcode()) {
654 O << "not";
655 break;
657 O << "combined load";
658 break;
660 O << "combined store";
661 break;
663 O << "active lane mask";
664 break;
666 O << "EXPLICIT-VECTOR-LENGTH";
667 break;
669 O << "first-order splice";
670 break;
672 O << "branch-on-cond";
673 break;
675 O << "TC > VF ? TC - VF : 0";
676 break;
678 O << "VF * Part +";
679 break;
681 O << "branch-on-count";
682 break;
684 O << "compute-reduction-result";
685 break;
687 O << "ptradd";
688 break;
689 default:
691 }
692
693 printFlags(O);
695
696 if (auto DL = getDebugLoc()) {
697 O << ", !dbg ";
698 DL.print(O);
699 }
700}
701#endif
702
704 assert(State.VF.isVector() && "not widening");
705 auto &CI = *cast<CallInst>(getUnderlyingInstr());
706 assert(!isa<DbgInfoIntrinsic>(CI) &&
707 "DbgInfoIntrinsic should have been dropped during VPlan construction");
709
710 bool UseIntrinsic = VectorIntrinsicID != Intrinsic::not_intrinsic;
711 FunctionType *VFTy = nullptr;
712 if (Variant)
713 VFTy = Variant->getFunctionType();
714 for (unsigned Part = 0; Part < State.UF; ++Part) {
715 SmallVector<Type *, 2> TysForDecl;
716 // Add return type if intrinsic is overloaded on it.
717 if (UseIntrinsic &&
718 isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1))
719 TysForDecl.push_back(
720 VectorType::get(CI.getType()->getScalarType(), State.VF));
722 for (const auto &I : enumerate(operands())) {
723 // Some intrinsics have a scalar argument - don't replace it with a
724 // vector.
725 Value *Arg;
726 if (UseIntrinsic &&
727 isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))
728 Arg = State.get(I.value(), VPIteration(0, 0));
729 // Some vectorized function variants may also take a scalar argument,
730 // e.g. linear parameters for pointers. This needs to be the scalar value
731 // from the start of the respective part when interleaving.
732 else if (VFTy && !VFTy->getParamType(I.index())->isVectorTy())
733 Arg = State.get(I.value(), VPIteration(Part, 0));
734 else
735 Arg = State.get(I.value(), Part);
736 if (UseIntrinsic &&
737 isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index()))
738 TysForDecl.push_back(Arg->getType());
739 Args.push_back(Arg);
740 }
741
742 Function *VectorF;
743 if (UseIntrinsic) {
744 // Use vector version of the intrinsic.
745 Module *M = State.Builder.GetInsertBlock()->getModule();
746 VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl);
747 assert(VectorF && "Can't retrieve vector intrinsic.");
748 } else {
749#ifndef NDEBUG
750 assert(Variant != nullptr && "Can't create vector function.");
751#endif
752 VectorF = Variant;
753 }
754
756 CI.getOperandBundlesAsDefs(OpBundles);
757 CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
758
759 if (isa<FPMathOperator>(V))
760 V->copyFastMathFlags(&CI);
761
762 if (!V->getType()->isVoidTy())
763 State.set(this, V, Part);
764 State.addMetadata(V, &CI);
765 }
766}
767
768#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
770 VPSlotTracker &SlotTracker) const {
771 O << Indent << "WIDEN-CALL ";
772
773 auto *CI = cast<CallInst>(getUnderlyingInstr());
774 if (CI->getType()->isVoidTy())
775 O << "void ";
776 else {
778 O << " = ";
779 }
780
781 O << "call @" << CI->getCalledFunction()->getName() << "(";
783 O << ")";
784
785 if (VectorIntrinsicID)
786 O << " (using vector intrinsic)";
787 else {
788 O << " (using library function";
789 if (Variant->hasName())
790 O << ": " << Variant->getName();
791 O << ")";
792 }
793}
794
796 VPSlotTracker &SlotTracker) const {
797 O << Indent << "WIDEN-SELECT ";
799 O << " = select ";
801 O << ", ";
803 O << ", ";
805 O << (isInvariantCond() ? " (condition is loop invariant)" : "");
806}
807#endif
808
811
812 // The condition can be loop invariant but still defined inside the
813 // loop. This means that we can't just use the original 'cond' value.
814 // We have to take the 'vectorized' value and pick the first lane.
815 // Instcombine will make this a no-op.
816 auto *InvarCond =
817 isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr;
818
819 for (unsigned Part = 0; Part < State.UF; ++Part) {
820 Value *Cond = InvarCond ? InvarCond : State.get(getCond(), Part);
821 Value *Op0 = State.get(getOperand(1), Part);
822 Value *Op1 = State.get(getOperand(2), Part);
823 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
824 State.set(this, Sel, Part);
825 State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
826 }
827}
828
829VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
830 const FastMathFlags &FMF) {
831 AllowReassoc = FMF.allowReassoc();
832 NoNaNs = FMF.noNaNs();
833 NoInfs = FMF.noInfs();
834 NoSignedZeros = FMF.noSignedZeros();
835 AllowReciprocal = FMF.allowReciprocal();
836 AllowContract = FMF.allowContract();
837 ApproxFunc = FMF.approxFunc();
838}
839
840#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
842 switch (OpType) {
843 case OperationType::Cmp:
845 break;
846 case OperationType::DisjointOp:
848 O << " disjoint";
849 break;
850 case OperationType::PossiblyExactOp:
851 if (ExactFlags.IsExact)
852 O << " exact";
853 break;
854 case OperationType::OverflowingBinOp:
855 if (WrapFlags.HasNUW)
856 O << " nuw";
857 if (WrapFlags.HasNSW)
858 O << " nsw";
859 break;
860 case OperationType::FPMathOp:
862 break;
863 case OperationType::GEPOp:
865 O << " inbounds";
866 break;
867 case OperationType::NonNegOp:
868 if (NonNegFlags.NonNeg)
869 O << " nneg";
870 break;
871 case OperationType::Other:
872 break;
873 }
874 if (getNumOperands() > 0)
875 O << " ";
876}
877#endif
878
881 auto &Builder = State.Builder;
882 switch (Opcode) {
883 case Instruction::Call:
884 case Instruction::Br:
885 case Instruction::PHI:
886 case Instruction::GetElementPtr:
887 case Instruction::Select:
888 llvm_unreachable("This instruction is handled by a different recipe.");
889 case Instruction::UDiv:
890 case Instruction::SDiv:
891 case Instruction::SRem:
892 case Instruction::URem:
893 case Instruction::Add:
894 case Instruction::FAdd:
895 case Instruction::Sub:
896 case Instruction::FSub:
897 case Instruction::FNeg:
898 case Instruction::Mul:
899 case Instruction::FMul:
900 case Instruction::FDiv:
901 case Instruction::FRem:
902 case Instruction::Shl:
903 case Instruction::LShr:
904 case Instruction::AShr:
905 case Instruction::And:
906 case Instruction::Or:
907 case Instruction::Xor: {
908 // Just widen unops and binops.
909 for (unsigned Part = 0; Part < State.UF; ++Part) {
911 for (VPValue *VPOp : operands())
912 Ops.push_back(State.get(VPOp, Part));
913
914 Value *V = Builder.CreateNAryOp(Opcode, Ops);
915
916 if (auto *VecOp = dyn_cast<Instruction>(V))
917 setFlags(VecOp);
918
919 // Use this vector value for all users of the original instruction.
920 State.set(this, V, Part);
921 State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
922 }
923
924 break;
925 }
926 case Instruction::Freeze: {
927 for (unsigned Part = 0; Part < State.UF; ++Part) {
928 Value *Op = State.get(getOperand(0), Part);
929
930 Value *Freeze = Builder.CreateFreeze(Op);
931 State.set(this, Freeze, Part);
932 }
933 break;
934 }
935 case Instruction::ICmp:
936 case Instruction::FCmp: {
937 // Widen compares. Generate vector compares.
938 bool FCmp = Opcode == Instruction::FCmp;
939 for (unsigned Part = 0; Part < State.UF; ++Part) {
940 Value *A = State.get(getOperand(0), Part);
941 Value *B = State.get(getOperand(1), Part);
942 Value *C = nullptr;
943 if (FCmp) {
944 // Propagate fast math flags.
945 IRBuilder<>::FastMathFlagGuard FMFG(Builder);
946 if (auto *I = dyn_cast_or_null<Instruction>(getUnderlyingValue()))
947 Builder.setFastMathFlags(I->getFastMathFlags());
948 C = Builder.CreateFCmp(getPredicate(), A, B);
949 } else {
950 C = Builder.CreateICmp(getPredicate(), A, B);
951 }
952 State.set(this, C, Part);
953 State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
954 }
955
956 break;
957 }
958 default:
959 // This instruction is not vectorized by simple widening.
960 LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "
961 << Instruction::getOpcodeName(Opcode));
962 llvm_unreachable("Unhandled instruction!");
963 } // end of switch.
964
965#if !defined(NDEBUG)
966 // Verify that VPlan type inference results agree with the type of the
967 // generated values.
968 for (unsigned Part = 0; Part < State.UF; ++Part) {
970 State.VF) == State.get(this, Part)->getType() &&
971 "inferred type and type from generated instructions do not match");
972 }
973#endif
974}
975
976#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
978 VPSlotTracker &SlotTracker) const {
979 O << Indent << "WIDEN ";
981 O << " = " << Instruction::getOpcodeName(Opcode);
982 printFlags(O);
984}
985#endif
986
989 auto &Builder = State.Builder;
990 /// Vectorize casts.
991 assert(State.VF.isVector() && "Not vectorizing?");
992 Type *DestTy = VectorType::get(getResultType(), State.VF);
993 VPValue *Op = getOperand(0);
994 for (unsigned Part = 0; Part < State.UF; ++Part) {
995 if (Part > 0 && Op->isLiveIn()) {
996 // FIXME: Remove once explicit unrolling is implemented using VPlan.
997 State.set(this, State.get(this, 0), Part);
998 continue;
999 }
1000 Value *A = State.get(Op, Part);
1001 Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
1002 State.set(this, Cast, Part);
1003 State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
1004 }
1005}
1006
1007#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1009 VPSlotTracker &SlotTracker) const {
1010 O << Indent << "WIDEN-CAST ";
1012 O << " = " << Instruction::getOpcodeName(Opcode) << " ";
1013 printFlags(O);
1015 O << " to " << *getResultType();
1016}
1017#endif
1018
1019/// This function adds
1020/// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...)
1021/// to each vector element of Val. The sequence starts at StartIndex.
1022/// \p Opcode is relevant for FP induction variable.
1023static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,
1025 IRBuilderBase &Builder) {
1026 assert(VF.isVector() && "only vector VFs are supported");
1027
1028 // Create and check the types.
1029 auto *ValVTy = cast<VectorType>(Val->getType());
1030 ElementCount VLen = ValVTy->getElementCount();
1031
1032 Type *STy = Val->getType()->getScalarType();
1033 assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
1034 "Induction Step must be an integer or FP");
1035 assert(Step->getType() == STy && "Step has wrong type");
1036
1038
1039 // Create a vector of consecutive numbers from zero to VF.
1040 VectorType *InitVecValVTy = ValVTy;
1041 if (STy->isFloatingPointTy()) {
1042 Type *InitVecValSTy =
1044 InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
1045 }
1046 Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
1047
1048 // Splat the StartIdx
1049 Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx);
1050
1051 if (STy->isIntegerTy()) {
1052 InitVec = Builder.CreateAdd(InitVec, StartIdxSplat);
1053 Step = Builder.CreateVectorSplat(VLen, Step);
1054 assert(Step->getType() == Val->getType() && "Invalid step vec");
1055 // FIXME: The newly created binary instructions should contain nsw/nuw
1056 // flags, which can be found from the original scalar operations.
1057 Step = Builder.CreateMul(InitVec, Step);
1058 return Builder.CreateAdd(Val, Step, "induction");
1059 }
1060
1061 // Floating point induction.
1062 assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
1063 "Binary Opcode should be specified for FP induction");
1064 InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
1065 InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat);
1066
1067 Step = Builder.CreateVectorSplat(VLen, Step);
1068 Value *MulOp = Builder.CreateFMul(InitVec, Step);
1069 return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
1070}
1071
1072/// A helper function that returns an integer or floating-point constant with
1073/// value C.
1075 return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
1076 : ConstantFP::get(Ty, C);
1077}
1078
1080 ElementCount VF) {
1081 assert(FTy->isFloatingPointTy() && "Expected floating point type!");
1082 Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits());
1083 Value *RuntimeVF = getRuntimeVF(B, IntTy, VF);
1084 return B.CreateUIToFP(RuntimeVF, FTy);
1085}
1086
1088 assert(!State.Instance && "Int or FP induction being replicated.");
1089
1090 Value *Start = getStartValue()->getLiveInIRValue();
1092 TruncInst *Trunc = getTruncInst();
1093 IRBuilderBase &Builder = State.Builder;
1094 assert(IV->getType() == ID.getStartValue()->getType() && "Types must match");
1095 assert(State.VF.isVector() && "must have vector VF");
1096
1097 // The value from the original loop to which we are mapping the new induction
1098 // variable.
1099 Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV;
1100
1101 // Fast-math-flags propagate from the original induction instruction.
1102 IRBuilder<>::FastMathFlagGuard FMFG(Builder);
1103 if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
1104 Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
1105
1106 // Now do the actual transformations, and start with fetching the step value.
1107 Value *Step = State.get(getStepValue(), VPIteration(0, 0));
1108
1109 assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
1110 "Expected either an induction phi-node or a truncate of it!");
1111
1112 // Construct the initial value of the vector IV in the vector loop preheader
1113 auto CurrIP = Builder.saveIP();
1114 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1115 Builder.SetInsertPoint(VectorPH->getTerminator());
1116 if (isa<TruncInst>(EntryVal)) {
1117 assert(Start->getType()->isIntegerTy() &&
1118 "Truncation requires an integer type");
1119 auto *TruncType = cast<IntegerType>(EntryVal->getType());
1120 Step = Builder.CreateTrunc(Step, TruncType);
1121 Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
1122 }
1123
1124 Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0);
1125 Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
1126 Value *SteppedStart = getStepVector(
1127 SplatStart, Zero, Step, ID.getInductionOpcode(), State.VF, State.Builder);
1128
1129 // We create vector phi nodes for both integer and floating-point induction
1130 // variables. Here, we determine the kind of arithmetic we will perform.
1133 if (Step->getType()->isIntegerTy()) {
1134 AddOp = Instruction::Add;
1135 MulOp = Instruction::Mul;
1136 } else {
1137 AddOp = ID.getInductionOpcode();
1138 MulOp = Instruction::FMul;
1139 }
1140
1141 // Multiply the vectorization factor by the step using integer or
1142 // floating-point arithmetic as appropriate.
1143 Type *StepType = Step->getType();
1144 Value *RuntimeVF;
1145 if (Step->getType()->isFloatingPointTy())
1146 RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF);
1147 else
1148 RuntimeVF = getRuntimeVF(Builder, StepType, State.VF);
1149 Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
1150
1151 // Create a vector splat to use in the induction update.
1152 //
1153 // FIXME: If the step is non-constant, we create the vector splat with
1154 // IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't
1155 // handle a constant vector splat.
1156 Value *SplatVF = isa<Constant>(Mul)
1157 ? ConstantVector::getSplat(State.VF, cast<Constant>(Mul))
1158 : Builder.CreateVectorSplat(State.VF, Mul);
1159 Builder.restoreIP(CurrIP);
1160
1161 // We may need to add the step a number of times, depending on the unroll
1162 // factor. The last of those goes into the PHI.
1163 PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");
1164 VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1165 VecInd->setDebugLoc(EntryVal->getDebugLoc());
1166 Instruction *LastInduction = VecInd;
1167 for (unsigned Part = 0; Part < State.UF; ++Part) {
1168 State.set(this, LastInduction, Part);
1169
1170 if (isa<TruncInst>(EntryVal))
1171 State.addMetadata(LastInduction, EntryVal);
1172
1173 LastInduction = cast<Instruction>(
1174 Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"));
1175 LastInduction->setDebugLoc(EntryVal->getDebugLoc());
1176 }
1177
1178 LastInduction->setName("vec.ind.next");
1179 VecInd->addIncoming(SteppedStart, VectorPH);
1180 // Add induction update using an incorrect block temporarily. The phi node
1181 // will be fixed after VPlan execution. Note that at this point the latch
1182 // block cannot be used, as it does not exist yet.
1183 // TODO: Model increment value in VPlan, by turning the recipe into a
1184 // multi-def and a subclass of VPHeaderPHIRecipe.
1185 VecInd->addIncoming(LastInduction, VectorPH);
1186}
1187
1188#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1190 VPSlotTracker &SlotTracker) const {
1191 O << Indent << "WIDEN-INDUCTION";
1192 if (getTruncInst()) {
1193 O << "\\l\"";
1194 O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\"";
1195 O << " +\n" << Indent << "\" ";
1197 } else
1198 O << " " << VPlanIngredient(IV);
1199
1200 O << ", ";
1202}
1203#endif
1204
1206 // The step may be defined by a recipe in the preheader (e.g. if it requires
1207 // SCEV expansion), but for the canonical induction the step is required to be
1208 // 1, which is represented as live-in.
1210 return false;
1211 auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
1212 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
1213 return StartC && StartC->isZero() && StepC && StepC->isOne();
1214}
1215
1216#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1218 VPSlotTracker &SlotTracker) const {
1219 O << Indent;
1221 O << Indent << "= DERIVED-IV ";
1223 O << " + ";
1225 O << " * ";
1227}
1228#endif
1229
1231 // Fast-math-flags propagate from the original induction instruction.
1233 if (hasFastMathFlags())
1235
1236 /// Compute scalar induction steps. \p ScalarIV is the scalar induction
1237 /// variable on which to base the steps, \p Step is the size of the step.
1238
1239 Value *BaseIV = State.get(getOperand(0), VPIteration(0, 0));
1240 Value *Step = State.get(getStepValue(), VPIteration(0, 0));
1241 IRBuilderBase &Builder = State.Builder;
1242
1243 // Ensure step has the same type as that of scalar IV.
1244 Type *BaseIVTy = BaseIV->getType()->getScalarType();
1245 assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");
1246
1247 // We build scalar steps for both integer and floating-point induction
1248 // variables. Here, we determine the kind of arithmetic we will perform.
1251 if (BaseIVTy->isIntegerTy()) {
1252 AddOp = Instruction::Add;
1253 MulOp = Instruction::Mul;
1254 } else {
1255 AddOp = InductionOpcode;
1256 MulOp = Instruction::FMul;
1257 }
1258
1259 // Determine the number of scalars we need to generate for each unroll
1260 // iteration.
1261 bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
1262 // Compute the scalar steps and save the results in State.
1263 Type *IntStepTy =
1264 IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
1265 Type *VecIVTy = nullptr;
1266 Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
1267 if (!FirstLaneOnly && State.VF.isScalable()) {
1268 VecIVTy = VectorType::get(BaseIVTy, State.VF);
1269 UnitStepVec =
1270 Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
1271 SplatStep = Builder.CreateVectorSplat(State.VF, Step);
1272 SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
1273 }
1274
1275 unsigned StartPart = 0;
1276 unsigned EndPart = State.UF;
1277 unsigned StartLane = 0;
1278 unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
1279 if (State.Instance) {
1280 StartPart = State.Instance->Part;
1281 EndPart = StartPart + 1;
1282 StartLane = State.Instance->Lane.getKnownLane();
1283 EndLane = StartLane + 1;
1284 }
1285 for (unsigned Part = StartPart; Part < EndPart; ++Part) {
1286 Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part);
1287
1288 if (!FirstLaneOnly && State.VF.isScalable()) {
1289 auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
1290 auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
1291 if (BaseIVTy->isFloatingPointTy())
1292 InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
1293 auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
1294 auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
1295 State.set(this, Add, Part);
1296 // It's useful to record the lane values too for the known minimum number
1297 // of elements so we do those below. This improves the code quality when
1298 // trying to extract the first element, for example.
1299 }
1300
1301 if (BaseIVTy->isFloatingPointTy())
1302 StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
1303
1304 for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
1305 Value *StartIdx = Builder.CreateBinOp(
1306 AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
1307 // The step returned by `createStepForVF` is a runtime-evaluated value
1308 // when VF is scalable. Otherwise, it should be folded into a Constant.
1309 assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
1310 "Expected StartIdx to be folded to a constant when VF is not "
1311 "scalable");
1312 auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
1313 auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
1314 State.set(this, Add, VPIteration(Part, Lane));
1315 }
1316 }
1317}
1318
1319#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1321 VPSlotTracker &SlotTracker) const {
1322 O << Indent;
1324 O << " = SCALAR-STEPS ";
1326}
1327#endif
1328
1330 assert(State.VF.isVector() && "not widening");
1331 auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
1332 // Construct a vector GEP by widening the operands of the scalar GEP as
1333 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
1334 // results in a vector of pointers when at least one operand of the GEP
1335 // is vector-typed. Thus, to keep the representation compact, we only use
1336 // vector-typed operands for loop-varying values.
1337
1338 if (areAllOperandsInvariant()) {
1339 // If we are vectorizing, but the GEP has only loop-invariant operands,
1340 // the GEP we build (by only using vector-typed operands for
1341 // loop-varying values) would be a scalar pointer. Thus, to ensure we
1342 // produce a vector of pointers, we need to either arbitrarily pick an
1343 // operand to broadcast, or broadcast a clone of the original GEP.
1344 // Here, we broadcast a clone of the original.
1345 //
1346 // TODO: If at some point we decide to scalarize instructions having
1347 // loop-invariant operands, this special case will no longer be
1348 // required. We would add the scalarization decision to
1349 // collectLoopScalars() and teach getVectorValue() to broadcast
1350 // the lane-zero scalar value.
1352 for (unsigned I = 0, E = getNumOperands(); I != E; I++)
1353 Ops.push_back(State.get(getOperand(I), VPIteration(0, 0)));
1354
1355 auto *NewGEP =
1356 State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
1357 ArrayRef(Ops).drop_front(), "", isInBounds());
1358 for (unsigned Part = 0; Part < State.UF; ++Part) {
1359 Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, NewGEP);
1360 State.set(this, EntryPart, Part);
1361 State.addMetadata(EntryPart, GEP);
1362 }
1363 } else {
1364 // If the GEP has at least one loop-varying operand, we are sure to
1365 // produce a vector of pointers. But if we are only unrolling, we want
1366 // to produce a scalar GEP for each unroll part. Thus, the GEP we
1367 // produce with the code below will be scalar (if VF == 1) or vector
1368 // (otherwise). Note that for the unroll-only case, we still maintain
1369 // values in the vector mapping with initVector, as we do for other
1370 // instructions.
1371 for (unsigned Part = 0; Part < State.UF; ++Part) {
1372 // The pointer operand of the new GEP. If it's loop-invariant, we
1373 // won't broadcast it.
1374 auto *Ptr = isPointerLoopInvariant()
1375 ? State.get(getOperand(0), VPIteration(0, 0))
1376 : State.get(getOperand(0), Part);
1377
1378 // Collect all the indices for the new GEP. If any index is
1379 // loop-invariant, we won't broadcast it.
1381 for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
1382 VPValue *Operand = getOperand(I);
1383 if (isIndexLoopInvariant(I - 1))
1384 Indices.push_back(State.get(Operand, VPIteration(0, 0)));
1385 else
1386 Indices.push_back(State.get(Operand, Part));
1387 }
1388
1389 // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
1390 // but it should be a vector, otherwise.
1391 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
1392 Indices, "", isInBounds());
1393 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
1394 "NewGEP is not a pointer vector");
1395 State.set(this, NewGEP, Part);
1396 State.addMetadata(NewGEP, GEP);
1397 }
1398 }
1399}
1400
1401#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1403 VPSlotTracker &SlotTracker) const {
1404 O << Indent << "WIDEN-GEP ";
1405 O << (isPointerLoopInvariant() ? "Inv" : "Var");
1406 for (size_t I = 0; I < getNumOperands() - 1; ++I)
1407 O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";
1408
1409 O << " ";
1411 O << " = getelementptr";
1412 printFlags(O);
1414}
1415#endif
1416
1417void VPVectorPointerRecipe ::execute(VPTransformState &State) {
1418 auto &Builder = State.Builder;
1420 for (unsigned Part = 0; Part < State.UF; ++Part) {
1421 // Calculate the pointer for the specific unroll-part.
1422 Value *PartPtr = nullptr;
1423 // Use i32 for the gep index type when the value is constant,
1424 // or query DataLayout for a more suitable index type otherwise.
1425 const DataLayout &DL =
1426 Builder.GetInsertBlock()->getModule()->getDataLayout();
1427 Type *IndexTy = State.VF.isScalable() && (IsReverse || Part > 0)
1428 ? DL.getIndexType(IndexedTy->getPointerTo())
1429 : Builder.getInt32Ty();
1430 Value *Ptr = State.get(getOperand(0), VPIteration(0, 0));
1431 bool InBounds = isInBounds();
1432 if (IsReverse) {
1433 // If the address is consecutive but reversed, then the
1434 // wide store needs to start at the last vector element.
1435 // RunTimeVF = VScale * VF.getKnownMinValue()
1436 // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
1437 Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
1438 // NumElt = -Part * RunTimeVF
1439 Value *NumElt = Builder.CreateMul(
1440 ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF);
1441 // LastLane = 1 - RunTimeVF
1442 Value *LastLane =
1443 Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
1444 PartPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
1445 PartPtr = Builder.CreateGEP(IndexedTy, PartPtr, LastLane, "", InBounds);
1446 } else {
1447 Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part);
1448 PartPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
1449 }
1450
1451 State.set(this, PartPtr, Part, /*IsScalar*/ true);
1452 }
1453}
1454
1455#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1457 VPSlotTracker &SlotTracker) const {
1458 O << Indent;
1460 O << " = vector-pointer ";
1461 if (IsReverse)
1462 O << "(reverse) ";
1463
1465}
1466#endif
1467
1470 // We know that all PHIs in non-header blocks are converted into
1471 // selects, so we don't have to worry about the insertion order and we
1472 // can just use the builder.
1473 // At this point we generate the predication tree. There may be
1474 // duplications since this is a simple recursive scan, but future
1475 // optimizations will clean it up.
1476
1477 unsigned NumIncoming = getNumIncomingValues();
1478
1479 // Generate a sequence of selects of the form:
1480 // SELECT(Mask3, In3,
1481 // SELECT(Mask2, In2,
1482 // SELECT(Mask1, In1,
1483 // In0)))
1484 // Note that Mask0 is never used: lanes for which no path reaches this phi and
1485 // are essentially undef are taken from In0.
1486 VectorParts Entry(State.UF);
1487 for (unsigned In = 0; In < NumIncoming; ++In) {
1488 for (unsigned Part = 0; Part < State.UF; ++Part) {
1489 // We might have single edge PHIs (blocks) - use an identity
1490 // 'select' for the first PHI operand.
1491 Value *In0 = State.get(getIncomingValue(In), Part);
1492 if (In == 0)
1493 Entry[Part] = In0; // Initialize with the first incoming value.
1494 else {
1495 // Select between the current value and the previous incoming edge
1496 // based on the incoming mask.
1497 Value *Cond = State.get(getMask(In), Part);
1498 Entry[Part] =
1499 State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
1500 }
1501 }
1502 }
1503 for (unsigned Part = 0; Part < State.UF; ++Part)
1504 State.set(this, Entry[Part], Part);
1505}
1506
1507#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1509 VPSlotTracker &SlotTracker) const {
1510 O << Indent << "BLEND ";
1512 O << " =";
1513 if (getNumIncomingValues() == 1) {
1514 // Not a User of any mask: not really blending, this is a
1515 // single-predecessor phi.
1516 O << " ";
1518 } else {
1519 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
1520 O << " ";
1522 if (I == 0)
1523 continue;
1524 O << "/";
1526 }
1527 }
1528}
1529#endif
1530
1532 assert(!State.Instance && "Reduction being replicated.");
1533 Value *PrevInChain = State.get(getChainOp(), 0, /*IsScalar*/ true);
1534 RecurKind Kind = RdxDesc.getRecurrenceKind();
1535 // Propagate the fast-math flags carried by the underlying instruction.
1537 State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
1538 for (unsigned Part = 0; Part < State.UF; ++Part) {
1539 Value *NewVecOp = State.get(getVecOp(), Part);
1540 if (VPValue *Cond = getCondOp()) {
1541 Value *NewCond = State.get(Cond, Part, State.VF.isScalar());
1542 VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
1543 Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
1544 Value *Iden = RdxDesc.getRecurrenceIdentity(Kind, ElementTy,
1545 RdxDesc.getFastMathFlags());
1546 if (State.VF.isVector()) {
1547 Iden = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Iden);
1548 }
1549
1550 Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Iden);
1551 NewVecOp = Select;
1552 }
1553 Value *NewRed;
1554 Value *NextInChain;
1555 if (IsOrdered) {
1556 if (State.VF.isVector())
1557 NewRed = createOrderedReduction(State.Builder, RdxDesc, NewVecOp,
1558 PrevInChain);
1559 else
1560 NewRed = State.Builder.CreateBinOp(
1561 (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), PrevInChain,
1562 NewVecOp);
1563 PrevInChain = NewRed;
1564 } else {
1565 PrevInChain = State.get(getChainOp(), Part, /*IsScalar*/ true);
1566 NewRed = createTargetReduction(State.Builder, RdxDesc, NewVecOp);
1567 }
1569 NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),
1570 NewRed, PrevInChain);
1571 } else if (IsOrdered)
1572 NextInChain = NewRed;
1573 else
1574 NextInChain = State.Builder.CreateBinOp(
1575 (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, PrevInChain);
1576 State.set(this, NextInChain, Part, /*IsScalar*/ true);
1577 }
1578}
1579
1580#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1582 VPSlotTracker &SlotTracker) const {
1583 O << Indent << "REDUCE ";
1585 O << " = ";
1587 O << " +";
1588 if (isa<FPMathOperator>(getUnderlyingInstr()))
1590 O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
1592 if (getCondOp()) {
1593 O << ", ";
1595 }
1596 O << ")";
1597 if (RdxDesc.IntermediateStore)
1598 O << " (with final reduction value stored in invariant address sank "
1599 "outside of loop)";
1600}
1601#endif
1602
1604 // Find if the recipe is used by a widened recipe via an intervening
1605 // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
1606 return any_of(users(), [](const VPUser *U) {
1607 if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))
1608 return any_of(PredR->users(), [PredR](const VPUser *U) {
1609 return !U->usesScalars(PredR);
1610 });
1611 return false;
1612 });
1613}
1614
1615#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1617 VPSlotTracker &SlotTracker) const {
1618 O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
1619
1620 if (!getUnderlyingInstr()->getType()->isVoidTy()) {
1622 O << " = ";
1623 }
1624 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
1625 O << "call";
1626 printFlags(O);
1627 O << "@" << CB->getCalledFunction()->getName() << "(";
1629 O, [&O, &SlotTracker](VPValue *Op) {
1630 Op->printAsOperand(O, SlotTracker);
1631 });
1632 O << ")";
1633 } else {
1635 printFlags(O);
1637 }
1638
1639 if (shouldPack())
1640 O << " (S->V)";
1641}
1642#endif
1643
1644/// Checks if \p C is uniform across all VFs and UFs. It is considered as such
1645/// if it is either defined outside the vector region or its operand is known to
1646/// be uniform across all VFs and UFs (e.g. VPDerivedIV or VPCanonicalIVPHI).
1647/// TODO: Uniformity should be associated with a VPValue and there should be a
1648/// generic way to check.
1650 return C->isDefinedOutsideVectorRegions() ||
1651 isa<VPDerivedIVRecipe>(C->getOperand(0)) ||
1652 isa<VPCanonicalIVPHIRecipe>(C->getOperand(0));
1653}
1654
1655Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) {
1657 "Codegen only implemented for first lane.");
1658 switch (Opcode) {
1659 case Instruction::SExt:
1660 case Instruction::ZExt:
1661 case Instruction::Trunc: {
1662 // Note: SExt/ZExt not used yet.
1663 Value *Op = State.get(getOperand(0), VPIteration(Part, 0));
1664 return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
1665 }
1666 default:
1667 llvm_unreachable("opcode not implemented yet");
1668 }
1669}
1670
1671void VPScalarCastRecipe ::execute(VPTransformState &State) {
1672 bool IsUniformAcrossVFsAndUFs = isUniformAcrossVFsAndUFs(this);
1673 for (unsigned Part = 0; Part != State.UF; ++Part) {
1674 Value *Res;
1675 // Only generate a single instance, if the recipe is uniform across UFs and
1676 // VFs.
1677 if (Part > 0 && IsUniformAcrossVFsAndUFs)
1678 Res = State.get(this, VPIteration(0, 0));
1679 else
1680 Res = generate(State, Part);
1681 State.set(this, Res, VPIteration(Part, 0));
1682 }
1683}
1684
1685#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1686void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
1687 VPSlotTracker &SlotTracker) const {
1688 O << Indent << "SCALAR-CAST ";
1689 printAsOperand(O, SlotTracker);
1690 O << " = " << Instruction::getOpcodeName(Opcode) << " ";
1691 printOperands(O, SlotTracker);
1692 O << " to " << *ResultTy;
1693}
1694#endif
1695
1697 assert(State.Instance && "Branch on Mask works only on single instance.");
1698
1699 unsigned Part = State.Instance->Part;
1700 unsigned Lane = State.Instance->Lane.getKnownLane();
1701
1702 Value *ConditionBit = nullptr;
1703 VPValue *BlockInMask = getMask();
1704 if (BlockInMask) {
1705 ConditionBit = State.get(BlockInMask, Part);
1706 if (ConditionBit->getType()->isVectorTy())
1707 ConditionBit = State.Builder.CreateExtractElement(
1708 ConditionBit, State.Builder.getInt32(Lane));
1709 } else // Block in mask is all-one.
1710 ConditionBit = State.Builder.getTrue();
1711
1712 // Replace the temporary unreachable terminator with a new conditional branch,
1713 // whose two destinations will be set later when they are created.
1714 auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
1715 assert(isa<UnreachableInst>(CurrentTerminator) &&
1716 "Expected to replace unreachable terminator with conditional branch.");
1717 auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
1718 CondBr->setSuccessor(0, nullptr);
1719 ReplaceInstWithInst(CurrentTerminator, CondBr);
1720}
1721
1723 assert(State.Instance && "Predicated instruction PHI works per instance.");
1724 Instruction *ScalarPredInst =
1725 cast<Instruction>(State.get(getOperand(0), *State.Instance));
1726 BasicBlock *PredicatedBB = ScalarPredInst->getParent();
1727 BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
1728 assert(PredicatingBB && "Predicated block has no single predecessor.");
1729 assert(isa<VPReplicateRecipe>(getOperand(0)) &&
1730 "operand must be VPReplicateRecipe");
1731
1732 // By current pack/unpack logic we need to generate only a single phi node: if
1733 // a vector value for the predicated instruction exists at this point it means
1734 // the instruction has vector users only, and a phi for the vector value is
1735 // needed. In this case the recipe of the predicated instruction is marked to
1736 // also do that packing, thereby "hoisting" the insert-element sequence.
1737 // Otherwise, a phi node for the scalar value is needed.
1738 unsigned Part = State.Instance->Part;
1739 if (State.hasVectorValue(getOperand(0), Part)) {
1740 Value *VectorValue = State.get(getOperand(0), Part);
1741 InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
1742 PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
1743 VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
1744 VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
1745 if (State.hasVectorValue(this, Part))
1746 State.reset(this, VPhi, Part);
1747 else
1748 State.set(this, VPhi, Part);
1749 // NOTE: Currently we need to update the value of the operand, so the next
1750 // predicated iteration inserts its generated value in the correct vector.
1751 State.reset(getOperand(0), VPhi, Part);
1752 } else {
1753 Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
1754 PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
1755 Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
1756 PredicatingBB);
1757 Phi->addIncoming(ScalarPredInst, PredicatedBB);
1758 if (State.hasScalarValue(this, *State.Instance))
1759 State.reset(this, Phi, *State.Instance);
1760 else
1761 State.set(this, Phi, *State.Instance);
1762 // NOTE: Currently we need to update the value of the operand, so the next
1763 // predicated iteration inserts its generated value in the correct vector.
1764 State.reset(getOperand(0), Phi, *State.Instance);
1765 }
1766}
1767
1768#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1770 VPSlotTracker &SlotTracker) const {
1771 O << Indent << "PHI-PREDICATED-INSTRUCTION ";
1773 O << " = ";
1775}
1776
1778 VPSlotTracker &SlotTracker) const {
1779 O << Indent << "WIDEN ";
1781 O << " = load ";
1783}
1784
1786 VPSlotTracker &SlotTracker) const {
1787 O << Indent << "WIDEN ";
1789 O << " = vp.load ";
1791}
1792
1794 VPSlotTracker &SlotTracker) const {
1795 O << Indent << "WIDEN store ";
1797}
1798
1800 VPSlotTracker &SlotTracker) const {
1801 O << Indent << "WIDEN vp.store ";
1803}
1804#endif
1805
1807 Value *Start = getStartValue()->getLiveInIRValue();
1808 PHINode *EntryPart = PHINode::Create(Start->getType(), 2, "index");
1809 EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1810
1811 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1812 EntryPart->addIncoming(Start, VectorPH);
1813 EntryPart->setDebugLoc(getDebugLoc());
1814 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1815 State.set(this, EntryPart, Part, /*IsScalar*/ true);
1816}
1817
1818#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1820 VPSlotTracker &SlotTracker) const {
1821 O << Indent << "EMIT ";
1823 O << " = CANONICAL-INDUCTION ";
1825}
1826#endif
1827
1830 VPValue *Step) const {
1831 // Must be an integer induction.
1833 return false;
1834 // Start must match the start value of this canonical induction.
1835 if (Start != getStartValue())
1836 return false;
1837
1838 // If the step is defined by a recipe, it is not a ConstantInt.
1839 if (Step->getDefiningRecipe())
1840 return false;
1841
1842 ConstantInt *StepC = dyn_cast<ConstantInt>(Step->getLiveInIRValue());
1843 return StepC && StepC->isOne();
1844}
1845
1847 return IsScalarAfterVectorization &&
1848 (!IsScalable || vputils::onlyFirstLaneUsed(this));
1849}
1850
1851#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1853 VPSlotTracker &SlotTracker) const {
1854 O << Indent << "EMIT ";
1856 O << " = WIDEN-POINTER-INDUCTION ";
1858 O << ", " << *IndDesc.getStep();
1859}
1860#endif
1861
1863 assert(!State.Instance && "cannot be used in per-lane");
1864 const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();
1865 SCEVExpander Exp(SE, DL, "induction");
1866
1867 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
1868 &*State.Builder.GetInsertPoint());
1869 assert(!State.ExpandedSCEVs.contains(Expr) &&
1870 "Same SCEV expanded multiple times");
1871 State.ExpandedSCEVs[Expr] = Res;
1872 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1873 State.set(this, Res, {Part, 0});
1874}
1875
1876#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1878 VPSlotTracker &SlotTracker) const {
1879 O << Indent << "EMIT ";
1881 O << " = EXPAND SCEV " << *Expr;
1882}
1883#endif
1884
1886 Value *CanonicalIV = State.get(getOperand(0), 0, /*IsScalar*/ true);
1887 Type *STy = CanonicalIV->getType();
1888 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
1889 ElementCount VF = State.VF;
1890 Value *VStart = VF.isScalar()
1891 ? CanonicalIV
1892 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
1893 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
1894 Value *VStep = createStepForVF(Builder, STy, VF, Part);
1895 if (VF.isVector()) {
1896 VStep = Builder.CreateVectorSplat(VF, VStep);
1897 VStep =
1898 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
1899 }
1900 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
1901 State.set(this, CanonicalVectorIV, Part);
1902 }
1903}
1904
1905#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1907 VPSlotTracker &SlotTracker) const {
1908 O << Indent << "EMIT ";
1910 O << " = WIDEN-CANONICAL-INDUCTION ";
1912}
1913#endif
1914
1916 auto &Builder = State.Builder;
1917 // Create a vector from the initial value.
1918 auto *VectorInit = getStartValue()->getLiveInIRValue();
1919
1920 Type *VecTy = State.VF.isScalar()
1921 ? VectorInit->getType()
1922 : VectorType::get(VectorInit->getType(), State.VF);
1923
1924 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1925 if (State.VF.isVector()) {
1926 auto *IdxTy = Builder.getInt32Ty();
1927 auto *One = ConstantInt::get(IdxTy, 1);
1928 IRBuilder<>::InsertPointGuard Guard(Builder);
1929 Builder.SetInsertPoint(VectorPH->getTerminator());
1930 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
1931 auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
1932 VectorInit = Builder.CreateInsertElement(
1933 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
1934 }
1935
1936 // Create a phi node for the new recurrence.
1937 PHINode *EntryPart = PHINode::Create(VecTy, 2, "vector.recur");
1938 EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1939 EntryPart->addIncoming(VectorInit, VectorPH);
1940 State.set(this, EntryPart, 0);
1941}
1942
1943#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1945 VPSlotTracker &SlotTracker) const {
1946 O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
1948 O << " = phi ";
1950}
1951#endif
1952
1954 auto &Builder = State.Builder;
1955
1956 // Reductions do not have to start at zero. They can start with
1957 // any loop invariant values.
1958 VPValue *StartVPV = getStartValue();
1959 Value *StartV = StartVPV->getLiveInIRValue();
1960
1961 // In order to support recurrences we need to be able to vectorize Phi nodes.
1962 // Phi nodes have cycles, so we need to vectorize them in two stages. This is
1963 // stage #1: We create a new vector PHI node with no incoming edges. We'll use
1964 // this value when we vectorize all of the instructions that use the PHI.
1965 bool ScalarPHI = State.VF.isScalar() || IsInLoop;
1966 Type *VecTy = ScalarPHI ? StartV->getType()
1967 : VectorType::get(StartV->getType(), State.VF);
1968
1969 BasicBlock *HeaderBB = State.CFG.PrevBB;
1970 assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
1971 "recipe must be in the vector loop header");
1972 unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
1973 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1974 Instruction *EntryPart = PHINode::Create(VecTy, 2, "vec.phi");
1975 EntryPart->insertBefore(HeaderBB->getFirstInsertionPt());
1976 State.set(this, EntryPart, Part, IsInLoop);
1977 }
1978
1979 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1980
1981 Value *Iden = nullptr;
1982 RecurKind RK = RdxDesc.getRecurrenceKind();
1985 // MinMax and AnyOf reductions have the start value as their identity.
1986 if (ScalarPHI) {
1987 Iden = StartV;
1988 } else {
1989 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
1990 Builder.SetInsertPoint(VectorPH->getTerminator());
1991 StartV = Iden =
1992 Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
1993 }
1994 } else {
1995 Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
1996 RdxDesc.getFastMathFlags());
1997
1998 if (!ScalarPHI) {
1999 Iden = Builder.CreateVectorSplat(State.VF, Iden);
2000 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
2001 Builder.SetInsertPoint(VectorPH->getTerminator());
2002 Constant *Zero = Builder.getInt32(0);
2003 StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
2004 }
2005 }
2006
2007 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
2008 Value *EntryPart = State.get(this, Part, IsInLoop);
2009 // Make sure to add the reduction start value only to the
2010 // first unroll part.
2011 Value *StartVal = (Part == 0) ? StartV : Iden;
2012 cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
2013 }
2014}
2015
2016#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2018 VPSlotTracker &SlotTracker) const {
2019 O << Indent << "WIDEN-REDUCTION-PHI ";
2020
2022 O << " = phi ";
2024}
2025#endif
2026
2029 "Non-native vplans are not expected to have VPWidenPHIRecipes.");
2030
2031 Value *Op0 = State.get(getOperand(0), 0);
2032 Type *VecTy = Op0->getType();
2033 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
2034 State.set(this, VecPhi, 0);
2035}
2036
2037#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2039 VPSlotTracker &SlotTracker) const {
2040 O << Indent << "WIDEN-PHI ";
2041
2042 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
2043 // Unless all incoming values are modeled in VPlan print the original PHI
2044 // directly.
2045 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
2046 // values as VPValues.
2047 if (getNumOperands() != OriginalPhi->getNumOperands()) {
2048 O << VPlanIngredient(OriginalPhi);
2049 return;
2050 }
2051
2053 O << " = phi ";
2055}
2056#endif
2057
2058// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
2059// remove VPActiveLaneMaskPHIRecipe.
2061 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
2062 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
2063 Value *StartMask = State.get(getOperand(0), Part);
2064 PHINode *EntryPart =
2065 State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
2066 EntryPart->addIncoming(StartMask, VectorPH);
2067 EntryPart->setDebugLoc(getDebugLoc());
2068 State.set(this, EntryPart, Part);
2069 }
2070}
2071
2072#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2074 VPSlotTracker &SlotTracker) const {
2075 O << Indent << "ACTIVE-LANE-MASK-PHI ";
2076
2078 O << " = phi ";
2080}
2081#endif
2082
2084 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
2085 assert(State.UF == 1 && "Expected unroll factor 1 for VP vectorization.");
2086 Value *Start = State.get(getOperand(0), VPIteration(0, 0));
2087 PHINode *EntryPart =
2088 State.Builder.CreatePHI(Start->getType(), 2, "evl.based.iv");
2089 EntryPart->addIncoming(Start, VectorPH);
2090 EntryPart->setDebugLoc(getDebugLoc());
2091 State.set(this, EntryPart, 0, /*IsScalar=*/true);
2092}
2093
2094#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2096 VPSlotTracker &SlotTracker) const {
2097 O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";
2098
2100 O << " = phi ";
2102}
2103#endif
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu AMDGPU Register Bank Select
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::string Name
Hexagon Common GEP
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static Value * getStepVector(Value *Val, Value *StartIdx, Value *Step, Instruction::BinaryOps BinOp, ElementCount VF, IRBuilderBase &Builder)
This function adds (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step,...
static bool isUniformAcrossVFsAndUFs(VPScalarCastRecipe *C)
Checks if C is uniform across all VFs and UFs.
static Constant * getSignedIntOrFpConstant(Type *Ty, int64_t C)
A helper function that returns an integer or floating-point constant with value C.
static Value * getRuntimeVFAsFloat(IRBuilderBase &B, Type *FTy, ElementCount VF)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:409
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:452
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:289
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, BasicBlock::iterator InsertBefore)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:1016
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:1018
static StringRef getPredicateName(Predicate P)
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:211
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:123
static Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:1449
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
A debug info location.
Definition: DebugLoc.h:33
constexpr bool isVector() const
One or more elements.
Definition: TypeSize.h:311
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:307
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
void setAllowContract(bool B=true)
Definition: FMF.h:91
bool noSignedZeros() const
Definition: FMF.h:68
bool noInfs() const
Definition: FMF.h:67
void setAllowReciprocal(bool B=true)
Definition: FMF.h:88
bool allowReciprocal() const
Definition: FMF.h:69
void print(raw_ostream &O) const
Print fast-math flags to O.
Definition: Operator.cpp:259
void setNoSignedZeros(bool B=true)
Definition: FMF.h:85
bool allowReassoc() const
Flag queries.
Definition: FMF.h:65
bool approxFunc() const
Definition: FMF.h:71
void setNoNaNs(bool B=true)
Definition: FMF.h:79
void setAllowReassoc(bool B=true)
Flag setters.
Definition: FMF.h:76
bool noNaNs() const
Definition: FMF.h:66
void setApproxFunc(bool B=true)
Definition: FMF.h:94
void setNoInfs(bool B=true)
Definition: FMF.h:82
bool allowContract() const
Definition: FMF.h:70
Class to represent function types.
Definition: DerivedTypes.h:103
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:201
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2361
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2472
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1978
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2094
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2460
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1533
Value * CreateVectorSplice(Value *V1, Value *V2, int64_t Imm, const Twine &Name="")
Return a vector splice intrinsic if using scalable vectors, otherwise return a shufflevector.
Definition: IRBuilder.cpp:1186
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1214
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:466
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:932
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1110
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2033
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2535
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:526
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2081
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:311
InsertPoint saveIP() const
Returns the current insert point.
Definition: IRBuilder.h:277
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:486
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2366
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2397
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1749
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2241
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1120
Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Definition: IRBuilder.cpp:1005
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2021
LLVMContext & getContext() const
Definition: IRBuilder.h:176
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1327
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1666
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2161
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Definition: IRBuilder.h:289
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1826
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2412
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1866
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2351
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1587
Value * CreateStepVector(Type *DstType, const Twine &Name="")
Creates a vector of type DstType with the linear sequence <0, 1, ...>
Definition: IRBuilder.cpp:109
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1361
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
A struct for saving information about induction variables.
const SCEV * getStep() const
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_IntInduction
Integer induction variable. Step = C.
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:454
bool isBinaryOp() const
Definition: Instruction.h:257
const BasicBlock * getParent() const
Definition: Instruction.h:152
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
const char * getOpcodeName() const
Definition: Instruction.h:254
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:451
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
BlockT * getHeader() const
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:71
FastMathFlags getFastMathFlags() const
static unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
Type * getRecurrenceType() const
Returns the type of the recurrence.
TrackingVH< Value > getRecurrenceStartValue() const
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
bool isSigned() const
Returns true if all source operands of the recurrence are SExtInsts.
RecurKind getRecurrenceKind() const
Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF) const
Returns identity corresponding to the RecurrenceKind.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This class uses information about analyze scalars to rewrite expressions in canonical form.
Type * getType() const
Return the LLVM type of this SCEV expression.
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:693
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
Value handle that tracks a Value across RAUW.
Definition: ValueHandle.h:331
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
static IntegerType * getInt1Ty(LLVMContext &C)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:140
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
Value * getOperand(unsigned i) const
Definition: User.h:169
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:2815
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:2862
iterator end()
Definition: VPlan.h:2846
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:2874
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:1956
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:1961
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account that the first incoming value has no mask.
Definition: VPlan.h:1953
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPRegionBlock * getParent()
Definition: VPlan.h:489
size_t getNumSuccessors() const
Definition: VPlan.h:534
VPlan * getPlan()
Definition: VPlan.cpp:148
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:153
VPBlockBase * getSingleSuccessor() const
Definition: VPlan.h:524
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2232
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
void execute(VPTransformState &State) override
Generate the canonical scalar induction phi of the vector loop.
bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start, VPValue *Step) const
Check if the induction described by Kind, /p Start and Step is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:313
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition: VPlanValue.h:401
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Definition: VPlanValue.h:413
unsigned getVPDefID() const
Definition: VPlanValue.h:433
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:2753
VPValue * getStartValue() const
Definition: VPlan.h:2752
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate phi for handling IV based on EVL over iterations correctly.
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:1653
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:1165
@ CanonicalIVIncrementForPart
Definition: VPlan.h:1175
@ CalculateTripCountMinusVF
Definition: VPlan.h:1173
bool hasResult() const
Definition: VPlan.h:1283
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
unsigned getOpcode() const
Definition: VPlan.h:1259
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
void execute(VPTransformState &State) override
Generate the instruction.
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlan.h:169
static VPLane getFirstLane()
Definition: VPlan.h:167
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the VPLiveOut to O.
PHINode * getPhi() const
Definition: VPlan.h:694
void fixPhi(VPlan &Plan, VPTransformState &State)
Fixup the wrapped LCSSA phi node in the unique exit block.
void execute(VPTransformState &State) override
Generates phi nodes for live-outs as needed to retain SSA form.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:709
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
VPBasicBlock * getParent()
Definition: VPlan.h:734
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:800
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Class to record LLVM IR flag for a recipe along with it.
Definition: VPlan.h:897
ExactFlagsTy ExactFlags
Definition: VPlan.h:953
FastMathFlagsTy FMFs
Definition: VPlan.h:956
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:955
void setFlags(Instruction *I) const
Set the IR flags for I.
Definition: VPlan.h:1082
bool isInBounds() const
Definition: VPlan.h:1121
GEPFlagsTy GEPFlags
Definition: VPlan.h:954
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:1128
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:952
WrapFlagsTy WrapFlags
Definition: VPlan.h:951
bool hasNoUnsignedWrap() const
Definition: VPlan.h:1132
void printFlags(raw_ostream &O) const
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:1115
bool hasNoSignedWrap() const
Definition: VPlan.h:1138
FastMathFlags getFastMathFlags() const
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition: VPlan.h:1925
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2116
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2118
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2114
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:2948
const VPBlockBase * getEntry() const
Definition: VPlan.h:2987
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getOpcode() const
Definition: VPlan.h:2196
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
VPScalarCastRecipe is a recipe to create scalar cast instructions.
Definition: VPlan.h:1410
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:2802
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:888
This class can be used to assign names to VPValues.
Definition: VPlanValue.h:454
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:203
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition: VPlan.cpp:1306
operand_range operands()
Definition: VPlanValue.h:278
unsigned getNumOperands() const
Definition: VPlanValue.h:252
operand_iterator op_begin()
Definition: VPlanValue.h:274
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:253
Value * getUnderlyingValue()
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:77
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:118
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition: VPlan.cpp:1302
friend class VPInstruction
Definition: VPlanValue.h:47
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:173
user_range users()
Definition: VPlanValue.h:133
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1406
void execute(VPTransformState &State) override
Produce widened copies of the cast.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:1737
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:1732
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:1743
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce widened copies of all Ingredients.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:3049
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.h:3233
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:5079
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:641
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:676
Type * getElementType() const
Definition: DerivedTypes.h:436
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
self_iterator getIterator()
Definition: ilist_node.h:109
iterator erase(iterator where)
Definition: ilist.h:204
pointer remove(iterator &IT)
Definition: ilist.h:188
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1465
bool isUniformAfterVectorization(VPValue *VPV)
Returns true if VPV is uniform after vectorization.
Definition: VPlan.h:3593
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
Definition: VPlan.cpp:1454
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlan.cpp:1449
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void ReplaceInstWithInst(BasicBlock *BB, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2406
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition: STLExtras.h:2165
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
Definition: LoopUtils.cpp:1046
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
cl::opt< bool > EnableVPlanNativePath("enable-vplan-native-path", cl::Hidden, cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization."))
Definition: VPlan.cpp:53
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Value * createOrderedReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence descriptor Desc.
Definition: LoopUtils.cpp:1223
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:34
@ Mul
Product of integers.
@ Add
Sum of integers.
Value * createAnyOfOp(IRBuilderBase &Builder, Value *StartVal, RecurKind RK, Value *Left, Value *Right)
See RecurrenceDescriptor::isAnyOfPattern for a description of the pattern we are trying to match.
Definition: LoopUtils.cpp:1037
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Value * createTargetReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, PHINode *OrigPhi=nullptr)
Create a generic target reduction using a recurrence descriptor Desc The target is queried to determi...
Definition: LoopUtils.cpp:1207
void execute(VPTransformState &State) override
Generate the phi nodes.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPIteration represents a single point in the iteration space of the output (vectorized and/or unrolle...
Definition: VPlan.h:219
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:365
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:373
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:348
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:236
Value * get(VPValue *Def, unsigned Part, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def and a given Part if IsScalar is false,...
Definition: VPlan.cpp:247
DenseMap< const SCEV *, Value * > ExpandedSCEVs
Map SCEVs to their expanded values.
Definition: VPlan.h:409
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition: VPlan.h:412
void addMetadata(Value *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:361
void reset(VPValue *Def, Value *V, unsigned Part)
Reset an existing vector value for Def and a given Part.
Definition: VPlan.h:303
struct llvm::VPTransformState::CFGState CFG
void set(VPValue *Def, Value *V, unsigned Part, bool IsScalar=false)
Set the generated vector Value for a given VPValue and a given Part, if IsScalar is false.
Definition: VPlan.h:288
std::optional< VPIteration > Instance
Hold the indices to generate specific scalar instructions.
Definition: VPlan.h:248
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:389
bool hasScalarValue(VPValue *Def, VPIteration Instance)
Definition: VPlan.h:276
bool hasVectorValue(VPValue *Def, unsigned Part)
Definition: VPlan.h:270
ElementCount VF
The chosen Vectorization and Unroll Factors of the loop being vectorized.
Definition: VPlan.h:242
Loop * CurrentVectorLoop
The loop object for the current parent region, or nullptr.
Definition: VPlan.h:398
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:372
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool isInvariantCond() const
Definition: VPlan.h:1516
VPValue * getCond() const
Definition: VPlan.h:1512
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.