LLVM 23.0.0git
VPlanUnroll.cpp
Go to the documentation of this file.
1//===-- VPlanUnroll.cpp - VPlan unroller ----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements explicit unrolling for VPlans.
11///
12//===----------------------------------------------------------------------===//
13
14#include "VPRecipeBuilder.h"
15#include "VPlan.h"
16#include "VPlanAnalysis.h"
17#include "VPlanCFG.h"
18#include "VPlanHelpers.h"
19#include "VPlanPatternMatch.h"
20#include "VPlanTransforms.h"
21#include "VPlanUtils.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/ScopeExit.h"
26#include "llvm/IR/Constants.h"
27#include "llvm/IR/Intrinsics.h"
28
29using namespace llvm;
30using namespace llvm::VPlanPatternMatch;
31
32namespace {
33
34/// Helper to hold state needed for unrolling. It holds the Plan to unroll by
35/// UF. It also holds copies of VPValues across UF-1 unroll parts to facilitate
36/// the unrolling transformation, where the original VPValues are retained for
37/// part zero.
38class UnrollState {
39 /// Plan to unroll.
40 VPlan &Plan;
41 /// Unroll factor to unroll by.
42 const unsigned UF;
43 /// Analysis for types.
44 VPTypeAnalysis TypeInfo;
45
46 /// Unrolling may create recipes that should not be unrolled themselves.
47 /// Those are tracked in ToSkip.
48 SmallPtrSet<VPRecipeBase *, 8> ToSkip;
49
50 // Associate with each VPValue of part 0 its unrolled instances of parts 1,
51 // ..., UF-1.
52 DenseMap<VPValue *, SmallVector<VPValue *>> VPV2Parts;
53
54 /// Unroll replicate region \p VPR by cloning the region UF - 1 times.
55 void unrollReplicateRegionByUF(VPRegionBlock *VPR);
56
57 /// Unroll recipe \p R by cloning it UF - 1 times, unless it is uniform across
58 /// all parts.
59 void unrollRecipeByUF(VPRecipeBase &R);
60
61 /// Unroll header phi recipe \p R. How exactly the recipe gets unrolled
62 /// depends on the concrete header phi. Inserts newly created recipes at \p
63 /// InsertPtForPhi.
64 void unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,
65 VPBasicBlock::iterator InsertPtForPhi);
66
67 /// Unroll a widen induction recipe \p IV. This introduces recipes to compute
68 /// the induction steps for each part.
69 void unrollWidenInductionByUF(VPWidenInductionRecipe *IV,
70 VPBasicBlock::iterator InsertPtForPhi);
71
72 VPValue *getConstantInt(unsigned Part) {
73 Type *CanIVIntTy = Plan.getVectorLoopRegion()->getCanonicalIVType();
74 return Plan.getConstantInt(CanIVIntTy, Part);
75 }
76
77public:
78 UnrollState(VPlan &Plan, unsigned UF) : Plan(Plan), UF(UF), TypeInfo(Plan) {}
79
80 void unrollBlock(VPBlockBase *VPB);
81
82 VPValue *getValueForPart(VPValue *V, unsigned Part) {
84 return V;
85 assert((VPV2Parts.contains(V) && VPV2Parts[V].size() >= Part) &&
86 "accessed value does not exist");
87 return VPV2Parts[V][Part - 1];
88 }
89
90 /// Given a single original recipe \p OrigR (of part zero), and its copy \p
91 /// CopyR for part \p Part, map every VPValue defined by \p OrigR to its
92 /// corresponding VPValue defined by \p CopyR.
93 void addRecipeForPart(VPRecipeBase *OrigR, VPRecipeBase *CopyR,
94 unsigned Part) {
95 for (const auto &[Idx, VPV] : enumerate(OrigR->definedValues())) {
96 const auto &[V, _] = VPV2Parts.try_emplace(VPV);
97 assert(V->second.size() == Part - 1 && "earlier parts not set");
98 V->second.push_back(CopyR->getVPValue(Idx));
99 }
100 }
101
102 /// Given a uniform recipe \p R, add it for all parts.
103 void addUniformForAllParts(VPSingleDefRecipe *R) {
104 const auto &[V, Inserted] = VPV2Parts.try_emplace(R);
105 assert(Inserted && "uniform value already added");
106 for (unsigned Part = 0; Part != UF; ++Part)
107 V->second.push_back(R);
108 }
109
110 bool contains(VPValue *VPV) const { return VPV2Parts.contains(VPV); }
111
112 /// Update \p R's operand at \p OpIdx with its corresponding VPValue for part
113 /// \p P.
114 void remapOperand(VPRecipeBase *R, unsigned OpIdx, unsigned Part) {
115 auto *Op = R->getOperand(OpIdx);
116 R->setOperand(OpIdx, getValueForPart(Op, Part));
117 }
118
119 /// Update \p R's operands with their corresponding VPValues for part \p P.
120 void remapOperands(VPRecipeBase *R, unsigned Part) {
121 for (const auto &[OpIdx, Op] : enumerate(R->operands()))
122 R->setOperand(OpIdx, getValueForPart(Op, Part));
123 }
124};
125} // namespace
126
128 unsigned Part, VPlan &Plan,
129 VPTypeAnalysis &TypeInfo) {
130 if (Part == 0)
131 return;
132
133 VPBuilder Builder(Steps);
134 Type *BaseIVTy = TypeInfo.inferScalarType(Steps->getOperand(0));
135 Type *IntStepTy =
136 IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
137 VPValue *StartIndex = Steps->getVFValue();
138 if (Part > 1) {
139 StartIndex = Builder.createOverflowingOp(
140 Instruction::Mul,
141 {StartIndex,
142 Plan.getConstantInt(TypeInfo.inferScalarType(StartIndex), Part)});
143 }
144 StartIndex = Builder.createScalarSExtOrTrunc(
145 StartIndex, IntStepTy, TypeInfo.inferScalarType(StartIndex),
146 Steps->getDebugLoc());
147
148 if (BaseIVTy->isFloatingPointTy())
149 StartIndex = Builder.createScalarCast(Instruction::SIToFP, StartIndex,
150 BaseIVTy, Steps->getDebugLoc());
151
152 Steps->setStartIndex(StartIndex);
153}
154
155void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
156 VPBlockBase *InsertPt = VPR->getSingleSuccessor();
157 for (unsigned Part = 1; Part != UF; ++Part) {
158 auto *Copy = VPR->clone();
159 VPBlockUtils::insertBlockBefore(Copy, InsertPt);
160
161 auto PartI = vp_depth_first_shallow(Copy->getEntry());
162 auto Part0 = vp_depth_first_shallow(VPR->getEntry());
163 for (const auto &[PartIVPBB, Part0VPBB] :
166 for (const auto &[PartIR, Part0R] : zip(*PartIVPBB, *Part0VPBB)) {
167 remapOperands(&PartIR, Part);
168 if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR))
169 addStartIndexForScalarSteps(Steps, Part, Plan, TypeInfo);
170
171 addRecipeForPart(&Part0R, &PartIR, Part);
172 }
173 }
174 }
175}
176
177void UnrollState::unrollWidenInductionByUF(
178 VPWidenInductionRecipe *IV, VPBasicBlock::iterator InsertPtForPhi) {
179 VPBasicBlock *PH = cast<VPBasicBlock>(
180 IV->getParent()->getEnclosingLoopRegion()->getSinglePredecessor());
181 Type *IVTy = TypeInfo.inferScalarType(IV);
182 auto &ID = IV->getInductionDescriptor();
183 FastMathFlags FMF;
184 VPIRFlags::WrapFlagsTy WrapFlags(false, false);
185 if (auto *IntOrFPInd = dyn_cast<VPWidenIntOrFpInductionRecipe>(IV)) {
186 if (IntOrFPInd->hasFastMathFlags())
187 FMF = IntOrFPInd->getFastMathFlags();
188 if (IntOrFPInd->hasNoWrapFlags())
189 WrapFlags = IntOrFPInd->getNoWrapFlags();
190 }
191
192 VPValue *ScalarStep = IV->getStepValue();
193 VPBuilder Builder(PH);
194 Type *VectorStepTy =
195 IVTy->isPointerTy() ? TypeInfo.inferScalarType(ScalarStep) : IVTy;
196 VPInstruction *VectorStep = Builder.createNaryOp(
197 VPInstruction::WideIVStep, {&Plan.getVF(), ScalarStep}, VectorStepTy, FMF,
198 IV->getDebugLoc());
199
200 ToSkip.insert(VectorStep);
201
202 // Now create recipes to compute the induction steps for part 1 .. UF. Part 0
203 // remains the header phi. Parts > 0 are computed by adding Step to the
204 // previous part. The header phi recipe will get 2 new operands: the step
205 // value for a single part and the last part, used to compute the backedge
206 // value during VPWidenInductionRecipe::execute.
207 // %Part.0 = VPWidenInductionRecipe %Start, %ScalarStep, %VectorStep, %Part.3
208 // %Part.1 = %Part.0 + %VectorStep
209 // %Part.2 = %Part.1 + %VectorStep
210 // %Part.3 = %Part.2 + %VectorStep
211 //
212 // The newly added recipes are added to ToSkip to avoid interleaving them
213 // again.
214 VPValue *Prev = IV;
215 Builder.setInsertPoint(IV->getParent(), InsertPtForPhi);
216 unsigned AddOpc;
217 VPIRFlags AddFlags;
218 if (IVTy->isPointerTy()) {
220 AddFlags = GEPNoWrapFlags::none();
221 } else if (IVTy->isFloatingPointTy()) {
222 AddOpc = ID.getInductionOpcode();
223 AddFlags = FMF;
224 } else {
225 AddOpc = Instruction::Add;
226 AddFlags = WrapFlags;
228 AddFlags = VPIRFlags::WrapFlagsTy(/*NUW=*/true, /*NSW=*/false);
229 }
230 for (unsigned Part = 1; Part != UF; ++Part) {
231 std::string Name =
232 Part > 1 ? "step.add." + std::to_string(Part) : "step.add";
233
234 VPInstruction *Add =
235 Builder.createNaryOp(AddOpc,
236 {
237 Prev,
238 VectorStep,
239 },
240 AddFlags, IV->getDebugLoc(), Name);
241 ToSkip.insert(Add);
242 addRecipeForPart(IV, Add, Part);
243 Prev = Add;
244 }
245 IV->addOperand(VectorStep);
246 IV->addOperand(Prev);
247}
248
249void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,
250 VPBasicBlock::iterator InsertPtForPhi) {
251 // First-order recurrences pass a single vector or scalar through their header
252 // phis, irrespective of interleaving.
254 return;
255
256 // Generate step vectors for each unrolled part.
257 if (auto *IV = dyn_cast<VPWidenInductionRecipe>(R)) {
258 unrollWidenInductionByUF(IV, InsertPtForPhi);
259 return;
260 }
261
262 auto *RdxPhi = dyn_cast<VPReductionPHIRecipe>(R);
263 if (RdxPhi && RdxPhi->isOrdered())
264 return;
265
266 auto InsertPt = std::next(R->getIterator());
267 for (unsigned Part = 1; Part != UF; ++Part) {
268 VPRecipeBase *Copy = R->clone();
269 Copy->insertBefore(*R->getParent(), InsertPt);
270 addRecipeForPart(R, Copy, Part);
271 if (RdxPhi) {
272 // If the start value is a ReductionStartVector, use the identity value
273 // (second operand) for unrolled parts. If the scaling factor is > 1,
274 // create a new ReductionStartVector with the scale factor and both
275 // operands set to the identity value.
276 if (auto *VPI = dyn_cast<VPInstruction>(RdxPhi->getStartValue())) {
277 assert(VPI->getOpcode() == VPInstruction::ReductionStartVector &&
278 "unexpected start VPInstruction");
279 if (Part != 1)
280 continue;
281 VPValue *StartV;
282 if (match(VPI->getOperand(2), m_One())) {
283 StartV = VPI->getOperand(1);
284 } else {
285 auto *C = VPI->clone();
286 C->setOperand(0, C->getOperand(1));
287 C->insertAfter(VPI);
288 StartV = C;
289 }
290 for (unsigned Part = 1; Part != UF; ++Part)
291 VPV2Parts[VPI][Part - 1] = StartV;
292 }
293 } else {
295 "unexpected header phi recipe not needing unrolled part");
296 }
297 }
298}
299
300/// Handle non-header-phi recipes.
301void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
303 return;
304
305 if (auto *VPI = dyn_cast<VPInstruction>(&R)) {
307 addUniformForAllParts(VPI);
308 return;
309 }
310 }
311 if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {
312 if (isa<StoreInst>(RepR->getUnderlyingValue()) &&
313 RepR->getOperand(1)->isDefinedOutsideLoopRegions()) {
314 // Stores to an invariant address only need to store the last part.
315 remapOperands(&R, UF - 1);
316 return;
317 }
318 if (match(RepR,
320 addUniformForAllParts(RepR);
321 return;
322 }
323 }
324
325 // Unroll non-uniform recipes.
326 auto InsertPt = std::next(R.getIterator());
327 VPBasicBlock &VPBB = *R.getParent();
328 for (unsigned Part = 1; Part != UF; ++Part) {
329 VPRecipeBase *Copy = R.clone();
330 Copy->insertBefore(VPBB, InsertPt);
331 addRecipeForPart(&R, Copy, Part);
332
333 // Phi operands are updated once all other recipes have been unrolled.
334 if (isa<VPWidenPHIRecipe>(Copy))
335 continue;
336
337 VPValue *Op;
339 m_VPValue(), m_VPValue(Op)))) {
340 Copy->setOperand(0, getValueForPart(Op, Part - 1));
341 Copy->setOperand(1, getValueForPart(Op, Part));
342 continue;
343 }
345 VPBuilder Builder(&R);
346 const DataLayout &DL = Plan.getDataLayout();
347 Type *IndexTy =
350 : DL.getIndexType(TypeInfo.inferScalarType(R.getVPSingleValue()));
351 Type *VFTy = Plan.getVF().getType();
352 VPValue *VF = Builder.createScalarZExtOrTrunc(
353 &Plan.getVF(), IndexTy, VFTy, DebugLoc::getUnknown());
354 // VFxUF does not wrap, so VF * Part also cannot wrap.
355 VPValue *VFxPart = Builder.createOverflowingOp(
356 Instruction::Mul, {VF, Plan.getConstantInt(IndexTy, Part)},
357 {true, true});
358 Copy->addOperand(VFxPart);
359 continue;
360 }
361 if (auto *Red = dyn_cast<VPReductionRecipe>(&R)) {
362 auto *Phi = dyn_cast<VPReductionPHIRecipe>(R.getOperand(0));
363 if (Phi && Phi->isOrdered()) {
364 auto &Parts = VPV2Parts[Phi];
365 if (Part == 1) {
366 Parts.clear();
367 Parts.push_back(Red);
368 }
369 Parts.push_back(Copy->getVPSingleValue());
370 Phi->setOperand(1, Copy->getVPSingleValue());
371 }
372 }
373 if (auto *VEPR = dyn_cast<VPVectorEndPointerRecipe>(Copy)) {
374 // Materialize PartN offset for VectorEndPointer.
375 VEPR->setOperand(0, R.getOperand(0));
376 VEPR->setOperand(1, R.getOperand(1));
377 VEPR->materializeOffset(Part);
378 continue;
379 }
380
381 remapOperands(Copy, Part);
382
383 if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(Copy))
384 addStartIndexForScalarSteps(ScalarIVSteps, Part, Plan, TypeInfo);
385
386 if (match(Copy,
388 VPBuilder Builder(Copy);
389 VPValue *ScaledByPart = Builder.createOverflowingOp(
390 Instruction::Mul, {Copy->getOperand(1), getConstantInt(Part)});
391 Copy->setOperand(1, ScaledByPart);
392 }
393 }
394 if (auto *VEPR = dyn_cast<VPVectorEndPointerRecipe>(&R)) {
395 // Materialize Part0 offset for VectorEndPointer.
396 VEPR->materializeOffset();
397 }
398 if (auto *WideCanIV = dyn_cast<VPWidenCanonicalIVRecipe>(&R)) {
399 // Set Part0 step for WidenCanonicalIV.
400 WideCanIV->addOperand(getConstantInt(0));
401 }
402}
403
404void UnrollState::unrollBlock(VPBlockBase *VPB) {
405 auto *VPR = dyn_cast<VPRegionBlock>(VPB);
406 if (VPR) {
407 if (VPR->isReplicator())
408 return unrollReplicateRegionByUF(VPR);
409
410 // Traverse blocks in region in RPO to ensure defs are visited before uses
411 // across blocks.
412 ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
413 RPOT(VPR->getEntry());
414 for (VPBlockBase *VPB : RPOT)
415 unrollBlock(VPB);
416 return;
417 }
418
419 // VPB is a VPBasicBlock; unroll it, i.e., unroll its recipes.
420 auto *VPBB = cast<VPBasicBlock>(VPB);
421 auto InsertPtForPhi = VPBB->getFirstNonPhi();
422 for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
423 if (ToSkip.contains(&R) || isa<VPIRInstruction>(&R))
424 continue;
425
426 // Add all VPValues for all parts to AnyOf, FirstActiveLaneMask and
427 // ComputeReductionResult which combine all parts to compute the final
428 // value.
429 VPValue *Op1;
431 match(&R, m_FirstActiveLane(m_VPValue(Op1))) ||
432 match(&R, m_LastActiveLane(m_VPValue(Op1))) ||
434 addUniformForAllParts(cast<VPInstruction>(&R));
435 for (unsigned Part = 1; Part != UF; ++Part)
436 R.addOperand(getValueForPart(Op1, Part));
437 continue;
438 }
439 VPValue *Op0;
440 if (match(&R, m_ExtractLane(m_VPValue(Op0), m_VPValue(Op1)))) {
441 addUniformForAllParts(cast<VPInstruction>(&R));
442 for (unsigned Part = 1; Part != UF; ++Part)
443 R.addOperand(getValueForPart(Op1, Part));
444 continue;
445 }
446
447 VPValue *Op2;
449 m_VPValue(Op2)))) {
450 addUniformForAllParts(cast<VPInstruction>(&R));
451 for (unsigned Part = 1; Part != UF; ++Part) {
452 R.addOperand(getValueForPart(Op1, Part));
453 R.addOperand(getValueForPart(Op2, Part));
454 }
455 continue;
456 }
457
458 if (Plan.hasScalarVFOnly()) {
459 if (match(&R, m_ExtractLastPart(m_VPValue(Op0))) ||
461 auto *I = cast<VPInstruction>(&R);
462 bool IsPenultimatePart =
464 unsigned PartIdx = IsPenultimatePart ? UF - 2 : UF - 1;
465 // For scalar VF, directly use the scalar part value.
466 I->replaceAllUsesWith(getValueForPart(Op0, PartIdx));
467 continue;
468 }
469 }
470 // For vector VF, the penultimate element is always extracted from the last part.
473 addUniformForAllParts(cast<VPSingleDefRecipe>(&R));
474 R.setOperand(0, getValueForPart(Op0, UF - 1));
475 continue;
476 }
477
478 auto *SingleDef = dyn_cast<VPSingleDefRecipe>(&R);
479 if (SingleDef && vputils::isUniformAcrossVFsAndUFs(SingleDef)) {
480 addUniformForAllParts(SingleDef);
481 continue;
482 }
483
484 if (auto *H = dyn_cast<VPHeaderPHIRecipe>(&R)) {
485 unrollHeaderPHIByUF(H, InsertPtForPhi);
486 continue;
487 }
488
489 unrollRecipeByUF(R);
490 }
491}
492
493void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) {
494 assert(UF > 0 && "Unroll factor must be positive");
495 Plan.setUF(UF);
496 llvm::scope_exit Cleanup([&Plan, UF]() {
497 auto Iter = vp_depth_first_deep(Plan.getEntry());
498 // Remove recipes that are redundant after unrolling.
500 for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
501 auto *VPI = dyn_cast<VPInstruction>(&R);
502 if (VPI &&
503 VPI->getOpcode() == VPInstruction::CanonicalIVIncrementForPart &&
504 VPI->getOperand(1) == &Plan.getVF()) {
505 VPI->replaceAllUsesWith(VPI->getOperand(0));
506 VPI->eraseFromParent();
507 }
508 }
509 }
510
511 Type *TCTy = VPTypeAnalysis(Plan).inferScalarType(Plan.getTripCount());
512 Plan.getUF().replaceAllUsesWith(Plan.getConstantInt(TCTy, UF));
513 });
514 if (UF == 1) {
515 return;
516 }
517
518 UnrollState Unroller(Plan, UF);
519
520 // Iterate over all blocks in the plan starting from Entry, and unroll
521 // recipes inside them. This includes the vector preheader and middle blocks,
522 // which may set up or post-process per-part values.
524 Plan.getEntry());
525 for (VPBlockBase *VPB : RPOT)
526 Unroller.unrollBlock(VPB);
527
528 unsigned Part = 1;
529 // Remap operands of cloned header phis to update backedge values. The header
530 // phis cloned during unrolling are just after the header phi for part 0.
531 // Reset Part to 1 when reaching the first (part 0) recipe of a block.
532 for (VPRecipeBase &H :
534 // The second operand of Fixed Order Recurrence phi's, feeding the spliced
535 // value across the backedge, needs to remap to the last part of the spliced
536 // value.
538 Unroller.remapOperand(&H, 1, UF - 1);
539 continue;
540 }
541 if (Unroller.contains(H.getVPSingleValue())) {
542 Part = 1;
543 continue;
544 }
545 Unroller.remapOperands(&H, Part);
546 Part++;
547 }
548
550}
551
552/// Add a lane offset to the start index of \p Steps.
553static void addLaneToStartIndex(VPScalarIVStepsRecipe *Steps, unsigned Lane,
554 VPlan &Plan, VPRecipeBase *InsertPt) {
555 assert(Lane > 0 && "Zero lane adds no offset to start index");
556 VPTypeAnalysis TypeInfo(Plan);
557 Type *BaseIVTy = TypeInfo.inferScalarType(Steps->getOperand(0));
558
559 VPValue *OldStartIndex = Steps->getStartIndex();
560 VPValue *LaneOffset;
561 unsigned AddOpcode;
562 // TODO: Retrieve the flags from Steps unconditionally.
563 VPIRFlags Flags;
564 if (BaseIVTy->isFloatingPointTy()) {
565 int SignedLane = static_cast<int>(Lane);
566 if (!OldStartIndex && Steps->getInductionOpcode() == Instruction::FSub)
567 SignedLane = -SignedLane;
568 LaneOffset = Plan.getOrAddLiveIn(ConstantFP::get(BaseIVTy, SignedLane));
569 AddOpcode = Steps->getInductionOpcode();
570 Flags = VPIRFlags(FastMathFlags());
571 } else {
572 unsigned BaseIVBits = BaseIVTy->getScalarSizeInBits();
573 LaneOffset = Plan.getConstantInt(
574 APInt(BaseIVBits, Lane, /*isSigned*/ false, /*implicitTrunc*/ true));
575 AddOpcode = Instruction::Add;
576 Flags = VPIRFlags(VPIRFlags::WrapFlagsTy(false, false));
577 }
578
579 VPValue *NewStartIndex = LaneOffset;
580 if (OldStartIndex) {
581 VPBuilder Builder(InsertPt);
582 NewStartIndex =
583 Builder.createNaryOp(AddOpcode, {OldStartIndex, LaneOffset}, Flags);
584 }
585 Steps->setStartIndex(NewStartIndex);
586}
587
588/// Create a single-scalar clone of \p DefR (must be a VPReplicateRecipe,
589/// VPInstruction or VPScalarIVStepsRecipe) for lane \p Lane. Use \p
590/// Def2LaneDefs to look up scalar definitions for operands of \DefR.
591static VPValue *
592cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
593 VPSingleDefRecipe *DefR, VPLane Lane,
594 const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs) {
596 "DefR must be a VPReplicateRecipe, VPInstruction or "
597 "VPScalarIVStepsRecipe");
598 VPValue *Op;
600 auto LaneDefs = Def2LaneDefs.find(Op);
601 if (LaneDefs != Def2LaneDefs.end())
602 return LaneDefs->second[Lane.getKnownLane()];
603
604 VPValue *Idx = Plan.getConstantInt(IdxTy, Lane.getKnownLane());
605 return Builder.createNaryOp(Instruction::ExtractElement, {Op, Idx});
606 }
607
608 // Collect the operands at Lane, creating extracts as needed.
610 for (VPValue *Op : DefR->operands()) {
611 // If Op is a definition that has been unrolled, directly use the clone for
612 // the corresponding lane.
613 auto LaneDefs = Def2LaneDefs.find(Op);
614 if (LaneDefs != Def2LaneDefs.end()) {
615 NewOps.push_back(LaneDefs->second[Lane.getKnownLane()]);
616 continue;
617 }
618 if (Lane.getKind() == VPLane::Kind::ScalableLast) {
619 // Look through mandatory Unpack.
620 [[maybe_unused]] bool Matched =
622 assert(Matched && "original op must have been Unpack");
623 auto *ExtractPart =
624 Builder.createNaryOp(VPInstruction::ExtractLastPart, {Op});
625 NewOps.push_back(
626 Builder.createNaryOp(VPInstruction::ExtractLastLane, {ExtractPart}));
627 continue;
628 }
630 NewOps.push_back(Op);
631 continue;
632 }
633
634 // Look through buildvector to avoid unnecessary extracts.
635 if (match(Op, m_BuildVector())) {
636 NewOps.push_back(
637 cast<VPInstruction>(Op)->getOperand(Lane.getKnownLane()));
638 continue;
639 }
640 VPValue *Idx = Plan.getConstantInt(IdxTy, Lane.getKnownLane());
641 VPValue *Ext = Builder.createNaryOp(Instruction::ExtractElement, {Op, Idx});
642 NewOps.push_back(Ext);
643 }
644
646 if (auto *RepR = dyn_cast<VPReplicateRecipe>(DefR)) {
647 // TODO: have cloning of replicate recipes also provide the desired result
648 // coupled with setting its operands to NewOps (deriving IsSingleScalar and
649 // Mask from the operands?)
650 New = new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps,
651 /*IsSingleScalar=*/true, /*Mask=*/nullptr,
652 *RepR, *RepR, RepR->getDebugLoc());
653 } else {
654 New = DefR->clone();
655 for (const auto &[Idx, Op] : enumerate(NewOps)) {
656 New->setOperand(Idx, Op);
657 }
658 if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(New)) {
659 // Skip lane 0: an absent start index is implicitly zero.
660 unsigned KnownLane = Lane.getKnownLane();
661 if (KnownLane != 0)
662 addLaneToStartIndex(Steps, KnownLane, Plan, DefR);
663 }
664 }
665 New->insertBefore(DefR);
666 return New;
667}
668
669/// Convert recipes in region blocks to operate on a single lane 0.
670/// VPReplicateRecipes are converted to single-scalar ones, branch-on-mask is
671/// converted into BranchOnCond, PredInstPhi recipes are replaced by scalar phi
672/// recipes with an additional poison operand, and extracts are created as
673/// needed.
675 VPBlockBase *Entry,
676 ElementCount VF) {
677 VPValue *Idx0 = Plan.getZero(IdxTy);
678 VPTypeAnalysis TypeInfo(Plan);
679 for (VPBlockBase *VPB : vp_depth_first_shallow(Entry)) {
681 assert(
682 !isa<VPWidenPHIRecipe>(&OldR) &&
683 !match(&OldR,
687 "must not contain wide phis, inserts or extracts before conversion");
688
689 VPBuilder Builder(&OldR);
690 DebugLoc OldDL = OldR.getDebugLoc();
691 // For scalar VF, operands are already scalar; no extraction needed.
692 if (!VF.isScalar()) {
693 for (const auto &[I, Op] : enumerate(OldR.operands())) {
694 // Skip operands that don't need extraction: values defined in the
695 // same block (already scalar), or values that are already single
696 // scalars.
697 // TODO: Support isSingleScalar for VPScalarIVStepsRecipe.
698 auto *DefR = Op->getDefiningRecipe();
700 DefR->getParent() == VPB) ||
702 continue;
703
704 // Extract lane zero from values defined outside the region.
705 VPValue *Extract = Builder.createNaryOp(Instruction::ExtractElement,
706 {Op, Idx0}, OldDL);
707 OldR.setOperand(I, Extract);
708 }
709 }
710
711 if (auto *RepR = dyn_cast<VPReplicateRecipe>(&OldR)) {
712 auto *NewR = new VPReplicateRecipe(
713 RepR->getUnderlyingInstr(), RepR->operands(),
714 /* IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR, *RepR, OldDL);
715 NewR->insertBefore(RepR);
716 RepR->replaceAllUsesWith(NewR);
717 RepR->eraseFromParent();
718 } else if (auto *BranchOnMask = dyn_cast<VPBranchOnMaskRecipe>(&OldR)) {
719 Builder.createNaryOp(VPInstruction::BranchOnCond,
720 {BranchOnMask->getOperand(0)}, OldDL);
721 BranchOnMask->eraseFromParent();
722 } else if (auto *PredPhi = dyn_cast<VPPredInstPHIRecipe>(&OldR)) {
723 VPValue *PredOp = PredPhi->getOperand(0);
724 Type *PredTy = TypeInfo.inferScalarType(PredOp);
726 VPPhi *NewPhi = Builder.createScalarPhi({Poison, PredOp}, OldDL);
727 PredPhi->replaceAllUsesWith(NewPhi);
728 PredPhi->eraseFromParent();
729 } else {
730 // TODO: Support isSingleScalar for VPScalarIVStepsRecipe.
732 (isa<VPInstruction>(OldR) &&
733 vputils::isSingleScalar(OldR.getVPSingleValue()))) &&
734 "unexpected unhandled recipe");
735 }
736 }
737 }
738}
739
740/// Update recipes in the cloned blocks rooted at \p NewEntry to match \p Lane,
741/// using the original blocks rooted at \p OldEntry as reference.
742static void processLaneForReplicateRegion(VPlan &Plan, Type *IdxTy,
743 unsigned Lane, VPBasicBlock *OldEntry,
744 VPBasicBlock *NewEntry) {
745 DenseMap<VPValue *, VPValue *> Old2NewVPValues;
746 VPValue *IdxLane = Plan.getConstantInt(IdxTy, Lane);
747 for (const auto &[OldBB, NewBB] :
749 vp_depth_first_shallow(NewEntry))) {
750 for (auto &&[OldR, NewR] :
752 for (const auto &[OldV, NewV] :
753 zip_equal(OldR.definedValues(), NewR.definedValues()))
754 Old2NewVPValues[OldV] = NewV;
755
756 // Remap operands to use lane-specific values.
757 for (const auto &[I, OldOp] : enumerate(NewR.operands())) {
758 // Use cloned value if operand was defined in the region.
759 if (auto *NewOp = Old2NewVPValues.lookup(OldOp))
760 NewR.setOperand(I, NewOp);
761 }
762
763 if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(&NewR)) {
764 addLaneToStartIndex(Steps, Lane, Plan, Steps);
765 } else if (match(&NewR, m_ExtractElement(m_VPValue(), m_VPValue()))) {
766 assert(match(NewR.getOperand(1), m_ZeroInt()) &&
767 "extract indices must be zero");
768 NewR.setOperand(1, IdxLane);
769 } else if (auto *NewPhi = dyn_cast<VPPhi>(&NewR)) {
770 auto *OldPhi = cast<VPPhi>(&OldR);
772 "VPPhis expected to have only first lane used");
773 auto *BVUser = dyn_cast_or_null<VPInstruction>(OldPhi->getSingleUser());
774 if (BVUser && match(BVUser, m_CombineOr(m_BuildVector(),
776 assert(BVUser->getOperand(0) == OldPhi &&
777 "Unexpected first operand of build vector user");
778 BVUser->setOperand(Lane, NewPhi);
779 }
780 }
781 }
782 }
783}
784
785/// Dissolve a single replicate region by replicating its blocks for each lane
786/// of \p VF. The region is disconnected, its blocks are reparented, cloned for
787/// each lane, and reconnected in sequence.
789 VPlan &Plan, Type *IdxTy) {
790 auto *FirstLaneEntry = cast<VPBasicBlock>(Region->getEntry());
791 auto *FirstLaneExiting = cast<VPBasicBlock>(Region->getExiting());
792
793 // Disconnect and dissolve the region.
794 VPBlockBase *Predecessor = Region->getSinglePredecessor();
795 assert(Predecessor && "Replicate region must have a single predecessor");
796 auto *Successor = cast<VPBasicBlock>(Region->getSingleSuccessor());
799
800 VPRegionBlock *ParentRegion = Region->getParent();
801 for (VPBlockBase *VPB : vp_depth_first_shallow(FirstLaneEntry))
802 VPB->setParent(ParentRegion);
803
804 // Process the original blocks for lane 0: converting their recipes to
805 // single-scalar.
806 convertRecipesInRegionBlocksToSingleScalar(Plan, IdxTy, FirstLaneEntry, VF);
807
808 // For scalar VF, just wire the blocks and return; no cloning or packing
809 // needed.
810 if (VF.isScalar()) {
811 VPBlockUtils::connectBlocks(Predecessor, FirstLaneEntry);
812 VPBlockUtils::connectBlocks(FirstLaneExiting, Successor);
813 return;
814 }
815
816 // Create a BuildVector or BuildStructVector in successor block for every
817 // VPPhi in (first lane's) exiting block having vector uses. All their
818 // operands are initialized to poison and will be replaced when processing
819 // each clone, except for the operand of the first lane which set here.
820 // BuildVectors are recorded to be replaced later by chains of insert-element
821 // and widen phi's.
822 unsigned NumLanes = VF.getFixedValue();
823 VPTypeAnalysis TypeInfo(Plan);
824 SmallVector<VPInstruction *> BuildVectors;
825 for (auto &R : FirstLaneExiting->phis()) {
826 auto *Phi = cast<VPPhi>(&R);
828 continue;
829
830 Type *ScalarTy = TypeInfo.inferScalarType(Phi);
831 bool IsStruct = isa<StructType>(ScalarTy);
833 SmallVector<VPValue *> BVOps(NumLanes, Poison);
834 auto *BV = new VPInstruction(IsStruct ? VPInstruction::BuildStructVector
836 BVOps);
837 if (!IsStruct)
838 BuildVectors.push_back(BV);
839 Phi->replaceAllUsesWith(BV);
840 BV->setOperand(0, Phi);
841 BV->insertBefore(*Successor, Successor->getFirstNonPhi());
842 }
843
844 // Clone converted blocks for remaining lanes and process each in reverse
845 // order, connecting each lane's Exiting block to the subsequent lane's entry.
846 VPBlockBase *NextLaneEntry = Successor;
847 for (int Lane = NumLanes - 1; Lane > 0; --Lane) {
848 const auto &[CurrentLaneEntry, CurrentLaneExiting] =
849 VPBlockUtils::cloneFrom(FirstLaneEntry);
850 for (VPBlockBase *VPB : vp_depth_first_shallow(CurrentLaneEntry))
851 VPB->setParent(ParentRegion);
852 processLaneForReplicateRegion(Plan, IdxTy, Lane,
853 cast<VPBasicBlock>(FirstLaneEntry),
854 cast<VPBasicBlock>(CurrentLaneEntry));
855 VPBlockUtils::connectBlocks(CurrentLaneExiting, NextLaneEntry);
856 NextLaneEntry = CurrentLaneEntry;
857 }
858
859 // Connect Predecessor to FirstLaneEntry, and FirstLaneRegionExit to
860 // NextLaneEntry which is the second lane region entry. The latter is
861 // done last so that earlier clonings from FirstLaneEntry stop at
862 // FirstLaneExiting.
863 VPBlockUtils::connectBlocks(Predecessor, FirstLaneEntry);
864 VPBlockUtils::connectBlocks(FirstLaneExiting, NextLaneEntry);
865
866 // Fold BuildVector fed by scalar phis into VPWidenPHIRecipes with
867 // InsertElement per lane.
868 // TODO: check if this folding should be dropped.
869 for (VPInstruction *BV : BuildVectors) {
870 assert(BV->getNumOperands() == NumLanes &&
871 "BuildVector must have one operand per lane");
872 for (const auto &[Idx, Op] : enumerate(BV->operands())) {
873 auto *ScalarPhi = cast<VPPhi>(Op);
874 auto DL = ScalarPhi->getDebugLoc();
875 auto *PredOp = cast<VPSingleDefRecipe>(ScalarPhi->getOperand(1));
876 VPValue *Poison = ScalarPhi->getOperand(0);
877 VPValue *PrevVal = Idx == 0 ? Poison : BV->getOperand(Idx - 1);
878 auto Builder = VPBuilder::getToInsertAfter(PredOp->getDefiningRecipe());
879 auto *Insert = Builder.createNaryOp(
880 Instruction::InsertElement,
881 {PrevVal, PredOp, Plan.getConstantInt(64, Idx)}, DL);
882 Builder.setInsertPoint(ScalarPhi);
883 auto *NewPhi = Builder.createWidenPhi({PrevVal, Insert}, DL);
884 ScalarPhi->replaceAllUsesWith(NewPhi);
885 ScalarPhi->eraseFromParent();
886 }
887 BV->replaceAllUsesWith(BV->getOperand(NumLanes - 1));
888 BV->eraseFromParent();
889 }
890}
891
892/// Collect and dissolve all replicate regions in the vector loop, replicating
893/// their blocks and recipes for each lane of \p VF.
895 Type *IdxTy) {
896 // Collect all replicate regions before modifying the CFG.
897 SmallVector<VPRegionBlock *> ReplicateRegions;
900 if (Region->isReplicator())
901 ReplicateRegions.push_back(Region);
902 }
903
904 assert((ReplicateRegions.empty() || !VF.isScalable()) &&
905 "cannot replicate across scalable VFs");
906
907 // Dissolve replicate regions by replicating their blocks for each lane.
908 // Traversing regions in reverse ensures that the successor of every region
909 // being processed is a basic-block, rather than another region.
910 for (VPRegionBlock *Region : reverse(ReplicateRegions))
911 dissolveReplicateRegion(Region, VF, Plan, IdxTy);
912
914}
915
917 Type *IdxTy = IntegerType::get(
919
920 if (Plan.hasScalarVFOnly()) {
921 // When Plan is only unrolled by UF, replicating by VF amounts to dissolving
922 // replicate regions.
923 replicateReplicateRegionsByVF(Plan, VF, IdxTy);
924 return;
925 }
926
927 // Visit all VPBBs outside the loop region and directly inside the top-level
928 // loop region.
929 auto VPBBsOutsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
931 auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
933 auto VPBBsToUnroll =
934 concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion);
935 // A mapping of current VPValue definitions to collections of new VPValues
936 // defined per lane. Serves to hook-up potential users of current VPValue
937 // definition that are replicated-per-VF later.
939 // The removal of current recipes being replaced by new ones needs to be
940 // delayed after Def2LaneDefs is no longer in use.
942 for (VPBasicBlock *VPBB : VPBBsToUnroll) {
943 for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
946 cast<VPReplicateRecipe>(&R)->isSingleScalar()) ||
947 (isa<VPInstruction>(&R) &&
948 !cast<VPInstruction>(&R)->doesGeneratePerAllLanes() &&
949 cast<VPInstruction>(&R)->getOpcode() != VPInstruction::Unpack))
950 continue;
951
952 auto *DefR = cast<VPSingleDefRecipe>(&R);
953 VPBuilder Builder(DefR);
954 if (DefR->getNumUsers() == 0) {
955 // Create single-scalar version of DefR for all lanes.
956 for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
957 cloneForLane(Plan, Builder, IdxTy, DefR, VPLane(I), Def2LaneDefs);
958 DefR->eraseFromParent();
959 continue;
960 }
961 /// Create single-scalar version of DefR for all lanes.
962 SmallVector<VPValue *> LaneDefs;
963 for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
964 LaneDefs.push_back(
965 cloneForLane(Plan, Builder, IdxTy, DefR, VPLane(I), Def2LaneDefs));
966
967 Def2LaneDefs[DefR] = LaneDefs;
968 /// Users that only demand the first lane can use the definition for lane
969 /// 0.
970 DefR->replaceUsesWithIf(LaneDefs[0], [DefR](VPUser &U, unsigned) {
971 return U.usesFirstLaneOnly(DefR);
972 });
973
974 // Update each build vector user that currently has DefR as its only
975 // operand, to have all LaneDefs as its operands.
976 for (VPUser *U : to_vector(DefR->users())) {
977 auto *VPI = dyn_cast<VPInstruction>(U);
978 if (!VPI || (VPI->getOpcode() != VPInstruction::BuildVector &&
979 VPI->getOpcode() != VPInstruction::BuildStructVector))
980 continue;
981 assert(VPI->getNumOperands() == 1 &&
982 "Build(Struct)Vector must have a single operand before "
983 "replicating by VF");
984 VPI->setOperand(0, LaneDefs[0]);
985 for (VPValue *LaneDef : drop_begin(LaneDefs))
986 VPI->addOperand(LaneDef);
987 }
988 ToRemove.push_back(DefR);
989 }
990 }
991 for (auto *R : reverse(ToRemove))
992 R->eraseFromParent();
993
994 replicateReplicateRegionsByVF(Plan, VF, IdxTy);
995}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool isCanonical(const MDString *S)
ManagedStatic< HTTPClientCleanup > Cleanup
#define _
#define I(x, y, z)
Definition MD5.cpp:57
#define H(x, y, z)
Definition MD5.cpp:56
MachineInstr unsigned OpIdx
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:483
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
static ConstantInt * getConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file provides utility VPlan to VPlan transformations.
static void addLaneToStartIndex(VPScalarIVStepsRecipe *Steps, unsigned Lane, VPlan &Plan, VPRecipeBase *InsertPt)
Add a lane offset to the start index of Steps.
static void replicateReplicateRegionsByVF(VPlan &Plan, ElementCount VF, Type *IdxTy)
Collect and dissolve all replicate regions in the vector loop, replicating their blocks and recipes f...
static VPValue * cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy, VPSingleDefRecipe *DefR, VPLane Lane, const DenseMap< VPValue *, SmallVector< VPValue * > > &Def2LaneDefs)
Create a single-scalar clone of DefR (must be a VPReplicateRecipe, VPInstruction or VPScalarIVStepsRe...
static void addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps, unsigned Part, VPlan &Plan, VPTypeAnalysis &TypeInfo)
static void convertRecipesInRegionBlocksToSingleScalar(VPlan &Plan, Type *IdxTy, VPBlockBase *Entry, ElementCount VF)
Convert recipes in region blocks to operate on a single lane 0.
static void dissolveReplicateRegion(VPRegionBlock *Region, ElementCount VF, VPlan &Plan, Type *IdxTy)
Dissolve a single replicate region by replicating its blocks for each lane of VF.
static void processLaneForReplicateRegion(VPlan &Plan, Type *IdxTy, unsigned Lane, VPBasicBlock *OldEntry, VPBasicBlock *NewEntry)
Update recipes in the cloned blocks rooted at NewEntry to match Lane, using the original blocks roote...
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, DenseMap< VPValue *, VPValue * > &Old2NewVPValues)
Definition VPlan.cpp:1216
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getUnknown()
Definition DebugLoc.h:161
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
Definition DenseMap.h:205
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
static GEPNoWrapFlags none()
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:354
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
RegionT * getParent() const
Get the parent of the Region.
Definition RegionInfo.h:362
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
Definition RegionInfo.h:320
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:284
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:130
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:186
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4263
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4290
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4351
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:266
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:93
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:216
void setParent(VPRegionBlock *P)
Definition VPlan.h:196
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:226
static auto blocksAs(T &&Range)
Return an iterator range over Range with each block cast to BlockTy.
Definition VPlanUtils.h:314
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:241
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:259
static void insertBlockBefore(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected block NewBlock before Blockptr.
Definition VPlanUtils.h:205
static auto blocksOnly(T &&Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition VPlanUtils.h:295
static std::pair< VPBlockBase *, VPBlockBase * > cloneFrom(VPBlockBase *Entry)
Clone the CFG for all nodes reachable from Entry, including cloning the blocks and their recipes.
Definition VPlan.cpp:710
VPlan-based builder utility analogous to IRBuilder.
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Definition VPlanValue.h:544
ArrayRef< VPRecipeValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
Definition VPlanValue.h:554
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4440
Class to record and manage LLVM IR flags.
Definition VPlan.h:696
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1227
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1341
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1267
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1312
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1262
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1259
@ CanonicalIVIncrementForPart
Definition VPlan.h:1243
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Kind getKind() const
Returns the Kind of lane offset.
unsigned getKnownLane() const
Returns a compile-time known value for the lane index and asserts if the lane can only be calculated ...
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:401
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:554
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4473
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
Definition VPlan.cpp:760
const VPBlockBase * getEntry() const
Definition VPlan.h:4517
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4549
Type * getCanonicalIVType() const
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4593
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:3304
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition VPlan.h:4108
Instruction::BinaryOps getInductionOpcode() const
Definition VPlan.h:4179
void setStartIndex(VPValue *StartIndex)
Set or add the StartIndex operand.
Definition VPlan.h:4165
VPValue * getStartIndex() const
Return the StartIndex, or null if known to be zero, valid only after unrolling.
Definition VPlan.h:4160
VPValue * getVFValue() const
Return the number of scalars to produce per unroll part, used to compute StartIndex during unrolling.
Definition VPlan.h:4156
VPSingleDefRecipe is a base class for recipes that model a sequence of one or more output IR that def...
Definition VPlan.h:610
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
An analysis for type-inference for VPValues.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:384
operand_range operands()
Definition VPlanValue.h:455
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:423
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:50
void replaceAllUsesWith(VPValue *New)
Definition VPlan.cpp:1511
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4621
const DataLayout & getDataLayout() const
Definition VPlan.h:4826
VPBasicBlock * getEntry()
Definition VPlan.h:4717
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4780
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4894
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
Definition VPlan.h:4920
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1098
VPSymbolicValue & getUF()
Returns the UF of the vector loop region.
Definition VPlan.h:4817
bool hasScalarVFOnly() const
Definition VPlan.h:4862
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4766
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4813
void setUF(unsigned UF)
Definition VPlan.h:4877
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4928
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ComputeReductionResult, Op0_t > m_ComputeReductionResult(const Op0_t &Op0)
VPInstruction_match< Instruction::InsertElement, Op0_t, Op1_t, Op2_t > m_InsertElement(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ExtractLastActive, Op0_t, Op1_t, Op2_t > m_ExtractLastActive(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
auto m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)
match_bind< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::BuildStructVector > m_BuildStructVector()
BuildStructVector matches only its opcode, w/o matching its operands as the number of operands is not...
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
bool isUniformAcrossVFsAndUFs(const VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:830
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:840
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition VPlanCFG.h:253
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
Definition VPlanCFG.h:288
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1151
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
bool isa_and_present(const Y &Val)
isa_and_present<X> - Functionally identical to isa, except that a null value is accepted.
Definition Casting.h:669
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Add
Sum of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
Type * getType() const
Returns the scalar type of this symbolic value.
Definition VPlanValue.h:294
static void unrollByUF(VPlan &Plan, unsigned UF)
Explicitly unroll Plan by UF.
static bool mergeBlocksIntoPredecessors(VPlan &Plan)
Remove redundant VPBasicBlocks by merging them into their single predecessor if the latter has a sing...
static void removeDeadRecipes(VPlan &Plan)
Remove dead recipes from Plan.
static void replicateByVF(VPlan &Plan, ElementCount VF)
Replace replicating VPReplicateRecipe, VPScalarIVStepsRecipe and VPInstruction in Plan with VF single...