LLVM 22.0.0git
VPlanUnroll.cpp
Go to the documentation of this file.
1//===-- VPlanUnroll.cpp - VPlan unroller ----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements explicit unrolling for VPlans.
11///
12//===----------------------------------------------------------------------===//
13
14#include "VPRecipeBuilder.h"
15#include "VPlan.h"
16#include "VPlanAnalysis.h"
17#include "VPlanCFG.h"
18#include "VPlanHelpers.h"
19#include "VPlanPatternMatch.h"
20#include "VPlanTransforms.h"
21#include "VPlanUtils.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/ScopeExit.h"
26#include "llvm/IR/Intrinsics.h"
27
28using namespace llvm;
29using namespace llvm::VPlanPatternMatch;
30
31namespace {
32
33/// Helper to hold state needed for unrolling. It holds the Plan to unroll by
34/// UF. It also holds copies of VPValues across UF-1 unroll parts to facilitate
35/// the unrolling transformation, where the original VPValues are retained for
36/// part zero.
37class UnrollState {
38 /// Plan to unroll.
39 VPlan &Plan;
40 /// Unroll factor to unroll by.
41 const unsigned UF;
42 /// Analysis for types.
43 VPTypeAnalysis TypeInfo;
44
45 /// Unrolling may create recipes that should not be unrolled themselves.
46 /// Those are tracked in ToSkip.
48
49 // Associate with each VPValue of part 0 its unrolled instances of parts 1,
50 // ..., UF-1.
52
53 /// Unroll replicate region \p VPR by cloning the region UF - 1 times.
54 void unrollReplicateRegionByUF(VPRegionBlock *VPR);
55
56 /// Unroll recipe \p R by cloning it UF - 1 times, unless it is uniform across
57 /// all parts.
58 void unrollRecipeByUF(VPRecipeBase &R);
59
60 /// Unroll header phi recipe \p R. How exactly the recipe gets unrolled
61 /// depends on the concrete header phi. Inserts newly created recipes at \p
62 /// InsertPtForPhi.
63 void unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,
64 VPBasicBlock::iterator InsertPtForPhi);
65
66 /// Unroll a widen induction recipe \p IV. This introduces recipes to compute
67 /// the induction steps for each part.
68 void unrollWidenInductionByUF(VPWidenInductionRecipe *IV,
69 VPBasicBlock::iterator InsertPtForPhi);
70
71 VPValue *getConstantVPV(unsigned Part) {
72 Type *CanIVIntTy = Plan.getCanonicalIV()->getScalarType();
73 return Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, Part));
74 }
75
76public:
77 UnrollState(VPlan &Plan, unsigned UF) : Plan(Plan), UF(UF), TypeInfo(Plan) {}
78
79 void unrollBlock(VPBlockBase *VPB);
80
81 VPValue *getValueForPart(VPValue *V, unsigned Part) {
82 if (Part == 0 || V->isLiveIn())
83 return V;
84 assert((VPV2Parts.contains(V) && VPV2Parts[V].size() >= Part) &&
85 "accessed value does not exist");
86 return VPV2Parts[V][Part - 1];
87 }
88
89 /// Given a single original recipe \p OrigR (of part zero), and its copy \p
90 /// CopyR for part \p Part, map every VPValue defined by \p OrigR to its
91 /// corresponding VPValue defined by \p CopyR.
92 void addRecipeForPart(VPRecipeBase *OrigR, VPRecipeBase *CopyR,
93 unsigned Part) {
94 for (const auto &[Idx, VPV] : enumerate(OrigR->definedValues())) {
95 const auto &[V, _] = VPV2Parts.try_emplace(VPV);
96 assert(V->second.size() == Part - 1 && "earlier parts not set");
97 V->second.push_back(CopyR->getVPValue(Idx));
98 }
99 }
100
101 /// Given a uniform recipe \p R, add it for all parts.
102 void addUniformForAllParts(VPSingleDefRecipe *R) {
103 const auto &[V, Inserted] = VPV2Parts.try_emplace(R);
104 assert(Inserted && "uniform value already added");
105 for (unsigned Part = 0; Part != UF; ++Part)
106 V->second.push_back(R);
107 }
108
109 bool contains(VPValue *VPV) const { return VPV2Parts.contains(VPV); }
110
111 /// Update \p R's operand at \p OpIdx with its corresponding VPValue for part
112 /// \p P.
113 void remapOperand(VPRecipeBase *R, unsigned OpIdx, unsigned Part) {
114 auto *Op = R->getOperand(OpIdx);
115 R->setOperand(OpIdx, getValueForPart(Op, Part));
116 }
117
118 /// Update \p R's operands with their corresponding VPValues for part \p P.
119 void remapOperands(VPRecipeBase *R, unsigned Part) {
120 for (const auto &[OpIdx, Op] : enumerate(R->operands()))
121 R->setOperand(OpIdx, getValueForPart(Op, Part));
122 }
123};
124} // namespace
125
126void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
127 VPBlockBase *InsertPt = VPR->getSingleSuccessor();
128 for (unsigned Part = 1; Part != UF; ++Part) {
129 auto *Copy = VPR->clone();
130 VPBlockUtils::insertBlockBefore(Copy, InsertPt);
131
132 auto PartI = vp_depth_first_shallow(Copy->getEntry());
133 auto Part0 = vp_depth_first_shallow(VPR->getEntry());
134 for (const auto &[PartIVPBB, Part0VPBB] :
135 zip(VPBlockUtils::blocksOnly<VPBasicBlock>(PartI),
136 VPBlockUtils::blocksOnly<VPBasicBlock>(Part0))) {
137 for (const auto &[PartIR, Part0R] : zip(*PartIVPBB, *Part0VPBB)) {
138 remapOperands(&PartIR, Part);
139 if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR)) {
140 ScalarIVSteps->addOperand(getConstantVPV(Part));
141 }
142
143 addRecipeForPart(&Part0R, &PartIR, Part);
144 }
145 }
146 }
147}
148
149void UnrollState::unrollWidenInductionByUF(
151 VPBasicBlock *PH = cast<VPBasicBlock>(
152 IV->getParent()->getEnclosingLoopRegion()->getSinglePredecessor());
153 Type *IVTy = TypeInfo.inferScalarType(IV);
154 auto &ID = IV->getInductionDescriptor();
156 if (isa_and_present<FPMathOperator>(ID.getInductionBinOp()))
157 Flags = ID.getInductionBinOp()->getFastMathFlags();
158
159 VPValue *ScalarStep = IV->getStepValue();
160 VPBuilder Builder(PH);
161 Type *VectorStepTy =
162 IVTy->isPointerTy() ? TypeInfo.inferScalarType(ScalarStep) : IVTy;
163 VPInstruction *VectorStep = Builder.createNaryOp(
164 VPInstruction::WideIVStep, {&Plan.getVF(), ScalarStep}, VectorStepTy,
165 Flags, IV->getDebugLoc());
166
167 ToSkip.insert(VectorStep);
168
169 // Now create recipes to compute the induction steps for part 1 .. UF. Part 0
170 // remains the header phi. Parts > 0 are computed by adding Step to the
171 // previous part. The header phi recipe will get 2 new operands: the step
172 // value for a single part and the last part, used to compute the backedge
173 // value during VPWidenInductionRecipe::execute.
174 // %Part.0 = VPWidenInductionRecipe %Start, %ScalarStep, %VectorStep, %Part.3
175 // %Part.1 = %Part.0 + %VectorStep
176 // %Part.2 = %Part.1 + %VectorStep
177 // %Part.3 = %Part.2 + %VectorStep
178 //
179 // The newly added recipes are added to ToSkip to avoid interleaving them
180 // again.
181 VPValue *Prev = IV;
182 Builder.setInsertPoint(IV->getParent(), InsertPtForPhi);
183 unsigned AddOpc;
184 if (IVTy->isPointerTy())
186 else if (IVTy->isFloatingPointTy())
187 AddOpc = ID.getInductionOpcode();
188 else
189 AddOpc = Instruction::Add;
190 for (unsigned Part = 1; Part != UF; ++Part) {
191 std::string Name =
192 Part > 1 ? "step.add." + std::to_string(Part) : "step.add";
193
194 VPInstruction *Add = Builder.createNaryOp(AddOpc,
195 {
196 Prev,
197 VectorStep,
198 },
199 Flags, IV->getDebugLoc(), Name);
200 ToSkip.insert(Add);
201 addRecipeForPart(IV, Add, Part);
202 Prev = Add;
203 }
204 IV->addOperand(VectorStep);
205 IV->addOperand(Prev);
206}
207
208void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,
209 VPBasicBlock::iterator InsertPtForPhi) {
210 // First-order recurrences pass a single vector or scalar through their header
211 // phis, irrespective of interleaving.
212 if (isa<VPFirstOrderRecurrencePHIRecipe>(R))
213 return;
214
215 // Generate step vectors for each unrolled part.
216 if (auto *IV = dyn_cast<VPWidenInductionRecipe>(R)) {
217 unrollWidenInductionByUF(IV, InsertPtForPhi);
218 return;
219 }
220
221 auto *RdxPhi = dyn_cast<VPReductionPHIRecipe>(R);
222 if (RdxPhi && RdxPhi->isOrdered())
223 return;
224
225 auto InsertPt = std::next(R->getIterator());
226 for (unsigned Part = 1; Part != UF; ++Part) {
227 VPRecipeBase *Copy = R->clone();
228 Copy->insertBefore(*R->getParent(), InsertPt);
229 addRecipeForPart(R, Copy, Part);
230 if (RdxPhi) {
231 // If the start value is a ReductionStartVector, use the identity value
232 // (second operand) for unrolled parts. If the scaling factor is > 1,
233 // create a new ReductionStartVector with the scale factor and both
234 // operands set to the identity value.
235 if (auto *VPI = dyn_cast<VPInstruction>(RdxPhi->getStartValue())) {
236 assert(VPI->getOpcode() == VPInstruction::ReductionStartVector &&
237 "unexpected start VPInstruction");
238 if (Part != 1)
239 continue;
240 VPValue *StartV;
241 if (match(VPI->getOperand(2), m_SpecificInt(1))) {
242 StartV = VPI->getOperand(1);
243 } else {
244 auto *C = VPI->clone();
245 C->setOperand(0, C->getOperand(1));
246 C->insertAfter(VPI);
247 StartV = C;
248 }
249 for (unsigned Part = 1; Part != UF; ++Part)
250 VPV2Parts[VPI][Part - 1] = StartV;
251 }
252 Copy->addOperand(getConstantVPV(Part));
253 } else {
254 assert(isa<VPActiveLaneMaskPHIRecipe>(R) &&
255 "unexpected header phi recipe not needing unrolled part");
256 }
257 }
258}
259
260/// Handle non-header-phi recipes.
261void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
262 if (match(&R, m_BranchOnCond(m_VPValue())) ||
264 return;
265
266 if (auto *VPI = dyn_cast<VPInstruction>(&R)) {
268 addUniformForAllParts(VPI);
269 return;
270 }
271 }
272 if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {
273 if (isa<StoreInst>(RepR->getUnderlyingValue()) &&
274 RepR->getOperand(1)->isDefinedOutsideLoopRegions()) {
275 // Stores to an invariant address only need to store the last part.
276 remapOperands(&R, UF - 1);
277 return;
278 }
279 if (auto *II = dyn_cast<IntrinsicInst>(RepR->getUnderlyingValue())) {
280 if (II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl) {
281 addUniformForAllParts(RepR);
282 return;
283 }
284 }
285 }
286
287 // Unroll non-uniform recipes.
288 auto InsertPt = std::next(R.getIterator());
289 VPBasicBlock &VPBB = *R.getParent();
290 for (unsigned Part = 1; Part != UF; ++Part) {
291 VPRecipeBase *Copy = R.clone();
292 Copy->insertBefore(VPBB, InsertPt);
293 addRecipeForPart(&R, Copy, Part);
294
295 VPValue *Op;
296 if (match(&R, m_VPInstruction<VPInstruction::FirstOrderRecurrenceSplice>(
297 m_VPValue(), m_VPValue(Op)))) {
298 Copy->setOperand(0, getValueForPart(Op, Part - 1));
299 Copy->setOperand(1, getValueForPart(Op, Part));
300 continue;
301 }
302 if (auto *Red = dyn_cast<VPReductionRecipe>(&R)) {
303 auto *Phi = dyn_cast<VPReductionPHIRecipe>(R.getOperand(0));
304 if (Phi && Phi->isOrdered()) {
305 auto &Parts = VPV2Parts[Phi];
306 if (Part == 1) {
307 Parts.clear();
308 Parts.push_back(Red);
309 }
310 Parts.push_back(Copy->getVPSingleValue());
311 Phi->setOperand(1, Copy->getVPSingleValue());
312 }
313 }
314 remapOperands(Copy, Part);
315
316 // Add operand indicating the part to generate code for, to recipes still
317 // requiring it.
320 match(Copy, m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>(
321 m_VPValue())))
322 Copy->addOperand(getConstantVPV(Part));
323
324 if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe>(R))
325 Copy->setOperand(0, R.getOperand(0));
326 }
327}
328
329void UnrollState::unrollBlock(VPBlockBase *VPB) {
330 auto *VPR = dyn_cast<VPRegionBlock>(VPB);
331 if (VPR) {
332 if (VPR->isReplicator())
333 return unrollReplicateRegionByUF(VPR);
334
335 // Traverse blocks in region in RPO to ensure defs are visited before uses
336 // across blocks.
338 RPOT(VPR->getEntry());
339 for (VPBlockBase *VPB : RPOT)
340 unrollBlock(VPB);
341 return;
342 }
343
344 // VPB is a VPBasicBlock; unroll it, i.e., unroll its recipes.
345 auto *VPBB = cast<VPBasicBlock>(VPB);
346 auto InsertPtForPhi = VPBB->getFirstNonPhi();
347 for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
348 if (ToSkip.contains(&R) || isa<VPIRInstruction>(&R))
349 continue;
350
351 // Add all VPValues for all parts to AnyOf, FirstActiveLaneMask and
352 // Compute*Result which combine all parts to compute the final value.
353 VPValue *Op1;
354 if (match(&R, m_VPInstruction<VPInstruction::AnyOf>(m_VPValue(Op1))) ||
355 match(&R, m_VPInstruction<VPInstruction::FirstActiveLane>(
356 m_VPValue(Op1))) ||
357 match(&R, m_VPInstruction<VPInstruction::ComputeAnyOfResult>(
358 m_VPValue(), m_VPValue(), m_VPValue(Op1))) ||
359 match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>(
360 m_VPValue(), m_VPValue(Op1))) ||
361 match(&R, m_VPInstruction<VPInstruction::ComputeFindIVResult>(
362 m_VPValue(), m_VPValue(), m_VPValue(), m_VPValue(Op1)))) {
363 addUniformForAllParts(cast<VPInstruction>(&R));
364 for (unsigned Part = 1; Part != UF; ++Part)
365 R.addOperand(getValueForPart(Op1, Part));
366 continue;
367 }
368 VPValue *Op0;
369 if (match(&R, m_VPInstruction<VPInstruction::ExtractLane>(
370 m_VPValue(Op0), m_VPValue(Op1)))) {
371 addUniformForAllParts(cast<VPInstruction>(&R));
372 for (unsigned Part = 1; Part != UF; ++Part)
373 R.addOperand(getValueForPart(Op1, Part));
374 continue;
375 }
376 if (match(&R, m_ExtractLastElement(m_VPValue(Op0))) ||
377 match(&R, m_VPInstruction<VPInstruction::ExtractPenultimateElement>(
378 m_VPValue(Op0)))) {
379 addUniformForAllParts(cast<VPSingleDefRecipe>(&R));
380 if (Plan.hasScalarVFOnly()) {
381 auto *I = cast<VPInstruction>(&R);
382 // Extracting from end with VF = 1 implies retrieving the last or
383 // penultimate scalar part (UF-1 or UF-2).
384 unsigned Offset =
385 I->getOpcode() == VPInstruction::ExtractLastElement ? 1 : 2;
386 I->replaceAllUsesWith(getValueForPart(Op0, UF - Offset));
387 R.eraseFromParent();
388 } else {
389 // Otherwise we extract from the last part.
390 remapOperands(&R, UF - 1);
391 }
392 continue;
393 }
394
395 auto *SingleDef = dyn_cast<VPSingleDefRecipe>(&R);
396 if (SingleDef && vputils::isUniformAcrossVFsAndUFs(SingleDef)) {
397 addUniformForAllParts(SingleDef);
398 continue;
399 }
400
401 if (auto *H = dyn_cast<VPHeaderPHIRecipe>(&R)) {
402 unrollHeaderPHIByUF(H, InsertPtForPhi);
403 continue;
404 }
405
406 unrollRecipeByUF(R);
407 }
408}
409
410void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) {
411 assert(UF > 0 && "Unroll factor must be positive");
412 Plan.setUF(UF);
413 auto Cleanup = make_scope_exit([&Plan]() {
414 auto Iter = vp_depth_first_deep(Plan.getEntry());
415 // Remove recipes that are redundant after unrolling.
416 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
417 for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
418 auto *VPI = dyn_cast<VPInstruction>(&R);
419 if (VPI &&
420 VPI->getOpcode() == VPInstruction::CanonicalIVIncrementForPart &&
421 VPI->getNumOperands() == 1) {
422 VPI->replaceAllUsesWith(VPI->getOperand(0));
423 VPI->eraseFromParent();
424 }
425 }
426 }
427 });
428 if (UF == 1) {
429 return;
430 }
431
432 UnrollState Unroller(Plan, UF);
433
434 // Iterate over all blocks in the plan starting from Entry, and unroll
435 // recipes inside them. This includes the vector preheader and middle blocks,
436 // which may set up or post-process per-part values.
438 Plan.getEntry());
439 for (VPBlockBase *VPB : RPOT)
440 Unroller.unrollBlock(VPB);
441
442 unsigned Part = 1;
443 // Remap operands of cloned header phis to update backedge values. The header
444 // phis cloned during unrolling are just after the header phi for part 0.
445 // Reset Part to 1 when reaching the first (part 0) recipe of a block.
446 for (VPRecipeBase &H :
448 // The second operand of Fixed Order Recurrence phi's, feeding the spliced
449 // value across the backedge, needs to remap to the last part of the spliced
450 // value.
451 if (isa<VPFirstOrderRecurrencePHIRecipe>(&H)) {
452 Unroller.remapOperand(&H, 1, UF - 1);
453 continue;
454 }
455 if (Unroller.contains(H.getVPSingleValue())) {
456 Part = 1;
457 continue;
458 }
459 Unroller.remapOperands(&H, Part);
460 Part++;
461 }
462
464}
465
466/// Create a single-scalar clone of \p RepR for lane \p Lane. Use \p
467/// Def2LaneDefs to look up scalar definitions for operands of \RepR.
468static VPReplicateRecipe *
469cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
470 VPReplicateRecipe *RepR, VPLane Lane,
471 const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs) {
472 // Collect the operands at Lane, creating extracts as needed.
474 for (VPValue *Op : RepR->operands()) {
475 // If Op is a definition that has been unrolled, directly use the clone for
476 // the corresponding lane.
477 auto LaneDefs = Def2LaneDefs.find(Op);
478 if (LaneDefs != Def2LaneDefs.end()) {
479 NewOps.push_back(LaneDefs->second[Lane.getKnownLane()]);
480 continue;
481 }
482 if (Lane.getKind() == VPLane::Kind::ScalableLast) {
483 NewOps.push_back(
485 continue;
486 }
488 NewOps.push_back(Op);
489 continue;
490 }
491
492 // Look through buildvector to avoid unnecessary extracts.
493 if (match(Op, m_BuildVector())) {
494 NewOps.push_back(
495 cast<VPInstruction>(Op)->getOperand(Lane.getKnownLane()));
496 continue;
497 }
498 VPValue *Idx =
499 Plan.getOrAddLiveIn(ConstantInt::get(IdxTy, Lane.getKnownLane()));
500 VPValue *Ext = Builder.createNaryOp(Instruction::ExtractElement, {Op, Idx});
501 NewOps.push_back(Ext);
502 }
503
504 auto *New =
505 new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps,
506 /*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR);
507 New->transferFlags(*RepR);
508 New->insertBefore(RepR);
509 return New;
510}
511
513 Type *IdxTy = IntegerType::get(
515
516 // Visit all VPBBs outside the loop region and directly inside the top-level
517 // loop region.
518 auto VPBBsOutsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
520 auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
522 auto VPBBsToUnroll =
523 concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion);
524 // A mapping of current VPValue definitions to collections of new VPValues
525 // defined per lane. Serves to hook-up potential users of current VPValue
526 // definition that are replicated-per-VF later.
528 // The removal of current recipes being replaced by new ones needs to be
529 // delayed after Def2LaneDefs is no longer in use.
531 for (VPBasicBlock *VPBB : VPBBsToUnroll) {
532 for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
533 auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
534 if (!RepR || RepR->isSingleScalar())
535 continue;
536
537 VPBuilder Builder(RepR);
538 if (RepR->getNumUsers() == 0) {
539 if (isa<StoreInst>(RepR->getUnderlyingInstr()) &&
540 vputils::isSingleScalar(RepR->getOperand(1))) {
541 // Stores to invariant addresses need to store the last lane only.
542 cloneForLane(Plan, Builder, IdxTy, RepR, VPLane::getLastLaneForVF(VF),
543 Def2LaneDefs);
544 } else {
545 // Create single-scalar version of RepR for all lanes.
546 for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
547 cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs);
548 }
549 RepR->eraseFromParent();
550 continue;
551 }
552 /// Create single-scalar version of RepR for all lanes.
553 SmallVector<VPValue *> LaneDefs;
554 for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
555 LaneDefs.push_back(
556 cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs));
557
558 Def2LaneDefs[RepR] = LaneDefs;
559 /// Users that only demand the first lane can use the definition for lane
560 /// 0.
561 RepR->replaceUsesWithIf(LaneDefs[0], [RepR](VPUser &U, unsigned) {
562 return U.onlyFirstLaneUsed(RepR);
563 });
564
565 // Update each build vector user that currently has RepR as its only
566 // operand, to have all LaneDefs as its operands.
567 for (VPUser *U : to_vector(RepR->users())) {
568 auto *VPI = dyn_cast<VPInstruction>(U);
569 if (!VPI || (VPI->getOpcode() != VPInstruction::BuildVector &&
570 VPI->getOpcode() != VPInstruction::BuildStructVector))
571 continue;
572 assert(VPI->getNumOperands() == 1 &&
573 "Build(Struct)Vector must have a single operand before "
574 "replicating by VF");
575 VPI->setOperand(0, LaneDefs[0]);
576 for (VPValue *LaneDef : drop_begin(LaneDefs))
577 VPI->addOperand(LaneDef);
578 }
579 ToRemove.push_back(RepR);
580 }
581 }
582 for (auto *R : reverse(ToRemove))
583 R->eraseFromParent();
584}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
ReachingDefAnalysis InstSet & ToRemove
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
static const HTTPClientCleanup Cleanup
Definition: HTTPClient.cpp:42
#define _
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:480
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file contains the declarations of different VPlan-related auxiliary helpers.
This file provides utility VPlan to VPlan transformations.
static VPReplicateRecipe * cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy, VPReplicateRecipe *RepR, VPLane Lane, const DenseMap< VPValue *, SmallVector< VPValue * > > &Def2LaneDefs)
Create a single-scalar clone of RepR for lane Lane.
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, DenseMap< VPValue *, VPValue * > &Old2NewVPValues)
Definition: VPlan.cpp:1141
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
Definition: blake3_impl.h:83
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:131
This class represents an Operation in the Expression.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition: DenseMap.h:245
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:168
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:319
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:476
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:267
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:3639
RecipeListTy::iterator iterator
Instruction iterators...
Definition: VPlan.h:3666
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition: VPlan.h:3727
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition: VPlan.cpp:236
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:81
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:160
VPBlockBase * getSingleSuccessor() const
Definition: VPlan.h:209
static void insertBlockBefore(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected block NewBlock before Blockptr.
Definition: VPlanUtils.h:137
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:3322
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
Definition: VPlanValue.h:416
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Definition: VPlanValue.h:406
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition: VPlan.h:1951
BasicBlock * getIRBasicBlock() const
Definition: VPlan.h:3816
Class to record and manage LLVM IR flags.
Definition: VPlan.h:596
This is a concrete Recipe that models a single VPlan-level instruction.
Definition: VPlan.h:967
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition: VPlan.h:1030
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition: VPlan.h:1034
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition: VPlan.h:993
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition: VPlan.h:990
@ CanonicalIVIncrementForPart
Definition: VPlan.h:983
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition: VPlanHelpers.h:125
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlanHelpers.h:166
Kind getKind() const
Returns the Kind of lane offset.
Definition: VPlanHelpers.h:183
unsigned getKnownLane() const
Returns a compile-time known value for the lane index and asserts if the lane can only be calculated ...
Definition: VPlanHelpers.h:172
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:391
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:3827
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
Definition: VPlan.cpp:772
const VPBlockBase * getEntry() const
Definition: VPlan.h:3863
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition: VPlan.h:3895
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition: VPlan.h:2731
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition: VPlan.h:3529
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition: VPlan.h:518
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:582
An analysis for type-inference for VPValues.
Definition: VPlanAnalysis.h:43
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:197
operand_range operands()
Definition: VPlanValue.h:265
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
Definition: VPlan.h:1817
A recipe to compute the pointers for widened memory accesses of IndexTy.
Definition: VPlan.h:1876
A Recipe for widening the canonical induction variable of the vector loop.
Definition: VPlan.h:3424
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
Definition: VPlan.h:2013
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:3930
VPBasicBlock * getEntry()
Definition: VPlan.h:4029
VPValue & getVF()
Returns the VF of the vector loop region.
Definition: VPlan.h:4122
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.cpp:1037
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
Definition: VPlan.h:4181
bool hasScalarVFOnly() const
Definition: VPlan.h:4150
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the vector loop.
Definition: VPlan.h:4235
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition: VPlan.h:4077
void setUF(unsigned UF)
Definition: VPlan.h:4164
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:169
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
VPInstruction_match< VPInstruction::ExtractLastElement, Op0_t > m_ExtractLastElement(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BranchOnCount, Op0_t, Op1_t > m_BranchOnCount(const Op0_t &Op0, const Op1_t &Op1)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::BranchOnCond, Op0_t > m_BranchOnCond(const Op0_t &Op0)
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
Definition: VPlanUtils.h:44
bool isUniformAcrossVFsAndUFs(VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
Definition: VPlanUtils.cpp:93
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
Definition: VPlanUtils.cpp:22
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:338
@ Offset
Definition: DWP.cpp:477
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:860
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition: ScopeExit.h:59
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2491
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:663
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition: VPlanCFG.h:216
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
Definition: VPlanCFG.h:243
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition: SmallVector.h:1300
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition: Casting.h:548
@ Add
Sum of integers.
DWARFExpression::Operation Op
static void unrollByUF(VPlan &Plan, unsigned UF)
Explicitly unroll Plan by UF.
static void removeDeadRecipes(VPlan &Plan)
Remove dead recipes from Plan.
static void replicateByVF(VPlan &Plan, ElementCount VF)
Replace each VPReplicateRecipe outside on any replicate region in Plan with VF single-scalar recipes.