LLVM 23.0.0git
VPlanUtils.cpp
Go to the documentation of this file.
1//===- VPlanUtils.cpp - VPlan-related utilities ---------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "VPlanUtils.h"
10#include "VPlanAnalysis.h"
11#include "VPlanCFG.h"
12#include "VPlanDominatorTree.h"
13#include "VPlanPatternMatch.h"
14#include "llvm/ADT/TypeSwitch.h"
18
19using namespace llvm;
20using namespace llvm::VPlanPatternMatch;
21using namespace llvm::SCEVPatternMatch;
22
24 return all_of(Def->users(),
25 [Def](const VPUser *U) { return U->usesFirstLaneOnly(Def); });
26}
27
29 return all_of(Def->users(),
30 [Def](const VPUser *U) { return U->usesFirstPartOnly(Def); });
31}
32
34 return all_of(Def->users(),
35 [Def](const VPUser *U) { return U->usesScalars(Def); });
36}
37
39 if (auto *E = dyn_cast<SCEVConstant>(Expr))
40 return Plan.getOrAddLiveIn(E->getValue());
41 // Skip SCEV expansion if Expr is a SCEVUnknown wrapping a non-instruction
42 // value. Otherwise the value may be defined in a loop and using it directly
43 // will break LCSSA form. The SCEV expansion takes care of preserving LCSSA
44 // form.
45 auto *U = dyn_cast<SCEVUnknown>(Expr);
46 if (U && !isa<Instruction>(U->getValue()))
47 return Plan.getOrAddLiveIn(U->getValue());
48 auto *Expanded = new VPExpandSCEVRecipe(Expr);
49 Plan.getEntry()->appendRecipe(Expanded);
50 return Expanded;
51}
52
53bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) {
55 return true;
56
57 auto IsWideCanonicalIV = [](VPValue *A) {
61 };
62
63 VPValue *A, *B;
64
65 auto m_CanonicalScalarIVSteps =
67 m_One(), m_Specific(&Plan.getVF()));
68
70 return B == Plan.getTripCount() &&
71 (match(A, m_CanonicalScalarIVSteps) || IsWideCanonicalIV(A));
72
73 // For scalar plans, the header mask uses the scalar steps.
74 if (match(V, m_ICmp(m_CanonicalScalarIVSteps,
76 assert(Plan.hasScalarVFOnly() &&
77 "Non-scalar VF using scalar IV steps for header mask?");
78 return true;
79 }
80
81 return match(V, m_ICmp(m_VPValue(A), m_VPValue(B))) && IsWideCanonicalIV(A) &&
82 B == Plan.getBackedgeTakenCount();
83}
84
85/// Returns true if \p R propagates poison from any operand to its result.
89 [](const VPRecipeBase *) { return true; })
90 .Case([](const VPReplicateRecipe *Rep) {
91 // GEP and casts propagate poison from all operands.
92 unsigned Opcode = Rep->getOpcode();
93 return Opcode == Instruction::GetElementPtr ||
94 Instruction::isCast(Opcode);
95 })
96 .Default([](const VPRecipeBase *) { return false; });
97}
98
99/// Returns true if \p V being poison is guaranteed to trigger UB because it
100/// propagates to the address of a memory recipe.
101static bool poisonGuaranteesUB(const VPValue *V) {
104
105 Worklist.push_back(V);
106
107 while (!Worklist.empty()) {
108 const VPValue *Current = Worklist.pop_back_val();
109 if (!Visited.insert(Current).second)
110 continue;
111
112 for (VPUser *U : Current->users()) {
113 // Check if Current is used as an address operand for load/store.
114 if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(U)) {
115 if (MemR->getAddr() == Current)
116 return true;
117 continue;
118 }
119 if (auto *Rep = dyn_cast<VPReplicateRecipe>(U)) {
120 unsigned Opcode = Rep->getOpcode();
121 if ((Opcode == Instruction::Load && Rep->getOperand(0) == Current) ||
122 (Opcode == Instruction::Store && Rep->getOperand(1) == Current))
123 return true;
124 }
125
126 // Check if poison propagates through this recipe to any of its users.
127 auto *R = cast<VPRecipeBase>(U);
128 for (const VPValue *Op : R->operands()) {
129 if (Op == Current && propagatesPoisonFromRecipeOp(R)) {
130 Worklist.push_back(R->getVPSingleValue());
131 break;
132 }
133 }
134 }
135 }
136
137 return false;
138}
139
142 const Loop *L) {
143 ScalarEvolution &SE = *PSE.getSE();
145 Value *LiveIn = V->getUnderlyingValue();
146 if (LiveIn && SE.isSCEVable(LiveIn->getType()))
147 return SE.getSCEV(LiveIn);
148 return SE.getCouldNotCompute();
149 }
150
151 // Helper to create SCEVs for binary and unary operations.
152 auto CreateSCEV = [&](ArrayRef<VPValue *> Ops,
153 function_ref<const SCEV *(ArrayRef<SCEVUse>)> CreateFn)
154 -> const SCEV * {
156 for (VPValue *Op : Ops) {
157 const SCEV *S = getSCEVExprForVPValue(Op, PSE, L);
159 return SE.getCouldNotCompute();
160 SCEVOps.push_back(S);
161 }
162 return CreateFn(SCEVOps);
163 };
164
165 VPValue *LHSVal, *RHSVal;
166 if (match(V, m_Add(m_VPValue(LHSVal), m_VPValue(RHSVal))))
167 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
168 return SE.getAddExpr(Ops[0], Ops[1], SCEV::FlagAnyWrap, 0);
169 });
170 if (match(V, m_Sub(m_VPValue(LHSVal), m_VPValue(RHSVal))))
171 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
172 return SE.getMinusSCEV(Ops[0], Ops[1], SCEV::FlagAnyWrap, 0);
173 });
174 if (match(V, m_Not(m_VPValue(LHSVal)))) {
175 // not X = xor X, -1 = -1 - X
176 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
177 return SE.getMinusSCEV(SE.getMinusOne(Ops[0]->getType()), Ops[0]);
178 });
179 }
180 if (match(V, m_Mul(m_VPValue(LHSVal), m_VPValue(RHSVal))))
181 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
182 return SE.getMulExpr(Ops[0], Ops[1], SCEV::FlagAnyWrap, 0);
183 });
184 if (match(V,
186 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
187 return SE.getUDivExpr(Ops[0], Ops[1]);
188 });
189 // Handle AND with constant mask: x & (2^n - 1) can be represented as x % 2^n.
190 const APInt *Mask;
191 if (match(V, m_c_BinaryAnd(m_VPValue(LHSVal), m_APInt(Mask))) &&
192 (*Mask + 1).isPowerOf2())
193 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
194 return SE.getURemExpr(Ops[0], SE.getConstant(*Mask + 1));
195 });
196 if (match(V, m_Trunc(m_VPValue(LHSVal)))) {
197 const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan();
198 Type *DestTy = VPTypeAnalysis(*Plan).inferScalarType(V);
199 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
200 return SE.getTruncateExpr(Ops[0], DestTy);
201 });
202 }
203 if (match(V, m_ZExt(m_VPValue(LHSVal)))) {
204 const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan();
205 Type *DestTy = VPTypeAnalysis(*Plan).inferScalarType(V);
206 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
207 return SE.getZeroExtendExpr(Ops[0], DestTy);
208 });
209 }
210 if (match(V, m_SExt(m_VPValue(LHSVal)))) {
211 const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan();
212 Type *DestTy = VPTypeAnalysis(*Plan).inferScalarType(V);
213
214 // Mirror SCEV's createSCEV handling for sext(sub nsw): push sign extension
215 // onto the operands before computing the subtraction.
216 VPValue *SubLHS, *SubRHS;
217 auto *SubR = dyn_cast<VPRecipeWithIRFlags>(LHSVal);
218 if (match(LHSVal, m_Sub(m_VPValue(SubLHS), m_VPValue(SubRHS))) && SubR &&
219 SubR->hasNoSignedWrap() && poisonGuaranteesUB(LHSVal)) {
220 const SCEV *V1 = getSCEVExprForVPValue(SubLHS, PSE, L);
221 const SCEV *V2 = getSCEVExprForVPValue(SubRHS, PSE, L);
223 return SE.getMinusSCEV(SE.getSignExtendExpr(V1, DestTy),
224 SE.getSignExtendExpr(V2, DestTy), SCEV::FlagNSW);
225 }
226
227 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
228 return SE.getSignExtendExpr(Ops[0], DestTy);
229 });
230 }
231 if (match(V,
233 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
234 return SE.getUMaxExpr(Ops[0], Ops[1]);
235 });
236 if (match(V,
238 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
239 return SE.getSMaxExpr(Ops[0], Ops[1]);
240 });
241 if (match(V,
243 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
244 return SE.getUMinExpr(Ops[0], Ops[1]);
245 });
246 if (match(V,
248 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
249 return SE.getSMinExpr(Ops[0], Ops[1]);
250 });
251
253 Type *SourceElementType;
254 if (match(V, m_GetElementPtr(SourceElementType, Ops))) {
255 const SCEV *GEPExpr = CreateSCEV(Ops, [&](ArrayRef<SCEVUse> Ops) {
256 return SE.getGEPExpr(Ops.front(), Ops.drop_front(), SourceElementType);
257 });
258 return PSE.getPredicatedSCEV(GEPExpr);
259 }
260
261 // TODO: Support constructing SCEVs for more recipes as needed.
262 const VPRecipeBase *DefR = V->getDefiningRecipe();
263 const SCEV *Expr =
265 .Case([](const VPExpandSCEVRecipe *R) { return R->getSCEV(); })
266 .Case([&SE, &PSE, L](const VPCanonicalIVPHIRecipe *R) {
267 if (!L)
268 return SE.getCouldNotCompute();
269 const SCEV *Start = getSCEVExprForVPValue(R->getOperand(0), PSE, L);
270 return SE.getAddRecExpr(Start, SE.getOne(Start->getType()), L,
272 })
273 .Case([&SE, &PSE, L](const VPWidenIntOrFpInductionRecipe *R) {
274 const SCEV *Step = getSCEVExprForVPValue(R->getStepValue(), PSE, L);
275 if (!L || isa<SCEVCouldNotCompute>(Step))
276 return SE.getCouldNotCompute();
277 const SCEV *Start =
278 getSCEVExprForVPValue(R->getStartValue(), PSE, L);
279 const SCEV *AddRec =
280 SE.getAddRecExpr(Start, Step, L, SCEV::FlagAnyWrap);
281 if (R->getTruncInst())
282 return SE.getTruncateExpr(AddRec, R->getScalarType());
283 return AddRec;
284 })
285 .Case([&SE, &PSE, L](const VPWidenPointerInductionRecipe *R) {
286 const SCEV *Start =
287 getSCEVExprForVPValue(R->getStartValue(), PSE, L);
288 if (!L || isa<SCEVCouldNotCompute>(Start))
289 return SE.getCouldNotCompute();
290 const SCEV *Step = getSCEVExprForVPValue(R->getStepValue(), PSE, L);
291 if (isa<SCEVCouldNotCompute>(Step))
292 return SE.getCouldNotCompute();
293 return SE.getAddRecExpr(Start, Step, L, SCEV::FlagAnyWrap);
294 })
295 .Case([&SE, &PSE, L](const VPDerivedIVRecipe *R) {
296 const SCEV *Start = getSCEVExprForVPValue(R->getOperand(0), PSE, L);
297 const SCEV *IV = getSCEVExprForVPValue(R->getOperand(1), PSE, L);
298 const SCEV *Scale = getSCEVExprForVPValue(R->getOperand(2), PSE, L);
299 if (any_of(ArrayRef({Start, IV, Scale}),
301 return SE.getCouldNotCompute();
302
303 return SE.getAddExpr(
304 SE.getTruncateOrSignExtend(Start, IV->getType()),
305 SE.getMulExpr(
306 IV, SE.getTruncateOrSignExtend(Scale, IV->getType())));
307 })
308 .Case([&SE, &PSE, L](const VPScalarIVStepsRecipe *R) {
309 const SCEV *IV = getSCEVExprForVPValue(R->getOperand(0), PSE, L);
310 const SCEV *Step = getSCEVExprForVPValue(R->getOperand(1), PSE, L);
312 return SE.getCouldNotCompute();
313 return SE.getTruncateOrSignExtend(IV, Step->getType());
314 })
315 .Default(
316 [&SE](const VPRecipeBase *) { return SE.getCouldNotCompute(); });
317
318 return PSE.getPredicatedSCEV(Expr);
319}
320
322 const Loop *L) {
323 // If address is an SCEVAddExpr, we require that all operands must be either
324 // be invariant or a (possibly sign-extend) affine AddRec.
325 if (auto *PtrAdd = dyn_cast<SCEVAddExpr>(Addr)) {
326 return all_of(PtrAdd->operands(), [&SE, L](const SCEV *Op) {
327 return SE.isLoopInvariant(Op, L) ||
328 match(Op, m_scev_SExt(m_scev_AffineAddRec(m_SCEV(), m_SCEV()))) ||
329 match(Op, m_scev_AffineAddRec(m_SCEV(), m_SCEV()));
330 });
331 }
332
333 // Otherwise, check if address is loop invariant or an affine add recurrence.
334 return SE.isLoopInvariant(Addr, L) ||
336}
337
338/// Returns true if \p Opcode preserves uniformity, i.e., if all operands are
339/// uniform, the result will also be uniform.
340static bool preservesUniformity(unsigned Opcode) {
341 if (Instruction::isBinaryOp(Opcode) || Instruction::isCast(Opcode))
342 return true;
343 switch (Opcode) {
344 case Instruction::Freeze:
345 case Instruction::GetElementPtr:
346 case Instruction::ICmp:
347 case Instruction::FCmp:
348 case Instruction::Select:
353 return true;
354 default:
355 return false;
356 }
357}
358
360 // A live-in must be uniform across the scope of VPlan.
362 return true;
363
364 if (auto *Rep = dyn_cast<VPReplicateRecipe>(VPV)) {
365 const VPRegionBlock *RegionOfR = Rep->getRegion();
366 // Don't consider recipes in replicate regions as uniform yet; their first
367 // lane cannot be accessed when executing the replicate region for other
368 // lanes.
369 if (RegionOfR && RegionOfR->isReplicator())
370 return false;
371 return Rep->isSingleScalar() || (preservesUniformity(Rep->getOpcode()) &&
372 all_of(Rep->operands(), isSingleScalar));
373 }
376 if (auto *WidenR = dyn_cast<VPWidenRecipe>(VPV)) {
377 return preservesUniformity(WidenR->getOpcode()) &&
378 all_of(WidenR->operands(), isSingleScalar);
379 }
380 if (auto *VPI = dyn_cast<VPInstruction>(VPV))
381 return VPI->isSingleScalar() || VPI->isVectorToScalar() ||
382 (preservesUniformity(VPI->getOpcode()) &&
383 all_of(VPI->operands(), isSingleScalar));
384 if (auto *RR = dyn_cast<VPReductionRecipe>(VPV))
385 return !RR->isPartialReduction();
388 return true;
389 if (auto *Expr = dyn_cast<VPExpressionRecipe>(VPV))
390 return Expr->isSingleScalar();
391
392 // VPExpandSCEVRecipes must be placed in the entry and are always uniform.
393 return isa<VPExpandSCEVRecipe>(VPV);
394}
395
397 // Live-ins are uniform.
399 return true;
400
401 VPRecipeBase *R = V->getDefiningRecipe();
402 VPBasicBlock *VPBB = R ? R->getParent() : nullptr;
403 VPlan *Plan = VPBB ? VPBB->getPlan() : nullptr;
404 if (VPBB &&
405 (VPBB == Plan->getVectorPreheader() || VPBB == Plan->getEntry())) {
406 if (match(V->getDefiningRecipe(),
408 return false;
409 return all_of(R->operands(), isUniformAcrossVFsAndUFs);
410 }
411
412 if (VPRegionBlock *EnclosingRegion = VPBB->getEnclosingLoopRegion()) {
413 // Canonical IV is uniform.
414 if (V == EnclosingRegion->getCanonicalIV())
415 return true;
416 }
417
419 .Case([](const VPDerivedIVRecipe *R) { return true; })
420 .Case([](const VPReplicateRecipe *R) {
421 // Be conservative about side-effects, except for the
422 // known-side-effecting assumes and stores, which we know will be
423 // uniform.
424 return R->isSingleScalar() &&
425 (!R->mayHaveSideEffects() ||
426 isa<AssumeInst, StoreInst>(R->getUnderlyingInstr())) &&
427 all_of(R->operands(), isUniformAcrossVFsAndUFs);
428 })
429 .Case([](const VPWidenRecipe *R) {
430 return preservesUniformity(R->getOpcode()) &&
431 all_of(R->operands(), isUniformAcrossVFsAndUFs);
432 })
433 .Case([](const VPInstruction *VPI) {
434 return (VPI->isScalarCast() &&
438 })
439 .Case([](const VPWidenCastRecipe *R) {
440 // A cast is uniform according to its operand.
441 return isUniformAcrossVFsAndUFs(R->getOperand(0));
442 })
443 .Default([](const VPRecipeBase *) { // A value is considered non-uniform
444 // unless proven otherwise.
445 return false;
446 });
447}
448
450 auto DepthFirst = vp_depth_first_shallow(Plan.getEntry());
451 auto I = find_if(DepthFirst, [&VPDT](VPBlockBase *VPB) {
452 return VPBlockUtils::isHeader(VPB, VPDT);
453 });
454 return I == DepthFirst.end() ? nullptr : cast<VPBasicBlock>(*I);
455}
456
458 if (!R)
459 return 1;
460 if (auto *RR = dyn_cast<VPReductionPHIRecipe>(R))
461 return RR->getVFScaleFactor();
462 if (auto *RR = dyn_cast<VPReductionRecipe>(R))
463 return RR->getVFScaleFactor();
464 if (auto *ER = dyn_cast<VPExpressionRecipe>(R))
465 return ER->getVFScaleFactor();
466 assert(
469 "getting scaling factor of reduction-start-vector not implemented yet");
470 return 1;
471}
472
473std::optional<VPValue *>
477 // Given a VPlan like the following (just including the recipes contributing
478 // to loop control exiting here, not the actual work), we're looking to match
479 // the recipes contributing to the uncountable exit condition comparison
480 // (here, vp<%4>) back to either live-ins or the address nodes for the load
481 // used as part of the uncountable exit comparison so that we can copy them
482 // to a preheader and rotate the address in the loop to the next vector
483 // iteration.
484 //
485 // Currently, the address of the load is restricted to a GEP with 2 operands
486 // and a live-in base address. This constraint may be relaxed later.
487 //
488 // VPlan ' for UF>=1' {
489 // Live-in vp<%0> = VF
490 // Live-in ir<64> = original trip-count
491 //
492 // entry:
493 // Successor(s): preheader, vector.ph
494 //
495 // vector.ph:
496 // Successor(s): vector loop
497 //
498 // <x1> vector loop: {
499 // vector.body:
500 // EMIT vp<%2> = CANONICAL-INDUCTION ir<0>
501 // vp<%3> = SCALAR-STEPS vp<%2>, ir<1>, vp<%0>
502 // CLONE ir<%ee.addr> = getelementptr ir<0>, vp<%3>
503 // WIDEN ir<%ee.load> = load ir<%ee.addr>
504 // WIDEN vp<%4> = icmp eq ir<%ee.load>, ir<0>
505 // EMIT vp<%5> = any-of vp<%4>
506 // EMIT vp<%6> = add vp<%2>, vp<%0>
507 // EMIT vp<%7> = icmp eq vp<%6>, ir<64>
508 // EMIT branch-on-two-conds vp<%5>, vp<%7>
509 // No successors
510 // }
511 // Successor(s): early.exit, middle.block
512 //
513 // middle.block:
514 // Successor(s): preheader
515 //
516 // preheader:
517 // No successors
518 // }
519
520 // Find the uncountable loop exit condition.
521 auto *Region = Plan.getVectorLoopRegion();
522 VPValue *UncountableCondition = nullptr;
523 if (!match(Region->getExitingBasicBlock()->getTerminator(),
524 m_BranchOnTwoConds(m_AnyOf(m_VPValue(UncountableCondition)),
525 m_VPValue())))
526 return std::nullopt;
527
529 Worklist.push_back(UncountableCondition);
530 while (!Worklist.empty()) {
531 VPValue *V = Worklist.pop_back_val();
532
533 // Any value defined outside the loop does not need to be copied.
534 if (V->isDefinedOutsideLoopRegions())
535 continue;
536
537 // FIXME: Remove the single user restriction; it's here because we're
538 // starting with the simplest set of loops we can, and multiple
539 // users means needing to add PHI nodes in the transform.
540 if (V->getNumUsers() > 1)
541 return std::nullopt;
542
543 VPValue *Op1, *Op2;
544 // Walk back through recipes until we find at least one load from memory.
545 if (match(V, m_ICmp(m_VPValue(Op1), m_VPValue(Op2)))) {
546 Worklist.push_back(Op1);
547 Worklist.push_back(Op2);
548 Recipes.push_back(V->getDefiningRecipe());
549 } else if (auto *Load = dyn_cast<VPWidenLoadRecipe>(V)) {
550 // Reject masked loads for the time being; they make the exit condition
551 // more complex.
552 if (Load->isMasked())
553 return std::nullopt;
554
555 VPValue *GEP = Load->getAddr();
557 return std::nullopt;
558
559 Recipes.push_back(Load);
560 Recipes.push_back(GEP->getDefiningRecipe());
561 GEPs.push_back(GEP->getDefiningRecipe());
562 } else
563 return std::nullopt;
564 }
565
566 return UncountableCondition;
567}
568
570 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
571 SmallVector<VPValue *> WideCanonicalIVs;
572 auto *FoundWidenCanonicalIVUser = find_if(
574 assert(count_if(LoopRegion->getCanonicalIV()->users(),
576 "Must have at most one VPWideCanonicalIVRecipe");
577 if (FoundWidenCanonicalIVUser !=
578 LoopRegion->getCanonicalIV()->users().end()) {
579 auto *WideCanonicalIV =
580 cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
581 WideCanonicalIVs.push_back(WideCanonicalIV);
582 }
583
584 // Also include VPWidenIntOrFpInductionRecipes that represent a widened
585 // version of the canonical induction.
586 VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
587 for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
588 auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
589 if (WidenOriginalIV && WidenOriginalIV->isCanonical())
590 WideCanonicalIVs.push_back(WidenOriginalIV);
591 }
592
593 // Walk users of wide canonical IVs and find the single compare of the form
594 // (ICMP_ULE, WideCanonicalIV, backedge-taken-count).
595 VPSingleDefRecipe *HeaderMask = nullptr;
596 for (auto *Wide : WideCanonicalIVs) {
597 for (VPUser *U : Wide->users()) {
598 auto *VPI = dyn_cast<VPInstruction>(U);
599 if (!VPI || !vputils::isHeaderMask(VPI, Plan))
600 continue;
601
602 assert(VPI->getOperand(0) == Wide &&
603 "WidenCanonicalIV must be the first operand of the compare");
604 assert(!HeaderMask && "Multiple header masks found?");
605 HeaderMask = VPI;
606 }
607 }
608 return HeaderMask;
609}
610
612 const VPDominatorTree &VPDT) {
613 auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
614 if (!VPBB)
615 return false;
616
617 // If VPBB is in a region R, VPBB is a loop header if R is a loop region with
618 // VPBB as its entry, i.e., free of predecessors.
619 if (auto *R = VPBB->getParent())
620 return !R->isReplicator() && !VPBB->hasPredecessors();
621
622 // A header dominates its second predecessor (the latch), with the other
623 // predecessor being the preheader
624 return VPB->getPredecessors().size() == 2 &&
625 VPDT.dominates(VPB, VPB->getPredecessors()[1]);
626}
627
629 const VPDominatorTree &VPDT) {
630 // A latch has a header as its second successor, with its other successor
631 // leaving the loop. A preheader OTOH has a header as its first (and only)
632 // successor.
633 return VPB->getNumSuccessors() == 2 &&
634 VPBlockUtils::isHeader(VPB->getSuccessors()[1], VPDT);
635}
636
637std::optional<MemoryLocation>
639 auto *M = dyn_cast<VPIRMetadata>(&R);
640 if (!M)
641 return std::nullopt;
643 // Populate noalias metadata from VPIRMetadata.
644 if (MDNode *NoAliasMD = M->getMetadata(LLVMContext::MD_noalias))
645 Loc.AATags.NoAlias = NoAliasMD;
646 if (MDNode *AliasScopeMD = M->getMetadata(LLVMContext::MD_alias_scope))
647 Loc.AATags.Scope = AliasScopeMD;
648 return Loc;
649}
650
651/// Find the ComputeReductionResult recipe for \p PhiR, looking through selects
652/// inserted for predicated reductions or tail folding.
654 VPValue *BackedgeVal = PhiR->getBackedgeValue();
656 BackedgeVal))
657 return Res;
658
659 // Look through selects inserted for tail folding or predicated reductions.
661 BackedgeVal, m_Select(m_VPValue(), m_VPValue(), m_VPValue()));
662 if (!SelR)
663 return nullptr;
666}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Hexagon Common GEP
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
This file provides utility analysis objects describing memory locations.
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
static bool propagatesPoisonFromRecipeOp(const VPRecipeBase *R)
Returns true if R propagates poison from any operand to its result.
static bool preservesUniformity(unsigned Opcode)
Returns true if Opcode preserves uniformity, i.e., if all operands are uniform, the result will also ...
static bool poisonGuaranteesUB(const VPValue *V)
Returns true if V being poison is guaranteed to trigger UB because it propagates to the address of a ...
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
bool isCast() const
bool isBinaryOp() const
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1080
Representation for a specific memory location.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getPredicatedSCEV(const SCEV *Expr)
Returns the rewritten SCEV for Expr in the context of the current SCEV predicate.
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getUDivExpr(SCEVUse LHS, SCEVUse RHS)
Get a canonical unsigned division expression, or something simpler if possible.
LLVM_ABI const SCEV * getURemExpr(SCEVUse LHS, SCEVUse RHS)
Represents an unsigned remainder expression based on unsigned division.
LLVM_ABI const SCEV * getSMinExpr(SCEVUse LHS, SCEVUse RHS)
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getAddRecExpr(SCEVUse Start, SCEVUse Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI const SCEV * getUMaxExpr(SCEVUse LHS, SCEVUse RHS)
const SCEV * getMinusOne(Type *Ty)
Return a SCEV for the constant -1 of a specific type.
LLVM_ABI const SCEV * getCouldNotCompute()
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
LLVM_ABI const SCEV * getSMaxExpr(SCEVUse LHS, SCEVUse RHS)
LLVM_ABI const SCEV * getGEPExpr(GEPOperator *GEP, ArrayRef< SCEVUse > IndexExprs)
Returns an expression for a GEP.
LLVM_ABI const SCEV * getUMinExpr(SCEVUse LHS, SCEVUse RHS, bool Sequential=false)
LLVM_ABI const SCEV * getTruncateOrSignExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
Definition TypeSwitch.h:89
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
Definition TypeSwitch.h:98
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4239
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4314
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4327
VPRegionBlock * getEnclosingLoopRegion()
Definition VPlan.cpp:598
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:82
size_t getNumSuccessors() const
Definition VPlan.h:225
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:210
VPlan * getPlan()
Definition VPlan.cpp:177
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:182
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:199
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
static bool isHeader(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop header, based on regions or VPDT in their absence.
Canonical scalar induction phi of the vector loop.
Definition VPlan.h:3801
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition VPlan.h:3971
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
Recipe to expand a SCEV expression.
Definition VPlan.h:3763
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2318
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1195
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1291
unsigned getOpcode() const
Definition VPlan.h:1375
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:390
bool isScalarCast() const
Return true if the recipe is a scalar cast.
A recipe for handling reduction phis.
Definition VPlan.h:2670
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4427
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4495
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
Definition VPlan.h:4525
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:3187
unsigned getOpcode() const
Definition VPlan.h:3257
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition VPlan.h:4043
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:591
An analysis for type-inference for VPValues.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:297
operand_range operands()
Definition VPlanValue.h:365
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:336
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:46
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:127
user_range users()
Definition VPlanValue.h:150
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
Definition VPlan.h:2124
A recipe to compute the pointers for widened memory accesses of SourceElementTy.
Definition VPlan.h:2197
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1810
A recipe for handling GEP instructions.
Definition VPlan.h:2060
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
Definition VPlan.h:2424
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition VPlan.h:1754
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4557
VPBasicBlock * getEntry()
Definition VPlan.h:4649
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4707
VPValue * getBackedgeTakenCount() const
Definition VPlan.h:4733
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4813
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1038
bool hasScalarVFOnly() const
Definition VPlan.h:4781
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4654
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4739
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
An efficient, type-erasing, non-owning reference to a callable.
IteratorT end() const
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
bool match(const SCEV *S, const Pattern &P)
class_match< const SCEV > m_SCEV()
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()
VPScalarIVSteps_match< Op0_t, Op1_t, Op2_t > m_ScalarIVSteps(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
GEPLikeRecipe_match< Op0_t, Op1_t > m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
bool isUniformAcrossVFsAndUFs(VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
VPBasicBlock * getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT)
Returns the header block of the first, top-level loop, or null if none exist.
bool isAddressSCEVForCost(const SCEV *Addr, ScalarEvolution &SE, const Loop *L)
Returns true if Addr is an address SCEV that can be passed to TTI::getAddressComputationCost,...
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
VPInstruction * findComputeReductionResult(VPReductionPHIRecipe *PhiR)
Find the ComputeReductionResult recipe for PhiR, looking through selects inserted for predicated redu...
std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)
Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
VPSingleDefRecipe * findHeaderMask(VPlan &Plan)
Collect the header mask with the pattern: (ICMP_ULE, WideCanonicalIV, backedge-taken-count) TODO: Int...
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
static VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
Definition VPlanUtils.h:132
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
unsigned getVFScaleFactor(VPRecipeBase *R)
Get the VF scaling factor applied to the recipe's output, if the recipe has one.
bool isHeaderMask(const VPValue *V, const VPlan &Plan)
Return true if V is a header mask in Plan.
LLVM_ABI_FOR_TEST std::optional< VPValue * > getRecipesForUncountableExit(VPlan &Plan, SmallVectorImpl< VPRecipeBase * > &Recipes, SmallVectorImpl< VPRecipeBase * > &GEPs)
Returns the VPValue representing the uncountable exit comparison used by AnyOf if the recipes it depe...
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition VPlanCFG.h:253
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2019
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
Definition Casting.h:866