LLVM 23.0.0git
VPlanUtils.cpp
Go to the documentation of this file.
1//===- VPlanUtils.cpp - VPlan-related utilities ---------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "VPlanUtils.h"
11#include "VPlanAnalysis.h"
12#include "VPlanCFG.h"
13#include "VPlanDominatorTree.h"
14#include "VPlanPatternMatch.h"
15#include "llvm/ADT/TypeSwitch.h"
19#include "llvm/IR/Dominators.h"
21
22using namespace llvm;
23using namespace llvm::VPlanPatternMatch;
24using namespace llvm::SCEVPatternMatch;
25
27 return all_of(Def->users(),
28 [Def](const VPUser *U) { return U->usesFirstLaneOnly(Def); });
29}
30
32 return all_of(Def->users(),
33 [Def](const VPUser *U) { return U->usesFirstPartOnly(Def); });
34}
35
37 return all_of(Def->users(),
38 [Def](const VPUser *U) { return U->usesScalars(Def); });
39}
40
42 if (auto *E = dyn_cast<SCEVConstant>(Expr))
43 return Plan.getOrAddLiveIn(E->getValue());
44 // Skip SCEV expansion if Expr is a SCEVUnknown wrapping a non-instruction
45 // value. Otherwise the value may be defined in a loop and using it directly
46 // will break LCSSA form. The SCEV expansion takes care of preserving LCSSA
47 // form.
48 auto *U = dyn_cast<SCEVUnknown>(Expr);
49 if (U && !isa<Instruction>(U->getValue()))
50 return Plan.getOrAddLiveIn(U->getValue());
51 auto *Expanded = new VPExpandSCEVRecipe(Expr);
52 VPBasicBlock *EntryVPBB = Plan.getEntry();
53 auto Iter = EntryVPBB->getFirstNonPhi();
54 while (Iter != EntryVPBB->end() && isa<VPIRInstruction>(*Iter))
55 ++Iter;
56 EntryVPBB->insert(Expanded, Iter);
57 return Expanded;
58}
59
60bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) {
62 return true;
63
64 VPValue *A, *B;
65
66 auto m_CanonicalScalarIVSteps = m_ScalarIVSteps(
69 m_One(), m_Specific(&Plan.getVF()));
70
71 // A wide canonical IV is either an explicit VPWidenCanonicalIVRecipe or a
72 // canonical VPWidenIntOrFpInductionRecipe.
73 auto m_WideCanonicalIV =
75
77 return B == Plan.getTripCount() &&
78 (match(A, m_CanonicalScalarIVSteps) || match(A, m_WideCanonicalIV));
79
80 // For scalar plans, the header mask uses the scalar steps.
81 if (match(V, m_ICmp(m_CanonicalScalarIVSteps,
83 assert(Plan.hasScalarVFOnly() &&
84 "Non-scalar VF using scalar IV steps for header mask?");
85 return true;
86 }
87
88 auto MaskMatch = m_ICmp(m_VPValue(A), m_VPValue(B));
89 if (match(V, m_CombineOr(MaskMatch, m_Reverse(MaskMatch))))
90 return match(A, m_WideCanonicalIV) && B == Plan.getBackedgeTakenCount();
91
92 return match(
95}
96
97/// Returns true if \p R propagates poison from any operand to its result.
101 [](const VPRecipeBase *) { return true; })
102 .Case([](const VPReplicateRecipe *Rep) {
103 // GEP and casts propagate poison from all operands.
104 unsigned Opcode = Rep->getOpcode();
105 return Opcode == Instruction::GetElementPtr ||
106 Instruction::isCast(Opcode);
107 })
108 .Default([](const VPRecipeBase *) { return false; });
109}
110
111/// Returns true if \p V being poison is guaranteed to trigger UB because it
112/// propagates to the address of a memory recipe.
113static bool poisonGuaranteesUB(const VPValue *V) {
116
117 Worklist.push_back(V);
118
119 while (!Worklist.empty()) {
120 const VPValue *Current = Worklist.pop_back_val();
121 if (!Visited.insert(Current).second)
122 continue;
123
124 for (VPUser *U : Current->users()) {
125 // Check if Current is used as an address operand for load/store.
127 if (MemR->getAddr() == Current)
128 return true;
129 continue;
130 }
131 if (auto *Rep = dyn_cast<VPReplicateRecipe>(U)) {
132 unsigned Opcode = Rep->getOpcode();
133 if ((Opcode == Instruction::Load && Rep->getOperand(0) == Current) ||
134 (Opcode == Instruction::Store && Rep->getOperand(1) == Current))
135 return true;
136 }
137
138 // Check if poison propagates through this recipe to any of its users.
139 auto *R = cast<VPRecipeBase>(U);
140 for (const VPValue *Op : R->operands()) {
141 if (Op == Current && propagatesPoisonFromRecipeOp(R)) {
142 Worklist.push_back(R->getVPSingleValue());
143 break;
144 }
145 }
146 }
147 }
148
149 return false;
150}
151
153 // Like IR stripPointerCasts, look through GEPs with all-zero indices and
154 // casts to find a root GEP VPInstruction.
155 while (auto *PtrVPI = dyn_cast<VPInstruction>(Ptr)) {
156 unsigned Opcode = PtrVPI->getOpcode();
157 if (Opcode == Instruction::GetElementPtr) {
158 if (any_of(drop_begin(PtrVPI->operands()),
159 [](VPValue *Op) { return !match(Op, m_ZeroInt()); }))
160 return PtrVPI->getGEPNoWrapFlags();
161 Ptr = PtrVPI->getOperand(0);
162 continue;
163 }
164 if (Opcode != Instruction::BitCast && Opcode != Instruction::AddrSpaceCast)
165 break;
166 Ptr = PtrVPI->getOperand(0);
167 }
168 return GEPNoWrapFlags::none();
169}
170
173 const Loop *L) {
174 ScalarEvolution &SE = *PSE.getSE();
176 Value *LiveIn = V->getUnderlyingValue();
177 if (LiveIn && SE.isSCEVable(LiveIn->getType()))
178 return SE.getSCEV(LiveIn);
179 return SE.getCouldNotCompute();
180 }
181
182 if (auto *RV = dyn_cast<VPRegionValue>(V)) {
183 assert(RV == RV->getDefiningRegion()->getCanonicalIV() &&
184 "RegionValue must be canonical IV");
185 if (!L)
186 return SE.getCouldNotCompute();
187 return SE.getAddRecExpr(SE.getZero(RV->getType()), SE.getOne(RV->getType()),
189 }
190
191 // Helper to create SCEVs for binary and unary operations.
192 auto CreateSCEV = [&](ArrayRef<VPValue *> Ops,
193 function_ref<const SCEV *(ArrayRef<SCEVUse>)> CreateFn)
194 -> const SCEV * {
196 for (VPValue *Op : Ops) {
197 const SCEV *S = getSCEVExprForVPValue(Op, PSE, L);
199 return SE.getCouldNotCompute();
200 SCEVOps.push_back(S);
201 }
202 return PSE.getPredicatedSCEV(CreateFn(SCEVOps));
203 };
204
205 VPValue *LHSVal, *RHSVal;
206 if (match(V, m_Add(m_VPValue(LHSVal), m_VPValue(RHSVal))))
207 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
208 return SE.getAddExpr(Ops[0], Ops[1], SCEV::FlagAnyWrap, 0);
209 });
210 if (match(V, m_Sub(m_VPValue(LHSVal), m_VPValue(RHSVal))))
211 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
212 return SE.getMinusSCEV(Ops[0], Ops[1], SCEV::FlagAnyWrap, 0);
213 });
214 if (match(V, m_Not(m_VPValue(LHSVal)))) {
215 // not X = xor X, -1 = -1 - X
216 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
217 return SE.getMinusSCEV(SE.getMinusOne(Ops[0]->getType()), Ops[0]);
218 });
219 }
220 if (match(V, m_Mul(m_VPValue(LHSVal), m_VPValue(RHSVal))))
221 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
222 return SE.getMulExpr(Ops[0], Ops[1], SCEV::FlagAnyWrap, 0);
223 });
224 // Handle shl by constant: x << c is equivalent to x * (1 << c).
225 uint64_t ShiftAmt;
226 if (match(V, m_Shl(m_VPValue(LHSVal), m_ConstantInt(ShiftAmt))))
227 return CreateSCEV(LHSVal, [&](ArrayRef<SCEVUse> Ops) {
228 return SE.getMulExpr(Ops[0],
229 SE.getPowerOfTwo(Ops[0]->getType(), ShiftAmt));
230 });
231 if (match(V, m_LShr(m_VPValue(LHSVal), m_ConstantInt(ShiftAmt)))) {
232 Type *Ty = V->getScalarType();
233 if (ShiftAmt < SE.getTypeSizeInBits(Ty))
234 return CreateSCEV(LHSVal, [&](ArrayRef<SCEVUse> Ops) {
235 return SE.getUDivExpr(Ops[0], SE.getPowerOfTwo(Ty, ShiftAmt));
236 });
237 }
238 if (match(V, m_UDiv(m_VPValue(LHSVal), m_VPValue(RHSVal))))
239 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
240 return SE.getUDivExpr(Ops[0], Ops[1]);
241 });
242 if (match(V, m_URem(m_VPValue(LHSVal), m_VPValue(RHSVal))))
243 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
244 return SE.getURemExpr(Ops[0], Ops[1]);
245 });
246 // Handle AND with constant mask: x & (2^n - 1) can be represented as x % 2^n.
247 const APInt *Mask;
248 if (match(V, m_c_BinaryAnd(m_VPValue(LHSVal), m_APInt(Mask))) &&
249 (*Mask + 1).isPowerOf2())
250 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
251 return SE.getURemExpr(Ops[0], SE.getConstant(*Mask + 1));
252 });
253 if (match(V, m_Trunc(m_VPValue(LHSVal)))) {
254 Type *DestTy = V->getScalarType();
255 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
256 return SE.getTruncateExpr(Ops[0], DestTy);
257 });
258 }
259 if (match(V, m_ZExt(m_VPValue(LHSVal)))) {
260 Type *DestTy = V->getScalarType();
261 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
262 return SE.getZeroExtendExpr(Ops[0], DestTy);
263 });
264 }
265 if (match(V, m_SExt(m_VPValue(LHSVal)))) {
266 Type *DestTy = V->getScalarType();
267
268 // Mirror SCEV's createSCEV handling for sext(sub nsw): push sign extension
269 // onto the operands before computing the subtraction.
270 VPValue *SubLHS, *SubRHS;
271 auto *SubR = dyn_cast<VPRecipeWithIRFlags>(LHSVal);
272 if (match(LHSVal, m_Sub(m_VPValue(SubLHS), m_VPValue(SubRHS))) && SubR &&
273 SubR->hasNoSignedWrap() && poisonGuaranteesUB(LHSVal)) {
274 const SCEV *V1 = getSCEVExprForVPValue(SubLHS, PSE, L);
275 const SCEV *V2 = getSCEVExprForVPValue(SubRHS, PSE, L);
277 return SE.getMinusSCEV(SE.getSignExtendExpr(V1, DestTy),
278 SE.getSignExtendExpr(V2, DestTy), SCEV::FlagNSW);
279 }
280
281 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
282 return SE.getSignExtendExpr(Ops[0], DestTy);
283 });
284 }
285 if (match(V,
287 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
288 return SE.getUMaxExpr(Ops[0], Ops[1]);
289 });
290 if (match(V,
292 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
293 return SE.getSMaxExpr(Ops[0], Ops[1]);
294 });
295 if (match(V,
297 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
298 return SE.getUMinExpr(Ops[0], Ops[1]);
299 });
300 if (match(V,
302 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
303 return SE.getSMinExpr(Ops[0], Ops[1]);
304 });
306 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
307 // is_int_min_poison is local to this intrinsic: poison on INT_MIN is
308 // not proof that the input is never INT_MIN, nor that poison reaches
309 // UB. Do not translate it to SCEV's global IsNSW flag.
310 return SE.getAbsExpr(Ops[0], /*IsNSW=*/false);
311 });
312
314 Type *SourceElementType;
315 if (match(V, m_GetElementPtr(SourceElementType, Ops))) {
316 return CreateSCEV(Ops, [&](ArrayRef<SCEVUse> Ops) {
317 return SE.getGEPExpr(Ops.front(), Ops.drop_front(), SourceElementType);
318 });
319 }
320
321 // TODO: Support constructing SCEVs for more recipes as needed.
322 const VPRecipeBase *DefR = V->getDefiningRecipe();
323 const SCEV *Expr =
325 .Case([](const VPExpandSCEVRecipe *R) { return R->getSCEV(); })
326 .Case([&SE, &PSE, L](const VPWidenIntOrFpInductionRecipe *R) {
327 const SCEV *Step = getSCEVExprForVPValue(R->getStepValue(), PSE, L);
328 if (!L || isa<SCEVCouldNotCompute>(Step))
329 return SE.getCouldNotCompute();
330 const SCEV *Start =
331 getSCEVExprForVPValue(R->getStartValue(), PSE, L);
332 const SCEV *AddRec =
333 SE.getAddRecExpr(Start, Step, L, SCEV::FlagAnyWrap);
334 if (R->getTruncInst())
335 return SE.getTruncateExpr(AddRec, R->getScalarType());
336 return AddRec;
337 })
338 .Case([&SE, &PSE, L](const VPWidenPointerInductionRecipe *R) {
339 const SCEV *Start =
340 getSCEVExprForVPValue(R->getStartValue(), PSE, L);
341 if (!L || isa<SCEVCouldNotCompute>(Start))
342 return SE.getCouldNotCompute();
343 const SCEV *Step = getSCEVExprForVPValue(R->getStepValue(), PSE, L);
344 if (isa<SCEVCouldNotCompute>(Step))
345 return SE.getCouldNotCompute();
346 return SE.getAddRecExpr(Start, Step, L, SCEV::FlagAnyWrap);
347 })
348 .Case([&SE, &PSE, L](const VPDerivedIVRecipe *R) {
349 const SCEV *Start = getSCEVExprForVPValue(R->getOperand(0), PSE, L);
350 const SCEV *IV = getSCEVExprForVPValue(R->getOperand(1), PSE, L);
351 const SCEV *Scale = getSCEVExprForVPValue(R->getOperand(2), PSE, L);
352 if (any_of(ArrayRef({Start, IV, Scale}),
354 return SE.getCouldNotCompute();
355
356 return SE.getAddExpr(
357 SE.getTruncateOrSignExtend(Start, IV->getType()),
358 SE.getMulExpr(
359 IV, SE.getTruncateOrSignExtend(Scale, IV->getType())));
360 })
361 .Case([&SE, &PSE, L](const VPScalarIVStepsRecipe *R) {
362 const SCEV *IV = getSCEVExprForVPValue(R->getOperand(0), PSE, L);
363 const SCEV *Step = getSCEVExprForVPValue(R->getOperand(1), PSE, L);
365 return SE.getCouldNotCompute();
366 return SE.getTruncateOrSignExtend(IV, Step->getType());
367 })
368 .Default(
369 [&SE](const VPRecipeBase *) { return SE.getCouldNotCompute(); });
370
371 return PSE.getPredicatedSCEV(Expr);
372}
373
375 const Loop *L) {
376 // If address is an SCEVAddExpr, we require that all operands must be either
377 // be invariant or a (possibly sign-extend) affine AddRec.
378 if (auto *PtrAdd = dyn_cast<SCEVAddExpr>(Addr)) {
379 return all_of(PtrAdd->operands(), [&SE, L](const SCEV *Op) {
380 return SE.isLoopInvariant(Op, L) ||
381 match(Op, m_scev_SExt(m_scev_AffineAddRec(m_SCEV(), m_SCEV()))) ||
382 match(Op, m_scev_AffineAddRec(m_SCEV(), m_SCEV()));
383 });
384 }
385
386 // Otherwise, check if address is loop invariant or an affine add recurrence.
387 return SE.isLoopInvariant(Addr, L) ||
389}
390
391/// Returns true if \p Opcode preserves uniformity, i.e., if all operands are
392/// uniform, the result will also be uniform.
393static bool preservesUniformity(unsigned Opcode) {
394 if (Instruction::isBinaryOp(Opcode) || Instruction::isCast(Opcode))
395 return true;
396 switch (Opcode) {
397 case Instruction::Freeze:
398 case Instruction::GetElementPtr:
399 case Instruction::ICmp:
400 case Instruction::FCmp:
401 case Instruction::Select:
406 return true;
407 default:
408 return false;
409 }
410}
411
413 // Live-in, symbolic and region-values represent single-scalar values.
415 return true;
416
417 if (auto *Rep = dyn_cast<VPReplicateRecipe>(VPV)) {
418 const VPRegionBlock *RegionOfR = Rep->getRegion();
419 // Don't consider recipes in replicate regions as uniform yet; their first
420 // lane cannot be accessed when executing the replicate region for other
421 // lanes.
422 if (RegionOfR && RegionOfR->isReplicator())
423 return false;
424 return Rep->isSingleScalar() || (preservesUniformity(Rep->getOpcode()) &&
425 all_of(Rep->operands(), isSingleScalar));
426 }
429 if (auto *WidenR = dyn_cast<VPWidenRecipe>(VPV)) {
430 return preservesUniformity(WidenR->getOpcode()) &&
431 all_of(WidenR->operands(), isSingleScalar);
432 }
433 if (auto *VPI = dyn_cast<VPInstruction>(VPV))
434 return VPI->isSingleScalar() || VPI->isVectorToScalar() ||
435 (preservesUniformity(VPI->getOpcode()) &&
436 all_of(VPI->operands(), isSingleScalar));
437 if (auto *RR = dyn_cast<VPReductionRecipe>(VPV))
438 return !RR->isPartialReduction();
440 VPV))
441 return true;
442 if (auto *Expr = dyn_cast<VPExpressionRecipe>(VPV))
443 return Expr->isVectorToScalar();
444
445 // VPExpandSCEVRecipes must be placed in the entry and are always uniform.
446 return isa<VPExpandSCEVRecipe>(VPV);
447}
448
450 // Live-ins and region values are uniform.
452 return true;
453
454 const VPRecipeBase *R = V->getDefiningRecipe();
455 const VPBasicBlock *VPBB = R ? R->getParent() : nullptr;
456 const VPlan *Plan = VPBB ? VPBB->getPlan() : nullptr;
457 if (VPBB) {
458 if ((VPBB == Plan->getVectorPreheader() || VPBB == Plan->getEntry())) {
459 if (match(V->getDefiningRecipe(),
461 return false;
462 return all_of(R->operands(), isUniformAcrossVFsAndUFs);
463 }
464 }
465
467 .Case([](const VPDerivedIVRecipe *R) { return true; })
468 .Case([](const VPReplicateRecipe *R) {
469 // Be conservative about side-effects, except for the
470 // known-side-effecting assumes and stores, which we know will be
471 // uniform.
472 return R->isSingleScalar() &&
473 (!R->mayHaveSideEffects() ||
474 isa<AssumeInst, StoreInst>(R->getUnderlyingInstr())) &&
475 all_of(R->operands(), isUniformAcrossVFsAndUFs);
476 })
477 .Case([](const VPWidenRecipe *R) {
478 return preservesUniformity(R->getOpcode()) &&
479 all_of(R->operands(), isUniformAcrossVFsAndUFs);
480 })
481 .Case([](const VPPhi *) {
482 // Bail out on VPPhi, as we can end up in infinite cycles.
483 return false;
484 })
485 .Case([](const VPInstruction *VPI) {
486 return (VPI->isSingleScalar() || VPI->isVectorToScalar() ||
489 })
490 .Case([](const VPWidenCastRecipe *R) {
491 // A cast is uniform according to its operand.
492 return isUniformAcrossVFsAndUFs(R->getOperand(0));
493 })
494 .Default([](const VPRecipeBase *) { // A value is considered non-uniform
495 // unless proven otherwise.
496 return false;
497 });
498}
499
501 auto DepthFirst = vp_depth_first_shallow(Plan.getEntry());
502 auto I = find_if(DepthFirst, [&VPDT](VPBlockBase *VPB) {
503 return VPBlockUtils::isHeader(VPB, VPDT);
504 });
505 return I == DepthFirst.end() ? nullptr : cast<VPBasicBlock>(*I);
506}
507
509 if (!R)
510 return 1;
511 if (auto *RR = dyn_cast<VPReductionPHIRecipe>(R))
512 return RR->getVFScaleFactor();
513 if (auto *RR = dyn_cast<VPReductionRecipe>(R))
514 return RR->getVFScaleFactor();
515 if (auto *ER = dyn_cast<VPExpressionRecipe>(R))
516 return ER->getVFScaleFactor();
517 assert(
520 "getting scaling factor of reduction-start-vector not implemented yet");
521 return 1;
522}
523
524bool vputils::cannotHoistOrSinkRecipe(const VPRecipeBase &R, bool Sinking) {
525 // Assumes don't alias anything or throw; as long as they're guaranteed to
526 // execute, they're safe to hoist. They should however not be sunk, as it
527 // would destroy information.
529 return Sinking;
530 // TODO: Relax checks in the future, e.g. we could also hoist reads, if their
531 // memory location is not modified in the vector loop.
532 if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
533 return true;
534 // Allocas cannot be hoisted.
535 auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
536 return RepR && RepR->getOpcode() == Instruction::Alloca;
537}
538
539std::optional<VPValue *>
542 VPBasicBlock *LatchVPBB) {
543 // Given a plain CFG VPlan loop with countable latch exiting block
544 // \p LatchVPBB, we're looking to match the recipes contributing to the
545 // uncountable exit condition comparison (here, vp<%4>) back to either
546 // live-ins or the address nodes for the load used as part of the uncountable
547 // exit comparison so that we can either move them within the loop, or copy
548 // them to the preheader depending on the chosen method for dealing with
549 // stores in uncountable exit loops.
550 //
551 // Currently, the address of the load is restricted to a GEP with 2 operands
552 // and a live-in base address. This constraint may be relaxed later.
553 //
554 // VPlan ' for UF>=1' {
555 // Live-in vp<%0> = VF * UF
556 // Live-in vp<%1> = vector-trip-count
557 // Live-in ir<20> = original trip-count
558 //
559 // ir-bb<entry>:
560 // Successor(s): scalar.ph, vector.ph
561 //
562 // vector.ph:
563 // Successor(s): for.body
564 //
565 // for.body:
566 // EMIT vp<%2> = phi ir<0>, vp<%index.next>
567 // EMIT-SCALAR ir<%iv> = phi [ ir<0>, vector.ph ], [ ir<%iv.next>, for.inc ]
568 // EMIT ir<%uncountable.addr> = getelementptr inbounds nuw ir<%pred>,ir<%iv>
569 // EMIT ir<%uncountable.val> = load ir<%uncountable.addr>
570 // EMIT ir<%uncountable.cond> = icmp sgt ir<%uncountable.val>, ir<500>
571 // EMIT vp<%3> = masked-cond ir<%uncountable.cond>
572 // Successor(s): for.inc
573 //
574 // for.inc:
575 // EMIT ir<%iv.next> = add nuw nsw ir<%iv>, ir<1>
576 // EMIT ir<%countable.cond> = icmp eq ir<%iv.next>, ir<20>
577 // EMIT vp<%index.next> = add nuw vp<%2>, vp<%0>
578 // EMIT vp<%4> = any-of ir<%3>
579 // EMIT vp<%5> = icmp eq vp<%index.next>, vp<%1>
580 // EMIT branch-on-two-conds vp<%4>, vp<%5>
581 // Successor(s): middle.block, middle.block, for.body
582 //
583 // middle.block:
584 // Successor(s): ir-bb<exit>, scalar.ph
585 //
586 // ir-bb<exit>:
587 // No successors
588 //
589 // scalar.ph:
590 // }
591
592 // Find the uncountable loop exit condition.
593 VPValue *UncountableCondition = nullptr;
594 if (!match(LatchVPBB->getTerminator(),
595 m_BranchOnTwoConds(m_AnyOf(m_VPValue(UncountableCondition)),
596 m_VPValue())))
597 return std::nullopt;
598
600 Worklist.push_back(UncountableCondition);
601 while (!Worklist.empty()) {
602 VPValue *V = Worklist.pop_back_val();
603
604 // Any value defined outside the loop does not need to be copied.
605 if (V->isDefinedOutsideLoopRegions())
606 continue;
607
608 // FIXME: Remove the single user restriction; it's here because we're
609 // starting with the simplest set of loops we can, and multiple
610 // users means needing to add PHI nodes in the transform.
611 if (V->getNumUsers() > 1)
612 return std::nullopt;
613
614 VPValue *Op1, *Op2;
615 // Walk back through recipes until we find at least one load from memory.
616 if (match(V, m_ICmp(m_VPValue(Op1), m_VPValue(Op2)))) {
617 Worklist.push_back(Op1);
618 Worklist.push_back(Op2);
619 Recipes.push_back(cast<VPInstruction>(V->getDefiningRecipe()));
621 VPRecipeBase *GepR = Op1->getDefiningRecipe();
622 // Only matching base + single offset term for now.
623 if (GepR->getNumOperands() != 2)
624 return std::nullopt;
625 // Matching a GEP with a loop-invariant base ptr.
627 m_LiveIn(), m_VPValue())))
628 return std::nullopt;
629 Recipes.push_back(cast<VPInstruction>(V->getDefiningRecipe()));
630 Recipes.push_back(cast<VPInstruction>(GepR));
631 GEPs.push_back(cast<VPInstruction>(GepR));
633 m_VPValue(Op1)))) {
634 Worklist.push_back(Op1);
635 Recipes.push_back(cast<VPInstruction>(V->getDefiningRecipe()));
636 } else
637 return std::nullopt;
638 }
639
640 // If we couldn't match anything, don't return the condition. It may be
641 // defined outside the loop.
642 if (Recipes.empty() || GEPs.empty())
643 return std::nullopt;
644
645 return UncountableCondition;
646}
647
649 if (VPValue *AliasMask = findIncomingAliasMask(Plan)) {
650 assert(match(AliasMask->getSingleUser(),
651 m_c_BinaryAnd(m_VPValue(), m_Specific(AliasMask))) &&
652 "AliasMask must only be used with the original header mask");
653 return cast<VPSingleDefRecipe>(AliasMask->getSingleUser());
654 }
655
656 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
657 SmallVector<VPValue *> WideCanonicalIVs;
658 auto *WideCanonicalIV =
660 assert(count_if(LoopRegion->getCanonicalIV()->users(),
662 "Must have at most one VPWideCanonicalIVRecipe");
663 if (WideCanonicalIV)
664 WideCanonicalIVs.push_back(WideCanonicalIV);
665
666 // Also include VPWidenIntOrFpInductionRecipes that represent a widened
667 // version of the canonical induction.
668 VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
669 for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
671 if (match(&Phi, m_CanonicalWidenIV(WidenIV)))
672 WideCanonicalIVs.push_back(WidenIV);
673 }
674
675 // Walk users of wide canonical IVs and find the single compare of the form
676 // (ICMP_ULE, WideCanonicalIV, backedge-taken-count).
677 VPSingleDefRecipe *HeaderMask = nullptr;
678 for (auto *Wide : WideCanonicalIVs) {
679 for (VPUser *U : Wide->users()) {
680 auto *VPI = dyn_cast<VPInstruction>(U);
681 if (!VPI || !vputils::isHeaderMask(VPI, Plan))
682 continue;
683
684 assert(VPI->getOperand(0) == Wide &&
685 "WidenCanonicalIV must be the first operand of the compare");
686 assert(!HeaderMask && "Multiple header masks found?");
687 HeaderMask = VPI;
688 }
689 }
690
691 for (VPRecipeBase &R : LoopRegion->getEntryBasicBlock()->phis()) {
692 auto *Def = cast<VPSingleDefRecipe>(&R);
693 if (vputils::isHeaderMask(Def, Plan)) {
694 assert(!HeaderMask && "Multiple header masks found?");
695 HeaderMask = Def;
696 }
697 }
698
699 return HeaderMask;
700}
701
704 VPBasicBlock *LastBB) {
705 assert(FirstBB->getParent() == LastBB->getParent() &&
706 "FirstBB and LastBB from different regions");
707#ifndef NDEBUG
708 bool InSingleSuccChain = false;
709 for (VPBlockBase *Succ = FirstBB; Succ; Succ = Succ->getSingleSuccessor())
710 InSingleSuccChain |= (Succ == LastBB);
711 assert(InSingleSuccChain &&
712 "LastBB unreachable from FirstBB in single-successor chain");
713#endif
714 auto Blocks = to_vector(
716 auto *LastIt = find(Blocks, LastBB);
717 assert(LastIt != Blocks.end() &&
718 "LastBB unreachable from FirstBB in depth-first traversal");
719 Blocks.erase(std::next(LastIt), Blocks.end());
720 return Blocks;
721}
722
724 for (VPRecipeBase &R : *Plan.getVectorPreheader())
726 return cast<VPInstruction>(&R);
727 return nullptr;
728}
729
731 const VPDominatorTree &VPDT) {
732 auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
733 if (!VPBB)
734 return false;
735
736 // If VPBB is in a region R, VPBB is a loop header if R is a loop region with
737 // VPBB as its entry, i.e., free of predecessors.
738 if (auto *R = VPBB->getParent())
739 return !R->isReplicator() && !VPBB->hasPredecessors();
740
741 // A header dominates its second predecessor (the latch), with the other
742 // predecessor being the preheader
743 return VPB->getPredecessors().size() == 2 &&
744 VPDT.dominates(VPB, VPB->getPredecessors()[1]);
745}
746
748 const VPDominatorTree &VPDT) {
749 // A latch has a header as its last successor, with its other successors
750 // leaving the loop. A preheader OTOH has a header as its first (and only)
751 // successor.
752 return VPB->getNumSuccessors() >= 2 &&
754}
755
756std::pair<VPBasicBlock *, VPBasicBlock *>
758 auto *Header = cast<VPBasicBlock>(
759 Plan.getEntry()->getSuccessors()[1]->getSingleSuccessor());
760 auto *Latch = cast<VPBasicBlock>(Header->getPredecessors()[1]);
761 return {Header, Latch};
762}
763
767
768std::optional<MemoryLocation>
770 auto *M = dyn_cast<VPIRMetadata>(&R);
771 if (!M)
772 return std::nullopt;
774 // Populate noalias metadata from VPIRMetadata.
775 if (MDNode *NoAliasMD = M->getMetadata(LLVMContext::MD_noalias))
776 Loc.AATags.NoAlias = NoAliasMD;
777 if (MDNode *AliasScopeMD = M->getMetadata(LLVMContext::MD_alias_scope))
778 Loc.AATags.Scope = AliasScopeMD;
779 return Loc;
780}
781
783 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
784 VPRegionValue *CanIV = LoopRegion->getCanonicalIV();
785 assert(CanIV && "Expected loop region to have a canonical IV");
786
787 VPSymbolicValue &VFxUF = Plan.getVFxUF();
788
789 // Check if \p Step matches the expected increment step, accounting for
790 // materialization of VFxUF and UF.
791 auto IsIncrementStep = [&](VPValue *Step) -> bool {
792 if (!VFxUF.isMaterialized())
793 return Step == &VFxUF;
794
795 VPSymbolicValue &UF = Plan.getUF();
796 if (!UF.isMaterialized())
797 return Step == &UF ||
798 match(Step, m_c_Mul(m_Specific(&Plan.getUF()),
800
801 // Alias masking: step is number of active lanes of a dependence mask.
802 if (match(Step, m_ZExtOrTruncOrSelf(
804 return true;
805
806 unsigned ConcreteUF = Plan.getConcreteUF();
807 // Fixed VF: step is just the concrete UF.
808 if (match(Step, m_SpecificInt(ConcreteUF)))
809 return true;
810
811 // Scalable VF: step involves VScale.
812 if (ConcreteUF == 1)
814 if (match(Step, m_c_Mul(m_SpecificInt(ConcreteUF),
816 return true;
817 // mul(VScale, ConcreteUF) may have been simplified to
818 // shl(VScale, log2(ConcreteUF)) when ConcreteUF is a power of 2.
819 return isPowerOf2_32(ConcreteUF) &&
821 m_SpecificInt(Log2_32(ConcreteUF))));
822 };
823
824 VPInstruction *Increment = nullptr;
825 for (VPUser *U : CanIV->users()) {
826 VPValue *Step;
827 if (isa<VPInstruction>(U) &&
828 match(U, m_c_Add(m_Specific(CanIV), m_VPValue(Step))) &&
829 IsIncrementStep(Step)) {
830 assert(!Increment && "There must be a unique increment");
832 }
833 }
834
835 assert((!VFxUF.isMaterialized() || Increment) &&
836 "After materializing VFxUF, an increment must exist");
837 assert((!Increment ||
838 LoopRegion->hasCanonicalIVNUW() == Increment->hasNoUnsignedWrap()) &&
839 "NUW flag in region and increment must match");
840 return Increment;
841}
842
843/// Find the ComputeReductionResult recipe for \p PhiR, looking through selects
844/// inserted for predicated reductions or tail folding.
846 VPValue *BackedgeVal = PhiR->getBackedgeValue();
847 if (auto *Res =
849 return Res;
850
851 // Look through selects inserted for tail folding or predicated reductions.
852 VPRecipeBase *SelR =
853 findUserOf(BackedgeVal, m_Select(m_VPValue(), m_VPValue(), m_VPValue()));
854 if (!SelR)
855 return nullptr;
858}
859
862 SmallVector<const VPValue *> WorkList = {V};
863
864 while (!WorkList.empty()) {
865 const VPValue *Cur = WorkList.pop_back_val();
866 if (!Seen.insert(Cur).second)
867 continue;
868
869 auto *Blend = dyn_cast<VPBlendRecipe>(Cur);
870 // Skip blends that use V only through a compare by checking if any incoming
871 // value was already visited.
872 if (Blend && none_of(seq<unsigned>(0, Blend->getNumIncomingValues()),
873 [&](unsigned I) {
874 return Seen.contains(Blend->getIncomingValue(I));
875 }))
876 continue;
877
878 for (VPUser *U : Cur->users()) {
879 if (auto *InterleaveR = dyn_cast<VPInterleaveBase>(U))
880 if (InterleaveR->getAddr() == Cur)
881 return true;
882 if (auto *RepR = dyn_cast<VPReplicateRecipe>(U)) {
883 if (RepR->getOpcode() == Instruction::Load &&
884 RepR->getOperand(0) == Cur)
885 return true;
886 if (RepR->getOpcode() == Instruction::Store &&
887 RepR->getOperand(1) == Cur)
888 return true;
889 }
891 if (MemR->getAddr() == Cur && MemR->isConsecutive())
892 return true;
893 }
894 }
895
896 // The legacy cost model only supports scalarization loads/stores with phi
897 // addresses, if the phi is directly used as load/store address. Don't
898 // traverse further for Blends.
899 if (Blend)
900 continue;
901
902 // Only traverse further through users that also define a value (and can
903 // thus have their own users walked).
904 for (VPUser *U : Cur->users())
905 if (auto *SDR = dyn_cast<VPSingleDefRecipe>(U))
906 WorkList.push_back(SDR);
907 }
908 return false;
909}
910
911/// Try to find a loop-invariant IR value for \p S in the plan's entry block
912/// that can be reused. Returns the corresponding live-in VPValue, or nullptr
913/// if no reusable IR value is found.
914VPValue *VPSCEVExpander::tryToReuseIRValue(const SCEV *S) {
916 return nullptr;
917 VPlan &Plan = Builder.getPlan();
918 BasicBlock *PH = cast<VPIRBasicBlock>(Plan.getEntry())->getIRBasicBlock();
919 for (Value *V : SE.getSCEVValues(S)) {
920 // Only reuse instructions in the plan's entry block, or, when a
921 // DominatorTree is available, any instruction that dominates it.
922 // Instructions in sibling branches may not dominate the entry block.
923 auto *I = dyn_cast<Instruction>(V);
924 if (!I)
925 return Plan.getOrAddLiveIn(V);
926 if (!SE.DT.dominates(I->getParent(), PH))
927 continue;
928 SmallVector<Instruction *> DropPoisonGeneratingInsts;
929 if (!SE.canReuseInstruction(S, I, DropPoisonGeneratingInsts))
930 continue;
931 for (Instruction *DropI : DropPoisonGeneratingInsts)
933 return Plan.getOrAddLiveIn(V);
934 }
935 return nullptr;
936}
937
939 if (VPValue *V = tryToReuseIRValue(S))
940 return V;
941
942 switch (S->getSCEVType()) {
943 case scConstant:
944 return Builder.getPlan().getOrAddLiveIn(cast<SCEVConstant>(S)->getValue());
945 case scUnknown:
946 return Builder.getPlan().getOrAddLiveIn(cast<SCEVUnknown>(S)->getValue());
947 case scVScale:
948 return Builder.createNaryOp(VPInstruction::VScale, {}, S->getType());
949 case scAddExpr:
950 case scMulExpr: {
951 if (S->getType()->isPointerTy())
952 return nullptr;
953 auto *NAry = cast<SCEVNAryExpr>(S);
954 unsigned Opcode =
955 S->getSCEVType() == scAddExpr ? Instruction::Add : Instruction::Mul;
956 // Iterate in reverse so that constants are emitted last.
958 for (const SCEVUse &Op : reverse(NAry->operands())) {
959 VPValue *OpV = tryToExpand(Op);
960 if (!OpV)
961 return nullptr;
962 Ops.push_back(OpV);
963 }
964 VPIRFlags::WrapFlagsTy WrapFlags(NAry->hasNoUnsignedWrap(),
965 NAry->hasNoSignedWrap());
966 VPValue *Result = Ops.front();
967 for (VPValue *Op : drop_begin(Ops))
968 Result = Builder.createOverflowingOp(Opcode, {Result, Op}, WrapFlags, DL);
969 return Result;
970 }
971 default:
972 return nullptr;
973 }
974}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static constexpr Value * getValue(Ty &ValueOrUse)
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file provides a LoopVectorizationPlanner class.
#define I(x, y, z)
Definition MD5.cpp:57
This file provides utility analysis objects describing memory locations.
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
static bool propagatesPoisonFromRecipeOp(const VPRecipeBase *R)
Returns true if R propagates poison from any operand to its result.
static bool preservesUniformity(unsigned Opcode)
Returns true if Opcode preserves uniformity, i.e., if all operands are uniform, the result will also ...
static bool poisonGuaranteesUB(const VPValue *V)
Returns true if V being poison is guaranteed to trigger UB because it propagates to the address of a ...
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
bool isCast() const
bool isBinaryOp() const
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1069
Representation for a specific memory location.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getPredicatedSCEV(const SCEV *Expr)
Returns the rewritten SCEV for Expr in the context of the current SCEV predicate.
static LLVM_ABI void dropPoisonGeneratingAnnotationsAndReinfer(ScalarEvolution &SE, Instruction *I)
Drop poison-generating flags from I, then try re-infer via SCEV.
This class represents an analyzed expression in the program.
static constexpr auto FlagAnyWrap
static constexpr auto FlagNSW
SCEVTypes getSCEVType() const
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getUDivExpr(SCEVUse LHS, SCEVUse RHS)
Get a canonical unsigned division expression, or something simpler if possible.
LLVM_ABI const SCEV * getAbsExpr(const SCEV *Op, bool IsNSW)
LLVM_ABI const SCEV * getURemExpr(SCEVUse LHS, SCEVUse RHS)
Represents an unsigned remainder expression based on unsigned division.
LLVM_ABI const SCEV * getSMinExpr(SCEVUse LHS, SCEVUse RHS)
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
LLVM_ABI uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getAddRecExpr(SCEVUse Start, SCEVUse Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI const SCEV * getUMaxExpr(SCEVUse LHS, SCEVUse RHS)
const SCEV * getMinusOne(Type *Ty)
Return a SCEV for the constant -1 of a specific type.
LLVM_ABI const SCEV * getCouldNotCompute()
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
const SCEV * getPowerOfTwo(Type *Ty, unsigned Power)
Return a SCEV for the constant Power of two.
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
LLVM_ABI const SCEV * getSMaxExpr(SCEVUse LHS, SCEVUse RHS)
LLVM_ABI bool canReuseInstruction(const SCEV *S, Instruction *I, SmallVectorImpl< Instruction * > &DropPoisonGeneratingInsts)
Check whether it is poison-safe to represent the expression S using the instruction I.
LLVM_ABI const SCEV * getGEPExpr(GEPOperator *GEP, ArrayRef< SCEVUse > IndexExprs)
Returns an expression for a GEP.
LLVM_ABI const SCEV * getUMinExpr(SCEVUse LHS, SCEVUse RHS, bool Sequential=false)
LLVM_ABI const SCEV * getTruncateOrSignExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
Definition TypeSwitch.h:89
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
Definition TypeSwitch.h:98
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:282
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4401
iterator end()
Definition VPlan.h:4438
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4489
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:266
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition VPlan.cpp:639
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4467
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:94
VPRegionBlock * getParent()
Definition VPlan.h:186
size_t getNumSuccessors() const
Definition VPlan.h:237
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:222
VPlan * getPlan()
Definition VPlan.cpp:211
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:216
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:227
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:211
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
static VPBasicBlock * getPlainCFGMiddleBlock(const VPlan &Plan)
Returns the middle block of Plan in plain CFG form (before regions are formed).
static bool isHeader(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop header, based on regions or VPDT in their absence.
static auto blocksOnly(T &&Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition VPlanUtils.h:324
static std::pair< VPBasicBlock *, VPBasicBlock * > getPlainCFGHeaderAndLatch(const VPlan &Plan)
Returns the header and latch of the outermost loop of Plan in plain CFG form (before regions are form...
static SmallVector< VPBasicBlock * > blocksInSingleSuccessorChainBetween(VPBasicBlock *FirstBB, VPBasicBlock *LastBB)
Returns the blocks between FirstBB and LastBB, where FirstBB to LastBB forms a single-sucessor chain.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition VPlan.h:4182
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
Recipe to expand a SCEV expression.
Definition VPlan.h:4014
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2483
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1226
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1315
@ VScale
Returns the value for vscale.
Definition VPlan.h:1348
unsigned getOpcode() const
Definition VPlan.h:1417
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
bool isSingleScalar() const
Returns true if this VPInstruction's operands are single scalars and the result is also a single scal...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:402
A recipe for handling reduction phis.
Definition VPlan.h:2864
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4611
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4687
bool hasCanonicalIVNUW() const
Indicates if NUW is set for the canonical IV increment, for loop regions.
Definition VPlan.h:4736
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
Definition VPlan.h:4723
VPValues defined by a VPRegionBlock, like the canonical IV.
Definition VPlanValue.h:215
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:3398
unsigned getOpcode() const
Definition VPlan.h:3485
VPValue * tryToExpand(const SCEV *S)
Try to expand S into recipes and live-ins using the builder.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition VPlan.h:4249
VPSingleDefRecipe is a base class for recipes that model a sequence of one or more output IR that def...
Definition VPlan.h:609
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:384
operand_range operands()
Definition VPlanValue.h:457
unsigned getNumOperands() const
Definition VPlanValue.h:424
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:50
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:130
user_range users()
Definition VPlanValue.h:157
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1878
A recipe for handling GEP instructions.
Definition VPlan.h:2206
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
Definition VPlan.h:2623
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition VPlan.h:1817
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4759
VPBasicBlock * getEntry()
Definition VPlan.h:4855
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4918
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4958
VPValue * getBackedgeTakenCount() const
Definition VPlan.h:4945
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:5032
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1068
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
Definition VPlan.h:5010
VPBasicBlock * getVectorPreheader() const
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4860
VPSymbolicValue & getUF()
Returns the UF of the vector loop region.
Definition VPlan.h:4955
bool hasScalarVFOnly() const
Definition VPlan.h:5000
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4898
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4951
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
An efficient, type-erasing, non-owning reference to a callable.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinaryOp_match< LHS, RHS, Instruction::UDiv > m_UDiv(const LHS &L, const RHS &R)
auto m_ZExtOrTruncOrSelf(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
bool match(const SCEV *S, const Pattern &P)
SCEVAffineAddRec_match< Op0_t, Op1_t, match_isa< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
canonical_widen_iv_match m_CanonicalWidenIV()
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
auto m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
canonical_iv_match m_CanonicalIV()
auto m_VPValue()
Match an arbitrary VPValue and ignore it.
match_bind< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
auto m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
static VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
auto m_ScalarIVSteps(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::Reverse, Op0_t > m_Reverse(const Op0_t &Op0)
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
bool cannotHoistOrSinkRecipe(const VPRecipeBase &R, bool Sinking=false)
Return true if we do not know how to (mechanically) hoist or sink R.
VPBasicBlock * getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT)
Returns the header block of the first, top-level loop, or null if none exist.
bool isAddressSCEVForCost(const SCEV *Addr, ScalarEvolution &SE, const Loop *L)
Returns true if Addr is an address SCEV that can be passed to TTI::getAddressComputationCost,...
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
VPInstruction * findComputeReductionResult(VPReductionPHIRecipe *PhiR)
Find the ComputeReductionResult recipe for PhiR, looking through selects inserted for predicated redu...
VPInstruction * findCanonicalIVIncrement(VPlan &Plan)
Find the canonical IV increment of Plan's vector loop region.
std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)
Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
VPValue * findIncomingAliasMask(const VPlan &Plan)
Finds the incoming alias-mask within the vector preheader.
VPSingleDefRecipe * findHeaderMask(VPlan &Plan)
Collect the header mask with the pattern: (ICMP_ULE, WideCanonicalIV, backedge-taken-count) Note: If ...
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
bool isUniformAcrossVFsAndUFs(const VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
bool isUsedByLoadStoreAddress(const VPValue *V)
Returns true if V is used as part of the address of another load or store.
LLVM_ABI_FOR_TEST std::optional< VPValue * > getRecipesForUncountableExit(SmallVectorImpl< VPInstruction * > &Recipes, SmallVectorImpl< VPInstruction * > &GEPs, VPBasicBlock *LatchVPBB)
Returns the VPValue representing the uncountable exit comparison used by AnyOf if the recipes it depe...
GEPNoWrapFlags getGEPFlagsForPtr(VPValue *Ptr)
Returns the GEP nowrap flags for Ptr, looking through pointer casts mirroring Value::stripPointerCast...
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
unsigned getVFScaleFactor(VPRecipeBase *R)
Get the VF scaling factor applied to the recipe's output, if the recipe has one.
bool isHeaderMask(const VPValue *V, const VPlan &Plan)
Return true if V is a header mask in Plan.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1764
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition VPlanCFG.h:253
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
Definition VPlanCFG.h:288
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1752
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2018
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
@ Default
The result value is uniform if and only if all operands are uniform.
Definition Uniformity.h:20
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
Definition Casting.h:866
SCEVUseT< const SCEV * > SCEVUse
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
Definition VPlanValue.h:286
bool isMaterialized() const
Returns true if this symbolic value has been materialized.
Definition VPlanValue.h:297