LLVM 23.0.0git
LoopAccessAnalysis.cpp
Go to the documentation of this file.
1//===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The implementation for the loop memory dependence that was originally
10// developed for the loop vectorizer.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/SetVector.h"
22#include "llvm/ADT/SmallSet.h"
40#include "llvm/IR/BasicBlock.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugLoc.h"
46#include "llvm/IR/Dominators.h"
47#include "llvm/IR/Function.h"
48#include "llvm/IR/InstrTypes.h"
49#include "llvm/IR/Instruction.h"
52#include "llvm/IR/PassManager.h"
53#include "llvm/IR/Type.h"
54#include "llvm/IR/Value.h"
55#include "llvm/IR/ValueHandle.h"
58#include "llvm/Support/Debug.h"
61#include <algorithm>
62#include <cassert>
63#include <cstdint>
64#include <iterator>
65#include <utility>
66#include <variant>
67#include <vector>
68
69using namespace llvm;
70using namespace llvm::SCEVPatternMatch;
71
72#define DEBUG_TYPE "loop-accesses"
73
75 VectorizationFactor("force-vector-width", cl::Hidden,
76 cl::desc("Sets the SIMD width. Zero is autoselect."),
79
81VectorizationInterleave("force-vector-interleave", cl::Hidden,
82 cl::desc("Sets the vectorization interleave count. "
83 "Zero is autoselect."),
87
89 "runtime-memory-check-threshold", cl::Hidden,
90 cl::desc("When performing memory disambiguation checks at runtime do not "
91 "generate more than this number of comparisons (default = 8)."),
94
95/// The maximum iterations used to merge memory checks
97 "memory-check-merge-threshold", cl::Hidden,
98 cl::desc("Maximum number of comparisons done when trying to merge "
99 "runtime memory checks. (default = 100)"),
100 cl::init(100));
101
102/// Maximum SIMD width.
103const unsigned VectorizerParams::MaxVectorWidth = 64;
104
105/// We collect dependences up to this threshold.
107 MaxDependences("max-dependences", cl::Hidden,
108 cl::desc("Maximum number of dependences collected by "
109 "loop-access analysis (default = 100)"),
110 cl::init(100));
111
112/// This enables versioning on the strides of symbolically striding memory
113/// accesses in code like the following.
114/// for (i = 0; i < N; ++i)
115/// A[i * Stride1] += B[i * Stride2] ...
116///
117/// Will be roughly translated to
118/// if (Stride1 == 1 && Stride2 == 1) {
119/// for (i = 0; i < N; i+=4)
120/// A[i:i+3] += ...
121/// } else
122/// ...
124 "enable-mem-access-versioning", cl::init(true), cl::Hidden,
125 cl::desc("Enable symbolic stride memory access versioning"));
126
127/// Enable store-to-load forwarding conflict detection. This option can
128/// be disabled for correctness testing.
130 "store-to-load-forwarding-conflict-detection", cl::Hidden,
131 cl::desc("Enable conflict detection in loop-access analysis"),
132 cl::init(true));
133
135 "max-forked-scev-depth", cl::Hidden,
136 cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"),
137 cl::init(5));
138
140 "laa-speculate-unit-stride", cl::Hidden,
141 cl::desc("Speculate that non-constant strides are unit in LAA"),
142 cl::init(true));
143
145 "hoist-runtime-checks", cl::Hidden,
146 cl::desc(
147 "Hoist inner loop runtime memory checks to outer loop if possible"),
150
152 return ::VectorizationInterleave.getNumOccurrences() > 0;
153}
154
156 const DenseMap<Value *, const SCEV *> &PtrToStride,
157 Value *Ptr) {
158 const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
159
160 // If there is an entry in the map return the SCEV of the pointer with the
161 // symbolic stride replaced by one.
162 const SCEV *StrideSCEV = PtrToStride.lookup(Ptr);
163 if (!StrideSCEV)
164 // For a non-symbolic stride, just return the original expression.
165 return OrigSCEV;
166
167 // Note: This assert is both overly strong and overly weak. The actual
168 // invariant here is that StrideSCEV should be loop invariant. The only
169 // such invariant strides we happen to speculate right now are unknowns
170 // and thus this is a reasonable proxy of the actual invariant.
171 assert(isa<SCEVUnknown>(StrideSCEV) && "shouldn't be in map");
172
173 ScalarEvolution *SE = PSE.getSE();
174 const SCEV *CT = SE->getOne(StrideSCEV->getType());
175 PSE.addPredicate(*SE->getEqualPredicate(StrideSCEV, CT));
176 const SCEV *Expr = PSE.getSCEV(Ptr);
177
178 LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV
179 << " by: " << *Expr << "\n");
180 return Expr;
181}
182
184 unsigned Index, const RuntimePointerChecking &RtCheck)
185 : High(RtCheck.Pointers[Index].End), Low(RtCheck.Pointers[Index].Start),
186 AddressSpace(RtCheck.Pointers[Index]
187 .PointerValue->getType()
189 NeedsFreeze(RtCheck.Pointers[Index].NeedsFreeze) {
190 Members.push_back(Index);
191}
192
193/// Returns \p A + \p B, if it is guaranteed not to unsigned wrap. Otherwise
194/// return nullptr. \p A and \p B must have the same type.
195static const SCEV *addSCEVNoOverflow(const SCEV *A, const SCEV *B,
196 ScalarEvolution &SE) {
197 if (!SE.willNotOverflow(Instruction::Add, /*IsSigned=*/false, A, B))
198 return nullptr;
199 return SE.getAddExpr(A, B);
200}
201
202/// Returns \p A * \p B, if it is guaranteed not to unsigned wrap. Otherwise
203/// return nullptr. \p A and \p B must have the same type.
204static const SCEV *mulSCEVNoOverflow(const SCEV *A, const SCEV *B,
205 ScalarEvolution &SE) {
206 if (!SE.willNotOverflow(Instruction::Mul, /*IsSigned=*/false, A, B))
207 return nullptr;
208 return SE.getMulExpr(A, B);
209}
210
211/// Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at
212/// \p MaxBTC is guaranteed inbounds of the accessed object.
214 const SCEVAddRecExpr *AR, const SCEV *MaxBTC, const SCEV *EltSize,
216 AssumptionCache *AC,
217 std::optional<ScalarEvolution::LoopGuards> &LoopGuards) {
218 auto *PointerBase = SE.getPointerBase(AR->getStart());
219 auto *StartPtr = dyn_cast<SCEVUnknown>(PointerBase);
220 if (!StartPtr)
221 return false;
222 const Loop *L = AR->getLoop();
223 bool CheckForNonNull;
224 Value *StartPtrV = StartPtr->getValue();
225 // We can ignore frees, as the fact that an object of a certain size existed
226 // at the location *at some point* is sufficient to derive the nowrap fact.
227 uint64_t DerefBytes = StartPtrV->getPointerDereferenceableBytes(
228 DL, CheckForNonNull, /*CanBeFreed=*/nullptr);
229
230 if (DerefBytes && CheckForNonNull)
231 return false;
232
233 const SCEV *Step = AR->getStepRecurrence(SE);
234 Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType());
235 const SCEV *DerefBytesSCEV = SE.getConstant(WiderTy, DerefBytes);
236
237 // Check if we have a suitable dereferencable assumption we can use.
238 Instruction *CtxI = &*L->getHeader()->getFirstNonPHIIt();
239 if (BasicBlock *LoopPred = L->getLoopPredecessor()) {
240 if (isa<UncondBrInst, CondBrInst>(LoopPred->getTerminator()))
241 CtxI = LoopPred->getTerminator();
242 }
244 StartPtrV, Attribute::Dereferenceable, *AC,
245 [&](RetainedKnowledge RK, Instruction *Assume, auto) {
246 if (!isValidAssumeForContext(Assume, CtxI, DT))
247 return false;
248 const SCEV *DerefRKSCEV = SE.getSCEV(RK.IRArgValue);
249 Type *CommonTy =
250 SE.getWiderType(DerefBytesSCEV->getType(), DerefRKSCEV->getType());
251 DerefBytesSCEV = SE.getNoopOrZeroExtend(DerefBytesSCEV, CommonTy);
252 DerefRKSCEV = SE.getNoopOrZeroExtend(DerefRKSCEV, CommonTy);
253 DerefBytesSCEV = SE.getUMaxExpr(DerefBytesSCEV, DerefRKSCEV);
254 // Continue with other assumptions.
255 return false;
256 });
257
258 if (DerefBytesSCEV->isZero())
259 return false;
260
261 bool IsKnownNonNegative = SE.isKnownNonNegative(Step);
262 if (!IsKnownNonNegative && !SE.isKnownNegative(Step))
263 return false;
264
265 Step = SE.getNoopOrSignExtend(Step, WiderTy);
266 MaxBTC = SE.getNoopOrZeroExtend(MaxBTC, WiderTy);
267
268 // For the computations below, make sure they don't unsigned wrap.
269 if (!SE.isKnownPredicate(CmpInst::ICMP_UGE, AR->getStart(), StartPtr))
270 return false;
271 const SCEV *StartOffset = SE.getNoopOrZeroExtend(
272 SE.getMinusSCEV(AR->getStart(), StartPtr), WiderTy);
273
274 if (!LoopGuards)
275 LoopGuards.emplace(ScalarEvolution::LoopGuards::collect(AR->getLoop(), SE));
276 MaxBTC = SE.applyLoopGuards(MaxBTC, *LoopGuards);
277
278 const SCEV *OffsetAtLastIter =
279 mulSCEVNoOverflow(MaxBTC, SE.getAbsExpr(Step, /*IsNSW=*/false), SE);
280 if (!OffsetAtLastIter) {
281 // Re-try with constant max backedge-taken count if using the symbolic one
282 // failed.
283 MaxBTC = SE.getConstantMaxBackedgeTakenCount(AR->getLoop());
284 if (isa<SCEVCouldNotCompute>(MaxBTC))
285 return false;
286 MaxBTC = SE.getNoopOrZeroExtend(
287 MaxBTC, WiderTy);
288 OffsetAtLastIter =
289 mulSCEVNoOverflow(MaxBTC, SE.getAbsExpr(Step, /*IsNSW=*/false), SE);
290 if (!OffsetAtLastIter)
291 return false;
292 }
293
294 const SCEV *OffsetEndBytes = addSCEVNoOverflow(
295 OffsetAtLastIter, SE.getNoopOrZeroExtend(EltSize, WiderTy), SE);
296 if (!OffsetEndBytes)
297 return false;
298
299 if (IsKnownNonNegative) {
300 // For positive steps, check if
301 // (AR->getStart() - StartPtr) + (MaxBTC * Step) + EltSize <= DerefBytes,
302 // while making sure none of the computations unsigned wrap themselves.
303 const SCEV *EndBytes = addSCEVNoOverflow(StartOffset, OffsetEndBytes, SE);
304 if (!EndBytes)
305 return false;
306
307 DerefBytesSCEV = SE.applyLoopGuards(DerefBytesSCEV, *LoopGuards);
308 return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes, DerefBytesSCEV);
309 }
310
311 // For negative steps check if
312 // * StartOffset >= (MaxBTC * Step + EltSize)
313 // * StartOffset <= DerefBytes.
314 assert(SE.isKnownNegative(Step) && "must be known negative");
315 return SE.isKnownPredicate(CmpInst::ICMP_SGE, StartOffset, OffsetEndBytes) &&
316 SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset, DerefBytesSCEV);
317}
318
319std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
320 const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
321 const SCEV *MaxBTC, ScalarEvolution *SE,
322 DenseMap<std::pair<const SCEV *, const SCEV *>,
323 std::pair<const SCEV *, const SCEV *>> *PointerBounds,
325 std::optional<ScalarEvolution::LoopGuards> &LoopGuards) {
326 auto &DL = Lp->getHeader()->getDataLayout();
327 Type *IdxTy = DL.getIndexType(PtrExpr->getType());
328 const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);
329
330 // Delegate to the SCEV-based overload, passing through the cache.
331 return getStartAndEndForAccess(Lp, PtrExpr, EltSizeSCEV, BTC, MaxBTC, SE,
332 PointerBounds, DT, AC, LoopGuards);
333}
334
335std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
336 const Loop *Lp, const SCEV *PtrExpr, const SCEV *EltSizeSCEV,
337 const SCEV *BTC, const SCEV *MaxBTC, ScalarEvolution *SE,
338 DenseMap<std::pair<const SCEV *, const SCEV *>,
339 std::pair<const SCEV *, const SCEV *>> *PointerBounds,
341 std::optional<ScalarEvolution::LoopGuards> &LoopGuards) {
342 std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
343 if (PointerBounds) {
344 auto [Iter, Ins] = PointerBounds->insert(
345 {{PtrExpr, EltSizeSCEV},
346 {SE->getCouldNotCompute(), SE->getCouldNotCompute()}});
347 if (!Ins)
348 return Iter->second;
349 PtrBoundsPair = &Iter->second;
350 }
351
352 const SCEV *ScStart;
353 const SCEV *ScEnd;
354
355 auto &DL = Lp->getHeader()->getDataLayout();
356 if (SE->isLoopInvariant(PtrExpr, Lp)) {
357 ScStart = ScEnd = PtrExpr;
358 } else if (auto *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr)) {
359 ScStart = AR->getStart();
360 if (!isa<SCEVCouldNotCompute>(BTC))
361 // Evaluating AR at an exact BTC is safe: LAA separately checks that
362 // accesses cannot wrap in the loop. If evaluating AR at BTC wraps, then
363 // the loop either triggers UB when executing a memory access with a
364 // poison pointer or the wrapping/poisoned pointer is not used.
365 ScEnd = AR->evaluateAtIteration(BTC, *SE);
366 else {
367 // Evaluating AR at MaxBTC may wrap and create an expression that is less
368 // than the start of the AddRec due to wrapping (for example consider
369 // MaxBTC = -2). If that's the case, set ScEnd to -(EltSize + 1). ScEnd
370 // will get incremented by EltSize before returning, so this effectively
371 // sets ScEnd to the maximum unsigned value for the type. Note that LAA
372 // separately checks that accesses cannot not wrap, so unsigned max
373 // represents an upper bound.
374 if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE, DL,
375 DT, AC, LoopGuards)) {
376 ScEnd = AR->evaluateAtIteration(MaxBTC, *SE);
377 } else {
378 ScEnd = SE->getAddExpr(
379 SE->getNegativeSCEV(EltSizeSCEV),
382 AR->getType())));
383 }
384 }
385 const SCEV *Step = AR->getStepRecurrence(*SE);
386
387 // For expressions with negative step, the upper bound is ScStart and the
388 // lower bound is ScEnd.
389 if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) {
390 if (CStep->getValue()->isNegative())
391 std::swap(ScStart, ScEnd);
392 } else {
393 // Fallback case: the step is not constant, but we can still
394 // get the upper and lower bounds of the interval by using min/max
395 // expressions.
396 ScStart = SE->getUMinExpr(ScStart, ScEnd);
397 ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
398 }
399 } else
400 return {SE->getCouldNotCompute(), SE->getCouldNotCompute()};
401
402 assert(SE->isLoopInvariant(ScStart, Lp) && "ScStart needs to be invariant");
403 assert(SE->isLoopInvariant(ScEnd, Lp) && "ScEnd needs to be invariant");
404
405 // Add the size of the pointed element to ScEnd.
406 ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
407
408 std::pair<const SCEV *, const SCEV *> Res = {ScStart, ScEnd};
409 if (PointerBounds)
410 *PtrBoundsPair = Res;
411 return Res;
412}
413
414/// Calculate Start and End points of memory access using
415/// getStartAndEndForAccess.
416void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
417 Type *AccessTy, bool WritePtr,
418 unsigned DepSetId, unsigned ASId,
420 bool NeedsFreeze) {
421 const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
422 const SCEV *BTC = PSE.getBackedgeTakenCount();
423 const auto &[ScStart, ScEnd] = getStartAndEndForAccess(
424 Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE.getSE(),
425 &DC.getPointerBounds(), DC.getDT(), DC.getAC(), LoopGuards);
427 !isa<SCEVCouldNotCompute>(ScEnd) &&
428 "must be able to compute both start and end expressions");
429 Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, PtrExpr,
430 NeedsFreeze);
431}
432
433bool RuntimePointerChecking::tryToCreateDiffCheck(
434 const RuntimeCheckingPtrGroup &CGI, const RuntimeCheckingPtrGroup &CGJ) {
435 // If either group contains multiple different pointers, bail out.
436 // TODO: Support multiple pointers by using the minimum or maximum pointer,
437 // depending on src & sink.
438 if (CGI.Members.size() != 1 || CGJ.Members.size() != 1)
439 return false;
440
441 const PointerInfo *Src = &Pointers[CGI.Members[0]];
442 const PointerInfo *Sink = &Pointers[CGJ.Members[0]];
443
444 // If either pointer is read and written, multiple checks may be needed. Bail
445 // out.
446 if (!DC.getOrderForAccess(Src->PointerValue, !Src->IsWritePtr).empty() ||
447 !DC.getOrderForAccess(Sink->PointerValue, !Sink->IsWritePtr).empty())
448 return false;
449
450 ArrayRef<unsigned> AccSrc =
451 DC.getOrderForAccess(Src->PointerValue, Src->IsWritePtr);
452 ArrayRef<unsigned> AccSink =
453 DC.getOrderForAccess(Sink->PointerValue, Sink->IsWritePtr);
454 // If either pointer is accessed multiple times, there may not be a clear
455 // src/sink relation. Bail out for now.
456 if (AccSrc.size() != 1 || AccSink.size() != 1)
457 return false;
458
459 // If the sink is accessed before src, swap src/sink.
460 if (AccSink[0] < AccSrc[0])
461 std::swap(Src, Sink);
462
463 const SCEVConstant *Step;
464 const SCEV *SrcStart;
465 const SCEV *SinkStart;
466 const Loop *InnerLoop = DC.getInnermostLoop();
467 if (!match(Src->Expr,
469 m_SpecificLoop(InnerLoop))) ||
470 !match(Sink->Expr,
472 m_SpecificLoop(InnerLoop))))
473 return false;
474
476 DC.getInstructionsForAccess(Src->PointerValue, Src->IsWritePtr);
478 DC.getInstructionsForAccess(Sink->PointerValue, Sink->IsWritePtr);
479 Type *SrcTy = getLoadStoreType(SrcInsts[0]);
480 Type *DstTy = getLoadStoreType(SinkInsts[0]);
482 return false;
483
484 const DataLayout &DL = InnerLoop->getHeader()->getDataLayout();
485 unsigned AllocSize =
486 std::max(DL.getTypeAllocSize(SrcTy), DL.getTypeAllocSize(DstTy));
487
488 // Only matching constant steps matching the AllocSize are supported at the
489 // moment. This simplifies the difference computation. Can be extended in the
490 // future.
491 if (Step->getAPInt().abs() != AllocSize)
492 return false;
493
494 // When counting down, the dependence distance needs to be swapped.
495 if (Step->getValue()->isNegative())
496 std::swap(SinkStart, SrcStart);
497
498 const SCEV *SinkStartInt = SE->getPtrToAddrExpr(SinkStart);
499 const SCEV *SrcStartInt = SE->getPtrToAddrExpr(SrcStart);
500 if (isa<SCEVCouldNotCompute>(SinkStartInt) ||
501 isa<SCEVCouldNotCompute>(SrcStartInt))
502 return false;
503
504 // If the start values for both Src and Sink also vary according to an outer
505 // loop, then it's probably better to avoid creating diff checks because
506 // they may not be hoisted. We should instead let llvm::addRuntimeChecks
507 // do the expanded full range overlap checks, which can be hoisted.
508 if (HoistRuntimeChecks && InnerLoop->getParentLoop() &&
509 isa<SCEVAddRecExpr>(SinkStartInt) && isa<SCEVAddRecExpr>(SrcStartInt)) {
510 auto *SrcStartAR = cast<SCEVAddRecExpr>(SrcStartInt);
511 auto *SinkStartAR = cast<SCEVAddRecExpr>(SinkStartInt);
512 const Loop *StartARLoop = SrcStartAR->getLoop();
513 if (StartARLoop == SinkStartAR->getLoop() &&
514 StartARLoop == InnerLoop->getParentLoop() &&
515 // If the diff check would already be loop invariant (due to the
516 // recurrences being the same), then we prefer to keep the diff checks
517 // because they are cheaper.
518 SrcStartAR->getStepRecurrence(*SE) !=
519 SinkStartAR->getStepRecurrence(*SE)) {
520 LLVM_DEBUG(dbgs() << "LAA: Not creating diff runtime check, since these "
521 "cannot be hoisted out of the outer loop\n");
522 return false;
523 }
524 }
525
526 LLVM_DEBUG(dbgs() << "LAA: Creating diff runtime check for:\n"
527 << "SrcStart: " << *SrcStartInt << '\n'
528 << "SinkStartInt: " << *SinkStartInt << '\n');
529 DiffChecks.emplace_back(SrcStartInt, SinkStartInt, AllocSize,
530 Src->NeedsFreeze || Sink->NeedsFreeze);
531 return true;
532}
533
535 SmallVector<RuntimePointerCheck, 4> Checks;
536
537 for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
538 for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
541
542 if (needsChecking(CGI, CGJ)) {
543 CanUseDiffCheck = CanUseDiffCheck && tryToCreateDiffCheck(CGI, CGJ);
544 Checks.emplace_back(&CGI, &CGJ);
545 }
546 }
547 }
548 return Checks;
549}
550
553 assert(Checks.empty() && "Checks is not empty");
554 groupChecks(DepCands);
555 Checks = generateChecks();
556}
557
559 const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const {
560 for (const auto &I : M.Members)
561 for (const auto &J : N.Members)
562 if (needsChecking(I, J))
563 return true;
564 return false;
565}
566
567/// Compare \p I and \p J and return the minimum.
568/// Return nullptr in case we couldn't find an answer.
569static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
570 ScalarEvolution *SE) {
571 std::optional<APInt> Diff = SE->computeConstantDifference(J, I);
572 if (!Diff)
573 return nullptr;
574 return Diff->isNegative() ? J : I;
575}
576
578 unsigned Index, const RuntimePointerChecking &RtCheck) {
579 return addPointer(
580 Index, RtCheck.Pointers[Index].Start, RtCheck.Pointers[Index].End,
581 RtCheck.Pointers[Index].PointerValue->getType()->getPointerAddressSpace(),
582 RtCheck.Pointers[Index].NeedsFreeze, *RtCheck.SE);
583}
584
585bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, const SCEV *Start,
586 const SCEV *End, unsigned AS,
587 bool NeedsFreeze,
588 ScalarEvolution &SE) {
589 assert(AddressSpace == AS &&
590 "all pointers in a checking group must be in the same address space");
591
592 // Compare the starts and ends with the known minimum and maximum
593 // of this set. We need to know how we compare against the min/max
594 // of the set in order to be able to emit memchecks.
595 const SCEV *Min0 = getMinFromExprs(Start, Low, &SE);
596 if (!Min0)
597 return false;
598
599 const SCEV *Min1 = getMinFromExprs(End, High, &SE);
600 if (!Min1)
601 return false;
602
603 // Update the low bound expression if we've found a new min value.
604 if (Min0 == Start)
605 Low = Start;
606
607 // Update the high bound expression if we've found a new max value.
608 if (Min1 != End)
609 High = End;
610
611 Members.push_back(Index);
612 this->NeedsFreeze |= NeedsFreeze;
613 return true;
614}
615
616void RuntimePointerChecking::groupChecks(
618 // We build the groups from dependency candidates equivalence classes
619 // because:
620 // - We know that pointers in the same equivalence class share
621 // the same underlying object and therefore there is a chance
622 // that we can compare pointers
623 // - We wouldn't be able to merge two pointers for which we need
624 // to emit a memcheck. The classes in DepCands are already
625 // conveniently built such that no two pointers in the same
626 // class need checking against each other.
627
628 // We use the following (greedy) algorithm to construct the groups
629 // For every pointer in the equivalence class:
630 // For each existing group:
631 // - if the difference between this pointer and the min/max bounds
632 // of the group is a constant, then make the pointer part of the
633 // group and update the min/max bounds of that group as required.
634
635 CheckingGroups.clear();
636
637 // If we need to check two pointers to the same underlying object
638 // with a non-constant difference, we shouldn't perform any pointer
639 // grouping with those pointers. This is because we can easily get
640 // into cases where the resulting check would return false, even when
641 // the accesses are safe.
642 //
643 // The following example shows this:
644 // for (i = 0; i < 1000; ++i)
645 // a[5000 + i * m] = a[i] + a[i + 9000]
646 //
647 // Here grouping gives a check of (5000, 5000 + 1000 * m) against
648 // (0, 10000) which is always false. However, if m is 1, there is no
649 // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
650 // us to perform an accurate check in this case.
651 //
652 // In the above case, we have a non-constant distance and an Unknown
653 // dependence between accesses to the same underlying object, and could retry
654 // with runtime checks without dependency information being available. In this
655 // case we will use the fallback path and create separate checking groups for
656 // accesses not present in DepCands.
657
658 unsigned TotalComparisons = 0;
659
661 for (unsigned Index = 0; Index < Pointers.size(); ++Index)
662 PositionMap[Pointers[Index].PointerValue].push_back(Index);
663
664 // We need to keep track of what pointers we've already seen so we
665 // don't process them twice.
667
668 // Go through all equivalence classes, get the "pointer check groups"
669 // and add them to the overall solution. We use the order in which accesses
670 // appear in 'Pointers' to enforce determinism.
671 for (unsigned I = 0; I < Pointers.size(); ++I) {
672 // We've seen this pointer before, and therefore already processed
673 // its equivalence class.
674 if (Seen.contains(I))
675 continue;
676
678 Pointers[I].IsWritePtr);
679
680 // If there is no entry in the dependency partition, there are no potential
681 // accesses to merge; simply add a new pointer checking group.
682 if (!DepCands.contains(Access)) {
683 CheckingGroups.push_back(RuntimeCheckingPtrGroup(I, *this));
684 continue;
685 }
686
688
689 // Because DepCands is constructed by visiting accesses in the order in
690 // which they appear in alias sets (which is deterministic) and the
691 // iteration order within an equivalence class member is only dependent on
692 // the order in which unions and insertions are performed on the
693 // equivalence class, the iteration order is deterministic.
694 for (auto M : DepCands.members(Access)) {
695 auto PointerI = PositionMap.find(M.getPointer());
696 // If we can't find the pointer in PositionMap that means we can't
697 // generate a memcheck for it.
698 if (PointerI == PositionMap.end())
699 continue;
700 for (unsigned Pointer : PointerI->second) {
701 bool Merged = false;
702 // Mark this pointer as seen.
703 Seen.insert(Pointer);
704
705 // Go through all the existing sets and see if we can find one
706 // which can include this pointer.
707 for (RuntimeCheckingPtrGroup &Group : Groups) {
708 // Don't perform more than a certain amount of comparisons.
709 // This should limit the cost of grouping the pointers to something
710 // reasonable. If we do end up hitting this threshold, the algorithm
711 // will create separate groups for all remaining pointers.
712 if (TotalComparisons > MemoryCheckMergeThreshold)
713 break;
714
715 TotalComparisons++;
716
717 if (Group.addPointer(Pointer, *this)) {
718 Merged = true;
719 break;
720 }
721 }
722
723 if (!Merged)
724 // We couldn't add this pointer to any existing set or the threshold
725 // for the number of comparisons has been reached. Create a new group
726 // to hold the current pointer.
727 Groups.emplace_back(Pointer, *this);
728 }
729 }
730
731 // We've computed the grouped checks for this partition.
732 // Save the results and continue with the next one.
734 }
735}
736
738 const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
739 unsigned PtrIdx2) {
740 return (PtrToPartition[PtrIdx1] != -1 &&
741 PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
742}
743
744bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
745 const PointerInfo &PointerI = Pointers[I];
746 const PointerInfo &PointerJ = Pointers[J];
747
748 // No need to check if two readonly pointers intersect.
749 if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
750 return false;
751
752 // Only need to check pointers between two different dependency sets.
753 if (PointerI.DependencySetId == PointerJ.DependencySetId)
754 return false;
755
756 // Only need to check pointers in the same alias set.
757 return PointerI.AliasSetId == PointerJ.AliasSetId;
758}
759
760/// Assign each RuntimeCheckingPtrGroup pointer an index for stable UTC output.
764 for (const auto &[Idx, CG] : enumerate(CheckingGroups))
765 PtrIndices[&CG] = Idx;
766 return PtrIndices;
767}
768
771 unsigned Depth) const {
772 unsigned N = 0;
773 auto PtrIndices = getPtrToIdxMap(CheckingGroups);
774 for (const auto &[Check1, Check2] : Checks) {
775 const auto &First = Check1->Members, &Second = Check2->Members;
776 OS.indent(Depth) << "Check " << N++ << ":\n";
777 OS.indent(Depth + 2) << "Comparing group GRP" << PtrIndices.at(Check1)
778 << ":\n";
779 for (unsigned K : First)
780 OS.indent(Depth + 2) << *Pointers[K].PointerValue << "\n";
781 OS.indent(Depth + 2) << "Against group GRP" << PtrIndices.at(Check2)
782 << ":\n";
783 for (unsigned K : Second)
784 OS.indent(Depth + 2) << *Pointers[K].PointerValue << "\n";
785 }
786}
787
789
790 OS.indent(Depth) << "Run-time memory checks:\n";
791 printChecks(OS, Checks, Depth);
792
793 OS.indent(Depth) << "Grouped accesses:\n";
794 auto PtrIndices = getPtrToIdxMap(CheckingGroups);
795 for (const auto &CG : CheckingGroups) {
796 OS.indent(Depth + 2) << "Group GRP" << PtrIndices.at(&CG) << ":\n";
797 OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
798 << ")\n";
799 for (unsigned Member : CG.Members) {
800 OS.indent(Depth + 6) << "Member: " << *Pointers[Member].Expr << "\n";
801 }
802 }
803}
804
805namespace {
806
807/// Analyses memory accesses in a loop.
808///
809/// Checks whether run time pointer checks are needed and builds sets for data
810/// dependence checking.
811class AccessAnalysis {
812public:
813 using MemAccessInfo =
814 PointerIntPair<Value * /* AccessPtr */, 1, bool /* IsWrite */>;
815
816 AccessAnalysis(const Loop *TheLoop, AAResults *AA, const LoopInfo *LI,
819 SmallPtrSetImpl<MDNode *> &LoopAliasScopes)
820 : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DT(DT), DepCands(DA),
821 PSE(PSE), LoopAliasScopes(LoopAliasScopes) {
822 // We're analyzing dependences across loop iterations.
823 BAA.enableCrossIterationMode();
824 }
825
826 /// Register a load and whether it is only read from.
827 void addLoad(const MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) {
828 Value *Ptr = const_cast<Value *>(Loc.Ptr);
829 AST.add(adjustLoc(Loc));
830 Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy);
831 if (IsReadOnly)
832 ReadOnlyPtr.insert(Ptr);
833 }
834
835 /// Register a store.
836 void addStore(const MemoryLocation &Loc, Type *AccessTy) {
837 Value *Ptr = const_cast<Value *>(Loc.Ptr);
838 AST.add(adjustLoc(Loc));
839 Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy);
840 }
841
842 /// Check if we can emit a run-time no-alias check for \p Access.
843 ///
844 /// Returns true if we can emit a run-time no alias check for \p Access.
845 /// If we can check this access, this also adds it to a dependence set and
846 /// adds a run-time to check for it to \p RtCheck. If \p Assume is true,
847 /// we will attempt to use additional run-time checks in order to get
848 /// the bounds of the pointer.
849 bool createCheckForAccess(RuntimePointerChecking &RtCheck,
850 MemAccessInfo Access, Type *AccessTy,
851 const DenseMap<Value *, const SCEV *> &Strides,
852 DenseMap<Value *, unsigned> &DepSetId,
853 Loop *TheLoop, unsigned &RunningDepId,
854 unsigned ASId, bool Assume);
855
856 /// Check whether we can check the pointers at runtime for
857 /// non-intersection.
858 ///
859 /// Returns true if we need no check or if we do and we can generate them
860 /// (i.e. the pointers have computable bounds). A return value of false means
861 /// we couldn't analyze and generate runtime checks for all pointers in the
862 /// loop, but if \p AllowPartial is set then we will have checks for those
863 /// pointers we could analyze. \p DepChecker is used to remove unknown
864 /// dependences from DepCands.
865 bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, Loop *TheLoop,
866 const DenseMap<Value *, const SCEV *> &Strides,
867 Value *&UncomputablePtr, bool AllowPartial,
868 const MemoryDepChecker &DepChecker);
869
870 /// Goes over all memory accesses, checks whether a RT check is needed
871 /// and builds sets of dependent accesses.
872 void buildDependenceSets();
873
874 /// Initial processing of memory accesses determined that we need to
875 /// perform dependency checking.
876 ///
877 /// Note that this can later be cleared if we retry memcheck analysis without
878 /// dependency checking (i.e. ShouldRetryWithRuntimeChecks).
879 bool isDependencyCheckNeeded() const { return !CheckDeps.empty(); }
880
881 /// We decided that no dependence analysis would be used. Reset the state.
882 void resetDepChecks(MemoryDepChecker &DepChecker) {
883 CheckDeps.clear();
884 DepChecker.clearDependences();
885 }
886
887 ArrayRef<MemAccessInfo> getDependenciesToCheck() const { return CheckDeps; }
888
889private:
890 using PtrAccessMap = MapVector<MemAccessInfo, SmallSetVector<Type *, 1>>;
891
892 /// Adjust the MemoryLocation so that it represents accesses to this
893 /// location across all iterations, rather than a single one.
894 MemoryLocation adjustLoc(MemoryLocation Loc) const {
895 // The accessed location varies within the loop, but remains within the
896 // underlying object.
898 Loc.AATags.Scope = adjustAliasScopeList(Loc.AATags.Scope);
899 Loc.AATags.NoAlias = adjustAliasScopeList(Loc.AATags.NoAlias);
900 return Loc;
901 }
902
903 /// Drop alias scopes that are only valid within a single loop iteration.
904 MDNode *adjustAliasScopeList(MDNode *ScopeList) const {
905 if (!ScopeList)
906 return nullptr;
907
908 // For the sake of simplicity, drop the whole scope list if any scope is
909 // iteration-local.
910 if (any_of(ScopeList->operands(), [&](Metadata *Scope) {
911 return LoopAliasScopes.contains(cast<MDNode>(Scope));
912 }))
913 return nullptr;
914
915 return ScopeList;
916 }
917
918 /// Map of all accesses. Values are the types used to access memory pointed to
919 /// by the pointer.
920 PtrAccessMap Accesses;
921
922 /// The loop being checked.
923 const Loop *TheLoop;
924
925 /// List of accesses that need a further dependence check.
927
928 /// Set of pointers that are read only.
929 SmallPtrSet<Value*, 16> ReadOnlyPtr;
930
931 /// Batched alias analysis results.
932 BatchAAResults BAA;
933
934 /// An alias set tracker to partition the access set by underlying object and
935 //intrinsic property (such as TBAA metadata).
936 AliasSetTracker AST;
937
938 /// The LoopInfo of the loop being checked.
939 const LoopInfo *LI;
940
941 /// The dominator tree of the function.
942 DominatorTree &DT;
943
944 /// Sets of potentially dependent accesses - members of one set share an
945 /// underlying pointer. The set "CheckDeps" identfies which sets really need a
946 /// dependence check.
948
949 /// Initial processing of memory accesses determined that we may need
950 /// to add memchecks. Perform the analysis to determine the necessary checks.
951 ///
952 /// Note that, this is different from isDependencyCheckNeeded. When we retry
953 /// memcheck analysis without dependency checking
954 /// (i.e. ShouldRetryWithRuntimeChecks), isDependencyCheckNeeded is
955 /// cleared while this remains set if we have potentially dependent accesses.
956 bool IsRTCheckAnalysisNeeded = false;
957
958 /// The SCEV predicate containing all the SCEV-related assumptions.
959 PredicatedScalarEvolution &PSE;
960
961 DenseMap<Value *, SmallVector<const Value *, 16>> UnderlyingObjects;
962
963 /// Alias scopes that are declared inside the loop, and as such not valid
964 /// across iterations.
965 SmallPtrSetImpl<MDNode *> &LoopAliasScopes;
966};
967
968} // end anonymous namespace
969
970std::optional<int64_t>
972 Type *AccessTy, Value *Ptr,
974 if (isa<ScalableVectorType>(AccessTy)) {
975 LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy
976 << "\n");
977 return std::nullopt;
978 }
979
980 // The access function must stride over the innermost loop.
981 if (Lp != AR->getLoop()) {
982 LLVM_DEBUG({
983 dbgs() << "LAA: Bad stride - Not striding over innermost loop ";
984 if (Ptr)
985 dbgs() << *Ptr << " ";
986
987 dbgs() << "SCEV: " << *AR << "\n";
988 });
989 return std::nullopt;
990 }
991
992 // Check the step is constant.
993 const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
994
995 // Calculate the pointer stride and check if it is constant.
996 const APInt *APStepVal;
997 if (!match(Step, m_scev_APInt(APStepVal))) {
998 LLVM_DEBUG({
999 dbgs() << "LAA: Bad stride - Not a constant strided ";
1000 if (Ptr)
1001 dbgs() << *Ptr << " ";
1002 dbgs() << "SCEV: " << *AR << "\n";
1003 });
1004 return std::nullopt;
1005 }
1006
1007 const auto &DL = Lp->getHeader()->getDataLayout();
1008 TypeSize AllocSize = DL.getTypeAllocSize(AccessTy);
1009 int64_t Size = AllocSize.getFixedValue();
1010
1011 // Huge step value - give up.
1012 std::optional<int64_t> StepVal = APStepVal->trySExtValue();
1013 if (!StepVal)
1014 return std::nullopt;
1015
1016 // Strided access.
1017 return *StepVal % Size ? std::nullopt : std::make_optional(*StepVal / Size);
1018}
1019
1020/// Check whether \p AR is a non-wrapping AddRec. If \p Ptr is not nullptr, use
1021/// information from the IR pointer value to determine no-wrap. If \p Predicates
1022/// is not nullptr add no-wrap assumptions if needed.
1023static bool
1025 Type *AccessTy, const Loop *L, const DominatorTree &DT,
1026 std::optional<int64_t> Stride = std::nullopt,
1027 SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr) {
1028 // FIXME: This should probably only return true for NUW.
1029 if (any(AR->getNoWrapFlags(SCEV::NoWrapMask)))
1030 return true;
1031
1033 return true;
1034
1035 // An nusw getelementptr that is an AddRec cannot wrap. If it would wrap,
1036 // the distance between the previously accessed location and the wrapped
1037 // location will be larger than half the pointer index type space. In that
1038 // case, the GEP would be poison and any memory access dependent on it would
1039 // be immediate UB when executed.
1041 GEP && GEP->hasNoUnsignedSignedWrap()) {
1042 // For the above reasoning to apply, the pointer must be dereferenced in
1043 // every iteration.
1044 if (L->getHeader() == L->getLoopLatch() ||
1045 any_of(GEP->users(), [L, &DT, GEP](User *U) {
1046 if (getLoadStorePointerOperand(U) != GEP)
1047 return false;
1048 BasicBlock *UserBB = cast<Instruction>(U)->getParent();
1049 if (!L->contains(UserBB))
1050 return false;
1051 return !LoopAccessInfo::blockNeedsPredication(UserBB, L, &DT);
1052 }))
1053 return true;
1054 }
1055
1056 if (!Stride)
1057 Stride = getStrideFromAddRec(AR, L, AccessTy, Ptr, PSE);
1058 if (Stride) {
1059 // If the null pointer is undefined, then a access sequence which would
1060 // otherwise access it can be assumed not to unsigned wrap. Note that this
1061 // assumes the object in memory is aligned to the natural alignment.
1062 unsigned AddrSpace = AR->getType()->getPointerAddressSpace();
1063 if (!NullPointerIsDefined(L->getHeader()->getParent(), AddrSpace) &&
1064 (Stride == 1 || Stride == -1))
1065 return true;
1066 }
1067
1068 if (Ptr && Predicates) {
1069 ScalarEvolution &SE = *PSE.getSE();
1073 Predicates->push_back(SE.getWrapPredicate(AR, Flags));
1074 LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap:\n"
1075 << "LAA: Pointer: " << *Ptr << "\n"
1076 << "LAA: SCEV: " << *AR << "\n"
1077 << "LAA: Added an overflow assumption\n");
1078 return true;
1079 }
1080
1081 return false;
1082}
1083
1084static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
1085 function_ref<void(Value *)> AddPointer) {
1087 SmallVector<Value *> WorkList;
1088 WorkList.push_back(StartPtr);
1089
1090 while (!WorkList.empty()) {
1091 Value *Ptr = WorkList.pop_back_val();
1092 if (!Visited.insert(Ptr).second)
1093 continue;
1094 auto *PN = dyn_cast<PHINode>(Ptr);
1095 // SCEV does not look through non-header PHIs inside the loop. Such phis
1096 // can be analyzed by adding separate accesses for each incoming pointer
1097 // value.
1098 if (PN && InnermostLoop.contains(PN->getParent()) &&
1099 PN->getParent() != InnermostLoop.getHeader()) {
1100 llvm::append_range(WorkList, PN->incoming_values());
1101 } else
1102 AddPointer(Ptr);
1103 }
1104}
1105
1106// Walk back through the IR for a pointer, looking for a select like the
1107// following:
1108//
1109// %offset = select i1 %cmp, i64 %a, i64 %b
1110// %addr = getelementptr double, double* %base, i64 %offset
1111// %ld = load double, double* %addr, align 8
1112//
1113// We won't be able to form a single SCEVAddRecExpr from this since the
1114// address for each loop iteration depends on %cmp. We could potentially
1115// produce multiple valid SCEVAddRecExprs, though, and check all of them for
1116// memory safety/aliasing if needed.
1117//
1118// If we encounter some IR we don't yet handle, or something obviously fine
1119// like a constant, then we just add the SCEV for that term to the list passed
1120// in by the caller. If we have a node that may potentially yield a valid
1121// SCEVAddRecExpr then we decompose it into parts and build the SCEV terms
1122// ourselves before adding to the list.
1124 ScalarEvolution *SE, const Loop *L, Value *Ptr,
1126 unsigned Depth) {
1127 // If our Value is a SCEVAddRecExpr, loop invariant, not an instruction, or
1128 // we've exceeded our limit on recursion, just return whatever we have
1129 // regardless of whether it can be used for a forked pointer or not, along
1130 // with an indication of whether it might be a poison or undef value.
1131 const SCEV *Scev = SE->getSCEV(Ptr);
1132 if (isa<SCEVAddRecExpr>(Scev) || L->isLoopInvariant(Ptr) ||
1133 !isa<Instruction>(Ptr) || Depth == 0) {
1134 ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
1135 return;
1136 }
1137
1138 Depth--;
1139
1140 auto UndefPoisonCheck = [](PointerIntPair<const SCEV *, 1, bool> S) {
1141 return get<1>(S);
1142 };
1143
1144 auto GetBinOpExpr = [&SE](unsigned Opcode, const SCEV *L, const SCEV *R) {
1145 switch (Opcode) {
1146 case Instruction::Add:
1147 return SE->getAddExpr(L, R);
1148 case Instruction::Sub:
1149 return SE->getMinusSCEV(L, R);
1150 default:
1151 llvm_unreachable("Unexpected binary operator when walking ForkedPtrs");
1152 }
1153 };
1154
1156 unsigned Opcode = I->getOpcode();
1157 switch (Opcode) {
1158 case Instruction::GetElementPtr: {
1159 auto *GEP = cast<GetElementPtrInst>(I);
1160 Type *SourceTy = GEP->getSourceElementType();
1161 // We only handle base + single offset GEPs here for now.
1162 // Not dealing with preexisting gathers yet, so no vectors.
1163 if (I->getNumOperands() != 2 || SourceTy->isVectorTy()) {
1164 ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(GEP));
1165 break;
1166 }
1169 findForkedSCEVs(SE, L, I->getOperand(0), BaseScevs, Depth);
1170 findForkedSCEVs(SE, L, I->getOperand(1), OffsetScevs, Depth);
1171
1172 // See if we need to freeze our fork...
1173 bool NeedsFreeze = any_of(BaseScevs, UndefPoisonCheck) ||
1174 any_of(OffsetScevs, UndefPoisonCheck);
1175
1176 // Check that we only have a single fork, on either the base or the offset.
1177 // Copy the SCEV across for the one without a fork in order to generate
1178 // the full SCEV for both sides of the GEP.
1179 if (OffsetScevs.size() == 2 && BaseScevs.size() == 1)
1180 BaseScevs.push_back(BaseScevs[0]);
1181 else if (BaseScevs.size() == 2 && OffsetScevs.size() == 1)
1182 OffsetScevs.push_back(OffsetScevs[0]);
1183 else {
1184 ScevList.emplace_back(Scev, NeedsFreeze);
1185 break;
1186 }
1187
1188 Type *IntPtrTy = SE->getEffectiveSCEVType(GEP->getPointerOperandType());
1189
1190 // Find the size of the type being pointed to. We only have a single
1191 // index term (guarded above) so we don't need to index into arrays or
1192 // structures, just get the size of the scalar value.
1193 const SCEV *Size = SE->getSizeOfExpr(IntPtrTy, SourceTy);
1194
1195 for (auto [B, O] : zip(BaseScevs, OffsetScevs)) {
1196 const SCEV *Base = get<0>(B);
1197 const SCEV *Offset = get<0>(O);
1198
1199 // Scale up the offsets by the size of the type, then add to the bases.
1200 const SCEV *Scaled =
1202 ScevList.emplace_back(SE->getAddExpr(Base, Scaled), NeedsFreeze);
1203 }
1204 break;
1205 }
1206 case Instruction::Select: {
1208 // A select means we've found a forked pointer, but we currently only
1209 // support a single select per pointer so if there's another behind this
1210 // then we just bail out and return the generic SCEV.
1211 findForkedSCEVs(SE, L, I->getOperand(1), ChildScevs, Depth);
1212 findForkedSCEVs(SE, L, I->getOperand(2), ChildScevs, Depth);
1213 if (ChildScevs.size() == 2)
1214 append_range(ScevList, ChildScevs);
1215 else
1216 ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
1217 break;
1218 }
1219 case Instruction::PHI: {
1221 // A phi means we've found a forked pointer, but we currently only
1222 // support a single phi per pointer so if there's another behind this
1223 // then we just bail out and return the generic SCEV.
1224 if (I->getNumOperands() == 2) {
1225 findForkedSCEVs(SE, L, I->getOperand(0), ChildScevs, Depth);
1226 findForkedSCEVs(SE, L, I->getOperand(1), ChildScevs, Depth);
1227 }
1228 if (ChildScevs.size() == 2)
1229 append_range(ScevList, ChildScevs);
1230 else
1231 ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
1232 break;
1233 }
1234 case Instruction::Add:
1235 case Instruction::Sub: {
1238 findForkedSCEVs(SE, L, I->getOperand(0), LScevs, Depth);
1239 findForkedSCEVs(SE, L, I->getOperand(1), RScevs, Depth);
1240
1241 // See if we need to freeze our fork...
1242 bool NeedsFreeze =
1243 any_of(LScevs, UndefPoisonCheck) || any_of(RScevs, UndefPoisonCheck);
1244
1245 // Check that we only have a single fork, on either the left or right side.
1246 // Copy the SCEV across for the one without a fork in order to generate
1247 // the full SCEV for both sides of the BinOp.
1248 if (LScevs.size() == 2 && RScevs.size() == 1)
1249 RScevs.push_back(RScevs[0]);
1250 else if (RScevs.size() == 2 && LScevs.size() == 1)
1251 LScevs.push_back(LScevs[0]);
1252 else {
1253 ScevList.emplace_back(Scev, NeedsFreeze);
1254 break;
1255 }
1256
1257 for (auto [L, R] : zip(LScevs, RScevs))
1258 ScevList.emplace_back(GetBinOpExpr(Opcode, get<0>(L), get<0>(R)),
1259 NeedsFreeze);
1260 break;
1261 }
1262 default:
1263 // Just return the current SCEV if we haven't handled the instruction yet.
1264 LLVM_DEBUG(dbgs() << "ForkedPtr unhandled instruction: " << *I << "\n");
1265 ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
1266 break;
1267 }
1268}
1269
1270bool AccessAnalysis::createCheckForAccess(
1271 RuntimePointerChecking &RtCheck, MemAccessInfo Access, Type *AccessTy,
1272 const DenseMap<Value *, const SCEV *> &StridesMap,
1273 DenseMap<Value *, unsigned> &DepSetId, Loop *TheLoop,
1274 unsigned &RunningDepId, unsigned ASId, bool Assume) {
1275 Value *Ptr = Access.getPointer();
1276 ScalarEvolution *SE = PSE.getSE();
1277 assert(SE->isSCEVable(Ptr->getType()) && "Value is not SCEVable!");
1278
1280 findForkedSCEVs(SE, TheLoop, Ptr, RTCheckPtrs, MaxForkedSCEVDepth);
1281 assert(!RTCheckPtrs.empty() &&
1282 "Must have some runtime-check pointer candidates");
1283
1284 // RTCheckPtrs must have size 2 if there are forked pointers. Otherwise, there
1285 // are no forked pointers; replaceSymbolicStridesSCEV in this case.
1286 auto IsLoopInvariantOrAR =
1287 [&SE, &TheLoop](const PointerIntPair<const SCEV *, 1, bool> &P) {
1288 return SE->isLoopInvariant(P.getPointer(), TheLoop) ||
1289 isa<SCEVAddRecExpr>(P.getPointer());
1290 };
1291 if (RTCheckPtrs.size() == 2 && all_of(RTCheckPtrs, IsLoopInvariantOrAR)) {
1292 LLVM_DEBUG(dbgs() << "LAA: Found forked pointer: " << *Ptr << "\n";
1293 for (const auto &[Idx, Q] : enumerate(RTCheckPtrs)) dbgs()
1294 << "\t(" << Idx << ") " << *Q.getPointer() << "\n");
1295 } else {
1296 RTCheckPtrs = {{replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false}};
1297 }
1298
1299 /// Check whether all pointers can participate in a runtime bounds check. They
1300 /// must either be invariant or non-wrapping affine AddRecs.
1302 for (auto &P : RTCheckPtrs) {
1303 // The bounds for loop-invariant pointer is trivial.
1304 if (SE->isLoopInvariant(P.getPointer(), TheLoop))
1305 continue;
1306
1307 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(P.getPointer());
1308 if (!AR && Assume)
1309 AR = PSE.getAsAddRec(Ptr, &Predicates);
1310 if (!AR || !AR->isAffine())
1311 return false;
1312
1313 // If there's only one option for Ptr, commit the predicates collected by
1314 // getAsAddRec and look Ptr up again afterwards: the lookup below reads the
1315 // assumptions back from PSE, so they need to be committed first.
1316 if (RTCheckPtrs.size() == 1) {
1317 PSE.addPredicates(Predicates);
1318 Predicates.clear();
1319 AR =
1320 cast<SCEVAddRecExpr>(replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr));
1321 P.setPointer(AR);
1322 }
1323
1324 if (!isNoWrap(PSE, AR, RTCheckPtrs.size() == 1 ? Ptr : nullptr, AccessTy,
1325 TheLoop, DT, /*Stride=*/std::nullopt,
1326 Assume ? &Predicates : nullptr))
1327 return false;
1328 }
1329 PSE.addPredicates(Predicates);
1330
1331 for (const auto &[PtrExpr, NeedsFreeze] : RTCheckPtrs) {
1332 // The id of the dependence set.
1333 unsigned DepId;
1334
1335 if (DepCands.contains(Access)) {
1336 Value *Leader = DepCands.getLeaderValue(Access).getPointer();
1337 unsigned &LeaderId = DepSetId[Leader];
1338 if (!LeaderId)
1339 LeaderId = RunningDepId++;
1340 DepId = LeaderId;
1341 } else
1342 // Each access has its own dependence set.
1343 DepId = RunningDepId++;
1344
1345 bool IsWrite = Access.getInt();
1346 RtCheck.insert(TheLoop, Ptr, PtrExpr, AccessTy, IsWrite, DepId, ASId, PSE,
1347 NeedsFreeze);
1348 LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
1349 }
1350
1351 return true;
1352}
1353
1354bool AccessAnalysis::canCheckPtrAtRT(
1355 RuntimePointerChecking &RtCheck, Loop *TheLoop,
1356 const DenseMap<Value *, const SCEV *> &StridesMap, Value *&UncomputablePtr,
1357 bool AllowPartial, const MemoryDepChecker &DepChecker) {
1358 // Find pointers with computable bounds. We are going to use this information
1359 // to place a runtime bound check.
1360 bool CanDoRT = true;
1361
1362 bool MayNeedRTCheck = false;
1363 if (!IsRTCheckAnalysisNeeded) return true;
1364
1365 if (auto *Deps = DepChecker.getDependences()) {
1366 // If there are unknown dependences, this means runtime checks are needed to
1367 // ensure there's no overlap between accesses to the same underlying object.
1368 // Remove the equivalence classes containing both source and destination
1369 // accesses from DepCands. This ensures runtime checks will be generated
1370 // between those accesses and prevents them from being grouped together.
1371 for (const auto &Dep : *Deps) {
1372 if (Dep.Type != MemoryDepChecker::Dependence::Unknown) {
1375 "Should only skip safe dependences");
1376 continue;
1377 }
1378 Instruction *Src = Dep.getSource(DepChecker);
1379 Instruction *Dst = Dep.getDestination(DepChecker);
1380 DepCands.eraseClass({getPointerOperand(Src), Src->mayWriteToMemory()});
1381 DepCands.eraseClass({getPointerOperand(Dst), Dst->mayWriteToMemory()});
1382 }
1383 } else {
1384 CheckDeps.clear();
1385 DepCands = {};
1386 }
1387
1388 // We assign a consecutive id to access from different alias sets.
1389 // Accesses between different groups doesn't need to be checked.
1390 unsigned ASId = 0;
1391 for (const auto &AS : AST) {
1392 int NumReadPtrChecks = 0;
1393 int NumWritePtrChecks = 0;
1394 bool CanDoAliasSetRT = true;
1395 ++ASId;
1396 auto ASPointers = AS.getPointers();
1397
1398 // We assign consecutive id to access from different dependence sets.
1399 // Accesses within the same set don't need a runtime check.
1400 unsigned RunningDepId = 1;
1402
1404
1405 // First, count how many write and read accesses are in the alias set. Also
1406 // collect MemAccessInfos for later.
1408 for (const Value *ConstPtr : ASPointers) {
1409 Value *Ptr = const_cast<Value *>(ConstPtr);
1410 bool IsWrite = Accesses.contains(MemAccessInfo(Ptr, true));
1411 if (IsWrite)
1412 ++NumWritePtrChecks;
1413 else
1414 ++NumReadPtrChecks;
1415 AccessInfos.emplace_back(Ptr, IsWrite);
1416 }
1417
1418 // We do not need runtime checks for this alias set, if there are no writes
1419 // or a single write and no reads.
1420 if (NumWritePtrChecks == 0 ||
1421 (NumWritePtrChecks == 1 && NumReadPtrChecks == 0)) {
1422 assert((ASPointers.size() <= 1 ||
1423 all_of(ASPointers,
1424 [this](const Value *Ptr) {
1425 MemAccessInfo AccessWrite(const_cast<Value *>(Ptr),
1426 true);
1427 return !DepCands.contains(AccessWrite);
1428 })) &&
1429 "Can only skip updating CanDoRT below, if all entries in AS "
1430 "are reads or there is at most 1 entry");
1431 continue;
1432 }
1433
1434 for (auto &Access : AccessInfos) {
1435 for (const auto &AccessTy : Accesses[Access]) {
1436 if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
1437 DepSetId, TheLoop, RunningDepId, ASId,
1438 false)) {
1439 LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
1440 << *Access.getPointer() << '\n');
1441 Retries.emplace_back(Access, AccessTy);
1442 CanDoAliasSetRT = false;
1443 }
1444 }
1445 }
1446
1447 // Note that this function computes CanDoRT and MayNeedRTCheck
1448 // independently. For example CanDoRT=false, MayNeedRTCheck=false means that
1449 // we have a pointer for which we couldn't find the bounds but we don't
1450 // actually need to emit any checks so it does not matter.
1451 //
1452 // We need runtime checks for this alias set, if there are at least 2
1453 // dependence sets (in which case RunningDepId > 2) or if we need to re-try
1454 // any bound checks (because in that case the number of dependence sets is
1455 // incomplete).
1456 bool NeedsAliasSetRTCheck = RunningDepId > 2 || !Retries.empty();
1457
1458 // We need to perform run-time alias checks, but some pointers had bounds
1459 // that couldn't be checked.
1460 if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
1461 // Reset the CanDoSetRt flag and retry all accesses that have failed.
1462 // We know that we need these checks, so we can now be more aggressive
1463 // and add further checks if required (overflow checks).
1464 CanDoAliasSetRT = true;
1465 for (const auto &[Access, AccessTy] : Retries) {
1466 if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
1467 DepSetId, TheLoop, RunningDepId, ASId,
1468 /*Assume=*/true)) {
1469 CanDoAliasSetRT = false;
1470 UncomputablePtr = Access.getPointer();
1471 if (!AllowPartial)
1472 break;
1473 }
1474 }
1475 }
1476
1477 CanDoRT &= CanDoAliasSetRT;
1478 MayNeedRTCheck |= NeedsAliasSetRTCheck;
1479 ++ASId;
1480 }
1481
1482 // If the pointers that we would use for the bounds comparison have different
1483 // address spaces, assume the values aren't directly comparable, so we can't
1484 // use them for the runtime check. We also have to assume they could
1485 // overlap. In the future there should be metadata for whether address spaces
1486 // are disjoint.
1487 unsigned NumPointers = RtCheck.Pointers.size();
1488 for (unsigned i = 0; i < NumPointers; ++i) {
1489 for (unsigned j = i + 1; j < NumPointers; ++j) {
1490 // Only need to check pointers between two different dependency sets.
1491 if (RtCheck.Pointers[i].DependencySetId ==
1492 RtCheck.Pointers[j].DependencySetId)
1493 continue;
1494 // Only need to check pointers in the same alias set.
1495 if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
1496 continue;
1497
1498 Value *PtrI = RtCheck.Pointers[i].PointerValue;
1499 Value *PtrJ = RtCheck.Pointers[j].PointerValue;
1500
1501 unsigned ASi = PtrI->getType()->getPointerAddressSpace();
1502 unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
1503 if (ASi != ASj) {
1504 LLVM_DEBUG(
1505 dbgs() << "LAA: Runtime check would require comparison between"
1506 " different address spaces\n");
1507 return false;
1508 }
1509 }
1510 }
1511
1512 if (MayNeedRTCheck && (CanDoRT || AllowPartial))
1513 RtCheck.generateChecks(DepCands);
1514
1515 LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
1516 << " pointer comparisons.\n");
1517
1518 // If we can do run-time checks, but there are no checks, no runtime checks
1519 // are needed. This can happen when all pointers point to the same underlying
1520 // object for example.
1521 RtCheck.Need = CanDoRT ? RtCheck.getNumberOfChecks() != 0 : MayNeedRTCheck;
1522
1523 bool CanDoRTIfNeeded = !RtCheck.Need || CanDoRT;
1524 assert(CanDoRTIfNeeded == (CanDoRT || !MayNeedRTCheck) &&
1525 "CanDoRTIfNeeded depends on RtCheck.Need");
1526 if (!CanDoRTIfNeeded && !AllowPartial)
1527 RtCheck.reset();
1528 return CanDoRTIfNeeded;
1529}
1530
1531void AccessAnalysis::buildDependenceSets() {
1532 // We process the set twice: first we process read-write pointers, last we
1533 // process read-only pointers. This allows us to skip dependence tests for
1534 // read-only pointers.
1535
1536 LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
1537 LLVM_DEBUG(dbgs() << " AST: "; AST.dump());
1538 LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n");
1539 LLVM_DEBUG({
1540 for (const auto &[A, _] : Accesses)
1541 dbgs() << "\t" << *A.getPointer() << " ("
1542 << (A.getInt()
1543 ? "write"
1544 : (ReadOnlyPtr.contains(A.getPointer()) ? "read-only"
1545 : "read"))
1546 << ")\n";
1547 });
1548
1549 // The AliasSetTracker has nicely partitioned our pointers by metadata
1550 // compatibility and potential for underlying-object overlap. As a result, we
1551 // only need to check for potential pointer dependencies within each alias
1552 // set.
1553 for (const auto &AS : AST) {
1554 bool AliasSetHasWrite = false;
1555
1556 // Map of (pointer to underlying objects, accessed address space) to last
1557 // access encountered.
1558 using UnderlyingObjToAccessMap =
1560 UnderlyingObjToAccessMap ObjToLastAccess;
1561
1562 // Set of access to check after all writes have been processed.
1563 PtrAccessMap DeferredAccesses;
1564
1565 // Iterate over each alias set twice, once to process read/write pointers,
1566 // and then to process read-only pointers.
1567
1568 auto ProcessAccesses = [&](bool UseDeferred) {
1569 PtrAccessMap &S = UseDeferred ? DeferredAccesses : Accesses;
1570
1571 // Note that both the alias-set tracker and the alias sets themselves used
1572 // ordered collections internally and so the iteration order here is
1573 // deterministic.
1574 for (const Value *ConstPtr : AS.getPointers()) {
1575 Value *Ptr = const_cast<Value *>(ConstPtr);
1576
1577 // For a single memory access in AliasSetTracker, Accesses may contain
1578 // both read and write, and they both need to be handled for CheckDeps.
1579 for (auto [AccessPtr, IsWrite] : S.keys()) {
1580 if (AccessPtr != Ptr)
1581 continue;
1582
1583 // If we're using the deferred access set, then it contains only
1584 // reads.
1585 bool IsReadOnlyPtr = ReadOnlyPtr.contains(Ptr) && !IsWrite;
1586 if (UseDeferred && !IsReadOnlyPtr)
1587 continue;
1588 // Otherwise, the pointer must be in the PtrAccessSet, either as a
1589 // read or a write.
1590 assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
1591 S.contains(MemAccessInfo(Ptr, false))) &&
1592 "Alias-set pointer not in the access set?");
1593
1594 MemAccessInfo Access(Ptr, IsWrite);
1595 DepCands.insert(Access);
1596
1597 // Memorize read-only pointers for later processing and skip them in
1598 // the first round (they need to be checked after we have seen all
1599 // write pointers). Note: we also mark pointer that are not
1600 // consecutive as "read-only" pointers (so that we check
1601 // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
1602 if (!UseDeferred && IsReadOnlyPtr) {
1603 // We only use the pointer keys, the types vector values don't
1604 // matter.
1605 DeferredAccesses.insert({Access, {}});
1606 continue;
1607 }
1608
1609 // If this is a write - check other reads and writes for conflicts. If
1610 // this is a read only check other writes for conflicts (but only if
1611 // there is no other write to the ptr - this is an optimization to
1612 // catch "a[i] = a[i] + " without having to do a dependence check).
1613 if ((IsWrite || IsReadOnlyPtr) && AliasSetHasWrite) {
1614 CheckDeps.push_back(Access);
1615 IsRTCheckAnalysisNeeded = true;
1616 }
1617
1618 if (IsWrite)
1619 AliasSetHasWrite = true;
1620
1621 // Create sets of pointers connected by a shared alias set and
1622 // underlying object.
1623 SmallVector<const Value *, 16> &UOs = UnderlyingObjects[Ptr];
1624 UOs = {};
1625 ::getUnderlyingObjects(Ptr, UOs, LI);
1627 << "Underlying objects for pointer " << *Ptr << "\n");
1628 for (const Value *UnderlyingObj : UOs) {
1629 // nullptr never alias, don't join sets for pointer that have "null"
1630 // in their UnderlyingObjects list.
1631 if (isa<ConstantPointerNull>(UnderlyingObj) &&
1633 TheLoop->getHeader()->getParent(),
1634 UnderlyingObj->getType()->getPointerAddressSpace()))
1635 continue;
1636
1637 auto [It, Inserted] = ObjToLastAccess.try_emplace(
1638 {UnderlyingObj,
1639 cast<PointerType>(Ptr->getType())->getAddressSpace()},
1640 Access);
1641 if (!Inserted) {
1642 DepCands.unionSets(Access, It->second);
1643 It->second = Access;
1644 }
1645
1646 LLVM_DEBUG(dbgs() << " " << *UnderlyingObj << "\n");
1647 }
1648 }
1649 }
1650 };
1651
1652 ProcessAccesses(false);
1653 ProcessAccesses(true);
1654 }
1655}
1656
1657/// Check whether the access through \p Ptr has a constant stride.
1658std::optional<int64_t> llvm::getPtrStride(
1659 PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp,
1660 const DominatorTree &DT, const DenseMap<Value *, const SCEV *> &StridesMap,
1661 bool ShouldCheckWrap, SmallVectorImpl<const SCEVPredicate *> *Predicates) {
1662 const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
1663 if (PSE.getSE()->isLoopInvariant(PtrScev, Lp))
1664 return 0;
1665
1666 assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr");
1667
1668 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
1669 if (Predicates && !AR) {
1670 AR = PSE.getSE()->convertSCEVToAddRecWithPredicates(PtrScev, Lp,
1671 *Predicates);
1672 }
1673
1674 if (!AR) {
1675 LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
1676 << " SCEV: " << *PtrScev << "\n");
1677 return std::nullopt;
1678 }
1679
1680 std::optional<int64_t> Stride =
1681 getStrideFromAddRec(AR, Lp, AccessTy, Ptr, PSE);
1682 if (!ShouldCheckWrap || !Stride)
1683 return Stride;
1684
1685 if (isNoWrap(PSE, AR, Ptr, AccessTy, Lp, DT, Stride, Predicates))
1686 return Stride;
1687
1688 LLVM_DEBUG(
1689 dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
1690 << *Ptr << " SCEV: " << *AR << "\n");
1691 return std::nullopt;
1692}
1693
1694/// Check whether the access through \p Ptr has a constant stride.
1695std::optional<int64_t>
1697 const Loop *Lp, const DominatorTree &DT,
1698 const DenseMap<Value *, const SCEV *> &StridesMap,
1699 bool Assume, bool ShouldCheckWrap) {
1701 std::optional<int64_t> Stride =
1702 getPtrStride(PSE, AccessTy, Ptr, Lp, DT, StridesMap, ShouldCheckWrap,
1703 Assume ? &Predicates : nullptr);
1704 PSE.addPredicates(Predicates);
1705 return Stride;
1706}
1707
1708std::optional<int64_t> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA,
1709 Type *ElemTyB, Value *PtrB,
1710 const DataLayout &DL,
1711 ScalarEvolution &SE,
1712 bool StrictCheck, bool CheckType) {
1713 assert(PtrA && PtrB && "Expected non-nullptr pointers.");
1714
1715 // Make sure that A and B are different pointers.
1716 if (PtrA == PtrB)
1717 return 0;
1718
1719 // Make sure that the element types are the same if required.
1720 if (CheckType && ElemTyA != ElemTyB)
1721 return std::nullopt;
1722
1723 unsigned ASA = PtrA->getType()->getPointerAddressSpace();
1724 unsigned ASB = PtrB->getType()->getPointerAddressSpace();
1725
1726 // Check that the address spaces match.
1727 if (ASA != ASB)
1728 return std::nullopt;
1729 unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
1730
1731 APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
1732 const Value *PtrA1 = PtrA->stripAndAccumulateConstantOffsets(
1733 DL, OffsetA, /*AllowNonInbounds=*/true);
1734 const Value *PtrB1 = PtrB->stripAndAccumulateConstantOffsets(
1735 DL, OffsetB, /*AllowNonInbounds=*/true);
1736
1737 std::optional<int64_t> Val;
1738 if (PtrA1 == PtrB1) {
1739 // Retrieve the address space again as pointer stripping now tracks through
1740 // `addrspacecast`.
1741 ASA = cast<PointerType>(PtrA1->getType())->getAddressSpace();
1742 ASB = cast<PointerType>(PtrB1->getType())->getAddressSpace();
1743 // Check that the address spaces match and that the pointers are valid.
1744 if (ASA != ASB)
1745 return std::nullopt;
1746
1747 IdxWidth = DL.getIndexSizeInBits(ASA);
1748 OffsetA = OffsetA.sextOrTrunc(IdxWidth);
1749 OffsetB = OffsetB.sextOrTrunc(IdxWidth);
1750
1751 OffsetB -= OffsetA;
1752 Val = OffsetB.trySExtValue();
1753 } else {
1754 // Otherwise compute the distance with SCEV between the base pointers.
1755 const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
1756 const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
1757 std::optional<APInt> Diff =
1758 SE.computeConstantDifference(PtrSCEVB, PtrSCEVA);
1759 if (!Diff)
1760 return std::nullopt;
1761 Val = Diff->trySExtValue();
1762 }
1763
1764 if (!Val)
1765 return std::nullopt;
1766
1767 int64_t Size = DL.getTypeStoreSize(ElemTyA);
1768 int64_t Dist = *Val / Size;
1769
1770 // Ensure that the calculated distance matches the type-based one after all
1771 // the bitcasts removal in the provided pointers.
1772 if (!StrictCheck || Dist * Size == Val)
1773 return Dist;
1774 return std::nullopt;
1775}
1776
1778 const DataLayout &DL, ScalarEvolution &SE,
1779 SmallVectorImpl<unsigned> &SortedIndices) {
1781 VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
1782 "Expected list of pointer operands.");
1783 // Walk over the pointers, and map each of them to an offset relative to
1784 // first pointer in the array.
1785 Value *Ptr0 = VL[0];
1786
1787 using DistOrdPair = std::pair<int64_t, unsigned>;
1788 auto Compare = llvm::less_first();
1789 std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);
1790 Offsets.emplace(0, 0);
1791 bool IsConsecutive = true;
1792 for (auto [Idx, Ptr] : drop_begin(enumerate(VL))) {
1793 std::optional<int64_t> Diff =
1794 getPointersDiff(ElemTy, Ptr0, ElemTy, Ptr, DL, SE,
1795 /*StrictCheck=*/true);
1796 if (!Diff)
1797 return false;
1798
1799 // Check if the pointer with the same offset is found.
1800 int64_t Offset = *Diff;
1801 auto [It, IsInserted] = Offsets.emplace(Offset, Idx);
1802 if (!IsInserted)
1803 return false;
1804 // Consecutive order if the inserted element is the last one.
1805 IsConsecutive &= std::next(It) == Offsets.end();
1806 }
1807 SortedIndices.clear();
1808 if (!IsConsecutive) {
1809 // Fill SortedIndices array only if it is non-consecutive.
1810 SortedIndices.resize(VL.size());
1811 for (auto [Idx, Off] : enumerate(Offsets))
1812 SortedIndices[Idx] = Off.second;
1813 }
1814 return true;
1815}
1816
1817/// Returns true if the memory operations \p A and \p B are consecutive.
1819 ScalarEvolution &SE, bool CheckType) {
1822 if (!PtrA || !PtrB)
1823 return false;
1824 Type *ElemTyA = getLoadStoreType(A);
1825 Type *ElemTyB = getLoadStoreType(B);
1826 std::optional<int64_t> Diff =
1827 getPointersDiff(ElemTyA, PtrA, ElemTyB, PtrB, DL, SE,
1828 /*StrictCheck=*/true, CheckType);
1829 return Diff == 1;
1830}
1831
1833 visitPointers(SI->getPointerOperand(), *InnermostLoop,
1834 [this, SI](Value *Ptr) {
1835 Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
1836 InstMap.push_back(SI);
1837 ++AccessIdx;
1838 });
1839}
1840
1842 visitPointers(LI->getPointerOperand(), *InnermostLoop,
1843 [this, LI](Value *Ptr) {
1844 Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
1845 InstMap.push_back(LI);
1846 ++AccessIdx;
1847 });
1848}
1849
1869
1871 switch (Type) {
1872 case NoDep:
1873 case Forward:
1875 case Unknown:
1876 case IndirectUnsafe:
1877 case InvariantUnsafe:
1878 return false;
1879
1881 case Backward:
1883 return true;
1884 }
1885 llvm_unreachable("unexpected DepType!");
1886}
1887
1892
1894 switch (Type) {
1895 case Forward:
1897 return true;
1898
1899 case NoDep:
1900 case Unknown:
1902 case Backward:
1904 case IndirectUnsafe:
1905 case InvariantUnsafe:
1906 return false;
1907 }
1908 llvm_unreachable("unexpected DepType!");
1909}
1910
1911bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
1912 uint64_t TypeByteSize,
1913 unsigned CommonStride) {
1914 // If loads occur at a distance that is not a multiple of a feasible vector
1915 // factor store-load forwarding does not take place.
1916 // Positive dependences might cause troubles because vectorizing them might
1917 // prevent store-load forwarding making vectorized code run a lot slower.
1918 // a[i] = a[i-3] ^ a[i-8];
1919 // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
1920 // hence on your typical architecture store-load forwarding does not take
1921 // place. Vectorizing in such cases does not make sense.
1922 // Store-load forwarding distance.
1923
1924 // After this many iterations store-to-load forwarding conflicts should not
1925 // cause any slowdowns.
1926 const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
1927 // Maximum vector factor.
1928 uint64_t MaxVFWithoutSLForwardIssuesPowerOf2 =
1929 std::min(VectorizerParams::MaxVectorWidth * TypeByteSize,
1930 MaxStoreLoadForwardSafeDistanceInBits);
1931
1932 // Compute the smallest VF at which the store and load would be misaligned.
1933 for (uint64_t VF = 2 * TypeByteSize;
1934 VF <= MaxVFWithoutSLForwardIssuesPowerOf2; VF *= 2) {
1935 // If the number of vector iteration between the store and the load are
1936 // small we could incur conflicts.
1937 if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1938 MaxVFWithoutSLForwardIssuesPowerOf2 = (VF >> 1);
1939 break;
1940 }
1941 }
1942
1943 if (MaxVFWithoutSLForwardIssuesPowerOf2 < 2 * TypeByteSize) {
1944 LLVM_DEBUG(
1945 dbgs() << "LAA: Distance " << Distance
1946 << " that could cause a store-load forwarding conflict\n");
1947 return true;
1948 }
1949
1950 if (CommonStride &&
1951 MaxVFWithoutSLForwardIssuesPowerOf2 <
1952 MaxStoreLoadForwardSafeDistanceInBits &&
1953 MaxVFWithoutSLForwardIssuesPowerOf2 !=
1954 VectorizerParams::MaxVectorWidth * TypeByteSize) {
1955 uint64_t MaxVF =
1956 bit_floor(MaxVFWithoutSLForwardIssuesPowerOf2 / CommonStride);
1957 uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
1958 MaxStoreLoadForwardSafeDistanceInBits =
1959 std::min(MaxStoreLoadForwardSafeDistanceInBits, MaxVFInBits);
1960 }
1961 return false;
1962}
1963
1964void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
1965 if (Status < S)
1966 Status = S;
1967}
1968
1969/// Given a dependence-distance \p Dist between two memory accesses, that have
1970/// strides in the same direction whose absolute value of the maximum stride is
1971/// given in \p MaxStride, in a loop whose maximum backedge taken count is \p
1972/// MaxBTC, check if it is possible to prove statically that the dependence
1973/// distance is larger than the range that the accesses will travel through the
1974/// execution of the loop. If so, return true; false otherwise. This is useful
1975/// for example in loops such as the following (PR31098):
1976///
1977/// for (i = 0; i < D; ++i) {
1978/// = out[i];
1979/// out[i+D] =
1980/// }
1982 const SCEV &MaxBTC, const SCEV &Dist,
1983 uint64_t MaxStride) {
1984
1985 // If we can prove that
1986 // (**) |Dist| > MaxBTC * Step
1987 // where Step is the absolute stride of the memory accesses in bytes,
1988 // then there is no dependence.
1989 //
1990 // Rationale:
1991 // We basically want to check if the absolute distance (|Dist/Step|)
1992 // is >= the loop iteration count (or > MaxBTC).
1993 // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
1994 // Section 4.2.1); Note, that for vectorization it is sufficient to prove
1995 // that the dependence distance is >= VF; This is checked elsewhere.
1996 // But in some cases we can prune dependence distances early, and
1997 // even before selecting the VF, and without a runtime test, by comparing
1998 // the distance against the loop iteration count. Since the vectorized code
1999 // will be executed only if LoopCount >= VF, proving distance >= LoopCount
2000 // also guarantees that distance >= VF.
2001 //
2002 const SCEV *Step = SE.getConstant(MaxBTC.getType(), MaxStride);
2003 const SCEV *Product = SE.getMulExpr(&MaxBTC, Step);
2004
2005 const SCEV *CastedDist = &Dist;
2006 const SCEV *CastedProduct = Product;
2007 uint64_t DistTypeSizeBits = DL.getTypeSizeInBits(Dist.getType());
2008 uint64_t ProductTypeSizeBits = DL.getTypeSizeInBits(Product->getType());
2009
2010 // The dependence distance can be positive/negative, so we sign extend Dist;
2011 // The multiplication of the absolute stride in bytes and the
2012 // backedgeTakenCount is non-negative, so we zero extend Product.
2013 if (DistTypeSizeBits > ProductTypeSizeBits)
2014 CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
2015 else
2016 CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
2017
2018 // Is Dist - (MaxBTC * Step) > 0 ?
2019 // (If so, then we have proven (**) because |Dist| >= Dist)
2020 const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct);
2021 if (SE.isKnownPositive(Minus))
2022 return true;
2023
2024 // Second try: Is -Dist - (MaxBTC * Step) > 0 ?
2025 // (If so, then we have proven (**) because |Dist| >= -1*Dist)
2026 const SCEV *NegDist = SE.getNegativeSCEV(CastedDist);
2027 Minus = SE.getMinusSCEV(NegDist, CastedProduct);
2028 return SE.isKnownPositive(Minus);
2029}
2030
2031/// Check the dependence for two accesses with the same stride \p Stride.
2032/// \p Distance is the positive distance in bytes, and \p TypeByteSize is type
2033/// size in bytes.
2034///
2035/// \returns true if they are independent.
2037 uint64_t TypeByteSize) {
2038 assert(Stride > 1 && "The stride must be greater than 1");
2039 assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
2040 assert(Distance > 0 && "The distance must be non-zero");
2041
2042 // Skip if the distance is not multiple of type byte size.
2043 if (Distance % TypeByteSize)
2044 return false;
2045
2046 // No dependence if the distance is not multiple of the stride.
2047 // E.g.
2048 // for (i = 0; i < 1024 ; i += 4)
2049 // A[i+2] = A[i] + 1;
2050 //
2051 // Two accesses in memory (distance is 2, stride is 4):
2052 // | A[0] | | | | A[4] | | | |
2053 // | | | A[2] | | | | A[6] | |
2054 //
2055 // E.g.
2056 // for (i = 0; i < 1024 ; i += 3)
2057 // A[i+4] = A[i] + 1;
2058 //
2059 // Two accesses in memory (distance is 4, stride is 3):
2060 // | A[0] | | | A[3] | | | A[6] | | |
2061 // | | | | | A[4] | | | A[7] | |
2062 return Distance % Stride;
2063}
2064
2065bool MemoryDepChecker::areAccessesCompletelyBeforeOrAfter(const SCEV *Src,
2066 Type *SrcTy,
2067 const SCEV *Sink,
2068 Type *SinkTy) {
2069 const SCEV *BTC = PSE.getBackedgeTakenCount();
2070 const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
2071 ScalarEvolution &SE = *PSE.getSE();
2072 const auto &[SrcStart_, SrcEnd_] =
2073 getStartAndEndForAccess(InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC,
2074 &SE, &PointerBounds, DT, AC, LoopGuards);
2075 if (isa<SCEVCouldNotCompute>(SrcStart_) || isa<SCEVCouldNotCompute>(SrcEnd_))
2076 return false;
2077
2078 const auto &[SinkStart_, SinkEnd_] =
2079 getStartAndEndForAccess(InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC,
2080 &SE, &PointerBounds, DT, AC, LoopGuards);
2081 if (isa<SCEVCouldNotCompute>(SinkStart_) ||
2082 isa<SCEVCouldNotCompute>(SinkEnd_))
2083 return false;
2084
2085 if (!LoopGuards)
2086 LoopGuards.emplace(ScalarEvolution::LoopGuards::collect(InnermostLoop, SE));
2087
2088 auto SrcEnd = SE.applyLoopGuards(SrcEnd_, *LoopGuards);
2089 auto SinkStart = SE.applyLoopGuards(SinkStart_, *LoopGuards);
2090 if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SrcEnd, SinkStart))
2091 return true;
2092
2093 auto SinkEnd = SE.applyLoopGuards(SinkEnd_, *LoopGuards);
2094 auto SrcStart = SE.applyLoopGuards(SrcStart_, *LoopGuards);
2095 return SE.isKnownPredicate(CmpInst::ICMP_ULE, SinkEnd, SrcStart);
2096}
2097
2099 MemoryDepChecker::DepDistanceStrideAndSizeInfo>
2100MemoryDepChecker::getDependenceDistanceStrideAndSize(
2101 const AccessAnalysis::MemAccessInfo &A, Instruction *AInst,
2102 const AccessAnalysis::MemAccessInfo &B, Instruction *BInst) {
2103 const auto &DL = InnermostLoop->getHeader()->getDataLayout();
2104 auto &SE = *PSE.getSE();
2105 const auto &[APtr, AIsWrite] = A;
2106 const auto &[BPtr, BIsWrite] = B;
2107
2108 // Two reads are independent.
2109 if (!AIsWrite && !BIsWrite)
2111
2112 Type *ATy = getLoadStoreType(AInst);
2113 Type *BTy = getLoadStoreType(BInst);
2114
2115 // We cannot check pointers in different address spaces.
2116 if (APtr->getType()->getPointerAddressSpace() !=
2117 BPtr->getType()->getPointerAddressSpace())
2119
2121 std::optional<int64_t> StrideAPtr =
2122 getPtrStride(PSE, ATy, APtr, InnermostLoop, *DT, SymbolicStrides,
2123 /*ShouldCheckWrap=*/true, &Predicates);
2124 std::optional<int64_t> StrideBPtr =
2125 getPtrStride(PSE, BTy, BPtr, InnermostLoop, *DT, SymbolicStrides,
2126 /*ShouldCheckWrap=*/true, &Predicates);
2127 PSE.addPredicates(Predicates);
2128
2129 const SCEV *Src = PSE.getSCEV(APtr);
2130 const SCEV *Sink = PSE.getSCEV(BPtr);
2131
2132 // If the induction step is negative we have to invert source and sink of the
2133 // dependence when measuring the distance between them. We should not swap
2134 // AIsWrite with BIsWrite, as their uses expect them in program order.
2135 if (StrideAPtr && *StrideAPtr < 0) {
2136 std::swap(Src, Sink);
2137 std::swap(AInst, BInst);
2138 std::swap(ATy, BTy);
2139 std::swap(StrideAPtr, StrideBPtr);
2140 }
2141
2142 const SCEV *Dist = SE.getMinusSCEV(Sink, Src);
2143
2144 LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
2145 << "\n");
2146 LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst
2147 << ": " << *Dist << "\n");
2148
2149 // Need accesses with constant strides and the same direction for further
2150 // dependence analysis. We don't want to vectorize "A[B[i]] += ..." and
2151 // similar code or pointer arithmetic that could wrap in the address space.
2152
2153 // If either Src or Sink are not strided (i.e. not a non-wrapping AddRec) and
2154 // not loop-invariant (stride will be 0 in that case), we cannot analyze the
2155 // dependence further and also cannot generate runtime checks.
2156 if (!StrideAPtr || !StrideBPtr) {
2157 LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
2159 }
2160
2161 int64_t StrideAPtrInt = *StrideAPtr;
2162 int64_t StrideBPtrInt = *StrideBPtr;
2163 LLVM_DEBUG(dbgs() << "LAA: Src induction step: " << StrideAPtrInt
2164 << " Sink induction step: " << StrideBPtrInt << "\n");
2165 // At least Src or Sink are loop invariant and the other is strided or
2166 // invariant.
2167 if (!StrideAPtrInt || !StrideBPtrInt) {
2168 // If both are loop-invariant and access the same location, we cannot
2169 // vectorize.
2170 if (!StrideAPtrInt && !StrideBPtrInt && Dist->isZero())
2172 // Otherwise, we can generate a runtime check to disambiguate the accesses.
2174 }
2175
2176 // Both Src and Sink have a constant stride, check if they are in the same
2177 // direction.
2178 if ((StrideAPtrInt > 0) != (StrideBPtrInt > 0)) {
2179 LLVM_DEBUG(
2180 dbgs() << "Pointer access with strides in different directions\n");
2182 }
2183
2184 TypeSize AStoreSz = DL.getTypeStoreSize(ATy);
2185 TypeSize BStoreSz = DL.getTypeStoreSize(BTy);
2186
2187 // If store sizes are not the same, set TypeByteSize to zero, so we can check
2188 // it in the caller isDependent.
2189 uint64_t ASz = DL.getTypeAllocSize(ATy);
2190 uint64_t BSz = DL.getTypeAllocSize(BTy);
2191 uint64_t TypeByteSize = (AStoreSz == BStoreSz) ? BSz : 0;
2192
2193 uint64_t StrideAScaled = std::abs(StrideAPtrInt) * ASz;
2194 uint64_t StrideBScaled = std::abs(StrideBPtrInt) * BSz;
2195
2196 uint64_t MaxStride = std::max(StrideAScaled, StrideBScaled);
2197
2198 std::optional<uint64_t> CommonStride;
2199 if (StrideAScaled == StrideBScaled)
2200 CommonStride = StrideAScaled;
2201
2202 // TODO: Historically, we didn't retry with runtime checks when (unscaled)
2203 // strides were different but there is no inherent reason to.
2204 if (!isa<SCEVConstant>(Dist))
2205 ShouldRetryWithRuntimeChecks |= StrideAPtrInt == StrideBPtrInt;
2206
2207 // If distance is a SCEVCouldNotCompute, return Unknown immediately.
2208 if (isa<SCEVCouldNotCompute>(Dist)) {
2209 LLVM_DEBUG(dbgs() << "LAA: Uncomputable distance.\n");
2210 return Dependence::Unknown;
2211 }
2212
2213 return DepDistanceStrideAndSizeInfo(Dist, MaxStride, CommonStride,
2214 TypeByteSize, AIsWrite, BIsWrite);
2215}
2216
2218MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2219 const MemAccessInfo &B, unsigned BIdx) {
2220 assert(AIdx < BIdx && "Must pass arguments in program order");
2221
2222 // Check if we can prove that Sink only accesses memory after Src's end or
2223 // vice versa. The helper is used to perform the checks only on the exit paths
2224 // where it helps to improve the analysis result.
2225 auto CheckCompletelyBeforeOrAfter = [&]() {
2226 auto *APtr = A.getPointer();
2227 auto *BPtr = B.getPointer();
2228 Type *ATy = getLoadStoreType(InstMap[AIdx]);
2229 Type *BTy = getLoadStoreType(InstMap[BIdx]);
2230 const SCEV *Src = PSE.getSCEV(APtr);
2231 const SCEV *Sink = PSE.getSCEV(BPtr);
2232 return areAccessesCompletelyBeforeOrAfter(Src, ATy, Sink, BTy);
2233 };
2234
2235 // Get the dependence distance, stride, type size and what access writes for
2236 // the dependence between A and B.
2237 auto Res =
2238 getDependenceDistanceStrideAndSize(A, InstMap[AIdx], B, InstMap[BIdx]);
2239 if (std::holds_alternative<Dependence::DepType>(Res)) {
2240 if (std::get<Dependence::DepType>(Res) == Dependence::Unknown &&
2241 CheckCompletelyBeforeOrAfter())
2242 return Dependence::NoDep;
2243 return std::get<Dependence::DepType>(Res);
2244 }
2245
2246 auto &[Dist, MaxStride, CommonStride, TypeByteSize, AIsWrite, BIsWrite] =
2247 std::get<DepDistanceStrideAndSizeInfo>(Res);
2248 bool HasSameSize = TypeByteSize > 0;
2249
2250 ScalarEvolution &SE = *PSE.getSE();
2251 auto &DL = InnermostLoop->getHeader()->getDataLayout();
2252
2253 // If the distance between the acecsses is larger than their maximum absolute
2254 // stride multiplied by the symbolic maximum backedge taken count (which is an
2255 // upper bound of the number of iterations), the accesses are independet, i.e.
2256 // they are far enough appart that accesses won't access the same location
2257 // across all loop ierations.
2258 if (HasSameSize &&
2260 DL, SE, *(PSE.getSymbolicMaxBackedgeTakenCount()), *Dist, MaxStride))
2261 return Dependence::NoDep;
2262
2263 // The rest of this function relies on ConstDist being at most 64-bits, which
2264 // is checked earlier. Will assert if the calling code changes.
2265 const APInt *APDist = nullptr;
2266 uint64_t ConstDist =
2267 match(Dist, m_scev_APInt(APDist)) ? APDist->abs().getZExtValue() : 0;
2268
2269 // Attempt to prove strided accesses independent.
2270 if (APDist) {
2271 // If the distance between accesses and their strides are known constants,
2272 // check whether the accesses interlace each other.
2273 if (ConstDist > 0 && CommonStride && CommonStride > 1 && HasSameSize &&
2274 areStridedAccessesIndependent(ConstDist, *CommonStride, TypeByteSize)) {
2275 LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
2276 return Dependence::NoDep;
2277 }
2278 } else {
2279 if (!LoopGuards)
2280 LoopGuards.emplace(
2281 ScalarEvolution::LoopGuards::collect(InnermostLoop, SE));
2282 Dist = SE.applyLoopGuards(Dist, *LoopGuards);
2283 }
2284
2285 // Negative distances are not plausible dependencies.
2286 if (SE.isKnownNonPositive(Dist)) {
2287 if (SE.isKnownNonNegative(Dist)) {
2288 if (HasSameSize) {
2289 // Write to the same location with the same size.
2290 return Dependence::Forward;
2291 }
2292 LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but "
2293 "different type sizes\n");
2294 return Dependence::Unknown;
2295 }
2296
2297 bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
2298 // Check if the first access writes to a location that is read in a later
2299 // iteration, where the distance between them is not a multiple of a vector
2300 // factor and relatively small.
2301 //
2302 // NOTE: There is no need to update MaxSafeVectorWidthInBits after call to
2303 // couldPreventStoreLoadForward, even if it changed MinDepDistBytes, since a
2304 // forward dependency will allow vectorization using any width.
2305
2306 if (IsTrueDataDependence && EnableForwardingConflictDetection) {
2307 if (!ConstDist) {
2308 return CheckCompletelyBeforeOrAfter() ? Dependence::NoDep
2310 }
2311 if (!HasSameSize ||
2312 couldPreventStoreLoadForward(ConstDist, TypeByteSize)) {
2313 LLVM_DEBUG(
2314 dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
2316 }
2317 }
2318
2319 LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");
2320 return Dependence::Forward;
2321 }
2322
2323 int64_t MinDistance = SE.getSignedRangeMin(Dist).getSExtValue();
2324 // Below we only handle strictly positive distances.
2325 if (MinDistance <= 0) {
2326 return CheckCompletelyBeforeOrAfter() ? Dependence::NoDep
2328 }
2329
2330 if (!HasSameSize) {
2331 if (CheckCompletelyBeforeOrAfter())
2332 return Dependence::NoDep;
2333 LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
2334 "different type sizes\n");
2335 return Dependence::Unknown;
2336 }
2337 // Bail out early if passed-in parameters make vectorization not feasible.
2338 unsigned MinForcedFactor =
2339 std::max(1U, VectorizerParams::VectorizationFactor.getKnownMinValue());
2340 unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
2342 // The minimum number of iterations for a vectorized/unrolled version.
2343 unsigned MinNumIter = std::max(MinForcedFactor * ForcedUnroll, 2U);
2344
2345 // It's not vectorizable if the distance is smaller than the minimum distance
2346 // needed for a vectroized/unrolled version. Vectorizing one iteration in
2347 // front needs MaxStride. Vectorizing the last iteration needs TypeByteSize.
2348 // (No need to plus the last gap distance).
2349 //
2350 // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
2351 // foo(int *A) {
2352 // int *B = (int *)((char *)A + 14);
2353 // for (i = 0 ; i < 1024 ; i += 2)
2354 // B[i] = A[i] + 1;
2355 // }
2356 //
2357 // Two accesses in memory (stride is 4 * 2):
2358 // | A[0] | | A[2] | | A[4] | | A[6] | |
2359 // | B[0] | | B[2] | | B[4] |
2360 //
2361 // MinDistance needs for vectorizing iterations except the last iteration:
2362 // 4 * 2 * (MinNumIter - 1). MinDistance needs for the last iteration: 4.
2363 // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
2364 //
2365 // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
2366 // 12, which is less than distance.
2367 //
2368 // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
2369 // the minimum distance needed is 28, which is greater than distance. It is
2370 // not safe to do vectorization.
2371 //
2372 // We use MaxStride (maximum of src and sink strides) to get a conservative
2373 // lower bound on the MinDistanceNeeded in case of different strides.
2374
2375 // We know that Dist is positive, but it may not be constant. Use the signed
2376 // minimum for computations below, as this ensures we compute the closest
2377 // possible dependence distance.
2378 uint64_t MinDistanceNeeded = MaxStride * (MinNumIter - 1) + TypeByteSize;
2379 if (MinDistanceNeeded > static_cast<uint64_t>(MinDistance)) {
2380 if (!ConstDist) {
2381 // For non-constant distances, we checked the lower bound of the
2382 // dependence distance and the distance may be larger at runtime (and safe
2383 // for vectorization). Classify it as Unknown, so we re-try with runtime
2384 // checks, unless we can prove both accesses cannot overlap.
2385 return CheckCompletelyBeforeOrAfter() ? Dependence::NoDep
2387 }
2388 LLVM_DEBUG(dbgs() << "LAA: Failure because of positive minimum distance "
2389 << MinDistance << '\n');
2390 return Dependence::Backward;
2391 }
2392
2393 // Unsafe if the minimum distance needed is greater than smallest dependence
2394 // distance distance.
2395 if (MinDistanceNeeded > MinDepDistBytes) {
2396 LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
2397 << MinDistanceNeeded << " size in bytes\n");
2398 return Dependence::Backward;
2399 }
2400
2401 MinDepDistBytes =
2402 std::min(static_cast<uint64_t>(MinDistance), MinDepDistBytes);
2403
2404 bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
2405 if (IsTrueDataDependence && EnableForwardingConflictDetection && ConstDist &&
2406 couldPreventStoreLoadForward(MinDistance, TypeByteSize, *CommonStride))
2408
2409 uint64_t MaxVF = MinDepDistBytes / MaxStride;
2410 LLVM_DEBUG(dbgs() << "LAA: Positive min distance " << MinDistance
2411 << " with max VF = " << MaxVF << '\n');
2412
2413 uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
2414 if (!ConstDist && MaxVFInBits < MaxTargetVectorWidthInBits) {
2415 // For non-constant distances, we checked the lower bound of the dependence
2416 // distance and the distance may be larger at runtime (and safe for
2417 // vectorization). Classify it as Unknown, so we re-try with runtime checks,
2418 // unless we can prove both accesses cannot overlap.
2419 return CheckCompletelyBeforeOrAfter() ? Dependence::NoDep
2421 }
2422
2423 if (CheckCompletelyBeforeOrAfter())
2424 return Dependence::NoDep;
2425
2426 MaxSafeVectorWidthInBits = std::min(MaxSafeVectorWidthInBits, MaxVFInBits);
2428}
2429
2431 ArrayRef<MemAccessInfo> CheckDeps) {
2432
2433 MinDepDistBytes = -1;
2435 for (MemAccessInfo CurAccess : CheckDeps) {
2436 if (Visited.contains(CurAccess))
2437 continue;
2438
2439 // Check accesses within this set.
2441 DepCands.findLeader(CurAccess);
2443 DepCands.member_end();
2444
2445 // Check every access pair.
2446 while (AI != AE) {
2447 Visited.insert(*AI);
2448 bool AIIsWrite = AI->getInt();
2449 // Reads from the same pointer don't create extra hazards, but multiple
2450 // stores do (WAW), so start from AI for writes and next(AI) for reads.
2452 (AIIsWrite ? AI : std::next(AI));
2453 while (OI != AE) {
2454 // Check every accessing instruction pair in program order.
2455 auto &Acc = Accesses[*AI];
2456 for (std::vector<unsigned>::iterator I1 = Acc.begin(), I1E = Acc.end();
2457 I1 != I1E; ++I1)
2458 // When checking for WAW (OI == AI) caused by multiple writes to the
2459 // same pointer, start I2 at the next access past I1 to avoid
2460 // self-comparison.
2461 for (std::vector<unsigned>::iterator
2462 I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()),
2463 I2E = (OI == AI ? I1E : Accesses[*OI].end());
2464 I2 != I2E; ++I2) {
2465 auto A = std::make_pair(&*AI, *I1);
2466 auto B = std::make_pair(&*OI, *I2);
2467
2468 assert(*I1 != *I2);
2469 if (*I1 > *I2)
2470 std::swap(A, B);
2471
2473 isDependent(*A.first, A.second, *B.first, B.second);
2475
2476 // Gather dependences unless we accumulated MaxDependences
2477 // dependences. In that case return as soon as we find the first
2478 // unsafe dependence. This puts a limit on this quadratic
2479 // algorithm.
2480 if (RecordDependences) {
2481 if (Type != Dependence::NoDep)
2482 Dependences.emplace_back(A.second, B.second, Type);
2483
2484 if (Dependences.size() >= MaxDependences) {
2485 RecordDependences = false;
2486 Dependences.clear();
2488 << "Too many dependences, stopped recording\n");
2489 }
2490 }
2491 if (!RecordDependences && !isSafeForVectorization())
2492 return false;
2493 }
2494 ++OI;
2495 }
2496 ++AI;
2497 }
2498 }
2499
2500 LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
2501 return isSafeForVectorization();
2502}
2503
2506 MemAccessInfo Access(Ptr, IsWrite);
2507 auto I = Accesses.find(Access);
2509 if (I != Accesses.end()) {
2510 transform(I->second, std::back_inserter(Insts),
2511 [&](unsigned Idx) { return this->InstMap[Idx]; });
2512 }
2513
2514 return Insts;
2515}
2516
2518 "NoDep",
2519 "Unknown",
2520 "IndirectUnsafe",
2521 "InvariantUnsafe",
2522 "Forward",
2523 "ForwardButPreventsForwarding",
2524 "Backward",
2525 "BackwardVectorizable",
2526 "BackwardVectorizableButPreventsForwarding"};
2527
2529 raw_ostream &OS, unsigned Depth,
2530 const SmallVectorImpl<Instruction *> &Instrs) const {
2531 OS.indent(Depth) << DepName[Type] << ":\n";
2532 OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
2533 OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
2534}
2535
2536bool LoopAccessInfo::canAnalyzeLoop() {
2537 // We need to have a loop header.
2538 LLVM_DEBUG(dbgs() << "\nLAA: Checking a loop in '"
2539 << TheLoop->getHeader()->getParent()->getName() << "' from "
2540 << TheLoop->getLocStr() << "\n");
2541
2542 // We can only analyze innermost loops.
2543 if (!TheLoop->isInnermost()) {
2544 LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
2545 recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
2546 return false;
2547 }
2548
2549 // We must have a single backedge.
2550 if (TheLoop->getNumBackEdges() != 1) {
2551 LLVM_DEBUG(
2552 dbgs() << "LAA: loop control flow is not understood by analyzer\n");
2553 recordAnalysis("CFGNotUnderstood")
2554 << "loop control flow is not understood by analyzer";
2555 return false;
2556 }
2557
2558 // ScalarEvolution needs to be able to find the symbolic max backedge taken
2559 // count, which is an upper bound on the number of loop iterations. The loop
2560 // may execute fewer iterations, if it exits via an uncountable exit.
2561 const SCEV *ExitCount = PSE->getSymbolicMaxBackedgeTakenCount();
2562 if (isa<SCEVCouldNotCompute>(ExitCount)) {
2563 recordAnalysis("CantComputeNumberOfIterations")
2564 << "could not determine number of loop iterations";
2565 LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
2566 return false;
2567 }
2568
2569 LLVM_DEBUG(dbgs() << "LAA: Found an analyzable loop: "
2570 << TheLoop->getHeader()->getName() << "\n");
2571 return true;
2572}
2573
2574bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
2575 const TargetLibraryInfo *TLI,
2576 DominatorTree *DT) {
2577 // Holds the Load and Store instructions.
2580 SmallPtrSet<MDNode *, 8> LoopAliasScopes;
2581
2582 // Holds all the different accesses in the loop.
2583 unsigned NumReads = 0;
2584 unsigned NumReadWrites = 0;
2585
2586 bool HasComplexMemInst = false;
2587
2588 // A runtime check is only legal to insert if there are no convergent calls.
2589 HasConvergentOp = false;
2590
2591 PtrRtChecking->Pointers.clear();
2592 PtrRtChecking->Need = false;
2593
2594 const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
2595
2596 const bool EnableMemAccessVersioningOfLoop =
2598 !TheLoop->getHeader()->getParent()->hasOptSize();
2599
2600 // Traverse blocks in fixed RPOT order, regardless of their storage in the
2601 // loop info, as it may be arbitrary.
2602 LoopBlocksRPO RPOT(TheLoop);
2603 RPOT.perform(LI);
2604
2605 // Don't return early as soon as we found a memory access that cannot be
2606 // vectorize - HasConvergentOp must still be computed as it is part of LAI's
2607 // public API (used by LoopDistribute).
2608 for (BasicBlock *BB : RPOT) {
2609 // Scan the BB and collect legal loads and stores. Also detect any
2610 // convergent instructions.
2611 for (Instruction &I : *BB) {
2612 if (auto *Call = dyn_cast<CallBase>(&I)) {
2613 if (Call->isConvergent())
2614 HasConvergentOp = true;
2615 }
2616
2617 // Unsafe to vectorize and we already found a convergent operation, can
2618 // early return now.
2619 if (HasComplexMemInst && HasConvergentOp)
2620 return false;
2621
2622 // Already unsafe to vectorize; keep scanning for convergent ops.
2623 if (HasComplexMemInst)
2624 continue;
2625
2626 // Record alias scopes defined inside the loop.
2627 if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
2628 for (Metadata *Op : Decl->getScopeList()->operands())
2629 LoopAliasScopes.insert(cast<MDNode>(Op));
2630
2631 // Many math library functions read the rounding mode. We will only
2632 // vectorize a loop if it contains known function calls that don't set
2633 // the flag. Therefore, it is safe to ignore this read from memory.
2634 auto *Call = dyn_cast<CallInst>(&I);
2636 continue;
2637
2638 // If this is a load, save it. If this instruction can read from memory
2639 // but is not a load, we only allow it if it's a call to a function with a
2640 // vector mapping and no pointer arguments.
2641 if (I.mayReadFromMemory()) {
2642 auto hasPointerArgs = [](CallBase *CB) {
2643 return any_of(CB->args(), [](Value const *Arg) {
2644 return Arg->getType()->isPointerTy();
2645 });
2646 };
2647
2648 // If the function has an explicit vectorized counterpart, and does not
2649 // take output/input pointers, we can safely assume that it can be
2650 // vectorized.
2651 if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
2652 !hasPointerArgs(Call) && !VFDatabase::getMappings(*Call).empty())
2653 continue;
2654
2655 auto *Ld = dyn_cast<LoadInst>(&I);
2656 if (!Ld) {
2657 recordAnalysis("CantVectorizeInstruction", &I)
2658 << "instruction cannot be vectorized";
2659 HasComplexMemInst = true;
2660 continue;
2661 }
2662 if (!Ld->isSimple() && !IsAnnotatedParallel) {
2663 recordAnalysis("NonSimpleLoad", Ld)
2664 << "read with atomic ordering or volatile read";
2665 LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
2666 HasComplexMemInst = true;
2667 continue;
2668 }
2669 NumLoads++;
2670 Loads.push_back(Ld);
2671 DepChecker->addAccess(Ld);
2672 if (EnableMemAccessVersioningOfLoop)
2673 collectStridedAccess(Ld);
2674 continue;
2675 }
2676
2677 // Save 'store' instructions. Abort if other instructions write to memory.
2678 if (I.mayWriteToMemory()) {
2679 auto *St = dyn_cast<StoreInst>(&I);
2680 if (!St) {
2681 recordAnalysis("CantVectorizeInstruction", &I)
2682 << "instruction cannot be vectorized";
2683 HasComplexMemInst = true;
2684 continue;
2685 }
2686 if (!St->isSimple() && !IsAnnotatedParallel) {
2687 recordAnalysis("NonSimpleStore", St)
2688 << "write with atomic ordering or volatile write";
2689 LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
2690 HasComplexMemInst = true;
2691 continue;
2692 }
2693 NumStores++;
2694 Stores.push_back(St);
2695 DepChecker->addAccess(St);
2696 if (EnableMemAccessVersioningOfLoop)
2697 collectStridedAccess(St);
2698 }
2699 } // Next instr.
2700 } // Next block.
2701
2702 if (HasComplexMemInst)
2703 return false;
2704
2705 // Now we have two lists that hold the loads and the stores.
2706 // Next, we find the pointers that they use.
2707
2708 // Check if we see any stores. If there are no stores, then we don't
2709 // care if the pointers are *restrict*.
2710 if (!Stores.size()) {
2711 LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
2712 return true;
2713 }
2714
2716 AccessAnalysis Accesses(TheLoop, AA, LI, *DT, DepCands, *PSE,
2717 LoopAliasScopes);
2718
2719 // Holds the analyzed pointers. We don't want to call getUnderlyingObjects
2720 // multiple times on the same object. If the ptr is accessed twice, once
2721 // for read and once for write, it will only appear once (on the write
2722 // list). This is okay, since we are going to check for conflicts between
2723 // writes and between reads and writes, but not between reads and reads.
2724 SmallSet<std::pair<Value *, Type *>, 16> Seen;
2725
2726 // Record uniform store addresses to identify if we have multiple stores
2727 // to the same address.
2728 SmallPtrSet<Value *, 16> UniformStores;
2729
2730 for (StoreInst *ST : Stores) {
2731 Value *Ptr = ST->getPointerOperand();
2732
2733 if (isInvariant(Ptr)) {
2734 // Record store instructions to loop invariant addresses
2735 StoresToInvariantAddresses.push_back(ST);
2736 HasStoreStoreDependenceInvolvingLoopInvariantAddress |=
2737 !UniformStores.insert(Ptr).second;
2738 }
2739
2740 // If we did *not* see this pointer before, insert it to the read-write
2741 // list. At this phase it is only a 'write' list.
2742 Type *AccessTy = getLoadStoreType(ST);
2743 if (Seen.insert({Ptr, AccessTy}).second) {
2744 ++NumReadWrites;
2745
2746 MemoryLocation Loc = MemoryLocation::get(ST);
2747 // The TBAA metadata could have a control dependency on the predication
2748 // condition, so we cannot rely on it when determining whether or not we
2749 // need runtime pointer checks.
2750 if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
2751 Loc.AATags.TBAA = nullptr;
2752
2753 // Expand forked pointers (i.e., a phi of multiple strided pointers) into
2754 // all alternatives.
2755 visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
2756 [&Accesses, AccessTy, Loc](Value *Ptr) {
2757 MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
2758 Accesses.addStore(NewLoc, AccessTy);
2759 });
2760 }
2761 }
2762
2763 if (IsAnnotatedParallel) {
2764 LLVM_DEBUG(
2765 dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
2766 << "checks.\n");
2767 return true;
2768 }
2769
2770 for (LoadInst *LD : Loads) {
2771 Value *Ptr = LD->getPointerOperand();
2772 // If we did *not* see this pointer before, insert it to the read list. If
2773 // we *did* see it before, then it is already in the read-write list. This
2774 // allows us to vectorize expressions such as A[i] += x; Because the address
2775 // of A[i] is a read-write pointer. This only works if the index of A[i] is
2776 // strictly monotonic, which we approximate (conservatively) via
2777 // getPtrStride. If the address is unknown (e.g. A[B[i]]) then we may read,
2778 // modify, and write overlapping words. Note that "zero stride" is unsafe
2779 // and is being handled below.
2780 bool IsReadOnlyPtr = false;
2781 Type *AccessTy = getLoadStoreType(LD);
2782 if (Seen.insert({Ptr, AccessTy}).second ||
2783 !getPtrStride(*PSE, AccessTy, Ptr, TheLoop, *DT, SymbolicStrides, false,
2784 true)) {
2785 ++NumReads;
2786 IsReadOnlyPtr = true;
2787 }
2788
2789 // See if there is an unsafe dependency between a load to a uniform address and
2790 // store to the same uniform address.
2791 if (UniformStores.contains(Ptr)) {
2792 LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "
2793 "load and uniform store to the same address!\n");
2794 HasLoadStoreDependenceInvolvingLoopInvariantAddress = true;
2795 }
2796
2797 MemoryLocation Loc = MemoryLocation::get(LD);
2798 // The TBAA metadata could have a control dependency on the predication
2799 // condition, so we cannot rely on it when determining whether or not we
2800 // need runtime pointer checks.
2801 if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
2802 Loc.AATags.TBAA = nullptr;
2803
2804 // Expand forked pointers (i.e., a phi of multiple strided pointers) into
2805 // all alternatives.
2806 visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
2807 [&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr) {
2808 MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
2809 Accesses.addLoad(NewLoc, AccessTy, IsReadOnlyPtr);
2810 });
2811 }
2812
2813 // If we write (or read-write) to a single destination and there are no other
2814 // reads in this loop then is it safe to vectorize: the vectorized stores
2815 // preserve ordering via replication or order-preserving @llvm.masked.scatter.
2816 if (NumReadWrites == 1 && NumReads == 0) {
2817 LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
2818 return true;
2819 }
2820
2821 // Build dependence sets and check whether we need a runtime pointer bounds
2822 // check.
2823 Accesses.buildDependenceSets();
2824
2825 // Find pointers with computable bounds. We are going to use this information
2826 // to place a runtime bound check.
2827 Value *UncomputablePtr = nullptr;
2828 HasCompletePtrRtChecking =
2829 Accesses.canCheckPtrAtRT(*PtrRtChecking, TheLoop, SymbolicStrides,
2830 UncomputablePtr, AllowPartial, getDepChecker());
2831 if (!HasCompletePtrRtChecking) {
2832 const auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
2833 recordAnalysis("CantIdentifyArrayBounds", I)
2834 << "cannot identify array bounds";
2835 LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
2836 << "the array bounds.\n");
2837 return false;
2838 }
2839
2840 LLVM_DEBUG(
2841 dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n");
2842
2843 bool DepsAreSafe = true;
2844 if (Accesses.isDependencyCheckNeeded()) {
2845 LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
2846 DepsAreSafe =
2847 DepChecker->areDepsSafe(DepCands, Accesses.getDependenciesToCheck());
2848
2849 if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeChecks()) {
2850 LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
2851
2852 PtrRtChecking->reset();
2853 PtrRtChecking->Need = true;
2854
2855 UncomputablePtr = nullptr;
2856 HasCompletePtrRtChecking = Accesses.canCheckPtrAtRT(
2857 *PtrRtChecking, TheLoop, SymbolicStrides, UncomputablePtr,
2858 AllowPartial, getDepChecker());
2859
2860 // Check that we found the bounds for the pointer.
2861 if (!HasCompletePtrRtChecking) {
2862 auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
2863 recordAnalysis("CantCheckMemDepsAtRunTime", I)
2864 << "cannot check memory dependencies at runtime";
2865 LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
2866 return false;
2867 }
2868
2869 // Clear the dependency checks. They are no longer needed.
2870 Accesses.resetDepChecks(*DepChecker);
2871
2872 DepsAreSafe = true;
2873 }
2874 }
2875
2876 // Update the invariant address dependence flags based on dependences found
2877 // by the dep checker. Even if dependences were not recorded (too many to
2878 // track), any InvariantUnsafe dep would still have set the status to Unsafe
2879 if (const auto *Deps = DepChecker->getDependences()) {
2880 for (const auto &Dep : *Deps) {
2882 continue;
2883 Instruction *Src = Dep.getSource(*DepChecker);
2884 Instruction *Dst = Dep.getDestination(*DepChecker);
2885 if (isa<LoadInst>(Src) != isa<LoadInst>(Dst)) {
2886 HasLoadStoreDependenceInvolvingLoopInvariantAddress = true;
2887 } else {
2888 assert(isa<StoreInst>(Src) && isa<StoreInst>(Dst) &&
2889 "Expected both to be stores");
2890 HasStoreStoreDependenceInvolvingLoopInvariantAddress = true;
2891 }
2892 }
2893 }
2894
2895 if (HasConvergentOp) {
2896 recordAnalysis("CantInsertRuntimeCheckWithConvergent")
2897 << "cannot add control dependency to convergent operation";
2898 LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check "
2899 "would be needed with a convergent operation\n");
2900 return false;
2901 }
2902
2903 if (DepsAreSafe) {
2904 LLVM_DEBUG(
2905 dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
2906 << (PtrRtChecking->Need ? "" : " don't")
2907 << " need runtime memory checks.\n");
2908 return true;
2909 }
2910
2911 emitUnsafeDependenceRemark();
2912 return false;
2913}
2914
2915void LoopAccessInfo::emitUnsafeDependenceRemark() {
2916 const auto *Deps = getDepChecker().getDependences();
2917 if (!Deps)
2918 return;
2919 const auto *Found =
2920 llvm::find_if(*Deps, [](const MemoryDepChecker::Dependence &D) {
2923 });
2924 if (Found == Deps->end())
2925 return;
2926 MemoryDepChecker::Dependence Dep = *Found;
2927
2928 LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
2929
2930 // Emit remark for first unsafe dependence
2931 bool HasForcedDistribution = false;
2932 std::optional<const MDOperand *> Value =
2933 findStringMetadataForLoop(TheLoop, "llvm.loop.distribute.enable");
2934 if (Value) {
2935 const MDOperand *Op = *Value;
2936 assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
2937 HasForcedDistribution = mdconst::extract<ConstantInt>(*Op)->getZExtValue();
2938 }
2939
2940 const std::string Info =
2941 HasForcedDistribution
2942 ? "unsafe dependent memory operations in loop."
2943 : "unsafe dependent memory operations in loop. Use "
2944 "#pragma clang loop distribute(enable) to allow loop distribution "
2945 "to attempt to isolate the offending operations into a separate "
2946 "loop";
2947 OptimizationRemarkAnalysis &R =
2948 recordAnalysis("UnsafeDep", Dep.getDestination(getDepChecker())) << Info;
2949
2950 switch (Dep.Type) {
2954 llvm_unreachable("Unexpected dependence");
2956 R << "\nBackward loop carried data dependence.";
2957 break;
2959 R << "\nForward loop carried data dependence that prevents "
2960 "store-to-load forwarding.";
2961 break;
2963 R << "\nBackward loop carried data dependence that prevents "
2964 "store-to-load forwarding.";
2965 break;
2967 R << "\nUnsafe indirect dependence.";
2968 break;
2970 R << "\nUnsafe dependence on loop-invariant address.";
2971 break;
2973 R << "\nUnknown data dependence.";
2974 break;
2975 }
2976
2977 if (Instruction *I = Dep.getSource(getDepChecker())) {
2978 DebugLoc SourceLoc = I->getDebugLoc();
2980 SourceLoc = DD->getDebugLoc();
2981 if (SourceLoc)
2982 R << " Memory location is the same as accessed at "
2983 << ore::NV("Location", SourceLoc);
2984 }
2985}
2986
2988 const Loop *TheLoop,
2989 const DominatorTree *DT) {
2990 assert(TheLoop->contains(BB) && "Unknown block used");
2991
2992 // Blocks that do not dominate the latch need predication.
2993 const BasicBlock *Latch = TheLoop->getLoopLatch();
2994 assert(Latch && "Loop expected to have a single latch.");
2995 return !DT->dominates(BB, Latch);
2996}
2997
2999LoopAccessInfo::recordAnalysis(StringRef RemarkName, const Instruction *I) {
3000 assert(!Report && "Multiple reports generated");
3001
3002 const BasicBlock *CodeRegion = TheLoop->getHeader();
3003 DebugLoc DL = TheLoop->getStartLoc();
3004
3005 if (I) {
3006 CodeRegion = I->getParent();
3007 // If there is no debug location attached to the instruction, revert back to
3008 // using the loop's.
3009 if (I->getDebugLoc())
3010 DL = I->getDebugLoc();
3011 }
3012
3013 Report = std::make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName,
3014 DL, CodeRegion);
3015 return *Report;
3016}
3017
3019 auto *SE = PSE->getSE();
3020 if (TheLoop->isLoopInvariant(V))
3021 return true;
3022 if (!SE->isSCEVable(V->getType()))
3023 return false;
3024 const SCEV *S = SE->getSCEV(V);
3025 return SE->isLoopInvariant(S, TheLoop);
3026}
3027
3028/// If \p Ptr is a GEP, which has a loop-variant operand, return that operand.
3029/// Otherwise, return \p Ptr.
3031 Loop *Lp) {
3032 auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
3033 if (!GEP)
3034 return Ptr;
3035
3036 Value *V = Ptr;
3037 for (const Use &U : GEP->operands()) {
3038 if (!SE->isLoopInvariant(SE->getSCEV(U), Lp)) {
3039 if (V == Ptr)
3040 V = U;
3041 else
3042 // There must be exactly one loop-variant operand.
3043 return Ptr;
3044 }
3045 }
3046 return V;
3047}
3048
3049/// Get the stride of a pointer access in a loop. Looks for symbolic
3050/// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
3051static const SCEV *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
3052 auto *PtrTy = dyn_cast<PointerType>(Ptr->getType());
3053 if (!PtrTy)
3054 return nullptr;
3055
3056 // Try to remove a gep instruction to make the pointer (actually index at this
3057 // point) easier analyzable. If OrigPtr is equal to Ptr we are analyzing the
3058 // pointer, otherwise, we are analyzing the index.
3059 Value *OrigPtr = Ptr;
3060
3061 Ptr = getLoopVariantGEPOperand(Ptr, SE, Lp);
3062 const SCEV *V = SE->getSCEV(Ptr);
3063
3064 if (Ptr != OrigPtr)
3065 // Strip off casts.
3066 while (auto *C = dyn_cast<SCEVIntegralCastExpr>(V))
3067 V = C->getOperand();
3068
3070 return nullptr;
3071
3072 // Note that the restriction after this loop invariant check are only
3073 // profitability restrictions.
3074 if (!SE->isLoopInvariant(V, Lp))
3075 return nullptr;
3076
3077 // Look for the loop invariant symbolic value.
3078 if (isa<SCEVUnknown>(V))
3079 return V;
3080
3081 // Look through multiplies that scale a stride by a constant.
3083 if (auto *C = dyn_cast<SCEVIntegralCastExpr>(V))
3084 if (isa<SCEVUnknown>(C->getOperand()))
3085 return V;
3086
3087 return nullptr;
3088}
3089
3090void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
3091 Value *Ptr = getLoadStorePointerOperand(MemAccess);
3092 if (!Ptr)
3093 return;
3094
3095 // Note: getStrideFromPointer is a *profitability* heuristic. We
3096 // could broaden the scope of values returned here - to anything
3097 // which happens to be loop invariant and contributes to the
3098 // computation of an interesting IV - but we chose not to as we
3099 // don't have a cost model here, and broadening the scope exposes
3100 // far too many unprofitable cases.
3101 const SCEV *StrideExpr = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop);
3102 if (!StrideExpr)
3103 return;
3104
3105 if (match(StrideExpr, m_scev_UndefOrPoison()))
3106 return;
3107
3108 LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "
3109 "versioning:");
3110 LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *StrideExpr << "\n");
3111
3112 if (!SpeculateUnitStride) {
3113 LLVM_DEBUG(dbgs() << " Chose not to due to -laa-speculate-unit-stride\n");
3114 return;
3115 }
3116
3117 // Avoid adding the "Stride == 1" predicate when we know that
3118 // Stride >= Trip-Count. Such a predicate will effectively optimize a single
3119 // or zero iteration loop, as Trip-Count <= Stride == 1.
3120 //
3121 // TODO: We are currently not making a very informed decision on when it is
3122 // beneficial to apply stride versioning. It might make more sense that the
3123 // users of this analysis (such as the vectorizer) will trigger it, based on
3124 // their specific cost considerations; For example, in cases where stride
3125 // versioning does not help resolving memory accesses/dependences, the
3126 // vectorizer should evaluate the cost of the runtime test, and the benefit
3127 // of various possible stride specializations, considering the alternatives
3128 // of using gather/scatters (if available).
3129
3130 const SCEV *MaxBTC = PSE->getSymbolicMaxBackedgeTakenCount();
3131
3132 // Match the types so we can compare the stride and the MaxBTC.
3133 // The Stride can be positive/negative, so we sign extend Stride;
3134 // The backedgeTakenCount is non-negative, so we zero extend MaxBTC.
3135 const DataLayout &DL = TheLoop->getHeader()->getDataLayout();
3136 uint64_t StrideTypeSizeBits = DL.getTypeSizeInBits(StrideExpr->getType());
3137 uint64_t BETypeSizeBits = DL.getTypeSizeInBits(MaxBTC->getType());
3138 const SCEV *CastedStride = StrideExpr;
3139 const SCEV *CastedBECount = MaxBTC;
3140 ScalarEvolution *SE = PSE->getSE();
3141 if (BETypeSizeBits >= StrideTypeSizeBits)
3142 CastedStride = SE->getNoopOrSignExtend(StrideExpr, MaxBTC->getType());
3143 else
3144 CastedBECount = SE->getZeroExtendExpr(MaxBTC, StrideExpr->getType());
3145 const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
3146 // Since TripCount == BackEdgeTakenCount + 1, checking:
3147 // "Stride >= TripCount" is equivalent to checking:
3148 // Stride - MaxBTC> 0
3149 if (SE->isKnownPositive(StrideMinusBETaken)) {
3150 LLVM_DEBUG(
3151 dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
3152 "Stride==1 predicate will imply that the loop executes "
3153 "at most once.\n");
3154 return;
3155 }
3156 LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.\n");
3157
3158 // Strip back off the integer cast, and check that our result is a
3159 // SCEVUnknown as we expect.
3160 const SCEV *StrideBase = StrideExpr;
3161 if (const auto *C = dyn_cast<SCEVIntegralCastExpr>(StrideBase))
3162 StrideBase = C->getOperand();
3163 SymbolicStrides[Ptr] = cast<SCEVUnknown>(StrideBase);
3164}
3165
3167 const TargetTransformInfo *TTI,
3168 const TargetLibraryInfo *TLI, AAResults *AA,
3169 DominatorTree *DT, LoopInfo *LI,
3170 AssumptionCache *AC, bool AllowPartial)
3171 : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
3172 PtrRtChecking(nullptr), TheLoop(L), AllowPartial(AllowPartial) {
3173 unsigned MaxTargetVectorWidthInBits = std::numeric_limits<unsigned>::max();
3174 if (TTI && !TTI->enableScalableVectorization())
3175 // Scale the vector width by 2 as rough estimate to also consider
3176 // interleaving.
3177 MaxTargetVectorWidthInBits =
3178 TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector) * 2;
3179
3180 DepChecker = std::make_unique<MemoryDepChecker>(
3181 *PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits, LoopGuards);
3182 PtrRtChecking =
3183 std::make_unique<RuntimePointerChecking>(*DepChecker, SE, LoopGuards);
3184 if (canAnalyzeLoop())
3185 CanVecMem = analyzeLoop(AA, LI, TLI, DT);
3186}
3187
3188void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
3189 if (CanVecMem) {
3190 OS.indent(Depth) << "Memory dependences are safe";
3191 const MemoryDepChecker &DC = getDepChecker();
3192 if (!DC.isSafeForAnyVectorWidth())
3193 OS << " with a maximum safe vector width of "
3194 << DC.getMaxSafeVectorWidthInBits() << " bits";
3197 OS << ", with a maximum safe store-load forward width of " << SLDist
3198 << " bits";
3199 }
3200 if (PtrRtChecking->Need)
3201 OS << " with run-time checks";
3202 OS << "\n";
3203 }
3204
3205 if (HasConvergentOp)
3206 OS.indent(Depth) << "Has convergent operation in loop\n";
3207
3208 if (Report)
3209 OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
3210
3211 if (auto *Dependences = DepChecker->getDependences()) {
3212 OS.indent(Depth) << "Dependences:\n";
3213 for (const auto &Dep : *Dependences) {
3214 Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
3215 OS << "\n";
3216 }
3217 } else
3218 OS.indent(Depth) << "Too many dependences, not recorded\n";
3219
3220 // List the pair of accesses need run-time checks to prove independence.
3221 PtrRtChecking->print(OS, Depth);
3222 if (PtrRtChecking->Need && !HasCompletePtrRtChecking)
3223 OS.indent(Depth) << "Generated run-time checks are incomplete\n";
3224 OS << "\n";
3225
3226 OS.indent(Depth)
3227 << "Non vectorizable stores to invariant address were "
3228 << (HasStoreStoreDependenceInvolvingLoopInvariantAddress ||
3229 HasLoadStoreDependenceInvolvingLoopInvariantAddress
3230 ? ""
3231 : "not ")
3232 << "found in loop.\n";
3233
3234 OS.indent(Depth) << "SCEV assumptions:\n";
3235 PSE->getPredicate().print(OS, Depth);
3236
3237 OS << "\n";
3238
3239 OS.indent(Depth) << "Expressions re-written:\n";
3240 PSE->print(OS, Depth);
3241}
3242
3244 bool AllowPartial) {
3245 const auto &[It, Inserted] = LoopAccessInfoMap.try_emplace(&L);
3246
3247 // We need to create the LoopAccessInfo if either we don't already have one,
3248 // or if it was created with a different value of AllowPartial.
3249 if (Inserted || It->second->hasAllowPartial() != AllowPartial)
3250 It->second = std::make_unique<LoopAccessInfo>(&L, &SE, TTI, TLI, &AA, &DT,
3251 &LI, AC, AllowPartial);
3252
3253 return *It->second;
3254}
3256 // Collect LoopAccessInfo entries that may keep references to IR outside the
3257 // analyzed loop or SCEVs that may have been modified or invalidated. At the
3258 // moment, that is loops requiring memory or SCEV runtime checks, as those cache
3259 // SCEVs, e.g. for pointer expressions.
3260 LoopAccessInfoMap.remove_if([](const auto &Entry) {
3261 const auto &LAI = Entry.second;
3262 return !(LAI->getRuntimePointerChecking()->getChecks().empty() &&
3263 LAI->getPSE().getPredicate().isAlwaysTrue());
3264 });
3265}
3266
3268 Function &F, const PreservedAnalyses &PA,
3269 FunctionAnalysisManager::Invalidator &Inv) {
3270 // Check whether our analysis is preserved.
3271 auto PAC = PA.getChecker<LoopAccessAnalysis>();
3272 if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
3273 // If not, give up now.
3274 return true;
3275
3276 // Check whether the analyses we depend on became invalid for any reason.
3277 // Skip checking TargetLibraryAnalysis as it is immutable and can't become
3278 // invalid.
3279 return Inv.invalidate<AAManager>(F, PA) ||
3280 Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) ||
3281 Inv.invalidate<LoopAnalysis>(F, PA) ||
3282 Inv.invalidate<DominatorTreeAnalysis>(F, PA);
3283}
3284
3287 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
3288 auto &AA = FAM.getResult<AAManager>(F);
3289 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
3290 auto &LI = FAM.getResult<LoopAnalysis>(F);
3291 auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
3292 auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
3293 auto &AC = FAM.getResult<AssumptionAnalysis>(F);
3294 return LoopAccessInfoManager(SE, AA, DT, LI, &TTI, &TLI, &AC);
3295}
3296
3297AnalysisKey LoopAccessAnalysis::Key;
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
@ Scaled
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
DXIL Forward Handle Accesses
DXIL Resource Access
dxil translate DXIL Translate Metadata
This file defines the DenseMap class.
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
#define DEBUG_TYPE
Hexagon Common GEP
#define _
This header defines various interfaces for pass management in LLVM.
static cl::opt< unsigned > MaxDependences("max-dependences", cl::Hidden, cl::desc("Maximum number of dependences collected by " "loop-access analysis (default = 100)"), cl::init(100))
We collect dependences up to this threshold.
static cl::opt< bool > EnableForwardingConflictDetection("store-to-load-forwarding-conflict-detection", cl::Hidden, cl::desc("Enable conflict detection in loop-access analysis"), cl::init(true))
Enable store-to-load forwarding conflict detection.
static void findForkedSCEVs(ScalarEvolution *SE, const Loop *L, Value *Ptr, SmallVectorImpl< PointerIntPair< const SCEV *, 1, bool > > &ScevList, unsigned Depth)
static const SCEV * mulSCEVNoOverflow(const SCEV *A, const SCEV *B, ScalarEvolution &SE)
Returns A * B, if it is guaranteed not to unsigned wrap.
static bool isNoWrap(PredicatedScalarEvolution &PSE, const SCEVAddRecExpr *AR, Value *Ptr, Type *AccessTy, const Loop *L, const DominatorTree &DT, std::optional< int64_t > Stride=std::nullopt, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Check whether AR is a non-wrapping AddRec.
static cl::opt< unsigned > MemoryCheckMergeThreshold("memory-check-merge-threshold", cl::Hidden, cl::desc("Maximum number of comparisons done when trying to merge " "runtime memory checks. (default = 100)"), cl::init(100))
The maximum iterations used to merge memory checks.
static const SCEV * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
static cl::opt< ElementCount, true > VectorizationFactor("force-vector-width", cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."), cl::location(VectorizerParams::VectorizationFactor))
static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR, const SCEV *MaxBTC, const SCEV *EltSize, ScalarEvolution &SE, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC, std::optional< ScalarEvolution::LoopGuards > &LoopGuards)
Return true, if evaluating AR at MaxBTC cannot wrap, because AR at MaxBTC is guaranteed inbounds of t...
static cl::opt< unsigned, true > VectorizationInterleave("force-vector-interleave", cl::Hidden, cl::desc("Sets the vectorization interleave count. " "Zero is autoselect."), cl::location(VectorizerParams::VectorizationInterleave))
static cl::opt< bool, true > HoistRuntimeChecks("hoist-runtime-checks", cl::Hidden, cl::desc("Hoist inner loop runtime memory checks to outer loop if possible"), cl::location(VectorizerParams::HoistRuntimeChecks), cl::init(true))
static DenseMap< const RuntimeCheckingPtrGroup *, unsigned > getPtrToIdxMap(ArrayRef< RuntimeCheckingPtrGroup > CheckingGroups)
Assign each RuntimeCheckingPtrGroup pointer an index for stable UTC output.
static cl::opt< unsigned, true > RuntimeMemoryCheckThreshold("runtime-memory-check-threshold", cl::Hidden, cl::desc("When performing memory disambiguation checks at runtime do not " "generate more than this number of comparisons (default = 8)."), cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8))
static void visitPointers(Value *StartPtr, const Loop &InnermostLoop, function_ref< void(Value *)> AddPointer)
static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, const SCEV &MaxBTC, const SCEV &Dist, uint64_t MaxStride)
Given a dependence-distance Dist between two memory accesses, that have strides in the same direction...
static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, uint64_t TypeByteSize)
Check the dependence for two accesses with the same stride Stride.
static const SCEV * getMinFromExprs(const SCEV *I, const SCEV *J, ScalarEvolution *SE)
Compare I and J and return the minimum.
static Value * getLoopVariantGEPOperand(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If Ptr is a GEP, which has a loop-variant operand, return that operand.
static cl::opt< unsigned > MaxForkedSCEVDepth("max-forked-scev-depth", cl::Hidden, cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"), cl::init(5))
static cl::opt< bool > SpeculateUnitStride("laa-speculate-unit-stride", cl::Hidden, cl::desc("Speculate that non-constant strides are unit in LAA"), cl::init(true))
static cl::opt< bool > EnableMemAccessVersioning("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning"))
This enables versioning on the strides of symbolically striding memory accesses in code like the foll...
static const SCEV * addSCEVNoOverflow(const SCEV *A, const SCEV *B, ScalarEvolution &SE)
Returns A + B, if it is guaranteed not to unsigned wrap.
This header provides classes for managing per-loop analyses.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file provides utility analysis objects describing memory locations.
#define P(N)
FunctionAnalysisManager FAM
This file defines the PointerIntPair class.
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This pass exposes codegen information to IR-level passes.
static const X86InstrFMA3Group Groups[]
A manager for alias analyses.
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
APInt abs() const
Get the absolute value.
Definition APInt.h:1818
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition APInt.cpp:1084
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
Definition APInt.h:1597
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
This templated class represents "all analyses that operate over <aparticular IR unit>" (e....
Definition Analysis.h:50
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
bool isNoBuiltin() const
Return true if the call should not be treated as a call to a builtin.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isConvergent() const
Determine if the invoke is convergent.
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:764
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:768
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:766
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
bool isNegative() const
Definition Constants.h:214
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:126
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
Definition DenseMap.h:252
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:225
iterator end()
Definition DenseMap.h:143
Analysis pass which computes a DominatorTree.
Definition Dominators.h:270
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:151
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
iterator_range< member_iterator > members(const ECValue &ECV) const
bool contains(const ElemTy &V) const
Returns true if V is contained an equivalence class.
const ECValue & insert(const ElemTy &Data)
Insert a new value into the union/find set, ignoring the request if the value already exists.
member_iterator member_end() const
const ElemTy & getLeaderValue(const ElemTy &V) const
Return the leader for the specified value that is in the set.
member_iterator findLeader(const ElemTy &V) const
Given a value in the set, return a member iterator for the equivalence class it is in.
void eraseClass(const ElemTy &V)
Erase the class containing V, i.e.
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
Merge the two equivalence sets for the specified values, inserting them if they do not already exist ...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:688
bool empty() const
Definition Function.h:833
PointerType * getType() const
Global values are always pointers.
An instruction for reading from memory.
Value * getPointerOperand()
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
This analysis provides dependence information for the memory accesses of a loop.
LLVM_ABI Result run(Function &F, FunctionAnalysisManager &AM)
LLVM_ABI bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
LLVM_ABI const LoopAccessInfo & getInfo(Loop &L, bool AllowPartial=false)
Drive the analysis of memory accesses in the loop.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
LLVM_ABI bool isInvariant(Value *V) const
Returns true if value V is loop invariant.
LLVM_ABI void print(raw_ostream &OS, unsigned Depth=0) const
Print the information about the memory accesses in the loop.
static LLVM_ABI bool blockNeedsPredication(const BasicBlock *BB, const Loop *TheLoop, const DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
LLVM_ABI LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetTransformInfo *TTI, const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT, LoopInfo *LI, AssumptionCache *AC, bool AllowPartial=false)
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:587
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
BlockT * getHeader() const
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
std::string getLocStr() const
Return a string containing the debug location of the loop (file name + line number if present,...
Definition LoopInfo.cpp:694
bool isAnnotatedParallel() const
Returns true if the loop is annotated parallel.
Definition LoopInfo.cpp:592
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition LoopInfo.cpp:659
ArrayRef< MDOperand > operands() const
Definition Metadata.h:1431
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
ArrayRef< unsigned > getOrderForAccess(Value *Ptr, bool IsWrite) const
Return the program order indices for the access location (Ptr, IsWrite).
bool isSafeForAnyStoreLoadForwardDistances() const
Return true if there are no store-load forwarding dependencies.
LLVM_ABI bool areDepsSafe(const DepCandidates &AccessSets, ArrayRef< MemAccessInfo > CheckDeps)
Check whether the dependencies between the accesses are safe, and records the dependence information ...
bool isSafeForAnyVectorWidth() const
Return true if the number of elements that are safe to operate on simultaneously is not bounded.
PointerIntPair< Value *, 1, bool > MemAccessInfo
EquivalenceClasses< MemAccessInfo > DepCandidates
Set of potential dependent memory accesses.
bool shouldRetryWithRuntimeChecks() const
In same cases when the dependency check fails we can still vectorize the loop with a dynamic array ac...
const Loop * getInnermostLoop() const
uint64_t getMaxSafeVectorWidthInBits() const
Return the number of elements that are safe to operate on simultaneously, multiplied by the size of t...
bool isSafeForVectorization() const
No memory dependence was encountered that would inhibit vectorization.
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
LLVM_ABI SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Find the set of instructions that read or write via Ptr.
VectorizationSafetyStatus
Type to keep track of the status of the dependence check.
LLVM_ABI void addAccess(StoreInst *SI)
Register the location (instructions are given increasing numbers) of a write access.
uint64_t getStoreLoadForwardSafeDistanceInBits() const
Return safe power-of-2 number of elements, which do not prevent store-load forwarding,...
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
const Value * Ptr
The address of the start of the location.
Diagnostic information for optimization analysis remarks.
PointerIntPair - This class implements a pair of a pointer and small integer.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
LLVM_ABI void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Returns true if we've statically proved that V doesn't wrap.
LLVM_ABI const SCEVAddRecExpr * getAsAddRec(Value *V, SmallVectorImpl< const SCEVPredicate * > *WrapPredsAdded=nullptr)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
LLVM_ABI void addPredicates(ArrayRef< const SCEVPredicate * > Preds)
Adds all predicates in Preds.
LLVM_ABI const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
LLVM_ABI const SCEV * getSymbolicMaxBackedgeTakenCount()
Get the (predicated) symbolic max backedge count for the analyzed loop.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition Analysis.h:275
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
bool Need
This flag indicates if we need to add the runtime check.
void reset()
Reset the state of the pointer runtime information.
unsigned getNumberOfChecks() const
Returns the number of run-time checks required according to needsChecking.
LLVM_ABI void printChecks(raw_ostream &OS, const SmallVectorImpl< RuntimePointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
LLVM_ABI bool needsChecking(const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
LLVM_ABI void print(raw_ostream &OS, unsigned Depth=0) const
Print the list run-time memory checks necessary.
SmallVector< RuntimeCheckingPtrGroup, 2 > CheckingGroups
Holds a partitioning of pointers into "check groups".
static LLVM_ABI bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
LLVM_ABI void generateChecks(MemoryDepChecker::DepCandidates &DepCands)
Generate the checks and store it.
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
LLVM_ABI void insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, Type *AccessTy, bool WritePtr, unsigned DepSetId, unsigned ASId, PredicatedScalarEvolution &PSE, bool NeedsFreeze)
Insert a pointer and calculate the start and end SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
SCEVUse getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
IncrementWrapFlags
Similar to SCEV::NoWrapFlags, but with slightly different semantics for FlagNUSW.
static SCEVWrapPredicate::IncrementWrapFlags clearFlags(SCEVWrapPredicate::IncrementWrapFlags Flags, SCEVWrapPredicate::IncrementWrapFlags OffFlags)
Convenient IncrementWrapFlags manipulation methods.
static SCEVWrapPredicate::IncrementWrapFlags getImpliedFlags(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Returns the set of SCEVWrapPredicate no wrap flags implied by a SCEVAddRecExpr.
This class represents an analyzed expression in the program.
static constexpr auto NoWrapMask
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
Analysis pass that exposes the ScalarEvolution for a function.
static LLVM_ABI LoopGuards collect(const Loop *L, ScalarEvolution &SE)
Collect rewrite map for loop guards for loop L, together with flags indicating if NUW and NSW can be ...
The main scalar evolution driver.
const SCEV * getConstantMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEVConstant that is greater than or equal to (i.e.
LLVM_ABI bool isKnownNonNegative(const SCEV *S)
Test if the given expression is known to be non-negative.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI Type * getWiderType(Type *Ty1, Type *Ty2) const
LLVM_ABI const SCEV * getAbsExpr(const SCEV *Op, bool IsNSW)
LLVM_ABI bool isKnownNonPositive(const SCEV *S)
Test if the given expression is known to be non-positive.
LLVM_ABI bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
LLVM_ABI bool willNotOverflow(Instruction::BinaryOps BinOp, bool Signed, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI=nullptr)
Is operation BinOp between LHS and RHS provably does not have a signed/unsigned overflow (Signed)?
LLVM_ABI const SCEVPredicate * getEqualPredicate(const SCEV *LHS, const SCEV *RHS)
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
APInt getSignedRangeMin(const SCEV *S)
Determine the min of the signed range for a particular SCEV.
LLVM_ABI const SCEV * getUMaxExpr(SCEVUse LHS, SCEVUse RHS)
LLVM_ABI const SCEV * getStoreSizeOfExpr(Type *IntTy, Type *StoreTy)
Return an expression for the store size of StoreTy that is type IntTy.
LLVM_ABI const SCEVPredicate * getWrapPredicate(const SCEVAddRecExpr *AR, SCEVWrapPredicate::IncrementWrapFlags AddedFlags)
LLVM_ABI const SCEV * getNoopOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI const SCEV * getCouldNotCompute()
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
LLVM_ABI const SCEV * getPtrToAddrExpr(const SCEV *Op)
LLVM_ABI const SCEVAddRecExpr * convertSCEVToAddRecWithPredicates(const SCEV *S, const Loop *L, SmallVectorImpl< const SCEVPredicate * > &Preds)
Tries to convert the S expression to an AddRec expression, adding additional predicates to Preds as r...
LLVM_ABI const SCEV * getSizeOfExpr(Type *IntTy, TypeSize Size)
Return an expression for a TypeSize.
LLVM_ABI std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)
Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...
LLVM_ABI const SCEV * getUMinExpr(SCEVUse LHS, SCEVUse RHS, bool Sequential=false)
LLVM_ABI const SCEV * getTruncateOrSignExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
bool contains(const T &V) const
Check if the SmallSet contains the given element.
Definition SmallSet.h:229
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:288
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:282
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition VectorUtils.h:76
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
LLVM_ABI uint64_t getPointerDereferenceableBytes(const DataLayout &DL, bool &CanBeNull, bool *CanBeFreed) const
Returns the number of bytes known to be dereferenceable for the pointer value.
Definition Value.cpp:909
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool match(Val *V, const Pattern &P)
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
is_undef_or_poison m_scev_UndefOrPoison()
Match an SCEVUnknown wrapping undef or poison.
specificloop_ty m_SpecificLoop(const Loop *L)
match_bind< const SCEVMulExpr > m_scev_Mul(const SCEVMulExpr *&V)
specificscev_ty m_scev_Specific(const SCEV *S)
Match if we have a specific specified SCEV.
SCEVAffineAddRec_match< Op0_t, Op1_t, match_isa< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, bool > hasa(Y &&MD)
Check whether Metadata has a Value.
Definition Metadata.h:651
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
DiagnosticInfoOptimizationBase::Argument NV
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI std::pair< const SCEV *, const SCEV * > getStartAndEndForAccess(const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC, const SCEV *MaxBTC, ScalarEvolution *SE, DenseMap< std::pair< const SCEV *, const SCEV * >, std::pair< const SCEV *, const SCEV * > > *PointerBounds, DominatorTree *DT, AssumptionCache *AC, std::optional< ScalarEvolution::LoopGuards > &LoopGuards)
Calculate Start and End points of memory access using exact backedge taken count BTC if computable or...
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:573
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:830
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
LLVM_ABI RetainedKnowledge getKnowledgeForValue(const Value *V, ArrayRef< Attribute::AttrKind > AttrKinds, AssumptionCache &AC, function_ref< bool(RetainedKnowledge, Instruction *, const CallBase::BundleOpInfo *)> Filter=[](auto...) { return true;})
Return a valid Knowledge associated to the Value V if its Attribute kind is in AttrKinds and it match...
LLVM_ABI bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr, bool AllowEphemerals=false)
Return true if it is valid to use the assumptions provided by an assume intrinsic,...
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2554
unsigned getPointerAddressSpace(const Type *T)
Definition SPIRVUtils.h:389
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
Definition InstrProf.h:143
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:2026
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI std::optional< int64_t > getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB, Value *PtrB, const DataLayout &DL, ScalarEvolution &SE, bool StrictCheck=false, bool CheckType=true)
Returns the distance between the pointers PtrA and PtrB iff they are compatible and it is possible to...
LLVM_ABI bool sortPtrAccesses(ArrayRef< Value * > VL, Type *ElemTy, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices)
Attempt to sort the pointers in VL and return the sorted indices in SortedIndices,...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
TargetTransformInfo TTI
LLVM_ABI const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
LLVM_ABI bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
IntPtrTy
Definition InstrProf.h:82
DWARFExpression::Operation Op
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI std::optional< int64_t > getStrideFromAddRec(const SCEVAddRecExpr *AR, const Loop *Lp, Type *AccessTy, Value *Ptr, PredicatedScalarEvolution &PSE)
If AR is an affine AddRec for Lp with a constant step, return the step in units of AccessTy's allocat...
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:347
LLVM_ABI void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=MaxLookupSearchDepth)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool ShouldCheckWrap=true, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
If the pointer has a constant stride return it in units of the access type size.
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:860
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
IR Values for the lower and upper bounds of a pointer evolution.
MDNode * Scope
The tag for alias scope specification (used with noalias).
Definition Metadata.h:786
MDNode * TBAA
The tag for type-based alias analysis.
Definition Metadata.h:780
MDNode * NoAlias
The tag specifying the noalias scope.
Definition Metadata.h:789
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29
Instruction * getDestination(const MemoryDepChecker &DepChecker) const
Return the destination instruction of the dependence.
DepType Type
The type of the dependence.
unsigned Destination
Index of the destination of the dependence in the InstMap vector.
LLVM_ABI bool isPossiblyBackward() const
May be a lexically backward dependence type (includes Unknown).
Instruction * getSource(const MemoryDepChecker &DepChecker) const
Return the source instruction of the dependence.
LLVM_ABI bool isForward() const
Lexically forward dependence.
LLVM_ABI bool isBackward() const
Lexically backward dependence.
LLVM_ABI void print(raw_ostream &OS, unsigned Depth, const SmallVectorImpl< Instruction * > &Instrs) const
Print the dependence.
unsigned Source
Index of the source of the dependence in the InstMap vector.
DepType
The type of the dependence.
static LLVM_ABI const char * DepName[]
String version of the types.
static LLVM_ABI VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
Represent one information held inside an operand bundle of an llvm.assume.
unsigned AddressSpace
Address space of the involved pointers.
LLVM_ABI bool addPointer(unsigned Index, const RuntimePointerChecking &RtCheck)
Tries to add the pointer recorded in RtCheck at index Index to this pointer checking group.
bool NeedsFreeze
Whether the pointer needs to be frozen after expansion, e.g.
LLVM_ABI RuntimeCheckingPtrGroup(unsigned Index, const RuntimePointerChecking &RtCheck)
Create a new pointer checking group containing a single pointer, with index Index in RtCheck.
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
SmallVector< unsigned, 2 > Members
Indices of all the pointers that constitute this grouping.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.
bool IsWritePtr
Holds the information if this pointer is used for writing to memory.
unsigned DependencySetId
Holds the id of the set of pointers that could be dependent because of a shared underlying object.
unsigned AliasSetId
Holds the id of the disjoint alias set to which this pointer belongs.
static LLVM_ABI const unsigned MaxVectorWidth
Maximum SIMD width.
static LLVM_ABI unsigned RuntimeMemoryCheckThreshold
\When performing memory disambiguation checks at runtime do not make more than this number of compari...
static LLVM_ABI bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
static LLVM_ABI unsigned VectorizationInterleave
Interleave factor as overridden by the user.
static LLVM_ABI ElementCount VectorizationFactor
VF as overridden by the user.
static LLVM_ABI bool HoistRuntimeChecks
Function object to check whether the first component of a container supported by std::get (like std::...
Definition STLExtras.h:1439