LLVM 23.0.0git
LoopAccessAnalysis.cpp
Go to the documentation of this file.
1//===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The implementation for the loop memory dependence that was originally
10// developed for the loop vectorizer.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/SetVector.h"
22#include "llvm/ADT/SmallSet.h"
40#include "llvm/IR/BasicBlock.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugLoc.h"
46#include "llvm/IR/Dominators.h"
47#include "llvm/IR/Function.h"
48#include "llvm/IR/InstrTypes.h"
49#include "llvm/IR/Instruction.h"
52#include "llvm/IR/PassManager.h"
53#include "llvm/IR/Type.h"
54#include "llvm/IR/Value.h"
55#include "llvm/IR/ValueHandle.h"
58#include "llvm/Support/Debug.h"
61#include <algorithm>
62#include <cassert>
63#include <cstdint>
64#include <iterator>
65#include <utility>
66#include <variant>
67#include <vector>
68
69using namespace llvm;
70using namespace llvm::SCEVPatternMatch;
71
72#define DEBUG_TYPE "loop-accesses"
73
75VectorizationFactor("force-vector-width", cl::Hidden,
76 cl::desc("Sets the SIMD width. Zero is autoselect."),
79
81VectorizationInterleave("force-vector-interleave", cl::Hidden,
82 cl::desc("Sets the vectorization interleave count. "
83 "Zero is autoselect."),
87
89 "runtime-memory-check-threshold", cl::Hidden,
90 cl::desc("When performing memory disambiguation checks at runtime do not "
91 "generate more than this number of comparisons (default = 8)."),
94
95/// The maximum iterations used to merge memory checks
97 "memory-check-merge-threshold", cl::Hidden,
98 cl::desc("Maximum number of comparisons done when trying to merge "
99 "runtime memory checks. (default = 100)"),
100 cl::init(100));
101
102/// Maximum SIMD width.
103const unsigned VectorizerParams::MaxVectorWidth = 64;
104
105/// We collect dependences up to this threshold.
107 MaxDependences("max-dependences", cl::Hidden,
108 cl::desc("Maximum number of dependences collected by "
109 "loop-access analysis (default = 100)"),
110 cl::init(100));
111
112/// This enables versioning on the strides of symbolically striding memory
113/// accesses in code like the following.
114/// for (i = 0; i < N; ++i)
115/// A[i * Stride1] += B[i * Stride2] ...
116///
117/// Will be roughly translated to
118/// if (Stride1 == 1 && Stride2 == 1) {
119/// for (i = 0; i < N; i+=4)
120/// A[i:i+3] += ...
121/// } else
122/// ...
124 "enable-mem-access-versioning", cl::init(true), cl::Hidden,
125 cl::desc("Enable symbolic stride memory access versioning"));
126
127/// Enable store-to-load forwarding conflict detection. This option can
128/// be disabled for correctness testing.
130 "store-to-load-forwarding-conflict-detection", cl::Hidden,
131 cl::desc("Enable conflict detection in loop-access analysis"),
132 cl::init(true));
133
135 "max-forked-scev-depth", cl::Hidden,
136 cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"),
137 cl::init(5));
138
140 "laa-speculate-unit-stride", cl::Hidden,
141 cl::desc("Speculate that non-constant strides are unit in LAA"),
142 cl::init(true));
143
145 "hoist-runtime-checks", cl::Hidden,
146 cl::desc(
147 "Hoist inner loop runtime memory checks to outer loop if possible"),
150
152 return ::VectorizationInterleave.getNumOccurrences() > 0;
153}
154
156 const DenseMap<Value *, const SCEV *> &PtrToStride,
157 Value *Ptr) {
158 const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
159
160 // If there is an entry in the map return the SCEV of the pointer with the
161 // symbolic stride replaced by one.
162 const SCEV *StrideSCEV = PtrToStride.lookup(Ptr);
163 if (!StrideSCEV)
164 // For a non-symbolic stride, just return the original expression.
165 return OrigSCEV;
166
167 // Note: This assert is both overly strong and overly weak. The actual
168 // invariant here is that StrideSCEV should be loop invariant. The only
169 // such invariant strides we happen to speculate right now are unknowns
170 // and thus this is a reasonable proxy of the actual invariant.
171 assert(isa<SCEVUnknown>(StrideSCEV) && "shouldn't be in map");
172
173 ScalarEvolution *SE = PSE.getSE();
174 const SCEV *CT = SE->getOne(StrideSCEV->getType());
175 PSE.addPredicate(*SE->getEqualPredicate(StrideSCEV, CT));
176 const SCEV *Expr = PSE.getSCEV(Ptr);
177
178 LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV
179 << " by: " << *Expr << "\n");
180 return Expr;
181}
182
184 unsigned Index, const RuntimePointerChecking &RtCheck)
185 : High(RtCheck.Pointers[Index].End), Low(RtCheck.Pointers[Index].Start),
186 AddressSpace(RtCheck.Pointers[Index]
187 .PointerValue->getType()
189 NeedsFreeze(RtCheck.Pointers[Index].NeedsFreeze) {
190 Members.push_back(Index);
191}
192
193/// Returns \p A + \p B, if it is guaranteed not to unsigned wrap. Otherwise
194/// return nullptr. \p A and \p B must have the same type.
195static const SCEV *addSCEVNoOverflow(const SCEV *A, const SCEV *B,
196 ScalarEvolution &SE) {
197 if (!SE.willNotOverflow(Instruction::Add, /*IsSigned=*/false, A, B))
198 return nullptr;
199 return SE.getAddExpr(A, B);
200}
201
202/// Returns \p A * \p B, if it is guaranteed not to unsigned wrap. Otherwise
203/// return nullptr. \p A and \p B must have the same type.
204static const SCEV *mulSCEVNoOverflow(const SCEV *A, const SCEV *B,
205 ScalarEvolution &SE) {
206 if (!SE.willNotOverflow(Instruction::Mul, /*IsSigned=*/false, A, B))
207 return nullptr;
208 return SE.getMulExpr(A, B);
209}
210
211/// Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at
212/// \p MaxBTC is guaranteed inbounds of the accessed object.
214 const SCEVAddRecExpr *AR, const SCEV *MaxBTC, const SCEV *EltSize,
216 AssumptionCache *AC,
217 std::optional<ScalarEvolution::LoopGuards> &LoopGuards) {
218 auto *PointerBase = SE.getPointerBase(AR->getStart());
219 auto *StartPtr = dyn_cast<SCEVUnknown>(PointerBase);
220 if (!StartPtr)
221 return false;
222 const Loop *L = AR->getLoop();
223 bool CheckForNonNull, CheckForFreed;
224 Value *StartPtrV = StartPtr->getValue();
225 // We can ignore frees, as the fact that an object of a certain size existed
226 // at the location *at some point* is sufficient to derive the nowrap fact.
227 uint64_t DerefBytes = StartPtrV->getPointerDereferenceableBytes(
228 DL, CheckForNonNull, CheckForFreed);
229
230 if (DerefBytes && CheckForNonNull)
231 return false;
232
233 const SCEV *Step = AR->getStepRecurrence(SE);
234 Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType());
235 const SCEV *DerefBytesSCEV = SE.getConstant(WiderTy, DerefBytes);
236
237 // Check if we have a suitable dereferencable assumption we can use.
238 Instruction *CtxI = &*L->getHeader()->getFirstNonPHIIt();
239 if (BasicBlock *LoopPred = L->getLoopPredecessor()) {
240 if (isa<UncondBrInst, CondBrInst>(LoopPred->getTerminator()))
241 CtxI = LoopPred->getTerminator();
242 }
243 RetainedKnowledge DerefRK;
244 getKnowledgeForValue(StartPtrV, {Attribute::Dereferenceable}, *AC,
245 [&](RetainedKnowledge RK, Instruction *Assume, auto) {
246 if (!isValidAssumeForContext(Assume, CtxI, DT))
247 return false;
248 DerefRK = std::max(DerefRK, RK);
249 return true;
250 });
251 if (DerefRK) {
252 const SCEV *DerefRKSCEV = SE.getSCEV(DerefRK.IRArgValue);
253 Type *CommonTy =
254 SE.getWiderType(DerefBytesSCEV->getType(), DerefRKSCEV->getType());
255 DerefBytesSCEV = SE.getNoopOrZeroExtend(DerefBytesSCEV, CommonTy);
256 DerefRKSCEV = SE.getNoopOrZeroExtend(DerefRKSCEV, CommonTy);
257 DerefBytesSCEV = SE.getUMaxExpr(DerefBytesSCEV, DerefRKSCEV);
258 }
259
260 if (DerefBytesSCEV->isZero())
261 return false;
262
263 bool IsKnownNonNegative = SE.isKnownNonNegative(Step);
264 if (!IsKnownNonNegative && !SE.isKnownNegative(Step))
265 return false;
266
267 Step = SE.getNoopOrSignExtend(Step, WiderTy);
268 MaxBTC = SE.getNoopOrZeroExtend(MaxBTC, WiderTy);
269
270 // For the computations below, make sure they don't unsigned wrap.
271 if (!SE.isKnownPredicate(CmpInst::ICMP_UGE, AR->getStart(), StartPtr))
272 return false;
273 const SCEV *StartOffset = SE.getNoopOrZeroExtend(
274 SE.getMinusSCEV(AR->getStart(), StartPtr), WiderTy);
275
276 if (!LoopGuards)
277 LoopGuards.emplace(ScalarEvolution::LoopGuards::collect(AR->getLoop(), SE));
278 MaxBTC = SE.applyLoopGuards(MaxBTC, *LoopGuards);
279
280 const SCEV *OffsetAtLastIter =
281 mulSCEVNoOverflow(MaxBTC, SE.getAbsExpr(Step, /*IsNSW=*/false), SE);
282 if (!OffsetAtLastIter) {
283 // Re-try with constant max backedge-taken count if using the symbolic one
284 // failed.
285 MaxBTC = SE.getConstantMaxBackedgeTakenCount(AR->getLoop());
286 if (isa<SCEVCouldNotCompute>(MaxBTC))
287 return false;
288 MaxBTC = SE.getNoopOrZeroExtend(
289 MaxBTC, WiderTy);
290 OffsetAtLastIter =
291 mulSCEVNoOverflow(MaxBTC, SE.getAbsExpr(Step, /*IsNSW=*/false), SE);
292 if (!OffsetAtLastIter)
293 return false;
294 }
295
296 const SCEV *OffsetEndBytes = addSCEVNoOverflow(
297 OffsetAtLastIter, SE.getNoopOrZeroExtend(EltSize, WiderTy), SE);
298 if (!OffsetEndBytes)
299 return false;
300
301 if (IsKnownNonNegative) {
302 // For positive steps, check if
303 // (AR->getStart() - StartPtr) + (MaxBTC * Step) + EltSize <= DerefBytes,
304 // while making sure none of the computations unsigned wrap themselves.
305 const SCEV *EndBytes = addSCEVNoOverflow(StartOffset, OffsetEndBytes, SE);
306 if (!EndBytes)
307 return false;
308
309 DerefBytesSCEV = SE.applyLoopGuards(DerefBytesSCEV, *LoopGuards);
310 return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes, DerefBytesSCEV);
311 }
312
313 // For negative steps check if
314 // * StartOffset >= (MaxBTC * Step + EltSize)
315 // * StartOffset <= DerefBytes.
316 assert(SE.isKnownNegative(Step) && "must be known negative");
317 return SE.isKnownPredicate(CmpInst::ICMP_SGE, StartOffset, OffsetEndBytes) &&
318 SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset, DerefBytesSCEV);
319}
320
321std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
322 const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
323 const SCEV *MaxBTC, ScalarEvolution *SE,
324 DenseMap<std::pair<const SCEV *, const SCEV *>,
325 std::pair<const SCEV *, const SCEV *>> *PointerBounds,
327 std::optional<ScalarEvolution::LoopGuards> &LoopGuards) {
328 auto &DL = Lp->getHeader()->getDataLayout();
329 Type *IdxTy = DL.getIndexType(PtrExpr->getType());
330 const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);
331
332 // Delegate to the SCEV-based overload, passing through the cache.
333 return getStartAndEndForAccess(Lp, PtrExpr, EltSizeSCEV, BTC, MaxBTC, SE,
334 PointerBounds, DT, AC, LoopGuards);
335}
336
337std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
338 const Loop *Lp, const SCEV *PtrExpr, const SCEV *EltSizeSCEV,
339 const SCEV *BTC, const SCEV *MaxBTC, ScalarEvolution *SE,
340 DenseMap<std::pair<const SCEV *, const SCEV *>,
341 std::pair<const SCEV *, const SCEV *>> *PointerBounds,
343 std::optional<ScalarEvolution::LoopGuards> &LoopGuards) {
344 std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
345 if (PointerBounds) {
346 auto [Iter, Ins] = PointerBounds->insert(
347 {{PtrExpr, EltSizeSCEV},
348 {SE->getCouldNotCompute(), SE->getCouldNotCompute()}});
349 if (!Ins)
350 return Iter->second;
351 PtrBoundsPair = &Iter->second;
352 }
353
354 const SCEV *ScStart;
355 const SCEV *ScEnd;
356
357 auto &DL = Lp->getHeader()->getDataLayout();
358 if (SE->isLoopInvariant(PtrExpr, Lp)) {
359 ScStart = ScEnd = PtrExpr;
360 } else if (auto *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr)) {
361 ScStart = AR->getStart();
362 if (!isa<SCEVCouldNotCompute>(BTC))
363 // Evaluating AR at an exact BTC is safe: LAA separately checks that
364 // accesses cannot wrap in the loop. If evaluating AR at BTC wraps, then
365 // the loop either triggers UB when executing a memory access with a
366 // poison pointer or the wrapping/poisoned pointer is not used.
367 ScEnd = AR->evaluateAtIteration(BTC, *SE);
368 else {
369 // Evaluating AR at MaxBTC may wrap and create an expression that is less
370 // than the start of the AddRec due to wrapping (for example consider
371 // MaxBTC = -2). If that's the case, set ScEnd to -(EltSize + 1). ScEnd
372 // will get incremented by EltSize before returning, so this effectively
373 // sets ScEnd to the maximum unsigned value for the type. Note that LAA
374 // separately checks that accesses cannot not wrap, so unsigned max
375 // represents an upper bound.
376 if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE, DL,
377 DT, AC, LoopGuards)) {
378 ScEnd = AR->evaluateAtIteration(MaxBTC, *SE);
379 } else {
380 ScEnd = SE->getAddExpr(
381 SE->getNegativeSCEV(EltSizeSCEV),
384 AR->getType())));
385 }
386 }
387 const SCEV *Step = AR->getStepRecurrence(*SE);
388
389 // For expressions with negative step, the upper bound is ScStart and the
390 // lower bound is ScEnd.
391 if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) {
392 if (CStep->getValue()->isNegative())
393 std::swap(ScStart, ScEnd);
394 } else {
395 // Fallback case: the step is not constant, but we can still
396 // get the upper and lower bounds of the interval by using min/max
397 // expressions.
398 ScStart = SE->getUMinExpr(ScStart, ScEnd);
399 ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
400 }
401 } else
402 return {SE->getCouldNotCompute(), SE->getCouldNotCompute()};
403
404 assert(SE->isLoopInvariant(ScStart, Lp) && "ScStart needs to be invariant");
405 assert(SE->isLoopInvariant(ScEnd, Lp) && "ScEnd needs to be invariant");
406
407 // Add the size of the pointed element to ScEnd.
408 ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
409
410 std::pair<const SCEV *, const SCEV *> Res = {ScStart, ScEnd};
411 if (PointerBounds)
412 *PtrBoundsPair = Res;
413 return Res;
414}
415
416/// Calculate Start and End points of memory access using
417/// getStartAndEndForAccess.
418void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
419 Type *AccessTy, bool WritePtr,
420 unsigned DepSetId, unsigned ASId,
422 bool NeedsFreeze) {
423 const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
424 const SCEV *BTC = PSE.getBackedgeTakenCount();
425 const auto &[ScStart, ScEnd] = getStartAndEndForAccess(
426 Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE.getSE(),
427 &DC.getPointerBounds(), DC.getDT(), DC.getAC(), LoopGuards);
429 !isa<SCEVCouldNotCompute>(ScEnd) &&
430 "must be able to compute both start and end expressions");
431 Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, PtrExpr,
432 NeedsFreeze);
433}
434
435bool RuntimePointerChecking::tryToCreateDiffCheck(
436 const RuntimeCheckingPtrGroup &CGI, const RuntimeCheckingPtrGroup &CGJ) {
437 // If either group contains multiple different pointers, bail out.
438 // TODO: Support multiple pointers by using the minimum or maximum pointer,
439 // depending on src & sink.
440 if (CGI.Members.size() != 1 || CGJ.Members.size() != 1)
441 return false;
442
443 const PointerInfo *Src = &Pointers[CGI.Members[0]];
444 const PointerInfo *Sink = &Pointers[CGJ.Members[0]];
445
446 // If either pointer is read and written, multiple checks may be needed. Bail
447 // out.
448 if (!DC.getOrderForAccess(Src->PointerValue, !Src->IsWritePtr).empty() ||
449 !DC.getOrderForAccess(Sink->PointerValue, !Sink->IsWritePtr).empty())
450 return false;
451
452 ArrayRef<unsigned> AccSrc =
453 DC.getOrderForAccess(Src->PointerValue, Src->IsWritePtr);
454 ArrayRef<unsigned> AccSink =
455 DC.getOrderForAccess(Sink->PointerValue, Sink->IsWritePtr);
456 // If either pointer is accessed multiple times, there may not be a clear
457 // src/sink relation. Bail out for now.
458 if (AccSrc.size() != 1 || AccSink.size() != 1)
459 return false;
460
461 // If the sink is accessed before src, swap src/sink.
462 if (AccSink[0] < AccSrc[0])
463 std::swap(Src, Sink);
464
465 const SCEVConstant *Step;
466 const SCEV *SrcStart;
467 const SCEV *SinkStart;
468 const Loop *InnerLoop = DC.getInnermostLoop();
469 if (!match(Src->Expr,
471 m_SpecificLoop(InnerLoop))) ||
472 !match(Sink->Expr,
474 m_SpecificLoop(InnerLoop))))
475 return false;
476
478 DC.getInstructionsForAccess(Src->PointerValue, Src->IsWritePtr);
480 DC.getInstructionsForAccess(Sink->PointerValue, Sink->IsWritePtr);
481 Type *SrcTy = getLoadStoreType(SrcInsts[0]);
482 Type *DstTy = getLoadStoreType(SinkInsts[0]);
484 return false;
485
486 const DataLayout &DL = InnerLoop->getHeader()->getDataLayout();
487 unsigned AllocSize =
488 std::max(DL.getTypeAllocSize(SrcTy), DL.getTypeAllocSize(DstTy));
489
490 // Only matching constant steps matching the AllocSize are supported at the
491 // moment. This simplifies the difference computation. Can be extended in the
492 // future.
493 if (Step->getAPInt().abs() != AllocSize)
494 return false;
495
496 // When counting down, the dependence distance needs to be swapped.
497 if (Step->getValue()->isNegative())
498 std::swap(SinkStart, SrcStart);
499
500 const SCEV *SinkStartInt = SE->getPtrToAddrExpr(SinkStart);
501 const SCEV *SrcStartInt = SE->getPtrToAddrExpr(SrcStart);
502 if (isa<SCEVCouldNotCompute>(SinkStartInt) ||
503 isa<SCEVCouldNotCompute>(SrcStartInt))
504 return false;
505
506 // If the start values for both Src and Sink also vary according to an outer
507 // loop, then it's probably better to avoid creating diff checks because
508 // they may not be hoisted. We should instead let llvm::addRuntimeChecks
509 // do the expanded full range overlap checks, which can be hoisted.
510 if (HoistRuntimeChecks && InnerLoop->getParentLoop() &&
511 isa<SCEVAddRecExpr>(SinkStartInt) && isa<SCEVAddRecExpr>(SrcStartInt)) {
512 auto *SrcStartAR = cast<SCEVAddRecExpr>(SrcStartInt);
513 auto *SinkStartAR = cast<SCEVAddRecExpr>(SinkStartInt);
514 const Loop *StartARLoop = SrcStartAR->getLoop();
515 if (StartARLoop == SinkStartAR->getLoop() &&
516 StartARLoop == InnerLoop->getParentLoop() &&
517 // If the diff check would already be loop invariant (due to the
518 // recurrences being the same), then we prefer to keep the diff checks
519 // because they are cheaper.
520 SrcStartAR->getStepRecurrence(*SE) !=
521 SinkStartAR->getStepRecurrence(*SE)) {
522 LLVM_DEBUG(dbgs() << "LAA: Not creating diff runtime check, since these "
523 "cannot be hoisted out of the outer loop\n");
524 return false;
525 }
526 }
527
528 LLVM_DEBUG(dbgs() << "LAA: Creating diff runtime check for:\n"
529 << "SrcStart: " << *SrcStartInt << '\n'
530 << "SinkStartInt: " << *SinkStartInt << '\n');
531 DiffChecks.emplace_back(SrcStartInt, SinkStartInt, AllocSize,
532 Src->NeedsFreeze || Sink->NeedsFreeze);
533 return true;
534}
535
537 SmallVector<RuntimePointerCheck, 4> Checks;
538
539 for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
540 for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
543
544 if (needsChecking(CGI, CGJ)) {
545 CanUseDiffCheck = CanUseDiffCheck && tryToCreateDiffCheck(CGI, CGJ);
546 Checks.emplace_back(&CGI, &CGJ);
547 }
548 }
549 }
550 return Checks;
551}
552
555 assert(Checks.empty() && "Checks is not empty");
556 groupChecks(DepCands);
557 Checks = generateChecks();
558}
559
561 const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const {
562 for (const auto &I : M.Members)
563 for (const auto &J : N.Members)
564 if (needsChecking(I, J))
565 return true;
566 return false;
567}
568
569/// Compare \p I and \p J and return the minimum.
570/// Return nullptr in case we couldn't find an answer.
571static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
572 ScalarEvolution *SE) {
573 std::optional<APInt> Diff = SE->computeConstantDifference(J, I);
574 if (!Diff)
575 return nullptr;
576 return Diff->isNegative() ? J : I;
577}
578
580 unsigned Index, const RuntimePointerChecking &RtCheck) {
581 return addPointer(
582 Index, RtCheck.Pointers[Index].Start, RtCheck.Pointers[Index].End,
583 RtCheck.Pointers[Index].PointerValue->getType()->getPointerAddressSpace(),
584 RtCheck.Pointers[Index].NeedsFreeze, *RtCheck.SE);
585}
586
587bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, const SCEV *Start,
588 const SCEV *End, unsigned AS,
589 bool NeedsFreeze,
590 ScalarEvolution &SE) {
591 assert(AddressSpace == AS &&
592 "all pointers in a checking group must be in the same address space");
593
594 // Compare the starts and ends with the known minimum and maximum
595 // of this set. We need to know how we compare against the min/max
596 // of the set in order to be able to emit memchecks.
597 const SCEV *Min0 = getMinFromExprs(Start, Low, &SE);
598 if (!Min0)
599 return false;
600
601 const SCEV *Min1 = getMinFromExprs(End, High, &SE);
602 if (!Min1)
603 return false;
604
605 // Update the low bound expression if we've found a new min value.
606 if (Min0 == Start)
607 Low = Start;
608
609 // Update the high bound expression if we've found a new max value.
610 if (Min1 != End)
611 High = End;
612
613 Members.push_back(Index);
614 this->NeedsFreeze |= NeedsFreeze;
615 return true;
616}
617
618void RuntimePointerChecking::groupChecks(
620 // We build the groups from dependency candidates equivalence classes
621 // because:
622 // - We know that pointers in the same equivalence class share
623 // the same underlying object and therefore there is a chance
624 // that we can compare pointers
625 // - We wouldn't be able to merge two pointers for which we need
626 // to emit a memcheck. The classes in DepCands are already
627 // conveniently built such that no two pointers in the same
628 // class need checking against each other.
629
630 // We use the following (greedy) algorithm to construct the groups
631 // For every pointer in the equivalence class:
632 // For each existing group:
633 // - if the difference between this pointer and the min/max bounds
634 // of the group is a constant, then make the pointer part of the
635 // group and update the min/max bounds of that group as required.
636
637 CheckingGroups.clear();
638
639 // If we need to check two pointers to the same underlying object
640 // with a non-constant difference, we shouldn't perform any pointer
641 // grouping with those pointers. This is because we can easily get
642 // into cases where the resulting check would return false, even when
643 // the accesses are safe.
644 //
645 // The following example shows this:
646 // for (i = 0; i < 1000; ++i)
647 // a[5000 + i * m] = a[i] + a[i + 9000]
648 //
649 // Here grouping gives a check of (5000, 5000 + 1000 * m) against
650 // (0, 10000) which is always false. However, if m is 1, there is no
651 // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
652 // us to perform an accurate check in this case.
653 //
654 // In the above case, we have a non-constant distance and an Unknown
655 // dependence between accesses to the same underlying object, and could retry
656 // with runtime checks without dependency information being available. In this
657 // case we will use the fallback path and create separate checking groups for
658 // accesses not present in DepCands.
659
660 unsigned TotalComparisons = 0;
661
663 for (unsigned Index = 0; Index < Pointers.size(); ++Index)
664 PositionMap[Pointers[Index].PointerValue].push_back(Index);
665
666 // We need to keep track of what pointers we've already seen so we
667 // don't process them twice.
669
670 // Go through all equivalence classes, get the "pointer check groups"
671 // and add them to the overall solution. We use the order in which accesses
672 // appear in 'Pointers' to enforce determinism.
673 for (unsigned I = 0; I < Pointers.size(); ++I) {
674 // We've seen this pointer before, and therefore already processed
675 // its equivalence class.
676 if (Seen.contains(I))
677 continue;
678
680 Pointers[I].IsWritePtr);
681
682 // If there is no entry in the dependency partition, there are no potential
683 // accesses to merge; simply add a new pointer checking group.
684 if (!DepCands.contains(Access)) {
685 CheckingGroups.push_back(RuntimeCheckingPtrGroup(I, *this));
686 continue;
687 }
688
690
691 // Because DepCands is constructed by visiting accesses in the order in
692 // which they appear in alias sets (which is deterministic) and the
693 // iteration order within an equivalence class member is only dependent on
694 // the order in which unions and insertions are performed on the
695 // equivalence class, the iteration order is deterministic.
696 for (auto M : DepCands.members(Access)) {
697 auto PointerI = PositionMap.find(M.getPointer());
698 // If we can't find the pointer in PositionMap that means we can't
699 // generate a memcheck for it.
700 if (PointerI == PositionMap.end())
701 continue;
702 for (unsigned Pointer : PointerI->second) {
703 bool Merged = false;
704 // Mark this pointer as seen.
705 Seen.insert(Pointer);
706
707 // Go through all the existing sets and see if we can find one
708 // which can include this pointer.
709 for (RuntimeCheckingPtrGroup &Group : Groups) {
710 // Don't perform more than a certain amount of comparisons.
711 // This should limit the cost of grouping the pointers to something
712 // reasonable. If we do end up hitting this threshold, the algorithm
713 // will create separate groups for all remaining pointers.
714 if (TotalComparisons > MemoryCheckMergeThreshold)
715 break;
716
717 TotalComparisons++;
718
719 if (Group.addPointer(Pointer, *this)) {
720 Merged = true;
721 break;
722 }
723 }
724
725 if (!Merged)
726 // We couldn't add this pointer to any existing set or the threshold
727 // for the number of comparisons has been reached. Create a new group
728 // to hold the current pointer.
729 Groups.emplace_back(Pointer, *this);
730 }
731 }
732
733 // We've computed the grouped checks for this partition.
734 // Save the results and continue with the next one.
736 }
737}
738
740 const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
741 unsigned PtrIdx2) {
742 return (PtrToPartition[PtrIdx1] != -1 &&
743 PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
744}
745
746bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
747 const PointerInfo &PointerI = Pointers[I];
748 const PointerInfo &PointerJ = Pointers[J];
749
750 // No need to check if two readonly pointers intersect.
751 if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
752 return false;
753
754 // Only need to check pointers between two different dependency sets.
755 if (PointerI.DependencySetId == PointerJ.DependencySetId)
756 return false;
757
758 // Only need to check pointers in the same alias set.
759 return PointerI.AliasSetId == PointerJ.AliasSetId;
760}
761
762/// Assign each RuntimeCheckingPtrGroup pointer an index for stable UTC output.
766 for (const auto &[Idx, CG] : enumerate(CheckingGroups))
767 PtrIndices[&CG] = Idx;
768 return PtrIndices;
769}
770
773 unsigned Depth) const {
774 unsigned N = 0;
775 auto PtrIndices = getPtrToIdxMap(CheckingGroups);
776 for (const auto &[Check1, Check2] : Checks) {
777 const auto &First = Check1->Members, &Second = Check2->Members;
778 OS.indent(Depth) << "Check " << N++ << ":\n";
779 OS.indent(Depth + 2) << "Comparing group GRP" << PtrIndices.at(Check1)
780 << ":\n";
781 for (unsigned K : First)
782 OS.indent(Depth + 2) << *Pointers[K].PointerValue << "\n";
783 OS.indent(Depth + 2) << "Against group GRP" << PtrIndices.at(Check2)
784 << ":\n";
785 for (unsigned K : Second)
786 OS.indent(Depth + 2) << *Pointers[K].PointerValue << "\n";
787 }
788}
789
791
792 OS.indent(Depth) << "Run-time memory checks:\n";
793 printChecks(OS, Checks, Depth);
794
795 OS.indent(Depth) << "Grouped accesses:\n";
796 auto PtrIndices = getPtrToIdxMap(CheckingGroups);
797 for (const auto &CG : CheckingGroups) {
798 OS.indent(Depth + 2) << "Group GRP" << PtrIndices.at(&CG) << ":\n";
799 OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
800 << ")\n";
801 for (unsigned Member : CG.Members) {
802 OS.indent(Depth + 6) << "Member: " << *Pointers[Member].Expr << "\n";
803 }
804 }
805}
806
807namespace {
808
809/// Analyses memory accesses in a loop.
810///
811/// Checks whether run time pointer checks are needed and builds sets for data
812/// dependence checking.
813class AccessAnalysis {
814public:
815 using MemAccessInfo =
816 PointerIntPair<Value * /* AccessPtr */, 1, bool /* IsWrite */>;
817
818 AccessAnalysis(const Loop *TheLoop, AAResults *AA, const LoopInfo *LI,
821 SmallPtrSetImpl<MDNode *> &LoopAliasScopes)
822 : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DT(DT), DepCands(DA),
823 PSE(PSE), LoopAliasScopes(LoopAliasScopes) {
824 // We're analyzing dependences across loop iterations.
825 BAA.enableCrossIterationMode();
826 }
827
828 /// Register a load and whether it is only read from.
829 void addLoad(const MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) {
830 Value *Ptr = const_cast<Value *>(Loc.Ptr);
831 AST.add(adjustLoc(Loc));
832 Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy);
833 if (IsReadOnly)
834 ReadOnlyPtr.insert(Ptr);
835 }
836
837 /// Register a store.
838 void addStore(const MemoryLocation &Loc, Type *AccessTy) {
839 Value *Ptr = const_cast<Value *>(Loc.Ptr);
840 AST.add(adjustLoc(Loc));
841 Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy);
842 }
843
844 /// Check if we can emit a run-time no-alias check for \p Access.
845 ///
846 /// Returns true if we can emit a run-time no alias check for \p Access.
847 /// If we can check this access, this also adds it to a dependence set and
848 /// adds a run-time to check for it to \p RtCheck. If \p Assume is true,
849 /// we will attempt to use additional run-time checks in order to get
850 /// the bounds of the pointer.
851 bool createCheckForAccess(RuntimePointerChecking &RtCheck,
852 MemAccessInfo Access, Type *AccessTy,
853 const DenseMap<Value *, const SCEV *> &Strides,
854 DenseMap<Value *, unsigned> &DepSetId,
855 Loop *TheLoop, unsigned &RunningDepId,
856 unsigned ASId, bool Assume);
857
858 /// Check whether we can check the pointers at runtime for
859 /// non-intersection.
860 ///
861 /// Returns true if we need no check or if we do and we can generate them
862 /// (i.e. the pointers have computable bounds). A return value of false means
863 /// we couldn't analyze and generate runtime checks for all pointers in the
864 /// loop, but if \p AllowPartial is set then we will have checks for those
865 /// pointers we could analyze. \p DepChecker is used to remove unknown
866 /// dependences from DepCands.
867 bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, Loop *TheLoop,
868 const DenseMap<Value *, const SCEV *> &Strides,
869 Value *&UncomputablePtr, bool AllowPartial,
870 const MemoryDepChecker &DepChecker);
871
872 /// Goes over all memory accesses, checks whether a RT check is needed
873 /// and builds sets of dependent accesses.
874 void buildDependenceSets();
875
876 /// Initial processing of memory accesses determined that we need to
877 /// perform dependency checking.
878 ///
879 /// Note that this can later be cleared if we retry memcheck analysis without
880 /// dependency checking (i.e. ShouldRetryWithRuntimeChecks).
881 bool isDependencyCheckNeeded() const { return !CheckDeps.empty(); }
882
883 /// We decided that no dependence analysis would be used. Reset the state.
884 void resetDepChecks(MemoryDepChecker &DepChecker) {
885 CheckDeps.clear();
886 DepChecker.clearDependences();
887 }
888
889 ArrayRef<MemAccessInfo> getDependenciesToCheck() const { return CheckDeps; }
890
891private:
892 using PtrAccessMap = MapVector<MemAccessInfo, SmallSetVector<Type *, 1>>;
893
894 /// Adjust the MemoryLocation so that it represents accesses to this
895 /// location across all iterations, rather than a single one.
896 MemoryLocation adjustLoc(MemoryLocation Loc) const {
897 // The accessed location varies within the loop, but remains within the
898 // underlying object.
900 Loc.AATags.Scope = adjustAliasScopeList(Loc.AATags.Scope);
901 Loc.AATags.NoAlias = adjustAliasScopeList(Loc.AATags.NoAlias);
902 return Loc;
903 }
904
905 /// Drop alias scopes that are only valid within a single loop iteration.
906 MDNode *adjustAliasScopeList(MDNode *ScopeList) const {
907 if (!ScopeList)
908 return nullptr;
909
910 // For the sake of simplicity, drop the whole scope list if any scope is
911 // iteration-local.
912 if (any_of(ScopeList->operands(), [&](Metadata *Scope) {
913 return LoopAliasScopes.contains(cast<MDNode>(Scope));
914 }))
915 return nullptr;
916
917 return ScopeList;
918 }
919
920 /// Map of all accesses. Values are the types used to access memory pointed to
921 /// by the pointer.
922 PtrAccessMap Accesses;
923
924 /// The loop being checked.
925 const Loop *TheLoop;
926
927 /// List of accesses that need a further dependence check.
929
930 /// Set of pointers that are read only.
931 SmallPtrSet<Value*, 16> ReadOnlyPtr;
932
933 /// Batched alias analysis results.
934 BatchAAResults BAA;
935
936 /// An alias set tracker to partition the access set by underlying object and
937 //intrinsic property (such as TBAA metadata).
938 AliasSetTracker AST;
939
940 /// The LoopInfo of the loop being checked.
941 const LoopInfo *LI;
942
943 /// The dominator tree of the function.
944 DominatorTree &DT;
945
946 /// Sets of potentially dependent accesses - members of one set share an
947 /// underlying pointer. The set "CheckDeps" identfies which sets really need a
948 /// dependence check.
950
951 /// Initial processing of memory accesses determined that we may need
952 /// to add memchecks. Perform the analysis to determine the necessary checks.
953 ///
954 /// Note that, this is different from isDependencyCheckNeeded. When we retry
955 /// memcheck analysis without dependency checking
956 /// (i.e. ShouldRetryWithRuntimeChecks), isDependencyCheckNeeded is
957 /// cleared while this remains set if we have potentially dependent accesses.
958 bool IsRTCheckAnalysisNeeded = false;
959
960 /// The SCEV predicate containing all the SCEV-related assumptions.
961 PredicatedScalarEvolution &PSE;
962
963 DenseMap<Value *, SmallVector<const Value *, 16>> UnderlyingObjects;
964
965 /// Alias scopes that are declared inside the loop, and as such not valid
966 /// across iterations.
967 SmallPtrSetImpl<MDNode *> &LoopAliasScopes;
968};
969
970} // end anonymous namespace
971
972/// Try to compute a constant stride for \p AR. Used by getPtrStride and
973/// isNoWrap.
974static std::optional<int64_t>
975getStrideFromAddRec(const SCEVAddRecExpr *AR, const Loop *Lp, Type *AccessTy,
976 Value *Ptr, PredicatedScalarEvolution &PSE) {
977 if (isa<ScalableVectorType>(AccessTy)) {
978 LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy
979 << "\n");
980 return std::nullopt;
981 }
982
983 // The access function must stride over the innermost loop.
984 if (Lp != AR->getLoop()) {
985 LLVM_DEBUG({
986 dbgs() << "LAA: Bad stride - Not striding over innermost loop ";
987 if (Ptr)
988 dbgs() << *Ptr << " ";
989
990 dbgs() << "SCEV: " << *AR << "\n";
991 });
992 return std::nullopt;
993 }
994
995 // Check the step is constant.
996 const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
997
998 // Calculate the pointer stride and check if it is constant.
999 const APInt *APStepVal;
1000 if (!match(Step, m_scev_APInt(APStepVal))) {
1001 LLVM_DEBUG({
1002 dbgs() << "LAA: Bad stride - Not a constant strided ";
1003 if (Ptr)
1004 dbgs() << *Ptr << " ";
1005 dbgs() << "SCEV: " << *AR << "\n";
1006 });
1007 return std::nullopt;
1008 }
1009
1010 const auto &DL = Lp->getHeader()->getDataLayout();
1011 TypeSize AllocSize = DL.getTypeAllocSize(AccessTy);
1012 int64_t Size = AllocSize.getFixedValue();
1013
1014 // Huge step value - give up.
1015 std::optional<int64_t> StepVal = APStepVal->trySExtValue();
1016 if (!StepVal)
1017 return std::nullopt;
1018
1019 // Strided access.
1020 return *StepVal % Size ? std::nullopt : std::make_optional(*StepVal / Size);
1021}
1022
1023/// Check whether \p AR is a non-wrapping AddRec. If \p Ptr is not nullptr, use
1024/// informating from the IR pointer value to determine no-wrap.
1026 Value *Ptr, Type *AccessTy, const Loop *L, bool Assume,
1027 const DominatorTree &DT,
1028 std::optional<int64_t> Stride = std::nullopt) {
1029 // FIXME: This should probably only return true for NUW.
1030 if (any(AR->getNoWrapFlags(SCEV::NoWrapMask)))
1031 return true;
1032
1034 return true;
1035
1036 // An nusw getelementptr that is an AddRec cannot wrap. If it would wrap,
1037 // the distance between the previously accessed location and the wrapped
1038 // location will be larger than half the pointer index type space. In that
1039 // case, the GEP would be poison and any memory access dependent on it would
1040 // be immediate UB when executed.
1042 GEP && GEP->hasNoUnsignedSignedWrap()) {
1043 // For the above reasoning to apply, the pointer must be dereferenced in
1044 // every iteration.
1045 if (L->getHeader() == L->getLoopLatch() ||
1046 any_of(GEP->users(), [L, &DT, GEP](User *U) {
1047 if (getLoadStorePointerOperand(U) != GEP)
1048 return false;
1049 BasicBlock *UserBB = cast<Instruction>(U)->getParent();
1050 if (!L->contains(UserBB))
1051 return false;
1052 return !LoopAccessInfo::blockNeedsPredication(UserBB, L, &DT);
1053 }))
1054 return true;
1055 }
1056
1057 if (!Stride)
1058 Stride = getStrideFromAddRec(AR, L, AccessTy, Ptr, PSE);
1059 if (Stride) {
1060 // If the null pointer is undefined, then a access sequence which would
1061 // otherwise access it can be assumed not to unsigned wrap. Note that this
1062 // assumes the object in memory is aligned to the natural alignment.
1063 unsigned AddrSpace = AR->getType()->getPointerAddressSpace();
1064 if (!NullPointerIsDefined(L->getHeader()->getParent(), AddrSpace) &&
1065 (Stride == 1 || Stride == -1))
1066 return true;
1067 }
1068
1069 if (Ptr && Assume) {
1071 LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap:\n"
1072 << "LAA: Pointer: " << *Ptr << "\n"
1073 << "LAA: SCEV: " << *AR << "\n"
1074 << "LAA: Added an overflow assumption\n");
1075 return true;
1076 }
1077
1078 return false;
1079}
1080
1081static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
1082 function_ref<void(Value *)> AddPointer) {
1084 SmallVector<Value *> WorkList;
1085 WorkList.push_back(StartPtr);
1086
1087 while (!WorkList.empty()) {
1088 Value *Ptr = WorkList.pop_back_val();
1089 if (!Visited.insert(Ptr).second)
1090 continue;
1091 auto *PN = dyn_cast<PHINode>(Ptr);
1092 // SCEV does not look through non-header PHIs inside the loop. Such phis
1093 // can be analyzed by adding separate accesses for each incoming pointer
1094 // value.
1095 if (PN && InnermostLoop.contains(PN->getParent()) &&
1096 PN->getParent() != InnermostLoop.getHeader()) {
1097 llvm::append_range(WorkList, PN->incoming_values());
1098 } else
1099 AddPointer(Ptr);
1100 }
1101}
1102
1103// Walk back through the IR for a pointer, looking for a select like the
1104// following:
1105//
1106// %offset = select i1 %cmp, i64 %a, i64 %b
1107// %addr = getelementptr double, double* %base, i64 %offset
1108// %ld = load double, double* %addr, align 8
1109//
1110// We won't be able to form a single SCEVAddRecExpr from this since the
1111// address for each loop iteration depends on %cmp. We could potentially
1112// produce multiple valid SCEVAddRecExprs, though, and check all of them for
1113// memory safety/aliasing if needed.
1114//
1115// If we encounter some IR we don't yet handle, or something obviously fine
1116// like a constant, then we just add the SCEV for that term to the list passed
1117// in by the caller. If we have a node that may potentially yield a valid
1118// SCEVAddRecExpr then we decompose it into parts and build the SCEV terms
1119// ourselves before adding to the list.
1121 ScalarEvolution *SE, const Loop *L, Value *Ptr,
1123 unsigned Depth) {
1124 // If our Value is a SCEVAddRecExpr, loop invariant, not an instruction, or
1125 // we've exceeded our limit on recursion, just return whatever we have
1126 // regardless of whether it can be used for a forked pointer or not, along
1127 // with an indication of whether it might be a poison or undef value.
1128 const SCEV *Scev = SE->getSCEV(Ptr);
1129 if (isa<SCEVAddRecExpr>(Scev) || L->isLoopInvariant(Ptr) ||
1130 !isa<Instruction>(Ptr) || Depth == 0) {
1131 ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
1132 return;
1133 }
1134
1135 Depth--;
1136
1137 auto UndefPoisonCheck = [](PointerIntPair<const SCEV *, 1, bool> S) {
1138 return get<1>(S);
1139 };
1140
1141 auto GetBinOpExpr = [&SE](unsigned Opcode, const SCEV *L, const SCEV *R) {
1142 switch (Opcode) {
1143 case Instruction::Add:
1144 return SE->getAddExpr(L, R);
1145 case Instruction::Sub:
1146 return SE->getMinusSCEV(L, R);
1147 default:
1148 llvm_unreachable("Unexpected binary operator when walking ForkedPtrs");
1149 }
1150 };
1151
1153 unsigned Opcode = I->getOpcode();
1154 switch (Opcode) {
1155 case Instruction::GetElementPtr: {
1156 auto *GEP = cast<GetElementPtrInst>(I);
1157 Type *SourceTy = GEP->getSourceElementType();
1158 // We only handle base + single offset GEPs here for now.
1159 // Not dealing with preexisting gathers yet, so no vectors.
1160 if (I->getNumOperands() != 2 || SourceTy->isVectorTy()) {
1161 ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(GEP));
1162 break;
1163 }
1166 findForkedSCEVs(SE, L, I->getOperand(0), BaseScevs, Depth);
1167 findForkedSCEVs(SE, L, I->getOperand(1), OffsetScevs, Depth);
1168
1169 // See if we need to freeze our fork...
1170 bool NeedsFreeze = any_of(BaseScevs, UndefPoisonCheck) ||
1171 any_of(OffsetScevs, UndefPoisonCheck);
1172
1173 // Check that we only have a single fork, on either the base or the offset.
1174 // Copy the SCEV across for the one without a fork in order to generate
1175 // the full SCEV for both sides of the GEP.
1176 if (OffsetScevs.size() == 2 && BaseScevs.size() == 1)
1177 BaseScevs.push_back(BaseScevs[0]);
1178 else if (BaseScevs.size() == 2 && OffsetScevs.size() == 1)
1179 OffsetScevs.push_back(OffsetScevs[0]);
1180 else {
1181 ScevList.emplace_back(Scev, NeedsFreeze);
1182 break;
1183 }
1184
1185 Type *IntPtrTy = SE->getEffectiveSCEVType(GEP->getPointerOperandType());
1186
1187 // Find the size of the type being pointed to. We only have a single
1188 // index term (guarded above) so we don't need to index into arrays or
1189 // structures, just get the size of the scalar value.
1190 const SCEV *Size = SE->getSizeOfExpr(IntPtrTy, SourceTy);
1191
1192 for (auto [B, O] : zip(BaseScevs, OffsetScevs)) {
1193 const SCEV *Base = get<0>(B);
1194 const SCEV *Offset = get<0>(O);
1195
1196 // Scale up the offsets by the size of the type, then add to the bases.
1197 const SCEV *Scaled =
1198 SE->getMulExpr(Size, SE->getTruncateOrSignExtend(Offset, IntPtrTy));
1199 ScevList.emplace_back(SE->getAddExpr(Base, Scaled), NeedsFreeze);
1200 }
1201 break;
1202 }
1203 case Instruction::Select: {
1205 // A select means we've found a forked pointer, but we currently only
1206 // support a single select per pointer so if there's another behind this
1207 // then we just bail out and return the generic SCEV.
1208 findForkedSCEVs(SE, L, I->getOperand(1), ChildScevs, Depth);
1209 findForkedSCEVs(SE, L, I->getOperand(2), ChildScevs, Depth);
1210 if (ChildScevs.size() == 2)
1211 append_range(ScevList, ChildScevs);
1212 else
1213 ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
1214 break;
1215 }
1216 case Instruction::PHI: {
1218 // A phi means we've found a forked pointer, but we currently only
1219 // support a single phi per pointer so if there's another behind this
1220 // then we just bail out and return the generic SCEV.
1221 if (I->getNumOperands() == 2) {
1222 findForkedSCEVs(SE, L, I->getOperand(0), ChildScevs, Depth);
1223 findForkedSCEVs(SE, L, I->getOperand(1), ChildScevs, Depth);
1224 }
1225 if (ChildScevs.size() == 2)
1226 append_range(ScevList, ChildScevs);
1227 else
1228 ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
1229 break;
1230 }
1231 case Instruction::Add:
1232 case Instruction::Sub: {
1235 findForkedSCEVs(SE, L, I->getOperand(0), LScevs, Depth);
1236 findForkedSCEVs(SE, L, I->getOperand(1), RScevs, Depth);
1237
1238 // See if we need to freeze our fork...
1239 bool NeedsFreeze =
1240 any_of(LScevs, UndefPoisonCheck) || any_of(RScevs, UndefPoisonCheck);
1241
1242 // Check that we only have a single fork, on either the left or right side.
1243 // Copy the SCEV across for the one without a fork in order to generate
1244 // the full SCEV for both sides of the BinOp.
1245 if (LScevs.size() == 2 && RScevs.size() == 1)
1246 RScevs.push_back(RScevs[0]);
1247 else if (RScevs.size() == 2 && LScevs.size() == 1)
1248 LScevs.push_back(LScevs[0]);
1249 else {
1250 ScevList.emplace_back(Scev, NeedsFreeze);
1251 break;
1252 }
1253
1254 for (auto [L, R] : zip(LScevs, RScevs))
1255 ScevList.emplace_back(GetBinOpExpr(Opcode, get<0>(L), get<0>(R)),
1256 NeedsFreeze);
1257 break;
1258 }
1259 default:
1260 // Just return the current SCEV if we haven't handled the instruction yet.
1261 LLVM_DEBUG(dbgs() << "ForkedPtr unhandled instruction: " << *I << "\n");
1262 ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
1263 break;
1264 }
1265}
1266
1267bool AccessAnalysis::createCheckForAccess(
1268 RuntimePointerChecking &RtCheck, MemAccessInfo Access, Type *AccessTy,
1269 const DenseMap<Value *, const SCEV *> &StridesMap,
1270 DenseMap<Value *, unsigned> &DepSetId, Loop *TheLoop,
1271 unsigned &RunningDepId, unsigned ASId, bool Assume) {
1272 Value *Ptr = Access.getPointer();
1273 ScalarEvolution *SE = PSE.getSE();
1274 assert(SE->isSCEVable(Ptr->getType()) && "Value is not SCEVable!");
1275
1277 findForkedSCEVs(SE, TheLoop, Ptr, RTCheckPtrs, MaxForkedSCEVDepth);
1278 assert(!RTCheckPtrs.empty() &&
1279 "Must have some runtime-check pointer candidates");
1280
1281 // RTCheckPtrs must have size 2 if there are forked pointers. Otherwise, there
1282 // are no forked pointers; replaceSymbolicStridesSCEV in this case.
1283 auto IsLoopInvariantOrAR =
1284 [&SE, &TheLoop](const PointerIntPair<const SCEV *, 1, bool> &P) {
1285 return SE->isLoopInvariant(P.getPointer(), TheLoop) ||
1286 isa<SCEVAddRecExpr>(P.getPointer());
1287 };
1288 if (RTCheckPtrs.size() == 2 && all_of(RTCheckPtrs, IsLoopInvariantOrAR)) {
1289 LLVM_DEBUG(dbgs() << "LAA: Found forked pointer: " << *Ptr << "\n";
1290 for (const auto &[Idx, Q] : enumerate(RTCheckPtrs)) dbgs()
1291 << "\t(" << Idx << ") " << *Q.getPointer() << "\n");
1292 } else {
1293 RTCheckPtrs = {{replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false}};
1294 }
1295
1296 /// Check whether all pointers can participate in a runtime bounds check. They
1297 /// must either be invariant or non-wrapping affine AddRecs.
1298 for (auto &P : RTCheckPtrs) {
1299 // The bounds for loop-invariant pointer is trivial.
1300 if (SE->isLoopInvariant(P.getPointer(), TheLoop))
1301 continue;
1302
1303 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(P.getPointer());
1304 if (!AR && Assume)
1305 AR = PSE.getAsAddRec(Ptr);
1306 if (!AR || !AR->isAffine())
1307 return false;
1308
1309 // If there's only one option for Ptr, look it up after bounds and wrap
1310 // checking, because assumptions might have been added to PSE.
1311 if (RTCheckPtrs.size() == 1) {
1312 AR =
1313 cast<SCEVAddRecExpr>(replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr));
1314 P.setPointer(AR);
1315 }
1316
1317 if (!isNoWrap(PSE, AR, RTCheckPtrs.size() == 1 ? Ptr : nullptr, AccessTy,
1318 TheLoop, Assume, DT))
1319 return false;
1320 }
1321
1322 for (const auto &[PtrExpr, NeedsFreeze] : RTCheckPtrs) {
1323 // The id of the dependence set.
1324 unsigned DepId;
1325
1326 if (DepCands.contains(Access)) {
1327 Value *Leader = DepCands.getLeaderValue(Access).getPointer();
1328 unsigned &LeaderId = DepSetId[Leader];
1329 if (!LeaderId)
1330 LeaderId = RunningDepId++;
1331 DepId = LeaderId;
1332 } else
1333 // Each access has its own dependence set.
1334 DepId = RunningDepId++;
1335
1336 bool IsWrite = Access.getInt();
1337 RtCheck.insert(TheLoop, Ptr, PtrExpr, AccessTy, IsWrite, DepId, ASId, PSE,
1338 NeedsFreeze);
1339 LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
1340 }
1341
1342 return true;
1343}
1344
1345bool AccessAnalysis::canCheckPtrAtRT(
1346 RuntimePointerChecking &RtCheck, Loop *TheLoop,
1347 const DenseMap<Value *, const SCEV *> &StridesMap, Value *&UncomputablePtr,
1348 bool AllowPartial, const MemoryDepChecker &DepChecker) {
1349 // Find pointers with computable bounds. We are going to use this information
1350 // to place a runtime bound check.
1351 bool CanDoRT = true;
1352
1353 bool MayNeedRTCheck = false;
1354 if (!IsRTCheckAnalysisNeeded) return true;
1355
1356 if (auto *Deps = DepChecker.getDependences()) {
1357 // If there are unknown dependences, this means runtime checks are needed to
1358 // ensure there's no overlap between accesses to the same underlying object.
1359 // Remove the equivalence classes containing both source and destination
1360 // accesses from DepCands. This ensures runtime checks will be generated
1361 // between those accesses and prevents them from being grouped together.
1362 for (const auto &Dep : *Deps) {
1363 if (Dep.Type != MemoryDepChecker::Dependence::Unknown) {
1366 "Should only skip safe dependences");
1367 continue;
1368 }
1369 Instruction *Src = Dep.getSource(DepChecker);
1370 Instruction *Dst = Dep.getDestination(DepChecker);
1371 DepCands.eraseClass({getPointerOperand(Src), Src->mayWriteToMemory()});
1372 DepCands.eraseClass({getPointerOperand(Dst), Dst->mayWriteToMemory()});
1373 }
1374 } else {
1375 CheckDeps.clear();
1376 DepCands = {};
1377 }
1378
1379 // We assign a consecutive id to access from different alias sets.
1380 // Accesses between different groups doesn't need to be checked.
1381 unsigned ASId = 0;
1382 for (const auto &AS : AST) {
1383 int NumReadPtrChecks = 0;
1384 int NumWritePtrChecks = 0;
1385 bool CanDoAliasSetRT = true;
1386 ++ASId;
1387 auto ASPointers = AS.getPointers();
1388
1389 // We assign consecutive id to access from different dependence sets.
1390 // Accesses within the same set don't need a runtime check.
1391 unsigned RunningDepId = 1;
1393
1395
1396 // First, count how many write and read accesses are in the alias set. Also
1397 // collect MemAccessInfos for later.
1399 for (const Value *ConstPtr : ASPointers) {
1400 Value *Ptr = const_cast<Value *>(ConstPtr);
1401 bool IsWrite = Accesses.contains(MemAccessInfo(Ptr, true));
1402 if (IsWrite)
1403 ++NumWritePtrChecks;
1404 else
1405 ++NumReadPtrChecks;
1406 AccessInfos.emplace_back(Ptr, IsWrite);
1407 }
1408
1409 // We do not need runtime checks for this alias set, if there are no writes
1410 // or a single write and no reads.
1411 if (NumWritePtrChecks == 0 ||
1412 (NumWritePtrChecks == 1 && NumReadPtrChecks == 0)) {
1413 assert((ASPointers.size() <= 1 ||
1414 all_of(ASPointers,
1415 [this](const Value *Ptr) {
1416 MemAccessInfo AccessWrite(const_cast<Value *>(Ptr),
1417 true);
1418 return !DepCands.contains(AccessWrite);
1419 })) &&
1420 "Can only skip updating CanDoRT below, if all entries in AS "
1421 "are reads or there is at most 1 entry");
1422 continue;
1423 }
1424
1425 for (auto &Access : AccessInfos) {
1426 for (const auto &AccessTy : Accesses[Access]) {
1427 if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
1428 DepSetId, TheLoop, RunningDepId, ASId,
1429 false)) {
1430 LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
1431 << *Access.getPointer() << '\n');
1432 Retries.emplace_back(Access, AccessTy);
1433 CanDoAliasSetRT = false;
1434 }
1435 }
1436 }
1437
1438 // Note that this function computes CanDoRT and MayNeedRTCheck
1439 // independently. For example CanDoRT=false, MayNeedRTCheck=false means that
1440 // we have a pointer for which we couldn't find the bounds but we don't
1441 // actually need to emit any checks so it does not matter.
1442 //
1443 // We need runtime checks for this alias set, if there are at least 2
1444 // dependence sets (in which case RunningDepId > 2) or if we need to re-try
1445 // any bound checks (because in that case the number of dependence sets is
1446 // incomplete).
1447 bool NeedsAliasSetRTCheck = RunningDepId > 2 || !Retries.empty();
1448
1449 // We need to perform run-time alias checks, but some pointers had bounds
1450 // that couldn't be checked.
1451 if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
1452 // Reset the CanDoSetRt flag and retry all accesses that have failed.
1453 // We know that we need these checks, so we can now be more aggressive
1454 // and add further checks if required (overflow checks).
1455 CanDoAliasSetRT = true;
1456 for (const auto &[Access, AccessTy] : Retries) {
1457 if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
1458 DepSetId, TheLoop, RunningDepId, ASId,
1459 /*Assume=*/true)) {
1460 CanDoAliasSetRT = false;
1461 UncomputablePtr = Access.getPointer();
1462 if (!AllowPartial)
1463 break;
1464 }
1465 }
1466 }
1467
1468 CanDoRT &= CanDoAliasSetRT;
1469 MayNeedRTCheck |= NeedsAliasSetRTCheck;
1470 ++ASId;
1471 }
1472
1473 // If the pointers that we would use for the bounds comparison have different
1474 // address spaces, assume the values aren't directly comparable, so we can't
1475 // use them for the runtime check. We also have to assume they could
1476 // overlap. In the future there should be metadata for whether address spaces
1477 // are disjoint.
1478 unsigned NumPointers = RtCheck.Pointers.size();
1479 for (unsigned i = 0; i < NumPointers; ++i) {
1480 for (unsigned j = i + 1; j < NumPointers; ++j) {
1481 // Only need to check pointers between two different dependency sets.
1482 if (RtCheck.Pointers[i].DependencySetId ==
1483 RtCheck.Pointers[j].DependencySetId)
1484 continue;
1485 // Only need to check pointers in the same alias set.
1486 if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
1487 continue;
1488
1489 Value *PtrI = RtCheck.Pointers[i].PointerValue;
1490 Value *PtrJ = RtCheck.Pointers[j].PointerValue;
1491
1492 unsigned ASi = PtrI->getType()->getPointerAddressSpace();
1493 unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
1494 if (ASi != ASj) {
1495 LLVM_DEBUG(
1496 dbgs() << "LAA: Runtime check would require comparison between"
1497 " different address spaces\n");
1498 return false;
1499 }
1500 }
1501 }
1502
1503 if (MayNeedRTCheck && (CanDoRT || AllowPartial))
1504 RtCheck.generateChecks(DepCands);
1505
1506 LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
1507 << " pointer comparisons.\n");
1508
1509 // If we can do run-time checks, but there are no checks, no runtime checks
1510 // are needed. This can happen when all pointers point to the same underlying
1511 // object for example.
1512 RtCheck.Need = CanDoRT ? RtCheck.getNumberOfChecks() != 0 : MayNeedRTCheck;
1513
1514 bool CanDoRTIfNeeded = !RtCheck.Need || CanDoRT;
1515 assert(CanDoRTIfNeeded == (CanDoRT || !MayNeedRTCheck) &&
1516 "CanDoRTIfNeeded depends on RtCheck.Need");
1517 if (!CanDoRTIfNeeded && !AllowPartial)
1518 RtCheck.reset();
1519 return CanDoRTIfNeeded;
1520}
1521
1522void AccessAnalysis::buildDependenceSets() {
1523 // We process the set twice: first we process read-write pointers, last we
1524 // process read-only pointers. This allows us to skip dependence tests for
1525 // read-only pointers.
1526
1527 LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
1528 LLVM_DEBUG(dbgs() << " AST: "; AST.dump());
1529 LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n");
1530 LLVM_DEBUG({
1531 for (const auto &[A, _] : Accesses)
1532 dbgs() << "\t" << *A.getPointer() << " ("
1533 << (A.getInt()
1534 ? "write"
1535 : (ReadOnlyPtr.contains(A.getPointer()) ? "read-only"
1536 : "read"))
1537 << ")\n";
1538 });
1539
1540 // The AliasSetTracker has nicely partitioned our pointers by metadata
1541 // compatibility and potential for underlying-object overlap. As a result, we
1542 // only need to check for potential pointer dependencies within each alias
1543 // set.
1544 for (const auto &AS : AST) {
1545 bool AliasSetHasWrite = false;
1546
1547 // Map of (pointer to underlying objects, accessed address space) to last
1548 // access encountered.
1549 using UnderlyingObjToAccessMap =
1551 UnderlyingObjToAccessMap ObjToLastAccess;
1552
1553 // Set of access to check after all writes have been processed.
1554 PtrAccessMap DeferredAccesses;
1555
1556 // Iterate over each alias set twice, once to process read/write pointers,
1557 // and then to process read-only pointers.
1558
1559 auto ProcessAccesses = [&](bool UseDeferred) {
1560 PtrAccessMap &S = UseDeferred ? DeferredAccesses : Accesses;
1561
1562 // Note that both the alias-set tracker and the alias sets themselves used
1563 // ordered collections internally and so the iteration order here is
1564 // deterministic.
1565 for (const Value *ConstPtr : AS.getPointers()) {
1566 Value *Ptr = const_cast<Value *>(ConstPtr);
1567
1568 // For a single memory access in AliasSetTracker, Accesses may contain
1569 // both read and write, and they both need to be handled for CheckDeps.
1570 for (auto [AccessPtr, IsWrite] : S.keys()) {
1571 if (AccessPtr != Ptr)
1572 continue;
1573
1574 // If we're using the deferred access set, then it contains only
1575 // reads.
1576 bool IsReadOnlyPtr = ReadOnlyPtr.contains(Ptr) && !IsWrite;
1577 if (UseDeferred && !IsReadOnlyPtr)
1578 continue;
1579 // Otherwise, the pointer must be in the PtrAccessSet, either as a
1580 // read or a write.
1581 assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
1582 S.contains(MemAccessInfo(Ptr, false))) &&
1583 "Alias-set pointer not in the access set?");
1584
1585 MemAccessInfo Access(Ptr, IsWrite);
1586 DepCands.insert(Access);
1587
1588 // Memorize read-only pointers for later processing and skip them in
1589 // the first round (they need to be checked after we have seen all
1590 // write pointers). Note: we also mark pointer that are not
1591 // consecutive as "read-only" pointers (so that we check
1592 // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
1593 if (!UseDeferred && IsReadOnlyPtr) {
1594 // We only use the pointer keys, the types vector values don't
1595 // matter.
1596 DeferredAccesses.insert({Access, {}});
1597 continue;
1598 }
1599
1600 // If this is a write - check other reads and writes for conflicts. If
1601 // this is a read only check other writes for conflicts (but only if
1602 // there is no other write to the ptr - this is an optimization to
1603 // catch "a[i] = a[i] + " without having to do a dependence check).
1604 if ((IsWrite || IsReadOnlyPtr) && AliasSetHasWrite) {
1605 CheckDeps.push_back(Access);
1606 IsRTCheckAnalysisNeeded = true;
1607 }
1608
1609 if (IsWrite)
1610 AliasSetHasWrite = true;
1611
1612 // Create sets of pointers connected by a shared alias set and
1613 // underlying object.
1614 SmallVector<const Value *, 16> &UOs = UnderlyingObjects[Ptr];
1615 UOs = {};
1616 ::getUnderlyingObjects(Ptr, UOs, LI);
1618 << "Underlying objects for pointer " << *Ptr << "\n");
1619 for (const Value *UnderlyingObj : UOs) {
1620 // nullptr never alias, don't join sets for pointer that have "null"
1621 // in their UnderlyingObjects list.
1622 if (isa<ConstantPointerNull>(UnderlyingObj) &&
1624 TheLoop->getHeader()->getParent(),
1625 UnderlyingObj->getType()->getPointerAddressSpace()))
1626 continue;
1627
1628 auto [It, Inserted] = ObjToLastAccess.try_emplace(
1629 {UnderlyingObj,
1630 cast<PointerType>(Ptr->getType())->getAddressSpace()},
1631 Access);
1632 if (!Inserted) {
1633 DepCands.unionSets(Access, It->second);
1634 It->second = Access;
1635 }
1636
1637 LLVM_DEBUG(dbgs() << " " << *UnderlyingObj << "\n");
1638 }
1639 }
1640 }
1641 };
1642
1643 ProcessAccesses(false);
1644 ProcessAccesses(true);
1645 }
1646}
1647
1648/// Check whether the access through \p Ptr has a constant stride.
1649std::optional<int64_t>
1651 const Loop *Lp, const DominatorTree &DT,
1652 const DenseMap<Value *, const SCEV *> &StridesMap,
1653 bool Assume, bool ShouldCheckWrap) {
1654 const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
1655 if (PSE.getSE()->isLoopInvariant(PtrScev, Lp))
1656 return 0;
1657
1658 assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr");
1659
1660 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
1661 if (Assume && !AR)
1662 AR = PSE.getAsAddRec(Ptr);
1663
1664 if (!AR) {
1665 LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
1666 << " SCEV: " << *PtrScev << "\n");
1667 return std::nullopt;
1668 }
1669
1670 std::optional<int64_t> Stride =
1671 getStrideFromAddRec(AR, Lp, AccessTy, Ptr, PSE);
1672 if (!ShouldCheckWrap || !Stride)
1673 return Stride;
1674
1675 if (isNoWrap(PSE, AR, Ptr, AccessTy, Lp, Assume, DT, Stride))
1676 return Stride;
1677
1678 LLVM_DEBUG(
1679 dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
1680 << *Ptr << " SCEV: " << *AR << "\n");
1681 return std::nullopt;
1682}
1683
1684std::optional<int64_t> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA,
1685 Type *ElemTyB, Value *PtrB,
1686 const DataLayout &DL,
1687 ScalarEvolution &SE,
1688 bool StrictCheck, bool CheckType) {
1689 assert(PtrA && PtrB && "Expected non-nullptr pointers.");
1690
1691 // Make sure that A and B are different pointers.
1692 if (PtrA == PtrB)
1693 return 0;
1694
1695 // Make sure that the element types are the same if required.
1696 if (CheckType && ElemTyA != ElemTyB)
1697 return std::nullopt;
1698
1699 unsigned ASA = PtrA->getType()->getPointerAddressSpace();
1700 unsigned ASB = PtrB->getType()->getPointerAddressSpace();
1701
1702 // Check that the address spaces match.
1703 if (ASA != ASB)
1704 return std::nullopt;
1705 unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
1706
1707 APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
1708 const Value *PtrA1 = PtrA->stripAndAccumulateConstantOffsets(
1709 DL, OffsetA, /*AllowNonInbounds=*/true);
1710 const Value *PtrB1 = PtrB->stripAndAccumulateConstantOffsets(
1711 DL, OffsetB, /*AllowNonInbounds=*/true);
1712
1713 std::optional<int64_t> Val;
1714 if (PtrA1 == PtrB1) {
1715 // Retrieve the address space again as pointer stripping now tracks through
1716 // `addrspacecast`.
1717 ASA = cast<PointerType>(PtrA1->getType())->getAddressSpace();
1718 ASB = cast<PointerType>(PtrB1->getType())->getAddressSpace();
1719 // Check that the address spaces match and that the pointers are valid.
1720 if (ASA != ASB)
1721 return std::nullopt;
1722
1723 IdxWidth = DL.getIndexSizeInBits(ASA);
1724 OffsetA = OffsetA.sextOrTrunc(IdxWidth);
1725 OffsetB = OffsetB.sextOrTrunc(IdxWidth);
1726
1727 OffsetB -= OffsetA;
1728 Val = OffsetB.trySExtValue();
1729 } else {
1730 // Otherwise compute the distance with SCEV between the base pointers.
1731 const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
1732 const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
1733 std::optional<APInt> Diff =
1734 SE.computeConstantDifference(PtrSCEVB, PtrSCEVA);
1735 if (!Diff)
1736 return std::nullopt;
1737 Val = Diff->trySExtValue();
1738 }
1739
1740 if (!Val)
1741 return std::nullopt;
1742
1743 int64_t Size = DL.getTypeStoreSize(ElemTyA);
1744 int64_t Dist = *Val / Size;
1745
1746 // Ensure that the calculated distance matches the type-based one after all
1747 // the bitcasts removal in the provided pointers.
1748 if (!StrictCheck || Dist * Size == Val)
1749 return Dist;
1750 return std::nullopt;
1751}
1752
1754 const DataLayout &DL, ScalarEvolution &SE,
1755 SmallVectorImpl<unsigned> &SortedIndices) {
1757 VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
1758 "Expected list of pointer operands.");
1759 // Walk over the pointers, and map each of them to an offset relative to
1760 // first pointer in the array.
1761 Value *Ptr0 = VL[0];
1762
1763 using DistOrdPair = std::pair<int64_t, unsigned>;
1764 auto Compare = llvm::less_first();
1765 std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);
1766 Offsets.emplace(0, 0);
1767 bool IsConsecutive = true;
1768 for (auto [Idx, Ptr] : drop_begin(enumerate(VL))) {
1769 std::optional<int64_t> Diff =
1770 getPointersDiff(ElemTy, Ptr0, ElemTy, Ptr, DL, SE,
1771 /*StrictCheck=*/true);
1772 if (!Diff)
1773 return false;
1774
1775 // Check if the pointer with the same offset is found.
1776 int64_t Offset = *Diff;
1777 auto [It, IsInserted] = Offsets.emplace(Offset, Idx);
1778 if (!IsInserted)
1779 return false;
1780 // Consecutive order if the inserted element is the last one.
1781 IsConsecutive &= std::next(It) == Offsets.end();
1782 }
1783 SortedIndices.clear();
1784 if (!IsConsecutive) {
1785 // Fill SortedIndices array only if it is non-consecutive.
1786 SortedIndices.resize(VL.size());
1787 for (auto [Idx, Off] : enumerate(Offsets))
1788 SortedIndices[Idx] = Off.second;
1789 }
1790 return true;
1791}
1792
1793/// Returns true if the memory operations \p A and \p B are consecutive.
1795 ScalarEvolution &SE, bool CheckType) {
1798 if (!PtrA || !PtrB)
1799 return false;
1800 Type *ElemTyA = getLoadStoreType(A);
1801 Type *ElemTyB = getLoadStoreType(B);
1802 std::optional<int64_t> Diff =
1803 getPointersDiff(ElemTyA, PtrA, ElemTyB, PtrB, DL, SE,
1804 /*StrictCheck=*/true, CheckType);
1805 return Diff == 1;
1806}
1807
1809 visitPointers(SI->getPointerOperand(), *InnermostLoop,
1810 [this, SI](Value *Ptr) {
1811 Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
1812 InstMap.push_back(SI);
1813 ++AccessIdx;
1814 });
1815}
1816
1818 visitPointers(LI->getPointerOperand(), *InnermostLoop,
1819 [this, LI](Value *Ptr) {
1820 Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
1821 InstMap.push_back(LI);
1822 ++AccessIdx;
1823 });
1824}
1825
1845
1847 switch (Type) {
1848 case NoDep:
1849 case Forward:
1851 case Unknown:
1852 case IndirectUnsafe:
1853 case InvariantUnsafe:
1854 return false;
1855
1857 case Backward:
1859 return true;
1860 }
1861 llvm_unreachable("unexpected DepType!");
1862}
1863
1868
1870 switch (Type) {
1871 case Forward:
1873 return true;
1874
1875 case NoDep:
1876 case Unknown:
1878 case Backward:
1880 case IndirectUnsafe:
1881 case InvariantUnsafe:
1882 return false;
1883 }
1884 llvm_unreachable("unexpected DepType!");
1885}
1886
1887bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
1888 uint64_t TypeByteSize,
1889 unsigned CommonStride) {
1890 // If loads occur at a distance that is not a multiple of a feasible vector
1891 // factor store-load forwarding does not take place.
1892 // Positive dependences might cause troubles because vectorizing them might
1893 // prevent store-load forwarding making vectorized code run a lot slower.
1894 // a[i] = a[i-3] ^ a[i-8];
1895 // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
1896 // hence on your typical architecture store-load forwarding does not take
1897 // place. Vectorizing in such cases does not make sense.
1898 // Store-load forwarding distance.
1899
1900 // After this many iterations store-to-load forwarding conflicts should not
1901 // cause any slowdowns.
1902 const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
1903 // Maximum vector factor.
1904 uint64_t MaxVFWithoutSLForwardIssuesPowerOf2 =
1905 std::min(VectorizerParams::MaxVectorWidth * TypeByteSize,
1906 MaxStoreLoadForwardSafeDistanceInBits);
1907
1908 // Compute the smallest VF at which the store and load would be misaligned.
1909 for (uint64_t VF = 2 * TypeByteSize;
1910 VF <= MaxVFWithoutSLForwardIssuesPowerOf2; VF *= 2) {
1911 // If the number of vector iteration between the store and the load are
1912 // small we could incur conflicts.
1913 if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1914 MaxVFWithoutSLForwardIssuesPowerOf2 = (VF >> 1);
1915 break;
1916 }
1917 }
1918
1919 if (MaxVFWithoutSLForwardIssuesPowerOf2 < 2 * TypeByteSize) {
1920 LLVM_DEBUG(
1921 dbgs() << "LAA: Distance " << Distance
1922 << " that could cause a store-load forwarding conflict\n");
1923 return true;
1924 }
1925
1926 if (CommonStride &&
1927 MaxVFWithoutSLForwardIssuesPowerOf2 <
1928 MaxStoreLoadForwardSafeDistanceInBits &&
1929 MaxVFWithoutSLForwardIssuesPowerOf2 !=
1930 VectorizerParams::MaxVectorWidth * TypeByteSize) {
1931 uint64_t MaxVF =
1932 bit_floor(MaxVFWithoutSLForwardIssuesPowerOf2 / CommonStride);
1933 uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
1934 MaxStoreLoadForwardSafeDistanceInBits =
1935 std::min(MaxStoreLoadForwardSafeDistanceInBits, MaxVFInBits);
1936 }
1937 return false;
1938}
1939
1940void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
1941 if (Status < S)
1942 Status = S;
1943}
1944
1945/// Given a dependence-distance \p Dist between two memory accesses, that have
1946/// strides in the same direction whose absolute value of the maximum stride is
1947/// given in \p MaxStride, in a loop whose maximum backedge taken count is \p
1948/// MaxBTC, check if it is possible to prove statically that the dependence
1949/// distance is larger than the range that the accesses will travel through the
1950/// execution of the loop. If so, return true; false otherwise. This is useful
1951/// for example in loops such as the following (PR31098):
1952///
1953/// for (i = 0; i < D; ++i) {
1954/// = out[i];
1955/// out[i+D] =
1956/// }
1958 const SCEV &MaxBTC, const SCEV &Dist,
1959 uint64_t MaxStride) {
1960
1961 // If we can prove that
1962 // (**) |Dist| > MaxBTC * Step
1963 // where Step is the absolute stride of the memory accesses in bytes,
1964 // then there is no dependence.
1965 //
1966 // Rationale:
1967 // We basically want to check if the absolute distance (|Dist/Step|)
1968 // is >= the loop iteration count (or > MaxBTC).
1969 // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
1970 // Section 4.2.1); Note, that for vectorization it is sufficient to prove
1971 // that the dependence distance is >= VF; This is checked elsewhere.
1972 // But in some cases we can prune dependence distances early, and
1973 // even before selecting the VF, and without a runtime test, by comparing
1974 // the distance against the loop iteration count. Since the vectorized code
1975 // will be executed only if LoopCount >= VF, proving distance >= LoopCount
1976 // also guarantees that distance >= VF.
1977 //
1978 const SCEV *Step = SE.getConstant(MaxBTC.getType(), MaxStride);
1979 const SCEV *Product = SE.getMulExpr(&MaxBTC, Step);
1980
1981 const SCEV *CastedDist = &Dist;
1982 const SCEV *CastedProduct = Product;
1983 uint64_t DistTypeSizeBits = DL.getTypeSizeInBits(Dist.getType());
1984 uint64_t ProductTypeSizeBits = DL.getTypeSizeInBits(Product->getType());
1985
1986 // The dependence distance can be positive/negative, so we sign extend Dist;
1987 // The multiplication of the absolute stride in bytes and the
1988 // backedgeTakenCount is non-negative, so we zero extend Product.
1989 if (DistTypeSizeBits > ProductTypeSizeBits)
1990 CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
1991 else
1992 CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
1993
1994 // Is Dist - (MaxBTC * Step) > 0 ?
1995 // (If so, then we have proven (**) because |Dist| >= Dist)
1996 const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct);
1997 if (SE.isKnownPositive(Minus))
1998 return true;
1999
2000 // Second try: Is -Dist - (MaxBTC * Step) > 0 ?
2001 // (If so, then we have proven (**) because |Dist| >= -1*Dist)
2002 const SCEV *NegDist = SE.getNegativeSCEV(CastedDist);
2003 Minus = SE.getMinusSCEV(NegDist, CastedProduct);
2004 return SE.isKnownPositive(Minus);
2005}
2006
2007/// Check the dependence for two accesses with the same stride \p Stride.
2008/// \p Distance is the positive distance in bytes, and \p TypeByteSize is type
2009/// size in bytes.
2010///
2011/// \returns true if they are independent.
2013 uint64_t TypeByteSize) {
2014 assert(Stride > 1 && "The stride must be greater than 1");
2015 assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
2016 assert(Distance > 0 && "The distance must be non-zero");
2017
2018 // Skip if the distance is not multiple of type byte size.
2019 if (Distance % TypeByteSize)
2020 return false;
2021
2022 // No dependence if the distance is not multiple of the stride.
2023 // E.g.
2024 // for (i = 0; i < 1024 ; i += 4)
2025 // A[i+2] = A[i] + 1;
2026 //
2027 // Two accesses in memory (distance is 2, stride is 4):
2028 // | A[0] | | | | A[4] | | | |
2029 // | | | A[2] | | | | A[6] | |
2030 //
2031 // E.g.
2032 // for (i = 0; i < 1024 ; i += 3)
2033 // A[i+4] = A[i] + 1;
2034 //
2035 // Two accesses in memory (distance is 4, stride is 3):
2036 // | A[0] | | | A[3] | | | A[6] | | |
2037 // | | | | | A[4] | | | A[7] | |
2038 return Distance % Stride;
2039}
2040
2041bool MemoryDepChecker::areAccessesCompletelyBeforeOrAfter(const SCEV *Src,
2042 Type *SrcTy,
2043 const SCEV *Sink,
2044 Type *SinkTy) {
2045 const SCEV *BTC = PSE.getBackedgeTakenCount();
2046 const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
2047 ScalarEvolution &SE = *PSE.getSE();
2048 const auto &[SrcStart_, SrcEnd_] =
2049 getStartAndEndForAccess(InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC,
2050 &SE, &PointerBounds, DT, AC, LoopGuards);
2051 if (isa<SCEVCouldNotCompute>(SrcStart_) || isa<SCEVCouldNotCompute>(SrcEnd_))
2052 return false;
2053
2054 const auto &[SinkStart_, SinkEnd_] =
2055 getStartAndEndForAccess(InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC,
2056 &SE, &PointerBounds, DT, AC, LoopGuards);
2057 if (isa<SCEVCouldNotCompute>(SinkStart_) ||
2058 isa<SCEVCouldNotCompute>(SinkEnd_))
2059 return false;
2060
2061 if (!LoopGuards)
2062 LoopGuards.emplace(ScalarEvolution::LoopGuards::collect(InnermostLoop, SE));
2063
2064 auto SrcEnd = SE.applyLoopGuards(SrcEnd_, *LoopGuards);
2065 auto SinkStart = SE.applyLoopGuards(SinkStart_, *LoopGuards);
2066 if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SrcEnd, SinkStart))
2067 return true;
2068
2069 auto SinkEnd = SE.applyLoopGuards(SinkEnd_, *LoopGuards);
2070 auto SrcStart = SE.applyLoopGuards(SrcStart_, *LoopGuards);
2071 return SE.isKnownPredicate(CmpInst::ICMP_ULE, SinkEnd, SrcStart);
2072}
2073
2075 MemoryDepChecker::DepDistanceStrideAndSizeInfo>
2076MemoryDepChecker::getDependenceDistanceStrideAndSize(
2077 const AccessAnalysis::MemAccessInfo &A, Instruction *AInst,
2078 const AccessAnalysis::MemAccessInfo &B, Instruction *BInst) {
2079 const auto &DL = InnermostLoop->getHeader()->getDataLayout();
2080 auto &SE = *PSE.getSE();
2081 const auto &[APtr, AIsWrite] = A;
2082 const auto &[BPtr, BIsWrite] = B;
2083
2084 // Two reads are independent.
2085 if (!AIsWrite && !BIsWrite)
2087
2088 Type *ATy = getLoadStoreType(AInst);
2089 Type *BTy = getLoadStoreType(BInst);
2090
2091 // We cannot check pointers in different address spaces.
2092 if (APtr->getType()->getPointerAddressSpace() !=
2093 BPtr->getType()->getPointerAddressSpace())
2095
2096 std::optional<int64_t> StrideAPtr = getPtrStride(
2097 PSE, ATy, APtr, InnermostLoop, *DT, SymbolicStrides, true, true);
2098 std::optional<int64_t> StrideBPtr = getPtrStride(
2099 PSE, BTy, BPtr, InnermostLoop, *DT, SymbolicStrides, true, true);
2100
2101 const SCEV *Src = PSE.getSCEV(APtr);
2102 const SCEV *Sink = PSE.getSCEV(BPtr);
2103
2104 // If the induction step is negative we have to invert source and sink of the
2105 // dependence when measuring the distance between them. We should not swap
2106 // AIsWrite with BIsWrite, as their uses expect them in program order.
2107 if (StrideAPtr && *StrideAPtr < 0) {
2108 std::swap(Src, Sink);
2109 std::swap(AInst, BInst);
2110 std::swap(ATy, BTy);
2111 std::swap(StrideAPtr, StrideBPtr);
2112 }
2113
2114 const SCEV *Dist = SE.getMinusSCEV(Sink, Src);
2115
2116 LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
2117 << "\n");
2118 LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst
2119 << ": " << *Dist << "\n");
2120
2121 // Need accesses with constant strides and the same direction for further
2122 // dependence analysis. We don't want to vectorize "A[B[i]] += ..." and
2123 // similar code or pointer arithmetic that could wrap in the address space.
2124
2125 // If either Src or Sink are not strided (i.e. not a non-wrapping AddRec) and
2126 // not loop-invariant (stride will be 0 in that case), we cannot analyze the
2127 // dependence further and also cannot generate runtime checks.
2128 if (!StrideAPtr || !StrideBPtr) {
2129 LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
2131 }
2132
2133 int64_t StrideAPtrInt = *StrideAPtr;
2134 int64_t StrideBPtrInt = *StrideBPtr;
2135 LLVM_DEBUG(dbgs() << "LAA: Src induction step: " << StrideAPtrInt
2136 << " Sink induction step: " << StrideBPtrInt << "\n");
2137 // At least Src or Sink are loop invariant and the other is strided or
2138 // invariant.
2139 if (!StrideAPtrInt || !StrideBPtrInt) {
2140 // If both are loop-invariant and access the same location, we cannot
2141 // vectorize.
2142 if (!StrideAPtrInt && !StrideBPtrInt && Dist->isZero())
2144 // Otherwise, we can generate a runtime check to disambiguate the accesses.
2146 }
2147
2148 // Both Src and Sink have a constant stride, check if they are in the same
2149 // direction.
2150 if ((StrideAPtrInt > 0) != (StrideBPtrInt > 0)) {
2151 LLVM_DEBUG(
2152 dbgs() << "Pointer access with strides in different directions\n");
2154 }
2155
2156 TypeSize AStoreSz = DL.getTypeStoreSize(ATy);
2157 TypeSize BStoreSz = DL.getTypeStoreSize(BTy);
2158
2159 // If store sizes are not the same, set TypeByteSize to zero, so we can check
2160 // it in the caller isDependent.
2161 uint64_t ASz = DL.getTypeAllocSize(ATy);
2162 uint64_t BSz = DL.getTypeAllocSize(BTy);
2163 uint64_t TypeByteSize = (AStoreSz == BStoreSz) ? BSz : 0;
2164
2165 uint64_t StrideAScaled = std::abs(StrideAPtrInt) * ASz;
2166 uint64_t StrideBScaled = std::abs(StrideBPtrInt) * BSz;
2167
2168 uint64_t MaxStride = std::max(StrideAScaled, StrideBScaled);
2169
2170 std::optional<uint64_t> CommonStride;
2171 if (StrideAScaled == StrideBScaled)
2172 CommonStride = StrideAScaled;
2173
2174 // TODO: Historically, we didn't retry with runtime checks when (unscaled)
2175 // strides were different but there is no inherent reason to.
2176 if (!isa<SCEVConstant>(Dist))
2177 ShouldRetryWithRuntimeChecks |= StrideAPtrInt == StrideBPtrInt;
2178
2179 // If distance is a SCEVCouldNotCompute, return Unknown immediately.
2180 if (isa<SCEVCouldNotCompute>(Dist)) {
2181 LLVM_DEBUG(dbgs() << "LAA: Uncomputable distance.\n");
2182 return Dependence::Unknown;
2183 }
2184
2185 return DepDistanceStrideAndSizeInfo(Dist, MaxStride, CommonStride,
2186 TypeByteSize, AIsWrite, BIsWrite);
2187}
2188
2190MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2191 const MemAccessInfo &B, unsigned BIdx) {
2192 assert(AIdx < BIdx && "Must pass arguments in program order");
2193
2194 // Check if we can prove that Sink only accesses memory after Src's end or
2195 // vice versa. The helper is used to perform the checks only on the exit paths
2196 // where it helps to improve the analysis result.
2197 auto CheckCompletelyBeforeOrAfter = [&]() {
2198 auto *APtr = A.getPointer();
2199 auto *BPtr = B.getPointer();
2200 Type *ATy = getLoadStoreType(InstMap[AIdx]);
2201 Type *BTy = getLoadStoreType(InstMap[BIdx]);
2202 const SCEV *Src = PSE.getSCEV(APtr);
2203 const SCEV *Sink = PSE.getSCEV(BPtr);
2204 return areAccessesCompletelyBeforeOrAfter(Src, ATy, Sink, BTy);
2205 };
2206
2207 // Get the dependence distance, stride, type size and what access writes for
2208 // the dependence between A and B.
2209 auto Res =
2210 getDependenceDistanceStrideAndSize(A, InstMap[AIdx], B, InstMap[BIdx]);
2211 if (std::holds_alternative<Dependence::DepType>(Res)) {
2212 if (std::get<Dependence::DepType>(Res) == Dependence::Unknown &&
2213 CheckCompletelyBeforeOrAfter())
2214 return Dependence::NoDep;
2215 return std::get<Dependence::DepType>(Res);
2216 }
2217
2218 auto &[Dist, MaxStride, CommonStride, TypeByteSize, AIsWrite, BIsWrite] =
2219 std::get<DepDistanceStrideAndSizeInfo>(Res);
2220 bool HasSameSize = TypeByteSize > 0;
2221
2222 ScalarEvolution &SE = *PSE.getSE();
2223 auto &DL = InnermostLoop->getHeader()->getDataLayout();
2224
2225 // If the distance between the acecsses is larger than their maximum absolute
2226 // stride multiplied by the symbolic maximum backedge taken count (which is an
2227 // upper bound of the number of iterations), the accesses are independet, i.e.
2228 // they are far enough appart that accesses won't access the same location
2229 // across all loop ierations.
2230 if (HasSameSize &&
2232 DL, SE, *(PSE.getSymbolicMaxBackedgeTakenCount()), *Dist, MaxStride))
2233 return Dependence::NoDep;
2234
2235 // The rest of this function relies on ConstDist being at most 64-bits, which
2236 // is checked earlier. Will assert if the calling code changes.
2237 const APInt *APDist = nullptr;
2238 uint64_t ConstDist =
2239 match(Dist, m_scev_APInt(APDist)) ? APDist->abs().getZExtValue() : 0;
2240
2241 // Attempt to prove strided accesses independent.
2242 if (APDist) {
2243 // If the distance between accesses and their strides are known constants,
2244 // check whether the accesses interlace each other.
2245 if (ConstDist > 0 && CommonStride && CommonStride > 1 && HasSameSize &&
2246 areStridedAccessesIndependent(ConstDist, *CommonStride, TypeByteSize)) {
2247 LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
2248 return Dependence::NoDep;
2249 }
2250 } else {
2251 if (!LoopGuards)
2252 LoopGuards.emplace(
2253 ScalarEvolution::LoopGuards::collect(InnermostLoop, SE));
2254 Dist = SE.applyLoopGuards(Dist, *LoopGuards);
2255 }
2256
2257 // Negative distances are not plausible dependencies.
2258 if (SE.isKnownNonPositive(Dist)) {
2259 if (SE.isKnownNonNegative(Dist)) {
2260 if (HasSameSize) {
2261 // Write to the same location with the same size.
2262 return Dependence::Forward;
2263 }
2264 LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but "
2265 "different type sizes\n");
2266 return Dependence::Unknown;
2267 }
2268
2269 bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
2270 // Check if the first access writes to a location that is read in a later
2271 // iteration, where the distance between them is not a multiple of a vector
2272 // factor and relatively small.
2273 //
2274 // NOTE: There is no need to update MaxSafeVectorWidthInBits after call to
2275 // couldPreventStoreLoadForward, even if it changed MinDepDistBytes, since a
2276 // forward dependency will allow vectorization using any width.
2277
2278 if (IsTrueDataDependence && EnableForwardingConflictDetection) {
2279 if (!ConstDist) {
2280 return CheckCompletelyBeforeOrAfter() ? Dependence::NoDep
2282 }
2283 if (!HasSameSize ||
2284 couldPreventStoreLoadForward(ConstDist, TypeByteSize)) {
2285 LLVM_DEBUG(
2286 dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
2288 }
2289 }
2290
2291 LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");
2292 return Dependence::Forward;
2293 }
2294
2295 int64_t MinDistance = SE.getSignedRangeMin(Dist).getSExtValue();
2296 // Below we only handle strictly positive distances.
2297 if (MinDistance <= 0) {
2298 return CheckCompletelyBeforeOrAfter() ? Dependence::NoDep
2300 }
2301
2302 if (!HasSameSize) {
2303 if (CheckCompletelyBeforeOrAfter())
2304 return Dependence::NoDep;
2305 LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
2306 "different type sizes\n");
2307 return Dependence::Unknown;
2308 }
2309 // Bail out early if passed-in parameters make vectorization not feasible.
2310 unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
2312 unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
2314 // The minimum number of iterations for a vectorized/unrolled version.
2315 unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
2316
2317 // It's not vectorizable if the distance is smaller than the minimum distance
2318 // needed for a vectroized/unrolled version. Vectorizing one iteration in
2319 // front needs MaxStride. Vectorizing the last iteration needs TypeByteSize.
2320 // (No need to plus the last gap distance).
2321 //
2322 // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
2323 // foo(int *A) {
2324 // int *B = (int *)((char *)A + 14);
2325 // for (i = 0 ; i < 1024 ; i += 2)
2326 // B[i] = A[i] + 1;
2327 // }
2328 //
2329 // Two accesses in memory (stride is 4 * 2):
2330 // | A[0] | | A[2] | | A[4] | | A[6] | |
2331 // | B[0] | | B[2] | | B[4] |
2332 //
2333 // MinDistance needs for vectorizing iterations except the last iteration:
2334 // 4 * 2 * (MinNumIter - 1). MinDistance needs for the last iteration: 4.
2335 // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
2336 //
2337 // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
2338 // 12, which is less than distance.
2339 //
2340 // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
2341 // the minimum distance needed is 28, which is greater than distance. It is
2342 // not safe to do vectorization.
2343 //
2344 // We use MaxStride (maximum of src and sink strides) to get a conservative
2345 // lower bound on the MinDistanceNeeded in case of different strides.
2346
2347 // We know that Dist is positive, but it may not be constant. Use the signed
2348 // minimum for computations below, as this ensures we compute the closest
2349 // possible dependence distance.
2350 uint64_t MinDistanceNeeded = MaxStride * (MinNumIter - 1) + TypeByteSize;
2351 if (MinDistanceNeeded > static_cast<uint64_t>(MinDistance)) {
2352 if (!ConstDist) {
2353 // For non-constant distances, we checked the lower bound of the
2354 // dependence distance and the distance may be larger at runtime (and safe
2355 // for vectorization). Classify it as Unknown, so we re-try with runtime
2356 // checks, unless we can prove both accesses cannot overlap.
2357 return CheckCompletelyBeforeOrAfter() ? Dependence::NoDep
2359 }
2360 LLVM_DEBUG(dbgs() << "LAA: Failure because of positive minimum distance "
2361 << MinDistance << '\n');
2362 return Dependence::Backward;
2363 }
2364
2365 // Unsafe if the minimum distance needed is greater than smallest dependence
2366 // distance distance.
2367 if (MinDistanceNeeded > MinDepDistBytes) {
2368 LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
2369 << MinDistanceNeeded << " size in bytes\n");
2370 return Dependence::Backward;
2371 }
2372
2373 MinDepDistBytes =
2374 std::min(static_cast<uint64_t>(MinDistance), MinDepDistBytes);
2375
2376 bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
2377 if (IsTrueDataDependence && EnableForwardingConflictDetection && ConstDist &&
2378 couldPreventStoreLoadForward(MinDistance, TypeByteSize, *CommonStride))
2380
2381 uint64_t MaxVF = MinDepDistBytes / MaxStride;
2382 LLVM_DEBUG(dbgs() << "LAA: Positive min distance " << MinDistance
2383 << " with max VF = " << MaxVF << '\n');
2384
2385 uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
2386 if (!ConstDist && MaxVFInBits < MaxTargetVectorWidthInBits) {
2387 // For non-constant distances, we checked the lower bound of the dependence
2388 // distance and the distance may be larger at runtime (and safe for
2389 // vectorization). Classify it as Unknown, so we re-try with runtime checks,
2390 // unless we can prove both accesses cannot overlap.
2391 return CheckCompletelyBeforeOrAfter() ? Dependence::NoDep
2393 }
2394
2395 if (CheckCompletelyBeforeOrAfter())
2396 return Dependence::NoDep;
2397
2398 MaxSafeVectorWidthInBits = std::min(MaxSafeVectorWidthInBits, MaxVFInBits);
2400}
2401
2403 ArrayRef<MemAccessInfo> CheckDeps) {
2404
2405 MinDepDistBytes = -1;
2407 for (MemAccessInfo CurAccess : CheckDeps) {
2408 if (Visited.contains(CurAccess))
2409 continue;
2410
2411 // Check accesses within this set.
2413 DepCands.findLeader(CurAccess);
2415 DepCands.member_end();
2416
2417 // Check every access pair.
2418 while (AI != AE) {
2419 Visited.insert(*AI);
2420 bool AIIsWrite = AI->getInt();
2421 // Reads from the same pointer don't create extra hazards, but multiple
2422 // stores do (WAW), so start from AI for writes and next(AI) for reads.
2424 (AIIsWrite ? AI : std::next(AI));
2425 while (OI != AE) {
2426 // Check every accessing instruction pair in program order.
2427 auto &Acc = Accesses[*AI];
2428 for (std::vector<unsigned>::iterator I1 = Acc.begin(), I1E = Acc.end();
2429 I1 != I1E; ++I1)
2430 // When checking for WAW (OI == AI) caused by multiple writes to the
2431 // same pointer, start I2 at the next access past I1 to avoid
2432 // self-comparison.
2433 for (std::vector<unsigned>::iterator
2434 I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()),
2435 I2E = (OI == AI ? I1E : Accesses[*OI].end());
2436 I2 != I2E; ++I2) {
2437 auto A = std::make_pair(&*AI, *I1);
2438 auto B = std::make_pair(&*OI, *I2);
2439
2440 assert(*I1 != *I2);
2441 if (*I1 > *I2)
2442 std::swap(A, B);
2443
2445 isDependent(*A.first, A.second, *B.first, B.second);
2447
2448 // Gather dependences unless we accumulated MaxDependences
2449 // dependences. In that case return as soon as we find the first
2450 // unsafe dependence. This puts a limit on this quadratic
2451 // algorithm.
2452 if (RecordDependences) {
2453 if (Type != Dependence::NoDep)
2454 Dependences.emplace_back(A.second, B.second, Type);
2455
2456 if (Dependences.size() >= MaxDependences) {
2457 RecordDependences = false;
2458 Dependences.clear();
2460 << "Too many dependences, stopped recording\n");
2461 }
2462 }
2463 if (!RecordDependences && !isSafeForVectorization())
2464 return false;
2465 }
2466 ++OI;
2467 }
2468 ++AI;
2469 }
2470 }
2471
2472 LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
2473 return isSafeForVectorization();
2474}
2475
2478 MemAccessInfo Access(Ptr, IsWrite);
2479 auto I = Accesses.find(Access);
2481 if (I != Accesses.end()) {
2482 transform(I->second, std::back_inserter(Insts),
2483 [&](unsigned Idx) { return this->InstMap[Idx]; });
2484 }
2485
2486 return Insts;
2487}
2488
2490 "NoDep",
2491 "Unknown",
2492 "IndirectUnsafe",
2493 "InvariantUnsafe",
2494 "Forward",
2495 "ForwardButPreventsForwarding",
2496 "Backward",
2497 "BackwardVectorizable",
2498 "BackwardVectorizableButPreventsForwarding"};
2499
2501 raw_ostream &OS, unsigned Depth,
2502 const SmallVectorImpl<Instruction *> &Instrs) const {
2503 OS.indent(Depth) << DepName[Type] << ":\n";
2504 OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
2505 OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
2506}
2507
2508bool LoopAccessInfo::canAnalyzeLoop() {
2509 // We need to have a loop header.
2510 LLVM_DEBUG(dbgs() << "\nLAA: Checking a loop in '"
2511 << TheLoop->getHeader()->getParent()->getName() << "' from "
2512 << TheLoop->getLocStr() << "\n");
2513
2514 // We can only analyze innermost loops.
2515 if (!TheLoop->isInnermost()) {
2516 LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
2517 recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
2518 return false;
2519 }
2520
2521 // We must have a single backedge.
2522 if (TheLoop->getNumBackEdges() != 1) {
2523 LLVM_DEBUG(
2524 dbgs() << "LAA: loop control flow is not understood by analyzer\n");
2525 recordAnalysis("CFGNotUnderstood")
2526 << "loop control flow is not understood by analyzer";
2527 return false;
2528 }
2529
2530 // ScalarEvolution needs to be able to find the symbolic max backedge taken
2531 // count, which is an upper bound on the number of loop iterations. The loop
2532 // may execute fewer iterations, if it exits via an uncountable exit.
2533 const SCEV *ExitCount = PSE->getSymbolicMaxBackedgeTakenCount();
2534 if (isa<SCEVCouldNotCompute>(ExitCount)) {
2535 recordAnalysis("CantComputeNumberOfIterations")
2536 << "could not determine number of loop iterations";
2537 LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
2538 return false;
2539 }
2540
2541 LLVM_DEBUG(dbgs() << "LAA: Found an analyzable loop: "
2542 << TheLoop->getHeader()->getName() << "\n");
2543 return true;
2544}
2545
2546bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
2547 const TargetLibraryInfo *TLI,
2548 DominatorTree *DT) {
2549 // Holds the Load and Store instructions.
2552 SmallPtrSet<MDNode *, 8> LoopAliasScopes;
2553
2554 // Holds all the different accesses in the loop.
2555 unsigned NumReads = 0;
2556 unsigned NumReadWrites = 0;
2557
2558 bool HasComplexMemInst = false;
2559
2560 // A runtime check is only legal to insert if there are no convergent calls.
2561 HasConvergentOp = false;
2562
2563 PtrRtChecking->Pointers.clear();
2564 PtrRtChecking->Need = false;
2565
2566 const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
2567
2568 const bool EnableMemAccessVersioningOfLoop =
2570 !TheLoop->getHeader()->getParent()->hasOptSize();
2571
2572 // Traverse blocks in fixed RPOT order, regardless of their storage in the
2573 // loop info, as it may be arbitrary.
2574 LoopBlocksRPO RPOT(TheLoop);
2575 RPOT.perform(LI);
2576
2577 // Don't return early as soon as we found a memory access that cannot be
2578 // vectorize - HasConvergentOp must still be computed as it is part of LAI's
2579 // public API (used by LoopDistribute).
2580 for (BasicBlock *BB : RPOT) {
2581 // Scan the BB and collect legal loads and stores. Also detect any
2582 // convergent instructions.
2583 for (Instruction &I : *BB) {
2584 if (auto *Call = dyn_cast<CallBase>(&I)) {
2585 if (Call->isConvergent())
2586 HasConvergentOp = true;
2587 }
2588
2589 // Unsafe to vectorize and we already found a convergent operation, can
2590 // early return now.
2591 if (HasComplexMemInst && HasConvergentOp)
2592 return false;
2593
2594 // Already unsafe to vectorize; keep scanning for convergent ops.
2595 if (HasComplexMemInst)
2596 continue;
2597
2598 // Record alias scopes defined inside the loop.
2599 if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
2600 for (Metadata *Op : Decl->getScopeList()->operands())
2601 LoopAliasScopes.insert(cast<MDNode>(Op));
2602
2603 // Many math library functions read the rounding mode. We will only
2604 // vectorize a loop if it contains known function calls that don't set
2605 // the flag. Therefore, it is safe to ignore this read from memory.
2606 auto *Call = dyn_cast<CallInst>(&I);
2608 continue;
2609
2610 // If this is a load, save it. If this instruction can read from memory
2611 // but is not a load, we only allow it if it's a call to a function with a
2612 // vector mapping and no pointer arguments.
2613 if (I.mayReadFromMemory()) {
2614 auto hasPointerArgs = [](CallBase *CB) {
2615 return any_of(CB->args(), [](Value const *Arg) {
2616 return Arg->getType()->isPointerTy();
2617 });
2618 };
2619
2620 // If the function has an explicit vectorized counterpart, and does not
2621 // take output/input pointers, we can safely assume that it can be
2622 // vectorized.
2623 if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
2624 !hasPointerArgs(Call) && !VFDatabase::getMappings(*Call).empty())
2625 continue;
2626
2627 auto *Ld = dyn_cast<LoadInst>(&I);
2628 if (!Ld) {
2629 recordAnalysis("CantVectorizeInstruction", &I)
2630 << "instruction cannot be vectorized";
2631 HasComplexMemInst = true;
2632 continue;
2633 }
2634 if (!Ld->isSimple() && !IsAnnotatedParallel) {
2635 recordAnalysis("NonSimpleLoad", Ld)
2636 << "read with atomic ordering or volatile read";
2637 LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
2638 HasComplexMemInst = true;
2639 continue;
2640 }
2641 NumLoads++;
2642 Loads.push_back(Ld);
2643 DepChecker->addAccess(Ld);
2644 if (EnableMemAccessVersioningOfLoop)
2645 collectStridedAccess(Ld);
2646 continue;
2647 }
2648
2649 // Save 'store' instructions. Abort if other instructions write to memory.
2650 if (I.mayWriteToMemory()) {
2651 auto *St = dyn_cast<StoreInst>(&I);
2652 if (!St) {
2653 recordAnalysis("CantVectorizeInstruction", &I)
2654 << "instruction cannot be vectorized";
2655 HasComplexMemInst = true;
2656 continue;
2657 }
2658 if (!St->isSimple() && !IsAnnotatedParallel) {
2659 recordAnalysis("NonSimpleStore", St)
2660 << "write with atomic ordering or volatile write";
2661 LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
2662 HasComplexMemInst = true;
2663 continue;
2664 }
2665 NumStores++;
2666 Stores.push_back(St);
2667 DepChecker->addAccess(St);
2668 if (EnableMemAccessVersioningOfLoop)
2669 collectStridedAccess(St);
2670 }
2671 } // Next instr.
2672 } // Next block.
2673
2674 if (HasComplexMemInst)
2675 return false;
2676
2677 // Now we have two lists that hold the loads and the stores.
2678 // Next, we find the pointers that they use.
2679
2680 // Check if we see any stores. If there are no stores, then we don't
2681 // care if the pointers are *restrict*.
2682 if (!Stores.size()) {
2683 LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
2684 return true;
2685 }
2686
2688 AccessAnalysis Accesses(TheLoop, AA, LI, *DT, DepCands, *PSE,
2689 LoopAliasScopes);
2690
2691 // Holds the analyzed pointers. We don't want to call getUnderlyingObjects
2692 // multiple times on the same object. If the ptr is accessed twice, once
2693 // for read and once for write, it will only appear once (on the write
2694 // list). This is okay, since we are going to check for conflicts between
2695 // writes and between reads and writes, but not between reads and reads.
2696 SmallSet<std::pair<Value *, Type *>, 16> Seen;
2697
2698 // Record uniform store addresses to identify if we have multiple stores
2699 // to the same address.
2700 SmallPtrSet<Value *, 16> UniformStores;
2701
2702 for (StoreInst *ST : Stores) {
2703 Value *Ptr = ST->getPointerOperand();
2704
2705 if (isInvariant(Ptr)) {
2706 // Record store instructions to loop invariant addresses
2707 StoresToInvariantAddresses.push_back(ST);
2708 HasStoreStoreDependenceInvolvingLoopInvariantAddress |=
2709 !UniformStores.insert(Ptr).second;
2710 }
2711
2712 // If we did *not* see this pointer before, insert it to the read-write
2713 // list. At this phase it is only a 'write' list.
2714 Type *AccessTy = getLoadStoreType(ST);
2715 if (Seen.insert({Ptr, AccessTy}).second) {
2716 ++NumReadWrites;
2717
2718 MemoryLocation Loc = MemoryLocation::get(ST);
2719 // The TBAA metadata could have a control dependency on the predication
2720 // condition, so we cannot rely on it when determining whether or not we
2721 // need runtime pointer checks.
2722 if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
2723 Loc.AATags.TBAA = nullptr;
2724
2725 // Expand forked pointers (i.e., a phi of multiple strided pointers) into
2726 // all alternatives.
2727 visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
2728 [&Accesses, AccessTy, Loc](Value *Ptr) {
2729 MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
2730 Accesses.addStore(NewLoc, AccessTy);
2731 });
2732 }
2733 }
2734
2735 if (IsAnnotatedParallel) {
2736 LLVM_DEBUG(
2737 dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
2738 << "checks.\n");
2739 return true;
2740 }
2741
2742 for (LoadInst *LD : Loads) {
2743 Value *Ptr = LD->getPointerOperand();
2744 // If we did *not* see this pointer before, insert it to the read list. If
2745 // we *did* see it before, then it is already in the read-write list. This
2746 // allows us to vectorize expressions such as A[i] += x; Because the address
2747 // of A[i] is a read-write pointer. This only works if the index of A[i] is
2748 // strictly monotonic, which we approximate (conservatively) via
2749 // getPtrStride. If the address is unknown (e.g. A[B[i]]) then we may read,
2750 // modify, and write overlapping words. Note that "zero stride" is unsafe
2751 // and is being handled below.
2752 bool IsReadOnlyPtr = false;
2753 Type *AccessTy = getLoadStoreType(LD);
2754 if (Seen.insert({Ptr, AccessTy}).second ||
2755 !getPtrStride(*PSE, AccessTy, Ptr, TheLoop, *DT, SymbolicStrides, false,
2756 true)) {
2757 ++NumReads;
2758 IsReadOnlyPtr = true;
2759 }
2760
2761 // See if there is an unsafe dependency between a load to a uniform address and
2762 // store to the same uniform address.
2763 if (UniformStores.contains(Ptr)) {
2764 LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "
2765 "load and uniform store to the same address!\n");
2766 HasLoadStoreDependenceInvolvingLoopInvariantAddress = true;
2767 }
2768
2769 MemoryLocation Loc = MemoryLocation::get(LD);
2770 // The TBAA metadata could have a control dependency on the predication
2771 // condition, so we cannot rely on it when determining whether or not we
2772 // need runtime pointer checks.
2773 if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
2774 Loc.AATags.TBAA = nullptr;
2775
2776 // Expand forked pointers (i.e., a phi of multiple strided pointers) into
2777 // all alternatives.
2778 visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
2779 [&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr) {
2780 MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
2781 Accesses.addLoad(NewLoc, AccessTy, IsReadOnlyPtr);
2782 });
2783 }
2784
2785 // If we write (or read-write) to a single destination and there are no other
2786 // reads in this loop then is it safe to vectorize: the vectorized stores
2787 // preserve ordering via replication or order-preserving @llvm.masked.scatter.
2788 if (NumReadWrites == 1 && NumReads == 0) {
2789 LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
2790 return true;
2791 }
2792
2793 // Build dependence sets and check whether we need a runtime pointer bounds
2794 // check.
2795 Accesses.buildDependenceSets();
2796
2797 // Find pointers with computable bounds. We are going to use this information
2798 // to place a runtime bound check.
2799 Value *UncomputablePtr = nullptr;
2800 HasCompletePtrRtChecking =
2801 Accesses.canCheckPtrAtRT(*PtrRtChecking, TheLoop, SymbolicStrides,
2802 UncomputablePtr, AllowPartial, getDepChecker());
2803 if (!HasCompletePtrRtChecking) {
2804 const auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
2805 recordAnalysis("CantIdentifyArrayBounds", I)
2806 << "cannot identify array bounds";
2807 LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
2808 << "the array bounds.\n");
2809 return false;
2810 }
2811
2812 LLVM_DEBUG(
2813 dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n");
2814
2815 bool DepsAreSafe = true;
2816 if (Accesses.isDependencyCheckNeeded()) {
2817 LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
2818 DepsAreSafe =
2819 DepChecker->areDepsSafe(DepCands, Accesses.getDependenciesToCheck());
2820
2821 if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeChecks()) {
2822 LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
2823
2824 PtrRtChecking->reset();
2825 PtrRtChecking->Need = true;
2826
2827 UncomputablePtr = nullptr;
2828 HasCompletePtrRtChecking = Accesses.canCheckPtrAtRT(
2829 *PtrRtChecking, TheLoop, SymbolicStrides, UncomputablePtr,
2830 AllowPartial, getDepChecker());
2831
2832 // Check that we found the bounds for the pointer.
2833 if (!HasCompletePtrRtChecking) {
2834 auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
2835 recordAnalysis("CantCheckMemDepsAtRunTime", I)
2836 << "cannot check memory dependencies at runtime";
2837 LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
2838 return false;
2839 }
2840
2841 // Clear the dependency checks. They are no longer needed.
2842 Accesses.resetDepChecks(*DepChecker);
2843
2844 DepsAreSafe = true;
2845 }
2846 }
2847
2848 // Update the invariant address dependence flags based on dependences found
2849 // by the dep checker. Even if dependences were not recorded (too many to
2850 // track), any InvariantUnsafe dep would still have set the status to Unsafe
2851 if (const auto *Deps = DepChecker->getDependences()) {
2852 for (const auto &Dep : *Deps) {
2854 continue;
2855 Instruction *Src = Dep.getSource(*DepChecker);
2856 Instruction *Dst = Dep.getDestination(*DepChecker);
2857 if (isa<LoadInst>(Src) != isa<LoadInst>(Dst)) {
2858 HasLoadStoreDependenceInvolvingLoopInvariantAddress = true;
2859 } else {
2860 assert(isa<StoreInst>(Src) && isa<StoreInst>(Dst) &&
2861 "Expected both to be stores");
2862 HasStoreStoreDependenceInvolvingLoopInvariantAddress = true;
2863 }
2864 }
2865 }
2866
2867 if (HasConvergentOp) {
2868 recordAnalysis("CantInsertRuntimeCheckWithConvergent")
2869 << "cannot add control dependency to convergent operation";
2870 LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check "
2871 "would be needed with a convergent operation\n");
2872 return false;
2873 }
2874
2875 if (DepsAreSafe) {
2876 LLVM_DEBUG(
2877 dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
2878 << (PtrRtChecking->Need ? "" : " don't")
2879 << " need runtime memory checks.\n");
2880 return true;
2881 }
2882
2883 emitUnsafeDependenceRemark();
2884 return false;
2885}
2886
2887void LoopAccessInfo::emitUnsafeDependenceRemark() {
2888 const auto *Deps = getDepChecker().getDependences();
2889 if (!Deps)
2890 return;
2891 const auto *Found =
2892 llvm::find_if(*Deps, [](const MemoryDepChecker::Dependence &D) {
2895 });
2896 if (Found == Deps->end())
2897 return;
2898 MemoryDepChecker::Dependence Dep = *Found;
2899
2900 LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
2901
2902 // Emit remark for first unsafe dependence
2903 bool HasForcedDistribution = false;
2904 std::optional<const MDOperand *> Value =
2905 findStringMetadataForLoop(TheLoop, "llvm.loop.distribute.enable");
2906 if (Value) {
2907 const MDOperand *Op = *Value;
2908 assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
2909 HasForcedDistribution = mdconst::extract<ConstantInt>(*Op)->getZExtValue();
2910 }
2911
2912 const std::string Info =
2913 HasForcedDistribution
2914 ? "unsafe dependent memory operations in loop."
2915 : "unsafe dependent memory operations in loop. Use "
2916 "#pragma clang loop distribute(enable) to allow loop distribution "
2917 "to attempt to isolate the offending operations into a separate "
2918 "loop";
2919 OptimizationRemarkAnalysis &R =
2920 recordAnalysis("UnsafeDep", Dep.getDestination(getDepChecker())) << Info;
2921
2922 switch (Dep.Type) {
2926 llvm_unreachable("Unexpected dependence");
2928 R << "\nBackward loop carried data dependence.";
2929 break;
2931 R << "\nForward loop carried data dependence that prevents "
2932 "store-to-load forwarding.";
2933 break;
2935 R << "\nBackward loop carried data dependence that prevents "
2936 "store-to-load forwarding.";
2937 break;
2939 R << "\nUnsafe indirect dependence.";
2940 break;
2942 R << "\nUnsafe dependence on loop-invariant address.";
2943 break;
2945 R << "\nUnknown data dependence.";
2946 break;
2947 }
2948
2949 if (Instruction *I = Dep.getSource(getDepChecker())) {
2950 DebugLoc SourceLoc = I->getDebugLoc();
2952 SourceLoc = DD->getDebugLoc();
2953 if (SourceLoc)
2954 R << " Memory location is the same as accessed at "
2955 << ore::NV("Location", SourceLoc);
2956 }
2957}
2958
2960 const Loop *TheLoop,
2961 const DominatorTree *DT) {
2962 assert(TheLoop->contains(BB) && "Unknown block used");
2963
2964 // Blocks that do not dominate the latch need predication.
2965 const BasicBlock *Latch = TheLoop->getLoopLatch();
2966 return !DT->dominates(BB, Latch);
2967}
2968
2970LoopAccessInfo::recordAnalysis(StringRef RemarkName, const Instruction *I) {
2971 assert(!Report && "Multiple reports generated");
2972
2973 const BasicBlock *CodeRegion = TheLoop->getHeader();
2974 DebugLoc DL = TheLoop->getStartLoc();
2975
2976 if (I) {
2977 CodeRegion = I->getParent();
2978 // If there is no debug location attached to the instruction, revert back to
2979 // using the loop's.
2980 if (I->getDebugLoc())
2981 DL = I->getDebugLoc();
2982 }
2983
2984 Report = std::make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName,
2985 DL, CodeRegion);
2986 return *Report;
2987}
2988
2990 auto *SE = PSE->getSE();
2991 if (TheLoop->isLoopInvariant(V))
2992 return true;
2993 if (!SE->isSCEVable(V->getType()))
2994 return false;
2995 const SCEV *S = SE->getSCEV(V);
2996 return SE->isLoopInvariant(S, TheLoop);
2997}
2998
2999/// If \p Ptr is a GEP, which has a loop-variant operand, return that operand.
3000/// Otherwise, return \p Ptr.
3002 Loop *Lp) {
3003 auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
3004 if (!GEP)
3005 return Ptr;
3006
3007 Value *V = Ptr;
3008 for (const Use &U : GEP->operands()) {
3009 if (!SE->isLoopInvariant(SE->getSCEV(U), Lp)) {
3010 if (V == Ptr)
3011 V = U;
3012 else
3013 // There must be exactly one loop-variant operand.
3014 return Ptr;
3015 }
3016 }
3017 return V;
3018}
3019
3020/// Get the stride of a pointer access in a loop. Looks for symbolic
3021/// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
3022static const SCEV *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
3023 auto *PtrTy = dyn_cast<PointerType>(Ptr->getType());
3024 if (!PtrTy)
3025 return nullptr;
3026
3027 // Try to remove a gep instruction to make the pointer (actually index at this
3028 // point) easier analyzable. If OrigPtr is equal to Ptr we are analyzing the
3029 // pointer, otherwise, we are analyzing the index.
3030 Value *OrigPtr = Ptr;
3031
3032 Ptr = getLoopVariantGEPOperand(Ptr, SE, Lp);
3033 const SCEV *V = SE->getSCEV(Ptr);
3034
3035 if (Ptr != OrigPtr)
3036 // Strip off casts.
3037 while (auto *C = dyn_cast<SCEVIntegralCastExpr>(V))
3038 V = C->getOperand();
3039
3041 return nullptr;
3042
3043 // Note that the restriction after this loop invariant check are only
3044 // profitability restrictions.
3045 if (!SE->isLoopInvariant(V, Lp))
3046 return nullptr;
3047
3048 // Look for the loop invariant symbolic value.
3049 if (isa<SCEVUnknown>(V))
3050 return V;
3051
3052 // Look through multiplies that scale a stride by a constant.
3054 if (auto *C = dyn_cast<SCEVIntegralCastExpr>(V))
3055 if (isa<SCEVUnknown>(C->getOperand()))
3056 return V;
3057
3058 return nullptr;
3059}
3060
3061void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
3062 Value *Ptr = getLoadStorePointerOperand(MemAccess);
3063 if (!Ptr)
3064 return;
3065
3066 // Note: getStrideFromPointer is a *profitability* heuristic. We
3067 // could broaden the scope of values returned here - to anything
3068 // which happens to be loop invariant and contributes to the
3069 // computation of an interesting IV - but we chose not to as we
3070 // don't have a cost model here, and broadening the scope exposes
3071 // far too many unprofitable cases.
3072 const SCEV *StrideExpr = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop);
3073 if (!StrideExpr)
3074 return;
3075
3076 if (match(StrideExpr, m_scev_UndefOrPoison()))
3077 return;
3078
3079 LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "
3080 "versioning:");
3081 LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *StrideExpr << "\n");
3082
3083 if (!SpeculateUnitStride) {
3084 LLVM_DEBUG(dbgs() << " Chose not to due to -laa-speculate-unit-stride\n");
3085 return;
3086 }
3087
3088 // Avoid adding the "Stride == 1" predicate when we know that
3089 // Stride >= Trip-Count. Such a predicate will effectively optimize a single
3090 // or zero iteration loop, as Trip-Count <= Stride == 1.
3091 //
3092 // TODO: We are currently not making a very informed decision on when it is
3093 // beneficial to apply stride versioning. It might make more sense that the
3094 // users of this analysis (such as the vectorizer) will trigger it, based on
3095 // their specific cost considerations; For example, in cases where stride
3096 // versioning does not help resolving memory accesses/dependences, the
3097 // vectorizer should evaluate the cost of the runtime test, and the benefit
3098 // of various possible stride specializations, considering the alternatives
3099 // of using gather/scatters (if available).
3100
3101 const SCEV *MaxBTC = PSE->getSymbolicMaxBackedgeTakenCount();
3102
3103 // Match the types so we can compare the stride and the MaxBTC.
3104 // The Stride can be positive/negative, so we sign extend Stride;
3105 // The backedgeTakenCount is non-negative, so we zero extend MaxBTC.
3106 const DataLayout &DL = TheLoop->getHeader()->getDataLayout();
3107 uint64_t StrideTypeSizeBits = DL.getTypeSizeInBits(StrideExpr->getType());
3108 uint64_t BETypeSizeBits = DL.getTypeSizeInBits(MaxBTC->getType());
3109 const SCEV *CastedStride = StrideExpr;
3110 const SCEV *CastedBECount = MaxBTC;
3111 ScalarEvolution *SE = PSE->getSE();
3112 if (BETypeSizeBits >= StrideTypeSizeBits)
3113 CastedStride = SE->getNoopOrSignExtend(StrideExpr, MaxBTC->getType());
3114 else
3115 CastedBECount = SE->getZeroExtendExpr(MaxBTC, StrideExpr->getType());
3116 const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
3117 // Since TripCount == BackEdgeTakenCount + 1, checking:
3118 // "Stride >= TripCount" is equivalent to checking:
3119 // Stride - MaxBTC> 0
3120 if (SE->isKnownPositive(StrideMinusBETaken)) {
3121 LLVM_DEBUG(
3122 dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
3123 "Stride==1 predicate will imply that the loop executes "
3124 "at most once.\n");
3125 return;
3126 }
3127 LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.\n");
3128
3129 // Strip back off the integer cast, and check that our result is a
3130 // SCEVUnknown as we expect.
3131 const SCEV *StrideBase = StrideExpr;
3132 if (const auto *C = dyn_cast<SCEVIntegralCastExpr>(StrideBase))
3133 StrideBase = C->getOperand();
3134 SymbolicStrides[Ptr] = cast<SCEVUnknown>(StrideBase);
3135}
3136
3138 const TargetTransformInfo *TTI,
3139 const TargetLibraryInfo *TLI, AAResults *AA,
3140 DominatorTree *DT, LoopInfo *LI,
3141 AssumptionCache *AC, bool AllowPartial)
3142 : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
3143 PtrRtChecking(nullptr), TheLoop(L), AllowPartial(AllowPartial) {
3144 unsigned MaxTargetVectorWidthInBits = std::numeric_limits<unsigned>::max();
3145 if (TTI && !TTI->enableScalableVectorization())
3146 // Scale the vector width by 2 as rough estimate to also consider
3147 // interleaving.
3148 MaxTargetVectorWidthInBits =
3149 TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector) * 2;
3150
3151 DepChecker = std::make_unique<MemoryDepChecker>(
3152 *PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits, LoopGuards);
3153 PtrRtChecking =
3154 std::make_unique<RuntimePointerChecking>(*DepChecker, SE, LoopGuards);
3155 if (canAnalyzeLoop())
3156 CanVecMem = analyzeLoop(AA, LI, TLI, DT);
3157}
3158
3159void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
3160 if (CanVecMem) {
3161 OS.indent(Depth) << "Memory dependences are safe";
3162 const MemoryDepChecker &DC = getDepChecker();
3163 if (!DC.isSafeForAnyVectorWidth())
3164 OS << " with a maximum safe vector width of "
3165 << DC.getMaxSafeVectorWidthInBits() << " bits";
3168 OS << ", with a maximum safe store-load forward width of " << SLDist
3169 << " bits";
3170 }
3171 if (PtrRtChecking->Need)
3172 OS << " with run-time checks";
3173 OS << "\n";
3174 }
3175
3176 if (HasConvergentOp)
3177 OS.indent(Depth) << "Has convergent operation in loop\n";
3178
3179 if (Report)
3180 OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
3181
3182 if (auto *Dependences = DepChecker->getDependences()) {
3183 OS.indent(Depth) << "Dependences:\n";
3184 for (const auto &Dep : *Dependences) {
3185 Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
3186 OS << "\n";
3187 }
3188 } else
3189 OS.indent(Depth) << "Too many dependences, not recorded\n";
3190
3191 // List the pair of accesses need run-time checks to prove independence.
3192 PtrRtChecking->print(OS, Depth);
3193 if (PtrRtChecking->Need && !HasCompletePtrRtChecking)
3194 OS.indent(Depth) << "Generated run-time checks are incomplete\n";
3195 OS << "\n";
3196
3197 OS.indent(Depth)
3198 << "Non vectorizable stores to invariant address were "
3199 << (HasStoreStoreDependenceInvolvingLoopInvariantAddress ||
3200 HasLoadStoreDependenceInvolvingLoopInvariantAddress
3201 ? ""
3202 : "not ")
3203 << "found in loop.\n";
3204
3205 OS.indent(Depth) << "SCEV assumptions:\n";
3206 PSE->getPredicate().print(OS, Depth);
3207
3208 OS << "\n";
3209
3210 OS.indent(Depth) << "Expressions re-written:\n";
3211 PSE->print(OS, Depth);
3212}
3213
3215 bool AllowPartial) {
3216 const auto &[It, Inserted] = LoopAccessInfoMap.try_emplace(&L);
3217
3218 // We need to create the LoopAccessInfo if either we don't already have one,
3219 // or if it was created with a different value of AllowPartial.
3220 if (Inserted || It->second->hasAllowPartial() != AllowPartial)
3221 It->second = std::make_unique<LoopAccessInfo>(&L, &SE, TTI, TLI, &AA, &DT,
3222 &LI, AC, AllowPartial);
3223
3224 return *It->second;
3225}
3227 // Collect LoopAccessInfo entries that may keep references to IR outside the
3228 // analyzed loop or SCEVs that may have been modified or invalidated. At the
3229 // moment, that is loops requiring memory or SCEV runtime checks, as those cache
3230 // SCEVs, e.g. for pointer expressions.
3231 LoopAccessInfoMap.remove_if([](const auto &Entry) {
3232 const auto &LAI = Entry.second;
3233 return !(LAI->getRuntimePointerChecking()->getChecks().empty() &&
3234 LAI->getPSE().getPredicate().isAlwaysTrue());
3235 });
3236}
3237
3239 Function &F, const PreservedAnalyses &PA,
3240 FunctionAnalysisManager::Invalidator &Inv) {
3241 // Check whether our analysis is preserved.
3242 auto PAC = PA.getChecker<LoopAccessAnalysis>();
3243 if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
3244 // If not, give up now.
3245 return true;
3246
3247 // Check whether the analyses we depend on became invalid for any reason.
3248 // Skip checking TargetLibraryAnalysis as it is immutable and can't become
3249 // invalid.
3250 return Inv.invalidate<AAManager>(F, PA) ||
3251 Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) ||
3252 Inv.invalidate<LoopAnalysis>(F, PA) ||
3253 Inv.invalidate<DominatorTreeAnalysis>(F, PA);
3254}
3255
3258 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
3259 auto &AA = FAM.getResult<AAManager>(F);
3260 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
3261 auto &LI = FAM.getResult<LoopAnalysis>(F);
3262 auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
3263 auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
3264 auto &AC = FAM.getResult<AssumptionAnalysis>(F);
3265 return LoopAccessInfoManager(SE, AA, DT, LI, &TTI, &TLI, &AC);
3266}
3267
3268AnalysisKey LoopAccessAnalysis::Key;
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
@ Scaled
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
DXIL Forward Handle Accesses
DXIL Resource Access
dxil translate DXIL Translate Metadata
This file defines the DenseMap class.
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
#define DEBUG_TYPE
Hexagon Common GEP
#define _
This header defines various interfaces for pass management in LLVM.
static cl::opt< unsigned > MaxDependences("max-dependences", cl::Hidden, cl::desc("Maximum number of dependences collected by " "loop-access analysis (default = 100)"), cl::init(100))
We collect dependences up to this threshold.
static cl::opt< bool > EnableForwardingConflictDetection("store-to-load-forwarding-conflict-detection", cl::Hidden, cl::desc("Enable conflict detection in loop-access analysis"), cl::init(true))
Enable store-to-load forwarding conflict detection.
static void findForkedSCEVs(ScalarEvolution *SE, const Loop *L, Value *Ptr, SmallVectorImpl< PointerIntPair< const SCEV *, 1, bool > > &ScevList, unsigned Depth)
static const SCEV * mulSCEVNoOverflow(const SCEV *A, const SCEV *B, ScalarEvolution &SE)
Returns A * B, if it is guaranteed not to unsigned wrap.
static cl::opt< unsigned > MemoryCheckMergeThreshold("memory-check-merge-threshold", cl::Hidden, cl::desc("Maximum number of comparisons done when trying to merge " "runtime memory checks. (default = 100)"), cl::init(100))
The maximum iterations used to merge memory checks.
static const SCEV * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR, const SCEV *MaxBTC, const SCEV *EltSize, ScalarEvolution &SE, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC, std::optional< ScalarEvolution::LoopGuards > &LoopGuards)
Return true, if evaluating AR at MaxBTC cannot wrap, because AR at MaxBTC is guaranteed inbounds of t...
static std::optional< int64_t > getStrideFromAddRec(const SCEVAddRecExpr *AR, const Loop *Lp, Type *AccessTy, Value *Ptr, PredicatedScalarEvolution &PSE)
Try to compute a constant stride for AR.
static cl::opt< unsigned, true > VectorizationInterleave("force-vector-interleave", cl::Hidden, cl::desc("Sets the vectorization interleave count. " "Zero is autoselect."), cl::location(VectorizerParams::VectorizationInterleave))
static cl::opt< bool, true > HoistRuntimeChecks("hoist-runtime-checks", cl::Hidden, cl::desc("Hoist inner loop runtime memory checks to outer loop if possible"), cl::location(VectorizerParams::HoistRuntimeChecks), cl::init(true))
static DenseMap< const RuntimeCheckingPtrGroup *, unsigned > getPtrToIdxMap(ArrayRef< RuntimeCheckingPtrGroup > CheckingGroups)
Assign each RuntimeCheckingPtrGroup pointer an index for stable UTC output.
static cl::opt< unsigned, true > VectorizationFactor("force-vector-width", cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."), cl::location(VectorizerParams::VectorizationFactor))
static cl::opt< unsigned, true > RuntimeMemoryCheckThreshold("runtime-memory-check-threshold", cl::Hidden, cl::desc("When performing memory disambiguation checks at runtime do not " "generate more than this number of comparisons (default = 8)."), cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8))
static void visitPointers(Value *StartPtr, const Loop &InnermostLoop, function_ref< void(Value *)> AddPointer)
static bool isNoWrap(PredicatedScalarEvolution &PSE, const SCEVAddRecExpr *AR, Value *Ptr, Type *AccessTy, const Loop *L, bool Assume, const DominatorTree &DT, std::optional< int64_t > Stride=std::nullopt)
Check whether AR is a non-wrapping AddRec.
static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, const SCEV &MaxBTC, const SCEV &Dist, uint64_t MaxStride)
Given a dependence-distance Dist between two memory accesses, that have strides in the same direction...
static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, uint64_t TypeByteSize)
Check the dependence for two accesses with the same stride Stride.
static const SCEV * getMinFromExprs(const SCEV *I, const SCEV *J, ScalarEvolution *SE)
Compare I and J and return the minimum.
static Value * getLoopVariantGEPOperand(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If Ptr is a GEP, which has a loop-variant operand, return that operand.
static cl::opt< unsigned > MaxForkedSCEVDepth("max-forked-scev-depth", cl::Hidden, cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"), cl::init(5))
static cl::opt< bool > SpeculateUnitStride("laa-speculate-unit-stride", cl::Hidden, cl::desc("Speculate that non-constant strides are unit in LAA"), cl::init(true))
static cl::opt< bool > EnableMemAccessVersioning("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning"))
This enables versioning on the strides of symbolically striding memory accesses in code like the foll...
static const SCEV * addSCEVNoOverflow(const SCEV *A, const SCEV *B, ScalarEvolution &SE)
Returns A + B, if it is guaranteed not to unsigned wrap.
This header provides classes for managing per-loop analyses.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file provides utility analysis objects describing memory locations.
#define P(N)
FunctionAnalysisManager FAM
This file defines the PointerIntPair class.
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This pass exposes codegen information to IR-level passes.
static const X86InstrFMA3Group Groups[]
A manager for alias analyses.
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
APInt abs() const
Get the absolute value.
Definition APInt.h:1818
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition APInt.cpp:1084
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
Definition APInt.h:1597
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
This templated class represents "all analyses that operate over <aparticular IR unit>" (e....
Definition Analysis.h:50
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
bool isNoBuiltin() const
Return true if the call should not be treated as a call to a builtin.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isConvergent() const
Determine if the invoke is convergent.
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:764
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:768
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:766
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
bool isNegative() const
Definition Constants.h:214
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:124
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
Definition DenseMap.h:252
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:225
iterator end()
Definition DenseMap.h:143
Analysis pass which computes a DominatorTree.
Definition Dominators.h:270
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:151
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
iterator_range< member_iterator > members(const ECValue &ECV) const
bool contains(const ElemTy &V) const
Returns true if V is contained an equivalence class.
const ECValue & insert(const ElemTy &Data)
Insert a new value into the union/find set, ignoring the request if the value already exists.
member_iterator member_end() const
const ElemTy & getLeaderValue(const ElemTy &V) const
Return the leader for the specified value that is in the set.
member_iterator findLeader(const ElemTy &V) const
Given a value in the set, return a member iterator for the equivalence class it is in.
void eraseClass(const ElemTy &V)
Erase the class containing V, i.e.
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
Merge the two equivalence sets for the specified values, inserting them if they do not already exist ...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:714
bool empty() const
Definition Function.h:859
PointerType * getType() const
Global values are always pointers.
An instruction for reading from memory.
Value * getPointerOperand()
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
This analysis provides dependence information for the memory accesses of a loop.
LLVM_ABI Result run(Function &F, FunctionAnalysisManager &AM)
LLVM_ABI bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
LLVM_ABI const LoopAccessInfo & getInfo(Loop &L, bool AllowPartial=false)
Drive the analysis of memory accesses in the loop.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
LLVM_ABI bool isInvariant(Value *V) const
Returns true if value V is loop invariant.
LLVM_ABI void print(raw_ostream &OS, unsigned Depth=0) const
Print the information about the memory accesses in the loop.
static LLVM_ABI bool blockNeedsPredication(const BasicBlock *BB, const Loop *TheLoop, const DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
LLVM_ABI LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetTransformInfo *TTI, const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT, LoopInfo *LI, AssumptionCache *AC, bool AllowPartial=false)
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:587
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
BlockT * getHeader() const
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
std::string getLocStr() const
Return a string containing the debug location of the loop (file name + line number if present,...
Definition LoopInfo.cpp:694
bool isAnnotatedParallel() const
Returns true if the loop is annotated parallel.
Definition LoopInfo.cpp:592
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition LoopInfo.cpp:659
ArrayRef< MDOperand > operands() const
Definition Metadata.h:1431
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
ArrayRef< unsigned > getOrderForAccess(Value *Ptr, bool IsWrite) const
Return the program order indices for the access location (Ptr, IsWrite).
bool isSafeForAnyStoreLoadForwardDistances() const
Return true if there are no store-load forwarding dependencies.
LLVM_ABI bool areDepsSafe(const DepCandidates &AccessSets, ArrayRef< MemAccessInfo > CheckDeps)
Check whether the dependencies between the accesses are safe, and records the dependence information ...
bool isSafeForAnyVectorWidth() const
Return true if the number of elements that are safe to operate on simultaneously is not bounded.
PointerIntPair< Value *, 1, bool > MemAccessInfo
EquivalenceClasses< MemAccessInfo > DepCandidates
Set of potential dependent memory accesses.
bool shouldRetryWithRuntimeChecks() const
In same cases when the dependency check fails we can still vectorize the loop with a dynamic array ac...
const Loop * getInnermostLoop() const
uint64_t getMaxSafeVectorWidthInBits() const
Return the number of elements that are safe to operate on simultaneously, multiplied by the size of t...
bool isSafeForVectorization() const
No memory dependence was encountered that would inhibit vectorization.
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
LLVM_ABI SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Find the set of instructions that read or write via Ptr.
VectorizationSafetyStatus
Type to keep track of the status of the dependence check.
LLVM_ABI void addAccess(StoreInst *SI)
Register the location (instructions are given increasing numbers) of a write access.
uint64_t getStoreLoadForwardSafeDistanceInBits() const
Return safe power-of-2 number of elements, which do not prevent store-load forwarding,...
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
const Value * Ptr
The address of the start of the location.
Diagnostic information for optimization analysis remarks.
PointerIntPair - This class implements a pair of a pointer and small integer.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
LLVM_ABI void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Returns true if we've proved that V doesn't wrap by means of a SCEV predicate.
LLVM_ABI void setNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Proves that V doesn't overflow by adding SCEV predicate.
LLVM_ABI const SCEVAddRecExpr * getAsAddRec(Value *V, SmallVectorImpl< const SCEVPredicate * > *WrapPredsAdded=nullptr)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
LLVM_ABI const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
LLVM_ABI const SCEV * getSymbolicMaxBackedgeTakenCount()
Get the (predicated) symbolic max backedge count for the analyzed loop.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition Analysis.h:275
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
bool Need
This flag indicates if we need to add the runtime check.
void reset()
Reset the state of the pointer runtime information.
unsigned getNumberOfChecks() const
Returns the number of run-time checks required according to needsChecking.
LLVM_ABI void printChecks(raw_ostream &OS, const SmallVectorImpl< RuntimePointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
LLVM_ABI bool needsChecking(const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
LLVM_ABI void print(raw_ostream &OS, unsigned Depth=0) const
Print the list run-time memory checks necessary.
SmallVector< RuntimeCheckingPtrGroup, 2 > CheckingGroups
Holds a partitioning of pointers into "check groups".
static LLVM_ABI bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
LLVM_ABI void generateChecks(MemoryDepChecker::DepCandidates &DepCands)
Generate the checks and store it.
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
LLVM_ABI void insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, Type *AccessTy, bool WritePtr, unsigned DepSetId, unsigned ASId, PredicatedScalarEvolution &PSE, bool NeedsFreeze)
Insert a pointer and calculate the start and end SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
SCEVUse getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
This class represents an analyzed expression in the program.
static constexpr auto NoWrapMask
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
Analysis pass that exposes the ScalarEvolution for a function.
static LLVM_ABI LoopGuards collect(const Loop *L, ScalarEvolution &SE)
Collect rewrite map for loop guards for loop L, together with flags indicating if NUW and NSW can be ...
The main scalar evolution driver.
const SCEV * getConstantMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEVConstant that is greater than or equal to (i.e.
LLVM_ABI bool isKnownNonNegative(const SCEV *S)
Test if the given expression is known to be non-negative.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI Type * getWiderType(Type *Ty1, Type *Ty2) const
LLVM_ABI const SCEV * getAbsExpr(const SCEV *Op, bool IsNSW)
LLVM_ABI bool isKnownNonPositive(const SCEV *S)
Test if the given expression is known to be non-positive.
LLVM_ABI bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
LLVM_ABI bool willNotOverflow(Instruction::BinaryOps BinOp, bool Signed, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI=nullptr)
Is operation BinOp between LHS and RHS provably does not have a signed/unsigned overflow (Signed)?
LLVM_ABI const SCEVPredicate * getEqualPredicate(const SCEV *LHS, const SCEV *RHS)
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
APInt getSignedRangeMin(const SCEV *S)
Determine the min of the signed range for a particular SCEV.
LLVM_ABI const SCEV * getUMaxExpr(SCEVUse LHS, SCEVUse RHS)
LLVM_ABI const SCEV * getStoreSizeOfExpr(Type *IntTy, Type *StoreTy)
Return an expression for the store size of StoreTy that is type IntTy.
LLVM_ABI const SCEV * getNoopOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI const SCEV * getCouldNotCompute()
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
LLVM_ABI const SCEV * getPtrToAddrExpr(const SCEV *Op)
LLVM_ABI const SCEV * getSizeOfExpr(Type *IntTy, TypeSize Size)
Return an expression for a TypeSize.
LLVM_ABI std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)
Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...
LLVM_ABI const SCEV * getUMinExpr(SCEVUse LHS, SCEVUse RHS, bool Sequential=false)
LLVM_ABI const SCEV * getTruncateOrSignExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
bool contains(const T &V) const
Check if the SmallSet contains the given element.
Definition SmallSet.h:229
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:288
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:282
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition VectorUtils.h:76
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
LLVM_ABI uint64_t getPointerDereferenceableBytes(const DataLayout &DL, bool &CanBeNull, bool &CanBeFreed) const
Returns the number of bytes known to be dereferenceable for the pointer value.
Definition Value.cpp:898
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool match(Val *V, const Pattern &P)
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
is_undef_or_poison m_scev_UndefOrPoison()
Match an SCEVUnknown wrapping undef or poison.
specificloop_ty m_SpecificLoop(const Loop *L)
match_bind< const SCEVMulExpr > m_scev_Mul(const SCEVMulExpr *&V)
specificscev_ty m_scev_Specific(const SCEV *S)
Match if we have a specific specified SCEV.
SCEVAffineAddRec_match< Op0_t, Op1_t, match_isa< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, bool > hasa(Y &&MD)
Check whether Metadata has a Value.
Definition Metadata.h:651
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
DiagnosticInfoOptimizationBase::Argument NV
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI std::pair< const SCEV *, const SCEV * > getStartAndEndForAccess(const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC, const SCEV *MaxBTC, ScalarEvolution *SE, DenseMap< std::pair< const SCEV *, const SCEV * >, std::pair< const SCEV *, const SCEV * > > *PointerBounds, DominatorTree *DT, AssumptionCache *AC, std::optional< ScalarEvolution::LoopGuards > &LoopGuards)
Calculate Start and End points of memory access using exact backedge taken count BTC if computable or...
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:558
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:830
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
LLVM_ABI RetainedKnowledge getKnowledgeForValue(const Value *V, ArrayRef< Attribute::AttrKind > AttrKinds, AssumptionCache &AC, function_ref< bool(RetainedKnowledge, Instruction *, const CallBase::BundleOpInfo *)> Filter=[](auto...) { return true;})
Return a valid Knowledge associated to the Value V if its Attribute kind is in AttrKinds and it match...
LLVM_ABI bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr, bool AllowEphemerals=false)
Return true if it is valid to use the assumptions provided by an assume intrinsic,...
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
unsigned getPointerAddressSpace(const Type *T)
Definition SPIRVUtils.h:392
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2207
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:2025
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI std::optional< int64_t > getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB, Value *PtrB, const DataLayout &DL, ScalarEvolution &SE, bool StrictCheck=false, bool CheckType=true)
Returns the distance between the pointers PtrA and PtrB iff they are compatible and it is possible to...
LLVM_ABI bool sortPtrAccesses(ArrayRef< Value * > VL, Type *ElemTy, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices)
Attempt to sort the pointers in VL and return the sorted indices in SortedIndices,...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
TargetTransformInfo TTI
LLVM_ABI const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
LLVM_ABI bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
DWARFExpression::Operation Op
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:347
LLVM_ABI void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=MaxLookupSearchDepth)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:860
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
IR Values for the lower and upper bounds of a pointer evolution.
MDNode * Scope
The tag for alias scope specification (used with noalias).
Definition Metadata.h:786
MDNode * TBAA
The tag for type-based alias analysis.
Definition Metadata.h:780
MDNode * NoAlias
The tag specifying the noalias scope.
Definition Metadata.h:789
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29
Instruction * getDestination(const MemoryDepChecker &DepChecker) const
Return the destination instruction of the dependence.
DepType Type
The type of the dependence.
unsigned Destination
Index of the destination of the dependence in the InstMap vector.
LLVM_ABI bool isPossiblyBackward() const
May be a lexically backward dependence type (includes Unknown).
Instruction * getSource(const MemoryDepChecker &DepChecker) const
Return the source instruction of the dependence.
LLVM_ABI bool isForward() const
Lexically forward dependence.
LLVM_ABI bool isBackward() const
Lexically backward dependence.
LLVM_ABI void print(raw_ostream &OS, unsigned Depth, const SmallVectorImpl< Instruction * > &Instrs) const
Print the dependence.
unsigned Source
Index of the source of the dependence in the InstMap vector.
DepType
The type of the dependence.
static LLVM_ABI const char * DepName[]
String version of the types.
static LLVM_ABI VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
Represent one information held inside an operand bundle of an llvm.assume.
unsigned AddressSpace
Address space of the involved pointers.
LLVM_ABI bool addPointer(unsigned Index, const RuntimePointerChecking &RtCheck)
Tries to add the pointer recorded in RtCheck at index Index to this pointer checking group.
bool NeedsFreeze
Whether the pointer needs to be frozen after expansion, e.g.
LLVM_ABI RuntimeCheckingPtrGroup(unsigned Index, const RuntimePointerChecking &RtCheck)
Create a new pointer checking group containing a single pointer, with index Index in RtCheck.
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
SmallVector< unsigned, 2 > Members
Indices of all the pointers that constitute this grouping.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.
bool IsWritePtr
Holds the information if this pointer is used for writing to memory.
unsigned DependencySetId
Holds the id of the set of pointers that could be dependent because of a shared underlying object.
unsigned AliasSetId
Holds the id of the disjoint alias set to which this pointer belongs.
static LLVM_ABI const unsigned MaxVectorWidth
Maximum SIMD width.
static LLVM_ABI unsigned VectorizationFactor
VF as overridden by the user.
static LLVM_ABI unsigned RuntimeMemoryCheckThreshold
\When performing memory disambiguation checks at runtime do not make more than this number of compari...
static LLVM_ABI bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
static LLVM_ABI unsigned VectorizationInterleave
Interleave factor as overridden by the user.
static LLVM_ABI bool HoistRuntimeChecks
Function object to check whether the first component of a container supported by std::get (like std::...
Definition STLExtras.h:1438