LLVM 20.0.0git
LoopIdiomRecognize.cpp
Go to the documentation of this file.
1//===- LoopIdiomRecognize.cpp - Loop idiom recognition --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements an idiom recognizer that transforms simple loops into a
10// non-loop form. In cases that this kicks in, it can be a significant
11// performance win.
12//
13// If compiling for code size we avoid idiom recognition if the resulting
14// code could be larger than the code for the original loop. One way this could
15// happen is if the loop is not removable after idiom recognition due to the
16// presence of non-idiom instructions. The initial implementation of the
17// heuristics applies to idioms in multi-block loops.
18//
19//===----------------------------------------------------------------------===//
20//
21// TODO List:
22//
23// Future loop memory idioms to recognize:
24// memcmp, strlen, etc.
25//
26// This could recognize common matrix multiplies and dot product idioms and
27// replace them with calls to BLAS (if linked in??).
28//
29//===----------------------------------------------------------------------===//
30
32#include "llvm/ADT/APInt.h"
33#include "llvm/ADT/ArrayRef.h"
34#include "llvm/ADT/DenseMap.h"
35#include "llvm/ADT/MapVector.h"
36#include "llvm/ADT/SetVector.h"
39#include "llvm/ADT/Statistic.h"
40#include "llvm/ADT/StringRef.h"
56#include "llvm/IR/BasicBlock.h"
57#include "llvm/IR/Constant.h"
58#include "llvm/IR/Constants.h"
59#include "llvm/IR/DataLayout.h"
60#include "llvm/IR/DebugLoc.h"
62#include "llvm/IR/Dominators.h"
63#include "llvm/IR/GlobalValue.h"
65#include "llvm/IR/IRBuilder.h"
66#include "llvm/IR/InstrTypes.h"
67#include "llvm/IR/Instruction.h"
70#include "llvm/IR/Intrinsics.h"
71#include "llvm/IR/LLVMContext.h"
72#include "llvm/IR/Module.h"
73#include "llvm/IR/PassManager.h"
75#include "llvm/IR/Type.h"
76#include "llvm/IR/User.h"
77#include "llvm/IR/Value.h"
78#include "llvm/IR/ValueHandle.h"
81#include "llvm/Support/Debug.h"
88#include <algorithm>
89#include <cassert>
90#include <cstdint>
91#include <utility>
92#include <vector>
93
94using namespace llvm;
95
96#define DEBUG_TYPE "loop-idiom"
97
98STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
99STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
100STATISTIC(NumMemMove, "Number of memmove's formed from loop load+stores");
102 NumShiftUntilBitTest,
103 "Number of uncountable loops recognized as 'shift until bitttest' idiom");
104STATISTIC(NumShiftUntilZero,
105 "Number of uncountable loops recognized as 'shift until zero' idiom");
106
109 DisableLIRPAll("disable-" DEBUG_TYPE "-all",
110 cl::desc("Options to disable Loop Idiom Recognize Pass."),
113
116 DisableLIRPMemset("disable-" DEBUG_TYPE "-memset",
117 cl::desc("Proceed with loop idiom recognize pass, but do "
118 "not convert loop(s) to memset."),
121
124 DisableLIRPMemcpy("disable-" DEBUG_TYPE "-memcpy",
125 cl::desc("Proceed with loop idiom recognize pass, but do "
126 "not convert loop(s) to memcpy."),
129
131 "use-lir-code-size-heurs",
132 cl::desc("Use loop idiom recognition code size heuristics when compiling"
133 "with -Os/-Oz"),
134 cl::init(true), cl::Hidden);
135
136namespace {
137
138class LoopIdiomRecognize {
139 Loop *CurLoop = nullptr;
140 AliasAnalysis *AA;
141 DominatorTree *DT;
142 LoopInfo *LI;
143 ScalarEvolution *SE;
146 const DataLayout *DL;
148 bool ApplyCodeSizeHeuristics;
149 std::unique_ptr<MemorySSAUpdater> MSSAU;
150
151public:
152 explicit LoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT,
153 LoopInfo *LI, ScalarEvolution *SE,
155 const TargetTransformInfo *TTI, MemorySSA *MSSA,
156 const DataLayout *DL,
158 : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {
159 if (MSSA)
160 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
161 }
162
163 bool runOnLoop(Loop *L);
164
165private:
166 using StoreList = SmallVector<StoreInst *, 8>;
167 using StoreListMap = MapVector<Value *, StoreList>;
168
169 StoreListMap StoreRefsForMemset;
170 StoreListMap StoreRefsForMemsetPattern;
171 StoreList StoreRefsForMemcpy;
172 bool HasMemset;
173 bool HasMemsetPattern;
174 bool HasMemcpy;
175
176 /// Return code for isLegalStore()
177 enum LegalStoreKind {
178 None = 0,
179 Memset,
180 MemsetPattern,
181 Memcpy,
182 UnorderedAtomicMemcpy,
183 DontUse // Dummy retval never to be used. Allows catching errors in retval
184 // handling.
185 };
186
187 /// \name Countable Loop Idiom Handling
188 /// @{
189
190 bool runOnCountableLoop();
191 bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
193
194 void collectStores(BasicBlock *BB);
195 LegalStoreKind isLegalStore(StoreInst *SI);
196 enum class ForMemset { No, Yes };
197 bool processLoopStores(SmallVectorImpl<StoreInst *> &SL, const SCEV *BECount,
198 ForMemset For);
199
200 template <typename MemInst>
201 bool processLoopMemIntrinsic(
202 BasicBlock *BB,
203 bool (LoopIdiomRecognize::*Processor)(MemInst *, const SCEV *),
204 const SCEV *BECount);
205 bool processLoopMemCpy(MemCpyInst *MCI, const SCEV *BECount);
206 bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
207
208 bool processLoopStridedStore(Value *DestPtr, const SCEV *StoreSizeSCEV,
209 MaybeAlign StoreAlignment, Value *StoredVal,
210 Instruction *TheStore,
212 const SCEVAddRecExpr *Ev, const SCEV *BECount,
213 bool IsNegStride, bool IsLoopMemset = false);
214 bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount);
215 bool processLoopStoreOfLoopLoad(Value *DestPtr, Value *SourcePtr,
216 const SCEV *StoreSize, MaybeAlign StoreAlign,
217 MaybeAlign LoadAlign, Instruction *TheStore,
218 Instruction *TheLoad,
219 const SCEVAddRecExpr *StoreEv,
220 const SCEVAddRecExpr *LoadEv,
221 const SCEV *BECount);
222 bool avoidLIRForMultiBlockLoop(bool IsMemset = false,
223 bool IsLoopMemset = false);
224
225 /// @}
226 /// \name Noncountable Loop Idiom Handling
227 /// @{
228
229 bool runOnNoncountableLoop();
230
231 bool recognizePopcount();
232 void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
233 PHINode *CntPhi, Value *Var);
234 bool isProfitableToInsertFFS(Intrinsic::ID IntrinID, Value *InitX,
235 bool ZeroCheck, size_t CanonicalSize);
236 bool insertFFSIfProfitable(Intrinsic::ID IntrinID, Value *InitX,
237 Instruction *DefX, PHINode *CntPhi,
238 Instruction *CntInst);
239 bool recognizeAndInsertFFS(); /// Find First Set: ctlz or cttz
240 bool recognizeShiftUntilLessThan();
241 void transformLoopToCountable(Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
242 Instruction *CntInst, PHINode *CntPhi,
243 Value *Var, Instruction *DefX,
244 const DebugLoc &DL, bool ZeroCheck,
245 bool IsCntPhiUsedOutsideLoop,
246 bool InsertSub = false);
247
248 bool recognizeShiftUntilBitTest();
249 bool recognizeShiftUntilZero();
250
251 /// @}
252};
253} // end anonymous namespace
254
257 LPMUpdater &) {
259 return PreservedAnalyses::all();
260
261 const auto *DL = &L.getHeader()->getDataLayout();
262
263 // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
264 // pass. Function analyses need to be preserved across loop transformations
265 // but ORE cannot be preserved (see comment before the pass definition).
266 OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
267
268 LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI,
269 AR.MSSA, DL, ORE);
270 if (!LIR.runOnLoop(&L))
271 return PreservedAnalyses::all();
272
274 if (AR.MSSA)
275 PA.preserve<MemorySSAAnalysis>();
276 return PA;
277}
278
280 I->replaceAllUsesWith(PoisonValue::get(I->getType()));
281 I->eraseFromParent();
282}
283
284//===----------------------------------------------------------------------===//
285//
286// Implementation of LoopIdiomRecognize
287//
288//===----------------------------------------------------------------------===//
289
290bool LoopIdiomRecognize::runOnLoop(Loop *L) {
291 CurLoop = L;
292 // If the loop could not be converted to canonical form, it must have an
293 // indirectbr in it, just give up.
294 if (!L->getLoopPreheader())
295 return false;
296
297 // Disable loop idiom recognition if the function's name is a common idiom.
298 StringRef Name = L->getHeader()->getParent()->getName();
299 if (Name == "memset" || Name == "memcpy")
300 return false;
301
302 // Determine if code size heuristics need to be applied.
303 ApplyCodeSizeHeuristics =
304 L->getHeader()->getParent()->hasOptSize() && UseLIRCodeSizeHeurs;
305
306 HasMemset = TLI->has(LibFunc_memset);
307 HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
308 HasMemcpy = TLI->has(LibFunc_memcpy);
309
310 if (HasMemset || HasMemsetPattern || HasMemcpy)
312 return runOnCountableLoop();
313
314 return runOnNoncountableLoop();
315}
316
317bool LoopIdiomRecognize::runOnCountableLoop() {
318 const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop);
319 assert(!isa<SCEVCouldNotCompute>(BECount) &&
320 "runOnCountableLoop() called on a loop without a predictable"
321 "backedge-taken count");
322
323 // If this loop executes exactly one time, then it should be peeled, not
324 // optimized by this pass.
325 if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
326 if (BECst->getAPInt() == 0)
327 return false;
328
330 CurLoop->getUniqueExitBlocks(ExitBlocks);
331
332 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
333 << CurLoop->getHeader()->getParent()->getName()
334 << "] Countable Loop %" << CurLoop->getHeader()->getName()
335 << "\n");
336
337 // The following transforms hoist stores/memsets into the loop pre-header.
338 // Give up if the loop has instructions that may throw.
339 SimpleLoopSafetyInfo SafetyInfo;
340 SafetyInfo.computeLoopSafetyInfo(CurLoop);
341 if (SafetyInfo.anyBlockMayThrow())
342 return false;
343
344 bool MadeChange = false;
345
346 // Scan all the blocks in the loop that are not in subloops.
347 for (auto *BB : CurLoop->getBlocks()) {
348 // Ignore blocks in subloops.
349 if (LI->getLoopFor(BB) != CurLoop)
350 continue;
351
352 MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks);
353 }
354 return MadeChange;
355}
356
357static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) {
358 const SCEVConstant *ConstStride = cast<SCEVConstant>(StoreEv->getOperand(1));
359 return ConstStride->getAPInt();
360}
361
362/// getMemSetPatternValue - If a strided store of the specified value is safe to
363/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
364/// be passed in. Otherwise, return null.
365///
366/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
367/// just replicate their input array and then pass on to memset_pattern16.
369 // FIXME: This could check for UndefValue because it can be merged into any
370 // other valid pattern.
371
372 // If the value isn't a constant, we can't promote it to being in a constant
373 // array. We could theoretically do a store to an alloca or something, but
374 // that doesn't seem worthwhile.
375 Constant *C = dyn_cast<Constant>(V);
376 if (!C || isa<ConstantExpr>(C))
377 return nullptr;
378
379 // Only handle simple values that are a power of two bytes in size.
380 uint64_t Size = DL->getTypeSizeInBits(V->getType());
381 if (Size == 0 || (Size & 7) || (Size & (Size - 1)))
382 return nullptr;
383
384 // Don't care enough about darwin/ppc to implement this.
385 if (DL->isBigEndian())
386 return nullptr;
387
388 // Convert to size in bytes.
389 Size /= 8;
390
391 // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
392 // if the top and bottom are the same (e.g. for vectors and large integers).
393 if (Size > 16)
394 return nullptr;
395
396 // If the constant is exactly 16 bytes, just use it.
397 if (Size == 16)
398 return C;
399
400 // Otherwise, we'll use an array of the constants.
401 unsigned ArraySize = 16 / Size;
402 ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
403 return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
404}
405
406LoopIdiomRecognize::LegalStoreKind
407LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
408 // Don't touch volatile stores.
409 if (SI->isVolatile())
410 return LegalStoreKind::None;
411 // We only want simple or unordered-atomic stores.
412 if (!SI->isUnordered())
413 return LegalStoreKind::None;
414
415 // Avoid merging nontemporal stores.
416 if (SI->getMetadata(LLVMContext::MD_nontemporal))
417 return LegalStoreKind::None;
418
419 Value *StoredVal = SI->getValueOperand();
420 Value *StorePtr = SI->getPointerOperand();
421
422 // Don't convert stores of non-integral pointer types to memsets (which stores
423 // integers).
424 if (DL->isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
425 return LegalStoreKind::None;
426
427 // Reject stores that are so large that they overflow an unsigned.
428 // When storing out scalable vectors we bail out for now, since the code
429 // below currently only works for constant strides.
430 TypeSize SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
431 if (SizeInBits.isScalable() || (SizeInBits.getFixedValue() & 7) ||
432 (SizeInBits.getFixedValue() >> 32) != 0)
433 return LegalStoreKind::None;
434
435 // See if the pointer expression is an AddRec like {base,+,1} on the current
436 // loop, which indicates a strided store. If we have something else, it's a
437 // random store we can't handle.
438 const SCEVAddRecExpr *StoreEv =
439 dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
440 if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
441 return LegalStoreKind::None;
442
443 // Check to see if we have a constant stride.
444 if (!isa<SCEVConstant>(StoreEv->getOperand(1)))
445 return LegalStoreKind::None;
446
447 // See if the store can be turned into a memset.
448
449 // If the stored value is a byte-wise value (like i32 -1), then it may be
450 // turned into a memset of i8 -1, assuming that all the consecutive bytes
451 // are stored. A store of i32 0x01020304 can never be turned into a memset,
452 // but it can be turned into memset_pattern if the target supports it.
453 Value *SplatValue = isBytewiseValue(StoredVal, *DL);
454
455 // Note: memset and memset_pattern on unordered-atomic is yet not supported
456 bool UnorderedAtomic = SI->isUnordered() && !SI->isSimple();
457
458 // If we're allowed to form a memset, and the stored value would be
459 // acceptable for memset, use it.
460 if (!UnorderedAtomic && HasMemset && SplatValue && !DisableLIRP::Memset &&
461 // Verify that the stored value is loop invariant. If not, we can't
462 // promote the memset.
463 CurLoop->isLoopInvariant(SplatValue)) {
464 // It looks like we can use SplatValue.
465 return LegalStoreKind::Memset;
466 }
467 if (!UnorderedAtomic && HasMemsetPattern && !DisableLIRP::Memset &&
468 // Don't create memset_pattern16s with address spaces.
469 StorePtr->getType()->getPointerAddressSpace() == 0 &&
470 getMemSetPatternValue(StoredVal, DL)) {
471 // It looks like we can use PatternValue!
472 return LegalStoreKind::MemsetPattern;
473 }
474
475 // Otherwise, see if the store can be turned into a memcpy.
476 if (HasMemcpy && !DisableLIRP::Memcpy) {
477 // Check to see if the stride matches the size of the store. If so, then we
478 // know that every byte is touched in the loop.
479 APInt Stride = getStoreStride(StoreEv);
480 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
481 if (StoreSize != Stride && StoreSize != -Stride)
482 return LegalStoreKind::None;
483
484 // The store must be feeding a non-volatile load.
485 LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
486
487 // Only allow non-volatile loads
488 if (!LI || LI->isVolatile())
489 return LegalStoreKind::None;
490 // Only allow simple or unordered-atomic loads
491 if (!LI->isUnordered())
492 return LegalStoreKind::None;
493
494 // See if the pointer expression is an AddRec like {base,+,1} on the current
495 // loop, which indicates a strided load. If we have something else, it's a
496 // random load we can't handle.
497 const SCEVAddRecExpr *LoadEv =
498 dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
499 if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
500 return LegalStoreKind::None;
501
502 // The store and load must share the same stride.
503 if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
504 return LegalStoreKind::None;
505
506 // Success. This store can be converted into a memcpy.
507 UnorderedAtomic = UnorderedAtomic || LI->isAtomic();
508 return UnorderedAtomic ? LegalStoreKind::UnorderedAtomicMemcpy
509 : LegalStoreKind::Memcpy;
510 }
511 // This store can't be transformed into a memset/memcpy.
512 return LegalStoreKind::None;
513}
514
515void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
516 StoreRefsForMemset.clear();
517 StoreRefsForMemsetPattern.clear();
518 StoreRefsForMemcpy.clear();
519 for (Instruction &I : *BB) {
520 StoreInst *SI = dyn_cast<StoreInst>(&I);
521 if (!SI)
522 continue;
523
524 // Make sure this is a strided store with a constant stride.
525 switch (isLegalStore(SI)) {
526 case LegalStoreKind::None:
527 // Nothing to do
528 break;
529 case LegalStoreKind::Memset: {
530 // Find the base pointer.
531 Value *Ptr = getUnderlyingObject(SI->getPointerOperand());
532 StoreRefsForMemset[Ptr].push_back(SI);
533 } break;
534 case LegalStoreKind::MemsetPattern: {
535 // Find the base pointer.
536 Value *Ptr = getUnderlyingObject(SI->getPointerOperand());
537 StoreRefsForMemsetPattern[Ptr].push_back(SI);
538 } break;
539 case LegalStoreKind::Memcpy:
540 case LegalStoreKind::UnorderedAtomicMemcpy:
541 StoreRefsForMemcpy.push_back(SI);
542 break;
543 default:
544 assert(false && "unhandled return value");
545 break;
546 }
547 }
548}
549
550/// runOnLoopBlock - Process the specified block, which lives in a counted loop
551/// with the specified backedge count. This block is known to be in the current
552/// loop and not in any subloops.
553bool LoopIdiomRecognize::runOnLoopBlock(
554 BasicBlock *BB, const SCEV *BECount,
555 SmallVectorImpl<BasicBlock *> &ExitBlocks) {
556 // We can only promote stores in this block if they are unconditionally
557 // executed in the loop. For a block to be unconditionally executed, it has
558 // to dominate all the exit blocks of the loop. Verify this now.
559 for (BasicBlock *ExitBlock : ExitBlocks)
560 if (!DT->dominates(BB, ExitBlock))
561 return false;
562
563 bool MadeChange = false;
564 // Look for store instructions, which may be optimized to memset/memcpy.
565 collectStores(BB);
566
567 // Look for a single store or sets of stores with a common base, which can be
568 // optimized into a memset (memset_pattern). The latter most commonly happens
569 // with structs and handunrolled loops.
570 for (auto &SL : StoreRefsForMemset)
571 MadeChange |= processLoopStores(SL.second, BECount, ForMemset::Yes);
572
573 for (auto &SL : StoreRefsForMemsetPattern)
574 MadeChange |= processLoopStores(SL.second, BECount, ForMemset::No);
575
576 // Optimize the store into a memcpy, if it feeds an similarly strided load.
577 for (auto &SI : StoreRefsForMemcpy)
578 MadeChange |= processLoopStoreOfLoopLoad(SI, BECount);
579
580 MadeChange |= processLoopMemIntrinsic<MemCpyInst>(
581 BB, &LoopIdiomRecognize::processLoopMemCpy, BECount);
582 MadeChange |= processLoopMemIntrinsic<MemSetInst>(
583 BB, &LoopIdiomRecognize::processLoopMemSet, BECount);
584
585 return MadeChange;
586}
587
588/// See if this store(s) can be promoted to a memset.
589bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
590 const SCEV *BECount, ForMemset For) {
591 // Try to find consecutive stores that can be transformed into memsets.
592 SetVector<StoreInst *> Heads, Tails;
594
595 // Do a quadratic search on all of the given stores and find
596 // all of the pairs of stores that follow each other.
597 SmallVector<unsigned, 16> IndexQueue;
598 for (unsigned i = 0, e = SL.size(); i < e; ++i) {
599 assert(SL[i]->isSimple() && "Expected only non-volatile stores.");
600
601 Value *FirstStoredVal = SL[i]->getValueOperand();
602 Value *FirstStorePtr = SL[i]->getPointerOperand();
603 const SCEVAddRecExpr *FirstStoreEv =
604 cast<SCEVAddRecExpr>(SE->getSCEV(FirstStorePtr));
605 APInt FirstStride = getStoreStride(FirstStoreEv);
606 unsigned FirstStoreSize = DL->getTypeStoreSize(SL[i]->getValueOperand()->getType());
607
608 // See if we can optimize just this store in isolation.
609 if (FirstStride == FirstStoreSize || -FirstStride == FirstStoreSize) {
610 Heads.insert(SL[i]);
611 continue;
612 }
613
614 Value *FirstSplatValue = nullptr;
615 Constant *FirstPatternValue = nullptr;
616
617 if (For == ForMemset::Yes)
618 FirstSplatValue = isBytewiseValue(FirstStoredVal, *DL);
619 else
620 FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL);
621
622 assert((FirstSplatValue || FirstPatternValue) &&
623 "Expected either splat value or pattern value.");
624
625 IndexQueue.clear();
626 // If a store has multiple consecutive store candidates, search Stores
627 // array according to the sequence: from i+1 to e, then from i-1 to 0.
628 // This is because usually pairing with immediate succeeding or preceding
629 // candidate create the best chance to find memset opportunity.
630 unsigned j = 0;
631 for (j = i + 1; j < e; ++j)
632 IndexQueue.push_back(j);
633 for (j = i; j > 0; --j)
634 IndexQueue.push_back(j - 1);
635
636 for (auto &k : IndexQueue) {
637 assert(SL[k]->isSimple() && "Expected only non-volatile stores.");
638 Value *SecondStorePtr = SL[k]->getPointerOperand();
639 const SCEVAddRecExpr *SecondStoreEv =
640 cast<SCEVAddRecExpr>(SE->getSCEV(SecondStorePtr));
641 APInt SecondStride = getStoreStride(SecondStoreEv);
642
643 if (FirstStride != SecondStride)
644 continue;
645
646 Value *SecondStoredVal = SL[k]->getValueOperand();
647 Value *SecondSplatValue = nullptr;
648 Constant *SecondPatternValue = nullptr;
649
650 if (For == ForMemset::Yes)
651 SecondSplatValue = isBytewiseValue(SecondStoredVal, *DL);
652 else
653 SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL);
654
655 assert((SecondSplatValue || SecondPatternValue) &&
656 "Expected either splat value or pattern value.");
657
658 if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) {
659 if (For == ForMemset::Yes) {
660 if (isa<UndefValue>(FirstSplatValue))
661 FirstSplatValue = SecondSplatValue;
662 if (FirstSplatValue != SecondSplatValue)
663 continue;
664 } else {
665 if (isa<UndefValue>(FirstPatternValue))
666 FirstPatternValue = SecondPatternValue;
667 if (FirstPatternValue != SecondPatternValue)
668 continue;
669 }
670 Tails.insert(SL[k]);
671 Heads.insert(SL[i]);
672 ConsecutiveChain[SL[i]] = SL[k];
673 break;
674 }
675 }
676 }
677
678 // We may run into multiple chains that merge into a single chain. We mark the
679 // stores that we transformed so that we don't visit the same store twice.
680 SmallPtrSet<Value *, 16> TransformedStores;
681 bool Changed = false;
682
683 // For stores that start but don't end a link in the chain:
684 for (StoreInst *I : Heads) {
685 if (Tails.count(I))
686 continue;
687
688 // We found a store instr that starts a chain. Now follow the chain and try
689 // to transform it.
690 SmallPtrSet<Instruction *, 8> AdjacentStores;
691 StoreInst *HeadStore = I;
692 unsigned StoreSize = 0;
693
694 // Collect the chain into a list.
695 while (Tails.count(I) || Heads.count(I)) {
696 if (TransformedStores.count(I))
697 break;
698 AdjacentStores.insert(I);
699
700 StoreSize += DL->getTypeStoreSize(I->getValueOperand()->getType());
701 // Move to the next value in the chain.
702 I = ConsecutiveChain[I];
703 }
704
705 Value *StoredVal = HeadStore->getValueOperand();
706 Value *StorePtr = HeadStore->getPointerOperand();
707 const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
708 APInt Stride = getStoreStride(StoreEv);
709
710 // Check to see if the stride matches the size of the stores. If so, then
711 // we know that every byte is touched in the loop.
712 if (StoreSize != Stride && StoreSize != -Stride)
713 continue;
714
715 bool IsNegStride = StoreSize == -Stride;
716
717 Type *IntIdxTy = DL->getIndexType(StorePtr->getType());
718 const SCEV *StoreSizeSCEV = SE->getConstant(IntIdxTy, StoreSize);
719 if (processLoopStridedStore(StorePtr, StoreSizeSCEV,
720 MaybeAlign(HeadStore->getAlign()), StoredVal,
721 HeadStore, AdjacentStores, StoreEv, BECount,
722 IsNegStride)) {
723 TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end());
724 Changed = true;
725 }
726 }
727
728 return Changed;
729}
730
731/// processLoopMemIntrinsic - Template function for calling different processor
732/// functions based on mem intrinsic type.
733template <typename MemInst>
734bool LoopIdiomRecognize::processLoopMemIntrinsic(
735 BasicBlock *BB,
736 bool (LoopIdiomRecognize::*Processor)(MemInst *, const SCEV *),
737 const SCEV *BECount) {
738 bool MadeChange = false;
739 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
740 Instruction *Inst = &*I++;
741 // Look for memory instructions, which may be optimized to a larger one.
742 if (MemInst *MI = dyn_cast<MemInst>(Inst)) {
743 WeakTrackingVH InstPtr(&*I);
744 if (!(this->*Processor)(MI, BECount))
745 continue;
746 MadeChange = true;
747
748 // If processing the instruction invalidated our iterator, start over from
749 // the top of the block.
750 if (!InstPtr)
751 I = BB->begin();
752 }
753 }
754 return MadeChange;
755}
756
757/// processLoopMemCpy - See if this memcpy can be promoted to a large memcpy
758bool LoopIdiomRecognize::processLoopMemCpy(MemCpyInst *MCI,
759 const SCEV *BECount) {
760 // We can only handle non-volatile memcpys with a constant size.
761 if (MCI->isVolatile() || !isa<ConstantInt>(MCI->getLength()))
762 return false;
763
764 // If we're not allowed to hack on memcpy, we fail.
765 if ((!HasMemcpy && !isa<MemCpyInlineInst>(MCI)) || DisableLIRP::Memcpy)
766 return false;
767
768 Value *Dest = MCI->getDest();
769 Value *Source = MCI->getSource();
770 if (!Dest || !Source)
771 return false;
772
773 // See if the load and store pointer expressions are AddRec like {base,+,1} on
774 // the current loop, which indicates a strided load and store. If we have
775 // something else, it's a random load or store we can't handle.
776 const SCEVAddRecExpr *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Dest));
777 if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
778 return false;
779 const SCEVAddRecExpr *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Source));
780 if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
781 return false;
782
783 // Reject memcpys that are so large that they overflow an unsigned.
784 uint64_t SizeInBytes = cast<ConstantInt>(MCI->getLength())->getZExtValue();
785 if ((SizeInBytes >> 32) != 0)
786 return false;
787
788 // Check if the stride matches the size of the memcpy. If so, then we know
789 // that every byte is touched in the loop.
790 const SCEVConstant *ConstStoreStride =
791 dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
792 const SCEVConstant *ConstLoadStride =
793 dyn_cast<SCEVConstant>(LoadEv->getOperand(1));
794 if (!ConstStoreStride || !ConstLoadStride)
795 return false;
796
797 APInt StoreStrideValue = ConstStoreStride->getAPInt();
798 APInt LoadStrideValue = ConstLoadStride->getAPInt();
799 // Huge stride value - give up
800 if (StoreStrideValue.getBitWidth() > 64 || LoadStrideValue.getBitWidth() > 64)
801 return false;
802
803 if (SizeInBytes != StoreStrideValue && SizeInBytes != -StoreStrideValue) {
804 ORE.emit([&]() {
805 return OptimizationRemarkMissed(DEBUG_TYPE, "SizeStrideUnequal", MCI)
806 << ore::NV("Inst", "memcpy") << " in "
807 << ore::NV("Function", MCI->getFunction())
808 << " function will not be hoisted: "
809 << ore::NV("Reason", "memcpy size is not equal to stride");
810 });
811 return false;
812 }
813
814 int64_t StoreStrideInt = StoreStrideValue.getSExtValue();
815 int64_t LoadStrideInt = LoadStrideValue.getSExtValue();
816 // Check if the load stride matches the store stride.
817 if (StoreStrideInt != LoadStrideInt)
818 return false;
819
820 return processLoopStoreOfLoopLoad(
821 Dest, Source, SE->getConstant(Dest->getType(), SizeInBytes),
822 MCI->getDestAlign(), MCI->getSourceAlign(), MCI, MCI, StoreEv, LoadEv,
823 BECount);
824}
825
826/// processLoopMemSet - See if this memset can be promoted to a large memset.
827bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
828 const SCEV *BECount) {
829 // We can only handle non-volatile memsets.
830 if (MSI->isVolatile())
831 return false;
832
833 // If we're not allowed to hack on memset, we fail.
834 if (!HasMemset || DisableLIRP::Memset)
835 return false;
836
837 Value *Pointer = MSI->getDest();
838
839 // See if the pointer expression is an AddRec like {base,+,1} on the current
840 // loop, which indicates a strided store. If we have something else, it's a
841 // random store we can't handle.
842 const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
843 if (!Ev || Ev->getLoop() != CurLoop)
844 return false;
845 if (!Ev->isAffine()) {
846 LLVM_DEBUG(dbgs() << " Pointer is not affine, abort\n");
847 return false;
848 }
849
850 const SCEV *PointerStrideSCEV = Ev->getOperand(1);
851 const SCEV *MemsetSizeSCEV = SE->getSCEV(MSI->getLength());
852 if (!PointerStrideSCEV || !MemsetSizeSCEV)
853 return false;
854
855 bool IsNegStride = false;
856 const bool IsConstantSize = isa<ConstantInt>(MSI->getLength());
857
858 if (IsConstantSize) {
859 // Memset size is constant.
860 // Check if the pointer stride matches the memset size. If so, then
861 // we know that every byte is touched in the loop.
862 LLVM_DEBUG(dbgs() << " memset size is constant\n");
863 uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
864 const SCEVConstant *ConstStride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
865 if (!ConstStride)
866 return false;
867
868 APInt Stride = ConstStride->getAPInt();
869 if (SizeInBytes != Stride && SizeInBytes != -Stride)
870 return false;
871
872 IsNegStride = SizeInBytes == -Stride;
873 } else {
874 // Memset size is non-constant.
875 // Check if the pointer stride matches the memset size.
876 // To be conservative, the pass would not promote pointers that aren't in
877 // address space zero. Also, the pass only handles memset length and stride
878 // that are invariant for the top level loop.
879 LLVM_DEBUG(dbgs() << " memset size is non-constant\n");
880 if (Pointer->getType()->getPointerAddressSpace() != 0) {
881 LLVM_DEBUG(dbgs() << " pointer is not in address space zero, "
882 << "abort\n");
883 return false;
884 }
885 if (!SE->isLoopInvariant(MemsetSizeSCEV, CurLoop)) {
886 LLVM_DEBUG(dbgs() << " memset size is not a loop-invariant, "
887 << "abort\n");
888 return false;
889 }
890
891 // Compare positive direction PointerStrideSCEV with MemsetSizeSCEV
892 IsNegStride = PointerStrideSCEV->isNonConstantNegative();
893 const SCEV *PositiveStrideSCEV =
894 IsNegStride ? SE->getNegativeSCEV(PointerStrideSCEV)
895 : PointerStrideSCEV;
896 LLVM_DEBUG(dbgs() << " MemsetSizeSCEV: " << *MemsetSizeSCEV << "\n"
897 << " PositiveStrideSCEV: " << *PositiveStrideSCEV
898 << "\n");
899
900 if (PositiveStrideSCEV != MemsetSizeSCEV) {
901 // If an expression is covered by the loop guard, compare again and
902 // proceed with optimization if equal.
903 const SCEV *FoldedPositiveStride =
904 SE->applyLoopGuards(PositiveStrideSCEV, CurLoop);
905 const SCEV *FoldedMemsetSize =
906 SE->applyLoopGuards(MemsetSizeSCEV, CurLoop);
907
908 LLVM_DEBUG(dbgs() << " Try to fold SCEV based on loop guard\n"
909 << " FoldedMemsetSize: " << *FoldedMemsetSize << "\n"
910 << " FoldedPositiveStride: " << *FoldedPositiveStride
911 << "\n");
912
913 if (FoldedPositiveStride != FoldedMemsetSize) {
914 LLVM_DEBUG(dbgs() << " SCEV don't match, abort\n");
915 return false;
916 }
917 }
918 }
919
920 // Verify that the memset value is loop invariant. If not, we can't promote
921 // the memset.
922 Value *SplatValue = MSI->getValue();
923 if (!SplatValue || !CurLoop->isLoopInvariant(SplatValue))
924 return false;
925
927 MSIs.insert(MSI);
928 return processLoopStridedStore(Pointer, SE->getSCEV(MSI->getLength()),
929 MSI->getDestAlign(), SplatValue, MSI, MSIs, Ev,
930 BECount, IsNegStride, /*IsLoopMemset=*/true);
931}
932
933/// mayLoopAccessLocation - Return true if the specified loop might access the
934/// specified pointer location, which is a loop-strided access. The 'Access'
935/// argument specifies what the verboten forms of access are (read or write).
936static bool
938 const SCEV *BECount, const SCEV *StoreSizeSCEV,
939 AliasAnalysis &AA,
940 SmallPtrSetImpl<Instruction *> &IgnoredInsts) {
941 // Get the location that may be stored across the loop. Since the access is
942 // strided positively through memory, we say that the modified location starts
943 // at the pointer and has infinite size.
945
946 // If the loop iterates a fixed number of times, we can refine the access size
947 // to be exactly the size of the memset, which is (BECount+1)*StoreSize
948 const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount);
949 const SCEVConstant *ConstSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
950 if (BECst && ConstSize) {
951 std::optional<uint64_t> BEInt = BECst->getAPInt().tryZExtValue();
952 std::optional<uint64_t> SizeInt = ConstSize->getAPInt().tryZExtValue();
953 // FIXME: Should this check for overflow?
954 if (BEInt && SizeInt)
955 AccessSize = LocationSize::precise((*BEInt + 1) * *SizeInt);
956 }
957
958 // TODO: For this to be really effective, we have to dive into the pointer
959 // operand in the store. Store to &A[i] of 100 will always return may alias
960 // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
961 // which will then no-alias a store to &A[100].
962 MemoryLocation StoreLoc(Ptr, AccessSize);
963
964 for (BasicBlock *B : L->blocks())
965 for (Instruction &I : *B)
966 if (!IgnoredInsts.contains(&I) &&
967 isModOrRefSet(AA.getModRefInfo(&I, StoreLoc) & Access))
968 return true;
969 return false;
970}
971
972// If we have a negative stride, Start refers to the end of the memory location
973// we're trying to memset. Therefore, we need to recompute the base pointer,
974// which is just Start - BECount*Size.
975static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount,
976 Type *IntPtr, const SCEV *StoreSizeSCEV,
977 ScalarEvolution *SE) {
978 const SCEV *Index = SE->getTruncateOrZeroExtend(BECount, IntPtr);
979 if (!StoreSizeSCEV->isOne()) {
980 // index = back edge count * store size
981 Index = SE->getMulExpr(Index,
982 SE->getTruncateOrZeroExtend(StoreSizeSCEV, IntPtr),
984 }
985 // base pointer = start - index * store size
986 return SE->getMinusSCEV(Start, Index);
987}
988
989/// Compute the number of bytes as a SCEV from the backedge taken count.
990///
991/// This also maps the SCEV into the provided type and tries to handle the
992/// computation in a way that will fold cleanly.
993static const SCEV *getNumBytes(const SCEV *BECount, Type *IntPtr,
994 const SCEV *StoreSizeSCEV, Loop *CurLoop,
995 const DataLayout *DL, ScalarEvolution *SE) {
996 const SCEV *TripCountSCEV =
997 SE->getTripCountFromExitCount(BECount, IntPtr, CurLoop);
998 return SE->getMulExpr(TripCountSCEV,
999 SE->getTruncateOrZeroExtend(StoreSizeSCEV, IntPtr),
1001}
1002
1003/// processLoopStridedStore - We see a strided store of some value. If we can
1004/// transform this into a memset or memset_pattern in the loop preheader, do so.
1005bool LoopIdiomRecognize::processLoopStridedStore(
1006 Value *DestPtr, const SCEV *StoreSizeSCEV, MaybeAlign StoreAlignment,
1007 Value *StoredVal, Instruction *TheStore,
1009 const SCEV *BECount, bool IsNegStride, bool IsLoopMemset) {
1010 Module *M = TheStore->getModule();
1011 Value *SplatValue = isBytewiseValue(StoredVal, *DL);
1012 Constant *PatternValue = nullptr;
1013
1014 if (!SplatValue)
1015 PatternValue = getMemSetPatternValue(StoredVal, DL);
1016
1017 assert((SplatValue || PatternValue) &&
1018 "Expected either splat value or pattern value.");
1019
1020 // The trip count of the loop and the base pointer of the addrec SCEV is
1021 // guaranteed to be loop invariant, which means that it should dominate the
1022 // header. This allows us to insert code for it in the preheader.
1023 unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
1024 BasicBlock *Preheader = CurLoop->getLoopPreheader();
1025 IRBuilder<> Builder(Preheader->getTerminator());
1026 SCEVExpander Expander(*SE, *DL, "loop-idiom");
1027 SCEVExpanderCleaner ExpCleaner(Expander);
1028
1029 Type *DestInt8PtrTy = Builder.getPtrTy(DestAS);
1030 Type *IntIdxTy = DL->getIndexType(DestPtr->getType());
1031
1032 bool Changed = false;
1033 const SCEV *Start = Ev->getStart();
1034 // Handle negative strided loops.
1035 if (IsNegStride)
1036 Start = getStartForNegStride(Start, BECount, IntIdxTy, StoreSizeSCEV, SE);
1037
1038 // TODO: ideally we should still be able to generate memset if SCEV expander
1039 // is taught to generate the dependencies at the latest point.
1040 if (!Expander.isSafeToExpand(Start))
1041 return Changed;
1042
1043 // Okay, we have a strided store "p[i]" of a splattable value. We can turn
1044 // this into a memset in the loop preheader now if we want. However, this
1045 // would be unsafe to do if there is anything else in the loop that may read
1046 // or write to the aliased location. Check for any overlap by generating the
1047 // base pointer and checking the region.
1048 Value *BasePtr =
1049 Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
1050
1051 // From here on out, conservatively report to the pass manager that we've
1052 // changed the IR, even if we later clean up these added instructions. There
1053 // may be structural differences e.g. in the order of use lists not accounted
1054 // for in just a textual dump of the IR. This is written as a variable, even
1055 // though statically all the places this dominates could be replaced with
1056 // 'true', with the hope that anyone trying to be clever / "more precise" with
1057 // the return value will read this comment, and leave them alone.
1058 Changed = true;
1059
1060 if (mayLoopAccessLocation(BasePtr, ModRefInfo::ModRef, CurLoop, BECount,
1061 StoreSizeSCEV, *AA, Stores))
1062 return Changed;
1063
1064 if (avoidLIRForMultiBlockLoop(/*IsMemset=*/true, IsLoopMemset))
1065 return Changed;
1066
1067 // Okay, everything looks good, insert the memset.
1068
1069 const SCEV *NumBytesS =
1070 getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE);
1071
1072 // TODO: ideally we should still be able to generate memset if SCEV expander
1073 // is taught to generate the dependencies at the latest point.
1074 if (!Expander.isSafeToExpand(NumBytesS))
1075 return Changed;
1076
1077 Value *NumBytes =
1078 Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
1079
1080 if (!SplatValue && !isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16))
1081 return Changed;
1082
1083 AAMDNodes AATags = TheStore->getAAMetadata();
1084 for (Instruction *Store : Stores)
1085 AATags = AATags.merge(Store->getAAMetadata());
1086 if (auto CI = dyn_cast<ConstantInt>(NumBytes))
1087 AATags = AATags.extendTo(CI->getZExtValue());
1088 else
1089 AATags = AATags.extendTo(-1);
1090
1091 CallInst *NewCall;
1092 if (SplatValue) {
1093 NewCall = Builder.CreateMemSet(
1094 BasePtr, SplatValue, NumBytes, MaybeAlign(StoreAlignment),
1095 /*isVolatile=*/false, AATags.TBAA, AATags.Scope, AATags.NoAlias);
1096 } else {
1097 assert (isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16));
1098 // Everything is emitted in default address space
1099 Type *Int8PtrTy = DestInt8PtrTy;
1100
1101 StringRef FuncName = "memset_pattern16";
1102 FunctionCallee MSP = getOrInsertLibFunc(M, *TLI, LibFunc_memset_pattern16,
1103 Builder.getVoidTy(), Int8PtrTy, Int8PtrTy, IntIdxTy);
1104 inferNonMandatoryLibFuncAttrs(M, FuncName, *TLI);
1105
1106 // Otherwise we should form a memset_pattern16. PatternValue is known to be
1107 // an constant array of 16-bytes. Plop the value into a mergable global.
1108 GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
1110 PatternValue, ".memset_pattern");
1111 GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Ok to merge these.
1112 GV->setAlignment(Align(16));
1113 Value *PatternPtr = GV;
1114 NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes});
1115
1116 // Set the TBAA info if present.
1117 if (AATags.TBAA)
1118 NewCall->setMetadata(LLVMContext::MD_tbaa, AATags.TBAA);
1119
1120 if (AATags.Scope)
1121 NewCall->setMetadata(LLVMContext::MD_alias_scope, AATags.Scope);
1122
1123 if (AATags.NoAlias)
1124 NewCall->setMetadata(LLVMContext::MD_noalias, AATags.NoAlias);
1125 }
1126
1127 NewCall->setDebugLoc(TheStore->getDebugLoc());
1128
1129 if (MSSAU) {
1130 MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1131 NewCall, nullptr, NewCall->getParent(), MemorySSA::BeforeTerminator);
1132 MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
1133 }
1134
1135 LLVM_DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n"
1136 << " from store to: " << *Ev << " at: " << *TheStore
1137 << "\n");
1138
1139 ORE.emit([&]() {
1140 OptimizationRemark R(DEBUG_TYPE, "ProcessLoopStridedStore",
1141 NewCall->getDebugLoc(), Preheader);
1142 R << "Transformed loop-strided store in "
1143 << ore::NV("Function", TheStore->getFunction())
1144 << " function into a call to "
1145 << ore::NV("NewFunction", NewCall->getCalledFunction())
1146 << "() intrinsic";
1147 if (!Stores.empty())
1148 R << ore::setExtraArgs();
1149 for (auto *I : Stores) {
1150 R << ore::NV("FromBlock", I->getParent()->getName())
1151 << ore::NV("ToBlock", Preheader->getName());
1152 }
1153 return R;
1154 });
1155
1156 // Okay, the memset has been formed. Zap the original store and anything that
1157 // feeds into it.
1158 for (auto *I : Stores) {
1159 if (MSSAU)
1160 MSSAU->removeMemoryAccess(I, true);
1162 }
1163 if (MSSAU && VerifyMemorySSA)
1164 MSSAU->getMemorySSA()->verifyMemorySSA();
1165 ++NumMemSet;
1166 ExpCleaner.markResultUsed();
1167 return true;
1168}
1169
1170/// If the stored value is a strided load in the same loop with the same stride
1171/// this may be transformable into a memcpy. This kicks in for stuff like
1172/// for (i) A[i] = B[i];
1173bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
1174 const SCEV *BECount) {
1175 assert(SI->isUnordered() && "Expected only non-volatile non-ordered stores.");
1176
1177 Value *StorePtr = SI->getPointerOperand();
1178 const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
1179 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
1180
1181 // The store must be feeding a non-volatile load.
1182 LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
1183 assert(LI->isUnordered() && "Expected only non-volatile non-ordered loads.");
1184
1185 // See if the pointer expression is an AddRec like {base,+,1} on the current
1186 // loop, which indicates a strided load. If we have something else, it's a
1187 // random load we can't handle.
1188 Value *LoadPtr = LI->getPointerOperand();
1189 const SCEVAddRecExpr *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr));
1190
1191 const SCEV *StoreSizeSCEV = SE->getConstant(StorePtr->getType(), StoreSize);
1192 return processLoopStoreOfLoopLoad(StorePtr, LoadPtr, StoreSizeSCEV,
1193 SI->getAlign(), LI->getAlign(), SI, LI,
1194 StoreEv, LoadEv, BECount);
1195}
1196
1197namespace {
1198class MemmoveVerifier {
1199public:
1200 explicit MemmoveVerifier(const Value &LoadBasePtr, const Value &StoreBasePtr,
1201 const DataLayout &DL)
1203 LoadBasePtr.stripPointerCasts(), LoadOff, DL)),
1205 StoreBasePtr.stripPointerCasts(), StoreOff, DL)),
1206 IsSameObject(BP1 == BP2) {}
1207
1208 bool loadAndStoreMayFormMemmove(unsigned StoreSize, bool IsNegStride,
1209 const Instruction &TheLoad,
1210 bool IsMemCpy) const {
1211 if (IsMemCpy) {
1212 // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr
1213 // for negative stride.
1214 if ((!IsNegStride && LoadOff <= StoreOff) ||
1215 (IsNegStride && LoadOff >= StoreOff))
1216 return false;
1217 } else {
1218 // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr
1219 // for negative stride. LoadBasePtr shouldn't overlap with StoreBasePtr.
1220 int64_t LoadSize =
1221 DL.getTypeSizeInBits(TheLoad.getType()).getFixedValue() / 8;
1222 if (BP1 != BP2 || LoadSize != int64_t(StoreSize))
1223 return false;
1224 if ((!IsNegStride && LoadOff < StoreOff + int64_t(StoreSize)) ||
1225 (IsNegStride && LoadOff + LoadSize > StoreOff))
1226 return false;
1227 }
1228 return true;
1229 }
1230
1231private:
1232 const DataLayout &DL;
1233 int64_t LoadOff = 0;
1234 int64_t StoreOff = 0;
1235 const Value *BP1;
1236 const Value *BP2;
1237
1238public:
1239 const bool IsSameObject;
1240};
1241} // namespace
1242
1243bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
1244 Value *DestPtr, Value *SourcePtr, const SCEV *StoreSizeSCEV,
1245 MaybeAlign StoreAlign, MaybeAlign LoadAlign, Instruction *TheStore,
1246 Instruction *TheLoad, const SCEVAddRecExpr *StoreEv,
1247 const SCEVAddRecExpr *LoadEv, const SCEV *BECount) {
1248
1249 // FIXME: until llvm.memcpy.inline supports dynamic sizes, we need to
1250 // conservatively bail here, since otherwise we may have to transform
1251 // llvm.memcpy.inline into llvm.memcpy which is illegal.
1252 if (isa<MemCpyInlineInst>(TheStore))
1253 return false;
1254
1255 // The trip count of the loop and the base pointer of the addrec SCEV is
1256 // guaranteed to be loop invariant, which means that it should dominate the
1257 // header. This allows us to insert code for it in the preheader.
1258 BasicBlock *Preheader = CurLoop->getLoopPreheader();
1259 IRBuilder<> Builder(Preheader->getTerminator());
1260 SCEVExpander Expander(*SE, *DL, "loop-idiom");
1261
1262 SCEVExpanderCleaner ExpCleaner(Expander);
1263
1264 bool Changed = false;
1265 const SCEV *StrStart = StoreEv->getStart();
1266 unsigned StrAS = DestPtr->getType()->getPointerAddressSpace();
1267 Type *IntIdxTy = Builder.getIntNTy(DL->getIndexSizeInBits(StrAS));
1268
1269 APInt Stride = getStoreStride(StoreEv);
1270 const SCEVConstant *ConstStoreSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
1271
1272 // TODO: Deal with non-constant size; Currently expect constant store size
1273 assert(ConstStoreSize && "store size is expected to be a constant");
1274
1275 int64_t StoreSize = ConstStoreSize->getValue()->getZExtValue();
1276 bool IsNegStride = StoreSize == -Stride;
1277
1278 // Handle negative strided loops.
1279 if (IsNegStride)
1280 StrStart =
1281 getStartForNegStride(StrStart, BECount, IntIdxTy, StoreSizeSCEV, SE);
1282
1283 // Okay, we have a strided store "p[i]" of a loaded value. We can turn
1284 // this into a memcpy in the loop preheader now if we want. However, this
1285 // would be unsafe to do if there is anything else in the loop that may read
1286 // or write the memory region we're storing to. This includes the load that
1287 // feeds the stores. Check for an alias by generating the base address and
1288 // checking everything.
1289 Value *StoreBasePtr = Expander.expandCodeFor(
1290 StrStart, Builder.getPtrTy(StrAS), Preheader->getTerminator());
1291
1292 // From here on out, conservatively report to the pass manager that we've
1293 // changed the IR, even if we later clean up these added instructions. There
1294 // may be structural differences e.g. in the order of use lists not accounted
1295 // for in just a textual dump of the IR. This is written as a variable, even
1296 // though statically all the places this dominates could be replaced with
1297 // 'true', with the hope that anyone trying to be clever / "more precise" with
1298 // the return value will read this comment, and leave them alone.
1299 Changed = true;
1300
1301 SmallPtrSet<Instruction *, 2> IgnoredInsts;
1302 IgnoredInsts.insert(TheStore);
1303
1304 bool IsMemCpy = isa<MemCpyInst>(TheStore);
1305 const StringRef InstRemark = IsMemCpy ? "memcpy" : "load and store";
1306
1307 bool LoopAccessStore =
1308 mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
1309 StoreSizeSCEV, *AA, IgnoredInsts);
1310 if (LoopAccessStore) {
1311 // For memmove case it's not enough to guarantee that loop doesn't access
1312 // TheStore and TheLoad. Additionally we need to make sure that TheStore is
1313 // the only user of TheLoad.
1314 if (!TheLoad->hasOneUse())
1315 return Changed;
1316 IgnoredInsts.insert(TheLoad);
1317 if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop,
1318 BECount, StoreSizeSCEV, *AA, IgnoredInsts)) {
1319 ORE.emit([&]() {
1320 return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessStore",
1321 TheStore)
1322 << ore::NV("Inst", InstRemark) << " in "
1323 << ore::NV("Function", TheStore->getFunction())
1324 << " function will not be hoisted: "
1325 << ore::NV("Reason", "The loop may access store location");
1326 });
1327 return Changed;
1328 }
1329 IgnoredInsts.erase(TheLoad);
1330 }
1331
1332 const SCEV *LdStart = LoadEv->getStart();
1333 unsigned LdAS = SourcePtr->getType()->getPointerAddressSpace();
1334
1335 // Handle negative strided loops.
1336 if (IsNegStride)
1337 LdStart =
1338 getStartForNegStride(LdStart, BECount, IntIdxTy, StoreSizeSCEV, SE);
1339
1340 // For a memcpy, we have to make sure that the input array is not being
1341 // mutated by the loop.
1342 Value *LoadBasePtr = Expander.expandCodeFor(LdStart, Builder.getPtrTy(LdAS),
1343 Preheader->getTerminator());
1344
1345 // If the store is a memcpy instruction, we must check if it will write to
1346 // the load memory locations. So remove it from the ignored stores.
1347 MemmoveVerifier Verifier(*LoadBasePtr, *StoreBasePtr, *DL);
1348 if (IsMemCpy && !Verifier.IsSameObject)
1349 IgnoredInsts.erase(TheStore);
1350 if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
1351 StoreSizeSCEV, *AA, IgnoredInsts)) {
1352 ORE.emit([&]() {
1353 return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad", TheLoad)
1354 << ore::NV("Inst", InstRemark) << " in "
1355 << ore::NV("Function", TheStore->getFunction())
1356 << " function will not be hoisted: "
1357 << ore::NV("Reason", "The loop may access load location");
1358 });
1359 return Changed;
1360 }
1361
1362 bool UseMemMove = IsMemCpy ? Verifier.IsSameObject : LoopAccessStore;
1363 if (UseMemMove)
1364 if (!Verifier.loadAndStoreMayFormMemmove(StoreSize, IsNegStride, *TheLoad,
1365 IsMemCpy))
1366 return Changed;
1367
1368 if (avoidLIRForMultiBlockLoop())
1369 return Changed;
1370
1371 // Okay, everything is safe, we can transform this!
1372
1373 const SCEV *NumBytesS =
1374 getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE);
1375
1376 Value *NumBytes =
1377 Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
1378
1379 AAMDNodes AATags = TheLoad->getAAMetadata();
1380 AAMDNodes StoreAATags = TheStore->getAAMetadata();
1381 AATags = AATags.merge(StoreAATags);
1382 if (auto CI = dyn_cast<ConstantInt>(NumBytes))
1383 AATags = AATags.extendTo(CI->getZExtValue());
1384 else
1385 AATags = AATags.extendTo(-1);
1386
1387 CallInst *NewCall = nullptr;
1388 // Check whether to generate an unordered atomic memcpy:
1389 // If the load or store are atomic, then they must necessarily be unordered
1390 // by previous checks.
1391 if (!TheStore->isAtomic() && !TheLoad->isAtomic()) {
1392 if (UseMemMove)
1393 NewCall = Builder.CreateMemMove(
1394 StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign, NumBytes,
1395 /*isVolatile=*/false, AATags.TBAA, AATags.Scope, AATags.NoAlias);
1396 else
1397 NewCall =
1398 Builder.CreateMemCpy(StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign,
1399 NumBytes, /*isVolatile=*/false, AATags.TBAA,
1400 AATags.TBAAStruct, AATags.Scope, AATags.NoAlias);
1401 } else {
1402 // For now don't support unordered atomic memmove.
1403 if (UseMemMove)
1404 return Changed;
1405 // We cannot allow unaligned ops for unordered load/store, so reject
1406 // anything where the alignment isn't at least the element size.
1407 assert((StoreAlign && LoadAlign) &&
1408 "Expect unordered load/store to have align.");
1409 if (*StoreAlign < StoreSize || *LoadAlign < StoreSize)
1410 return Changed;
1411
1412 // If the element.atomic memcpy is not lowered into explicit
1413 // loads/stores later, then it will be lowered into an element-size
1414 // specific lib call. If the lib call doesn't exist for our store size, then
1415 // we shouldn't generate the memcpy.
1416 if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize())
1417 return Changed;
1418
1419 // Create the call.
1420 // Note that unordered atomic loads/stores are *required* by the spec to
1421 // have an alignment but non-atomic loads/stores may not.
1422 NewCall = Builder.CreateElementUnorderedAtomicMemCpy(
1423 StoreBasePtr, *StoreAlign, LoadBasePtr, *LoadAlign, NumBytes, StoreSize,
1424 AATags.TBAA, AATags.TBAAStruct, AATags.Scope, AATags.NoAlias);
1425 }
1426 NewCall->setDebugLoc(TheStore->getDebugLoc());
1427
1428 if (MSSAU) {
1429 MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1430 NewCall, nullptr, NewCall->getParent(), MemorySSA::BeforeTerminator);
1431 MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
1432 }
1433
1434 LLVM_DEBUG(dbgs() << " Formed new call: " << *NewCall << "\n"
1435 << " from load ptr=" << *LoadEv << " at: " << *TheLoad
1436 << "\n"
1437 << " from store ptr=" << *StoreEv << " at: " << *TheStore
1438 << "\n");
1439
1440 ORE.emit([&]() {
1441 return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStoreOfLoopLoad",
1442 NewCall->getDebugLoc(), Preheader)
1443 << "Formed a call to "
1444 << ore::NV("NewFunction", NewCall->getCalledFunction())
1445 << "() intrinsic from " << ore::NV("Inst", InstRemark)
1446 << " instruction in " << ore::NV("Function", TheStore->getFunction())
1447 << " function"
1449 << ore::NV("FromBlock", TheStore->getParent()->getName())
1450 << ore::NV("ToBlock", Preheader->getName());
1451 });
1452
1453 // Okay, a new call to memcpy/memmove has been formed. Zap the original store
1454 // and anything that feeds into it.
1455 if (MSSAU)
1456 MSSAU->removeMemoryAccess(TheStore, true);
1457 deleteDeadInstruction(TheStore);
1458 if (MSSAU && VerifyMemorySSA)
1459 MSSAU->getMemorySSA()->verifyMemorySSA();
1460 if (UseMemMove)
1461 ++NumMemMove;
1462 else
1463 ++NumMemCpy;
1464 ExpCleaner.markResultUsed();
1465 return true;
1466}
1467
1468// When compiling for codesize we avoid idiom recognition for a multi-block loop
1469// unless it is a loop_memset idiom or a memset/memcpy idiom in a nested loop.
1470//
1471bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
1472 bool IsLoopMemset) {
1473 if (ApplyCodeSizeHeuristics && CurLoop->getNumBlocks() > 1) {
1474 if (CurLoop->isOutermost() && (!IsMemset || !IsLoopMemset)) {
1475 LLVM_DEBUG(dbgs() << " " << CurLoop->getHeader()->getParent()->getName()
1476 << " : LIR " << (IsMemset ? "Memset" : "Memcpy")
1477 << " avoided: multi-block top-level loop\n");
1478 return true;
1479 }
1480 }
1481
1482 return false;
1483}
1484
1485bool LoopIdiomRecognize::runOnNoncountableLoop() {
1486 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
1487 << CurLoop->getHeader()->getParent()->getName()
1488 << "] Noncountable Loop %"
1489 << CurLoop->getHeader()->getName() << "\n");
1490
1491 return recognizePopcount() || recognizeAndInsertFFS() ||
1492 recognizeShiftUntilBitTest() || recognizeShiftUntilZero() ||
1493 recognizeShiftUntilLessThan();
1494}
1495
1496/// Check if the given conditional branch is based on the comparison between
1497/// a variable and zero, and if the variable is non-zero or zero (JmpOnZero is
1498/// true), the control yields to the loop entry. If the branch matches the
1499/// behavior, the variable involved in the comparison is returned. This function
1500/// will be called to see if the precondition and postcondition of the loop are
1501/// in desirable form.
1503 bool JmpOnZero = false) {
1504 if (!BI || !BI->isConditional())
1505 return nullptr;
1506
1507 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
1508 if (!Cond)
1509 return nullptr;
1510
1511 ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
1512 if (!CmpZero || !CmpZero->isZero())
1513 return nullptr;
1514
1515 BasicBlock *TrueSucc = BI->getSuccessor(0);
1516 BasicBlock *FalseSucc = BI->getSuccessor(1);
1517 if (JmpOnZero)
1518 std::swap(TrueSucc, FalseSucc);
1519
1520 ICmpInst::Predicate Pred = Cond->getPredicate();
1521 if ((Pred == ICmpInst::ICMP_NE && TrueSucc == LoopEntry) ||
1522 (Pred == ICmpInst::ICMP_EQ && FalseSucc == LoopEntry))
1523 return Cond->getOperand(0);
1524
1525 return nullptr;
1526}
1527
1528/// Check if the given conditional branch is based on an unsigned less-than
1529/// comparison between a variable and a constant, and if the comparison is false
1530/// the control yields to the loop entry. If the branch matches the behaviour,
1531/// the variable involved in the comparison is returned.
1533 APInt &Threshold) {
1534 if (!BI || !BI->isConditional())
1535 return nullptr;
1536
1537 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
1538 if (!Cond)
1539 return nullptr;
1540
1541 ConstantInt *CmpConst = dyn_cast<ConstantInt>(Cond->getOperand(1));
1542 if (!CmpConst)
1543 return nullptr;
1544
1545 BasicBlock *FalseSucc = BI->getSuccessor(1);
1546 ICmpInst::Predicate Pred = Cond->getPredicate();
1547
1548 if (Pred == ICmpInst::ICMP_ULT && FalseSucc == LoopEntry) {
1549 Threshold = CmpConst->getValue();
1550 return Cond->getOperand(0);
1551 }
1552
1553 return nullptr;
1554}
1555
1556// Check if the recurrence variable `VarX` is in the right form to create
1557// the idiom. Returns the value coerced to a PHINode if so.
1559 BasicBlock *LoopEntry) {
1560 auto *PhiX = dyn_cast<PHINode>(VarX);
1561 if (PhiX && PhiX->getParent() == LoopEntry &&
1562 (PhiX->getOperand(0) == DefX || PhiX->getOperand(1) == DefX))
1563 return PhiX;
1564 return nullptr;
1565}
1566
1567/// Return true if the idiom is detected in the loop.
1568///
1569/// Additionally:
1570/// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
1571/// or nullptr if there is no such.
1572/// 2) \p CntPhi is set to the corresponding phi node
1573/// or nullptr if there is no such.
1574/// 3) \p InitX is set to the value whose CTLZ could be used.
1575/// 4) \p DefX is set to the instruction calculating Loop exit condition.
1576/// 5) \p Threshold is set to the constant involved in the unsigned less-than
1577/// comparison.
1578///
1579/// The core idiom we are trying to detect is:
1580/// \code
1581/// if (x0 < 2)
1582/// goto loop-exit // the precondition of the loop
1583/// cnt0 = init-val
1584/// do {
1585/// x = phi (x0, x.next); //PhiX
1586/// cnt = phi (cnt0, cnt.next)
1587///
1588/// cnt.next = cnt + 1;
1589/// ...
1590/// x.next = x >> 1; // DefX
1591/// } while (x >= 4)
1592/// loop-exit:
1593/// \endcode
1595 Intrinsic::ID &IntrinID,
1596 Value *&InitX, Instruction *&CntInst,
1597 PHINode *&CntPhi, Instruction *&DefX,
1598 APInt &Threshold) {
1599 BasicBlock *LoopEntry;
1600
1601 DefX = nullptr;
1602 CntInst = nullptr;
1603 CntPhi = nullptr;
1604 LoopEntry = *(CurLoop->block_begin());
1605
1606 // step 1: Check if the loop-back branch is in desirable form.
1608 dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry,
1609 Threshold))
1610 DefX = dyn_cast<Instruction>(T);
1611 else
1612 return false;
1613
1614 // step 2: Check the recurrence of variable X
1615 if (!DefX || !isa<PHINode>(DefX))
1616 return false;
1617
1618 PHINode *VarPhi = cast<PHINode>(DefX);
1619 int Idx = VarPhi->getBasicBlockIndex(LoopEntry);
1620 if (Idx == -1)
1621 return false;
1622
1623 DefX = dyn_cast<Instruction>(VarPhi->getIncomingValue(Idx));
1624 if (!DefX || DefX->getNumOperands() == 0 || DefX->getOperand(0) != VarPhi)
1625 return false;
1626
1627 // step 3: detect instructions corresponding to "x.next = x >> 1"
1628 if (DefX->getOpcode() != Instruction::LShr)
1629 return false;
1630
1631 IntrinID = Intrinsic::ctlz;
1632 ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
1633 if (!Shft || !Shft->isOne())
1634 return false;
1635
1636 InitX = VarPhi->getIncomingValueForBlock(CurLoop->getLoopPreheader());
1637
1638 // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
1639 // or cnt.next = cnt + -1.
1640 // TODO: We can skip the step. If loop trip count is known (CTLZ),
1641 // then all uses of "cnt.next" could be optimized to the trip count
1642 // plus "cnt0". Currently it is not optimized.
1643 // This step could be used to detect POPCNT instruction:
1644 // cnt.next = cnt + (x.next & 1)
1645 for (Instruction &Inst : llvm::make_range(
1646 LoopEntry->getFirstNonPHI()->getIterator(), LoopEntry->end())) {
1647 if (Inst.getOpcode() != Instruction::Add)
1648 continue;
1649
1650 ConstantInt *Inc = dyn_cast<ConstantInt>(Inst.getOperand(1));
1651 if (!Inc || (!Inc->isOne() && !Inc->isMinusOne()))
1652 continue;
1653
1654 PHINode *Phi = getRecurrenceVar(Inst.getOperand(0), &Inst, LoopEntry);
1655 if (!Phi)
1656 continue;
1657
1658 CntInst = &Inst;
1659 CntPhi = Phi;
1660 break;
1661 }
1662 if (!CntInst)
1663 return false;
1664
1665 return true;
1666}
1667
1668/// Return true iff the idiom is detected in the loop.
1669///
1670/// Additionally:
1671/// 1) \p CntInst is set to the instruction counting the population bit.
1672/// 2) \p CntPhi is set to the corresponding phi node.
1673/// 3) \p Var is set to the value whose population bits are being counted.
1674///
1675/// The core idiom we are trying to detect is:
1676/// \code
1677/// if (x0 != 0)
1678/// goto loop-exit // the precondition of the loop
1679/// cnt0 = init-val;
1680/// do {
1681/// x1 = phi (x0, x2);
1682/// cnt1 = phi(cnt0, cnt2);
1683///
1684/// cnt2 = cnt1 + 1;
1685/// ...
1686/// x2 = x1 & (x1 - 1);
1687/// ...
1688/// } while(x != 0);
1689///
1690/// loop-exit:
1691/// \endcode
1692static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
1693 Instruction *&CntInst, PHINode *&CntPhi,
1694 Value *&Var) {
1695 // step 1: Check to see if the look-back branch match this pattern:
1696 // "if (a!=0) goto loop-entry".
1697 BasicBlock *LoopEntry;
1698 Instruction *DefX2, *CountInst;
1699 Value *VarX1, *VarX0;
1700 PHINode *PhiX, *CountPhi;
1701
1702 DefX2 = CountInst = nullptr;
1703 VarX1 = VarX0 = nullptr;
1704 PhiX = CountPhi = nullptr;
1705 LoopEntry = *(CurLoop->block_begin());
1706
1707 // step 1: Check if the loop-back branch is in desirable form.
1708 {
1709 if (Value *T = matchCondition(
1710 dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
1711 DefX2 = dyn_cast<Instruction>(T);
1712 else
1713 return false;
1714 }
1715
1716 // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
1717 {
1718 if (!DefX2 || DefX2->getOpcode() != Instruction::And)
1719 return false;
1720
1721 BinaryOperator *SubOneOp;
1722
1723 if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0))))
1724 VarX1 = DefX2->getOperand(1);
1725 else {
1726 VarX1 = DefX2->getOperand(0);
1727 SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1));
1728 }
1729 if (!SubOneOp || SubOneOp->getOperand(0) != VarX1)
1730 return false;
1731
1732 ConstantInt *Dec = dyn_cast<ConstantInt>(SubOneOp->getOperand(1));
1733 if (!Dec ||
1734 !((SubOneOp->getOpcode() == Instruction::Sub && Dec->isOne()) ||
1735 (SubOneOp->getOpcode() == Instruction::Add &&
1736 Dec->isMinusOne()))) {
1737 return false;
1738 }
1739 }
1740
1741 // step 3: Check the recurrence of variable X
1742 PhiX = getRecurrenceVar(VarX1, DefX2, LoopEntry);
1743 if (!PhiX)
1744 return false;
1745
1746 // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
1747 {
1748 CountInst = nullptr;
1749 for (Instruction &Inst : llvm::make_range(
1750 LoopEntry->getFirstNonPHI()->getIterator(), LoopEntry->end())) {
1751 if (Inst.getOpcode() != Instruction::Add)
1752 continue;
1753
1754 ConstantInt *Inc = dyn_cast<ConstantInt>(Inst.getOperand(1));
1755 if (!Inc || !Inc->isOne())
1756 continue;
1757
1758 PHINode *Phi = getRecurrenceVar(Inst.getOperand(0), &Inst, LoopEntry);
1759 if (!Phi)
1760 continue;
1761
1762 // Check if the result of the instruction is live of the loop.
1763 bool LiveOutLoop = false;
1764 for (User *U : Inst.users()) {
1765 if ((cast<Instruction>(U))->getParent() != LoopEntry) {
1766 LiveOutLoop = true;
1767 break;
1768 }
1769 }
1770
1771 if (LiveOutLoop) {
1772 CountInst = &Inst;
1773 CountPhi = Phi;
1774 break;
1775 }
1776 }
1777
1778 if (!CountInst)
1779 return false;
1780 }
1781
1782 // step 5: check if the precondition is in this form:
1783 // "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;"
1784 {
1785 auto *PreCondBr = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1786 Value *T = matchCondition(PreCondBr, CurLoop->getLoopPreheader());
1787 if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1))
1788 return false;
1789
1790 CntInst = CountInst;
1791 CntPhi = CountPhi;
1792 Var = T;
1793 }
1794
1795 return true;
1796}
1797
1798/// Return true if the idiom is detected in the loop.
1799///
1800/// Additionally:
1801/// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
1802/// or nullptr if there is no such.
1803/// 2) \p CntPhi is set to the corresponding phi node
1804/// or nullptr if there is no such.
1805/// 3) \p Var is set to the value whose CTLZ could be used.
1806/// 4) \p DefX is set to the instruction calculating Loop exit condition.
1807///
1808/// The core idiom we are trying to detect is:
1809/// \code
1810/// if (x0 == 0)
1811/// goto loop-exit // the precondition of the loop
1812/// cnt0 = init-val;
1813/// do {
1814/// x = phi (x0, x.next); //PhiX
1815/// cnt = phi(cnt0, cnt.next);
1816///
1817/// cnt.next = cnt + 1;
1818/// ...
1819/// x.next = x >> 1; // DefX
1820/// ...
1821/// } while(x.next != 0);
1822///
1823/// loop-exit:
1824/// \endcode
1825static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
1826 Intrinsic::ID &IntrinID, Value *&InitX,
1827 Instruction *&CntInst, PHINode *&CntPhi,
1828 Instruction *&DefX) {
1829 BasicBlock *LoopEntry;
1830 Value *VarX = nullptr;
1831
1832 DefX = nullptr;
1833 CntInst = nullptr;
1834 CntPhi = nullptr;
1835 LoopEntry = *(CurLoop->block_begin());
1836
1837 // step 1: Check if the loop-back branch is in desirable form.
1838 if (Value *T = matchCondition(
1839 dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
1840 DefX = dyn_cast<Instruction>(T);
1841 else
1842 return false;
1843
1844 // step 2: detect instructions corresponding to "x.next = x >> 1 or x << 1"
1845 if (!DefX || !DefX->isShift())
1846 return false;
1847 IntrinID = DefX->getOpcode() == Instruction::Shl ? Intrinsic::cttz :
1848 Intrinsic::ctlz;
1849 ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
1850 if (!Shft || !Shft->isOne())
1851 return false;
1852 VarX = DefX->getOperand(0);
1853
1854 // step 3: Check the recurrence of variable X
1855 PHINode *PhiX = getRecurrenceVar(VarX, DefX, LoopEntry);
1856 if (!PhiX)
1857 return false;
1858
1859 InitX = PhiX->getIncomingValueForBlock(CurLoop->getLoopPreheader());
1860
1861 // Make sure the initial value can't be negative otherwise the ashr in the
1862 // loop might never reach zero which would make the loop infinite.
1863 if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, DL))
1864 return false;
1865
1866 // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
1867 // or cnt.next = cnt + -1.
1868 // TODO: We can skip the step. If loop trip count is known (CTLZ),
1869 // then all uses of "cnt.next" could be optimized to the trip count
1870 // plus "cnt0". Currently it is not optimized.
1871 // This step could be used to detect POPCNT instruction:
1872 // cnt.next = cnt + (x.next & 1)
1873 for (Instruction &Inst : llvm::make_range(
1874 LoopEntry->getFirstNonPHI()->getIterator(), LoopEntry->end())) {
1875 if (Inst.getOpcode() != Instruction::Add)
1876 continue;
1877
1878 ConstantInt *Inc = dyn_cast<ConstantInt>(Inst.getOperand(1));
1879 if (!Inc || (!Inc->isOne() && !Inc->isMinusOne()))
1880 continue;
1881
1882 PHINode *Phi = getRecurrenceVar(Inst.getOperand(0), &Inst, LoopEntry);
1883 if (!Phi)
1884 continue;
1885
1886 CntInst = &Inst;
1887 CntPhi = Phi;
1888 break;
1889 }
1890 if (!CntInst)
1891 return false;
1892
1893 return true;
1894}
1895
1896// Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
1897// profitable if we delete the loop.
1898bool LoopIdiomRecognize::isProfitableToInsertFFS(Intrinsic::ID IntrinID,
1899 Value *InitX, bool ZeroCheck,
1900 size_t CanonicalSize) {
1901 const Value *Args[] = {InitX,
1902 ConstantInt::getBool(InitX->getContext(), ZeroCheck)};
1903
1904 // @llvm.dbg doesn't count as they have no semantic effect.
1905 auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
1907 std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
1908
1909 IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args);
1912 if (HeaderSize != CanonicalSize && Cost > TargetTransformInfo::TCC_Basic)
1913 return false;
1914
1915 return true;
1916}
1917
1918/// Convert CTLZ / CTTZ idiom loop into countable loop.
1919/// If CTLZ / CTTZ inserted as a new trip count returns true; otherwise,
1920/// returns false.
1921bool LoopIdiomRecognize::insertFFSIfProfitable(Intrinsic::ID IntrinID,
1922 Value *InitX, Instruction *DefX,
1923 PHINode *CntPhi,
1924 Instruction *CntInst) {
1925 bool IsCntPhiUsedOutsideLoop = false;
1926 for (User *U : CntPhi->users())
1927 if (!CurLoop->contains(cast<Instruction>(U))) {
1928 IsCntPhiUsedOutsideLoop = true;
1929 break;
1930 }
1931 bool IsCntInstUsedOutsideLoop = false;
1932 for (User *U : CntInst->users())
1933 if (!CurLoop->contains(cast<Instruction>(U))) {
1934 IsCntInstUsedOutsideLoop = true;
1935 break;
1936 }
1937 // If both CntInst and CntPhi are used outside the loop the profitability
1938 // is questionable.
1939 if (IsCntInstUsedOutsideLoop && IsCntPhiUsedOutsideLoop)
1940 return false;
1941
1942 // For some CPUs result of CTLZ(X) intrinsic is undefined
1943 // when X is 0. If we can not guarantee X != 0, we need to check this
1944 // when expand.
1945 bool ZeroCheck = false;
1946 // It is safe to assume Preheader exist as it was checked in
1947 // parent function RunOnLoop.
1948 BasicBlock *PH = CurLoop->getLoopPreheader();
1949
1950 // If we are using the count instruction outside the loop, make sure we
1951 // have a zero check as a precondition. Without the check the loop would run
1952 // one iteration for before any check of the input value. This means 0 and 1
1953 // would have identical behavior in the original loop and thus
1954 if (!IsCntPhiUsedOutsideLoop) {
1955 auto *PreCondBB = PH->getSinglePredecessor();
1956 if (!PreCondBB)
1957 return false;
1958 auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1959 if (!PreCondBI)
1960 return false;
1961 if (matchCondition(PreCondBI, PH) != InitX)
1962 return false;
1963 ZeroCheck = true;
1964 }
1965
1966 // FFS idiom loop has only 6 instructions:
1967 // %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
1968 // %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
1969 // %shr = ashr %n.addr.0, 1
1970 // %tobool = icmp eq %shr, 0
1971 // %inc = add nsw %i.0, 1
1972 // br i1 %tobool
1973 size_t IdiomCanonicalSize = 6;
1974 if (!isProfitableToInsertFFS(IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
1975 return false;
1976
1977 transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
1978 DefX->getDebugLoc(), ZeroCheck,
1979 IsCntPhiUsedOutsideLoop);
1980 return true;
1981}
1982
1983/// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
1984/// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
1985/// trip count returns true; otherwise, returns false.
1986bool LoopIdiomRecognize::recognizeAndInsertFFS() {
1987 // Give up if the loop has multiple blocks or multiple backedges.
1988 if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
1989 return false;
1990
1991 Intrinsic::ID IntrinID;
1992 Value *InitX;
1993 Instruction *DefX = nullptr;
1994 PHINode *CntPhi = nullptr;
1995 Instruction *CntInst = nullptr;
1996
1997 if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX, CntInst, CntPhi,
1998 DefX))
1999 return false;
2000
2001 return insertFFSIfProfitable(IntrinID, InitX, DefX, CntPhi, CntInst);
2002}
2003
2004bool LoopIdiomRecognize::recognizeShiftUntilLessThan() {
2005 // Give up if the loop has multiple blocks or multiple backedges.
2006 if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
2007 return false;
2008
2009 Intrinsic::ID IntrinID;
2010 Value *InitX;
2011 Instruction *DefX = nullptr;
2012 PHINode *CntPhi = nullptr;
2013 Instruction *CntInst = nullptr;
2014
2015 APInt LoopThreshold;
2016 if (!detectShiftUntilLessThanIdiom(CurLoop, *DL, IntrinID, InitX, CntInst,
2017 CntPhi, DefX, LoopThreshold))
2018 return false;
2019
2020 if (LoopThreshold == 2) {
2021 // Treat as regular FFS.
2022 return insertFFSIfProfitable(IntrinID, InitX, DefX, CntPhi, CntInst);
2023 }
2024
2025 // Look for Floor Log2 Idiom.
2026 if (LoopThreshold != 4)
2027 return false;
2028
2029 // Abort if CntPhi is used outside of the loop.
2030 for (User *U : CntPhi->users())
2031 if (!CurLoop->contains(cast<Instruction>(U)))
2032 return false;
2033
2034 // It is safe to assume Preheader exist as it was checked in
2035 // parent function RunOnLoop.
2036 BasicBlock *PH = CurLoop->getLoopPreheader();
2037 auto *PreCondBB = PH->getSinglePredecessor();
2038 if (!PreCondBB)
2039 return false;
2040 auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
2041 if (!PreCondBI)
2042 return false;
2043
2044 APInt PreLoopThreshold;
2045 if (matchShiftULTCondition(PreCondBI, PH, PreLoopThreshold) != InitX ||
2046 PreLoopThreshold != 2)
2047 return false;
2048
2049 bool ZeroCheck = true;
2050
2051 // the loop has only 6 instructions:
2052 // %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
2053 // %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
2054 // %shr = ashr %n.addr.0, 1
2055 // %tobool = icmp ult %n.addr.0, C
2056 // %inc = add nsw %i.0, 1
2057 // br i1 %tobool
2058 size_t IdiomCanonicalSize = 6;
2059 if (!isProfitableToInsertFFS(IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
2060 return false;
2061
2062 // log2(x) = w − 1 − clz(x)
2063 transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
2064 DefX->getDebugLoc(), ZeroCheck,
2065 /*IsCntPhiUsedOutsideLoop=*/false,
2066 /*InsertSub=*/true);
2067 return true;
2068}
2069
2070/// Recognizes a population count idiom in a non-countable loop.
2071///
2072/// If detected, transforms the relevant code to issue the popcount intrinsic
2073/// function call, and returns true; otherwise, returns false.
2074bool LoopIdiomRecognize::recognizePopcount() {
2076 return false;
2077
2078 // Counting population are usually conducted by few arithmetic instructions.
2079 // Such instructions can be easily "absorbed" by vacant slots in a
2080 // non-compact loop. Therefore, recognizing popcount idiom only makes sense
2081 // in a compact loop.
2082
2083 // Give up if the loop has multiple blocks or multiple backedges.
2084 if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
2085 return false;
2086
2087 BasicBlock *LoopBody = *(CurLoop->block_begin());
2088 if (LoopBody->size() >= 20) {
2089 // The loop is too big, bail out.
2090 return false;
2091 }
2092
2093 // It should have a preheader containing nothing but an unconditional branch.
2094 BasicBlock *PH = CurLoop->getLoopPreheader();
2095 if (!PH || &PH->front() != PH->getTerminator())
2096 return false;
2097 auto *EntryBI = dyn_cast<BranchInst>(PH->getTerminator());
2098 if (!EntryBI || EntryBI->isConditional())
2099 return false;
2100
2101 // It should have a precondition block where the generated popcount intrinsic
2102 // function can be inserted.
2103 auto *PreCondBB = PH->getSinglePredecessor();
2104 if (!PreCondBB)
2105 return false;
2106 auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
2107 if (!PreCondBI || PreCondBI->isUnconditional())
2108 return false;
2109
2110 Instruction *CntInst;
2111 PHINode *CntPhi;
2112 Value *Val;
2113 if (!detectPopcountIdiom(CurLoop, PreCondBB, CntInst, CntPhi, Val))
2114 return false;
2115
2116 transformLoopToPopcount(PreCondBB, CntInst, CntPhi, Val);
2117 return true;
2118}
2119
2121 const DebugLoc &DL) {
2122 Value *Ops[] = {Val};
2123 Type *Tys[] = {Val->getType()};
2124
2126 Function *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
2127 CallInst *CI = IRBuilder.CreateCall(Func, Ops);
2128 CI->setDebugLoc(DL);
2129
2130 return CI;
2131}
2132
2134 const DebugLoc &DL, bool ZeroCheck,
2135 Intrinsic::ID IID) {
2136 Value *Ops[] = {Val, IRBuilder.getInt1(ZeroCheck)};
2137 Type *Tys[] = {Val->getType()};
2138
2140 Function *Func = Intrinsic::getDeclaration(M, IID, Tys);
2141 CallInst *CI = IRBuilder.CreateCall(Func, Ops);
2142 CI->setDebugLoc(DL);
2143
2144 return CI;
2145}
2146
2147/// Transform the following loop (Using CTLZ, CTTZ is similar):
2148/// loop:
2149/// CntPhi = PHI [Cnt0, CntInst]
2150/// PhiX = PHI [InitX, DefX]
2151/// CntInst = CntPhi + 1
2152/// DefX = PhiX >> 1
2153/// LOOP_BODY
2154/// Br: loop if (DefX != 0)
2155/// Use(CntPhi) or Use(CntInst)
2156///
2157/// Into:
2158/// If CntPhi used outside the loop:
2159/// CountPrev = BitWidth(InitX) - CTLZ(InitX >> 1)
2160/// Count = CountPrev + 1
2161/// else
2162/// Count = BitWidth(InitX) - CTLZ(InitX)
2163/// loop:
2164/// CntPhi = PHI [Cnt0, CntInst]
2165/// PhiX = PHI [InitX, DefX]
2166/// PhiCount = PHI [Count, Dec]
2167/// CntInst = CntPhi + 1
2168/// DefX = PhiX >> 1
2169/// Dec = PhiCount - 1
2170/// LOOP_BODY
2171/// Br: loop if (Dec != 0)
2172/// Use(CountPrev + Cnt0) // Use(CntPhi)
2173/// or
2174/// Use(Count + Cnt0) // Use(CntInst)
2175///
2176/// If LOOP_BODY is empty the loop will be deleted.
2177/// If CntInst and DefX are not used in LOOP_BODY they will be removed.
2178void LoopIdiomRecognize::transformLoopToCountable(
2179 Intrinsic::ID IntrinID, BasicBlock *Preheader, Instruction *CntInst,
2180 PHINode *CntPhi, Value *InitX, Instruction *DefX, const DebugLoc &DL,
2181 bool ZeroCheck, bool IsCntPhiUsedOutsideLoop, bool InsertSub) {
2182 BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
2183
2184 // Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
2185 IRBuilder<> Builder(PreheaderBr);
2186 Builder.SetCurrentDebugLocation(DL);
2187
2188 // If there are no uses of CntPhi crate:
2189 // Count = BitWidth - CTLZ(InitX);
2190 // NewCount = Count;
2191 // If there are uses of CntPhi create:
2192 // NewCount = BitWidth - CTLZ(InitX >> 1);
2193 // Count = NewCount + 1;
2194 Value *InitXNext;
2195 if (IsCntPhiUsedOutsideLoop) {
2196 if (DefX->getOpcode() == Instruction::AShr)
2197 InitXNext = Builder.CreateAShr(InitX, 1);
2198 else if (DefX->getOpcode() == Instruction::LShr)
2199 InitXNext = Builder.CreateLShr(InitX, 1);
2200 else if (DefX->getOpcode() == Instruction::Shl) // cttz
2201 InitXNext = Builder.CreateShl(InitX, 1);
2202 else
2203 llvm_unreachable("Unexpected opcode!");
2204 } else
2205 InitXNext = InitX;
2206 Value *Count =
2207 createFFSIntrinsic(Builder, InitXNext, DL, ZeroCheck, IntrinID);
2208 Type *CountTy = Count->getType();
2209 Count = Builder.CreateSub(
2210 ConstantInt::get(CountTy, CountTy->getIntegerBitWidth()), Count);
2211 if (InsertSub)
2212 Count = Builder.CreateSub(Count, ConstantInt::get(CountTy, 1));
2213 Value *NewCount = Count;
2214 if (IsCntPhiUsedOutsideLoop)
2215 Count = Builder.CreateAdd(Count, ConstantInt::get(CountTy, 1));
2216
2217 NewCount = Builder.CreateZExtOrTrunc(NewCount, CntInst->getType());
2218
2219 Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader);
2220 if (cast<ConstantInt>(CntInst->getOperand(1))->isOne()) {
2221 // If the counter was being incremented in the loop, add NewCount to the
2222 // counter's initial value, but only if the initial value is not zero.
2223 ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
2224 if (!InitConst || !InitConst->isZero())
2225 NewCount = Builder.CreateAdd(NewCount, CntInitVal);
2226 } else {
2227 // If the count was being decremented in the loop, subtract NewCount from
2228 // the counter's initial value.
2229 NewCount = Builder.CreateSub(CntInitVal, NewCount);
2230 }
2231
2232 // Step 2: Insert new IV and loop condition:
2233 // loop:
2234 // ...
2235 // PhiCount = PHI [Count, Dec]
2236 // ...
2237 // Dec = PhiCount - 1
2238 // ...
2239 // Br: loop if (Dec != 0)
2240 BasicBlock *Body = *(CurLoop->block_begin());
2241 auto *LbBr = cast<BranchInst>(Body->getTerminator());
2242 ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
2243
2244 PHINode *TcPhi = PHINode::Create(CountTy, 2, "tcphi");
2245 TcPhi->insertBefore(Body->begin());
2246
2247 Builder.SetInsertPoint(LbCond);
2248 Instruction *TcDec = cast<Instruction>(Builder.CreateSub(
2249 TcPhi, ConstantInt::get(CountTy, 1), "tcdec", false, true));
2250
2251 TcPhi->addIncoming(Count, Preheader);
2252 TcPhi->addIncoming(TcDec, Body);
2253
2254 CmpInst::Predicate Pred =
2255 (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
2256 LbCond->setPredicate(Pred);
2257 LbCond->setOperand(0, TcDec);
2258 LbCond->setOperand(1, ConstantInt::get(CountTy, 0));
2259
2260 // Step 3: All the references to the original counter outside
2261 // the loop are replaced with the NewCount
2262 if (IsCntPhiUsedOutsideLoop)
2263 CntPhi->replaceUsesOutsideBlock(NewCount, Body);
2264 else
2265 CntInst->replaceUsesOutsideBlock(NewCount, Body);
2266
2267 // step 4: Forget the "non-computable" trip-count SCEV associated with the
2268 // loop. The loop would otherwise not be deleted even if it becomes empty.
2269 SE->forgetLoop(CurLoop);
2270}
2271
2272void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
2273 Instruction *CntInst,
2274 PHINode *CntPhi, Value *Var) {
2275 BasicBlock *PreHead = CurLoop->getLoopPreheader();
2276 auto *PreCondBr = cast<BranchInst>(PreCondBB->getTerminator());
2277 const DebugLoc &DL = CntInst->getDebugLoc();
2278
2279 // Assuming before transformation, the loop is following:
2280 // if (x) // the precondition
2281 // do { cnt++; x &= x - 1; } while(x);
2282
2283 // Step 1: Insert the ctpop instruction at the end of the precondition block
2284 IRBuilder<> Builder(PreCondBr);
2285 Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
2286 {
2287 PopCnt = createPopcntIntrinsic(Builder, Var, DL);
2288 NewCount = PopCntZext =
2289 Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType()));
2290
2291 if (NewCount != PopCnt)
2292 (cast<Instruction>(NewCount))->setDebugLoc(DL);
2293
2294 // TripCnt is exactly the number of iterations the loop has
2295 TripCnt = NewCount;
2296
2297 // If the population counter's initial value is not zero, insert Add Inst.
2298 Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
2299 ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
2300 if (!InitConst || !InitConst->isZero()) {
2301 NewCount = Builder.CreateAdd(NewCount, CntInitVal);
2302 (cast<Instruction>(NewCount))->setDebugLoc(DL);
2303 }
2304 }
2305
2306 // Step 2: Replace the precondition from "if (x == 0) goto loop-exit" to
2307 // "if (NewCount == 0) loop-exit". Without this change, the intrinsic
2308 // function would be partial dead code, and downstream passes will drag
2309 // it back from the precondition block to the preheader.
2310 {
2311 ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
2312
2313 Value *Opnd0 = PopCntZext;
2314 Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);
2315 if (PreCond->getOperand(0) != Var)
2316 std::swap(Opnd0, Opnd1);
2317
2318 ICmpInst *NewPreCond = cast<ICmpInst>(
2319 Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
2320 PreCondBr->setCondition(NewPreCond);
2321
2323 }
2324
2325 // Step 3: Note that the population count is exactly the trip count of the
2326 // loop in question, which enable us to convert the loop from noncountable
2327 // loop into a countable one. The benefit is twofold:
2328 //
2329 // - If the loop only counts population, the entire loop becomes dead after
2330 // the transformation. It is a lot easier to prove a countable loop dead
2331 // than to prove a noncountable one. (In some C dialects, an infinite loop
2332 // isn't dead even if it computes nothing useful. In general, DCE needs
2333 // to prove a noncountable loop finite before safely delete it.)
2334 //
2335 // - If the loop also performs something else, it remains alive.
2336 // Since it is transformed to countable form, it can be aggressively
2337 // optimized by some optimizations which are in general not applicable
2338 // to a noncountable loop.
2339 //
2340 // After this step, this loop (conceptually) would look like following:
2341 // newcnt = __builtin_ctpop(x);
2342 // t = newcnt;
2343 // if (x)
2344 // do { cnt++; x &= x-1; t--) } while (t > 0);
2345 BasicBlock *Body = *(CurLoop->block_begin());
2346 {
2347 auto *LbBr = cast<BranchInst>(Body->getTerminator());
2348 ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
2349 Type *Ty = TripCnt->getType();
2350
2351 PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi");
2352 TcPhi->insertBefore(Body->begin());
2353
2354 Builder.SetInsertPoint(LbCond);
2355 Instruction *TcDec = cast<Instruction>(
2356 Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
2357 "tcdec", false, true));
2358
2359 TcPhi->addIncoming(TripCnt, PreHead);
2360 TcPhi->addIncoming(TcDec, Body);
2361
2362 CmpInst::Predicate Pred =
2363 (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_UGT : CmpInst::ICMP_SLE;
2364 LbCond->setPredicate(Pred);
2365 LbCond->setOperand(0, TcDec);
2366 LbCond->setOperand(1, ConstantInt::get(Ty, 0));
2367 }
2368
2369 // Step 4: All the references to the original population counter outside
2370 // the loop are replaced with the NewCount -- the value returned from
2371 // __builtin_ctpop().
2372 CntInst->replaceUsesOutsideBlock(NewCount, Body);
2373
2374 // step 5: Forget the "non-computable" trip-count SCEV associated with the
2375 // loop. The loop would otherwise not be deleted even if it becomes empty.
2376 SE->forgetLoop(CurLoop);
2377}
2378
2379/// Match loop-invariant value.
2380template <typename SubPattern_t> struct match_LoopInvariant {
2381 SubPattern_t SubPattern;
2382 const Loop *L;
2383
2384 match_LoopInvariant(const SubPattern_t &SP, const Loop *L)
2385 : SubPattern(SP), L(L) {}
2386
2387 template <typename ITy> bool match(ITy *V) {
2388 return L->isLoopInvariant(V) && SubPattern.match(V);
2389 }
2390};
2391
2392/// Matches if the value is loop-invariant.
2393template <typename Ty>
2394inline match_LoopInvariant<Ty> m_LoopInvariant(const Ty &M, const Loop *L) {
2395 return match_LoopInvariant<Ty>(M, L);
2396}
2397
2398/// Return true if the idiom is detected in the loop.
2399///
2400/// The core idiom we are trying to detect is:
2401/// \code
2402/// entry:
2403/// <...>
2404/// %bitmask = shl i32 1, %bitpos
2405/// br label %loop
2406///
2407/// loop:
2408/// %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ]
2409/// %x.curr.bitmasked = and i32 %x.curr, %bitmask
2410/// %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0
2411/// %x.next = shl i32 %x.curr, 1
2412/// <...>
2413/// br i1 %x.curr.isbitunset, label %loop, label %end
2414///
2415/// end:
2416/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
2417/// %x.next.res = phi i32 [ %x.next, %loop ] <...>
2418/// <...>
2419/// \endcode
2420static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX,
2421 Value *&BitMask, Value *&BitPos,
2422 Value *&CurrX, Instruction *&NextX) {
2424 " Performing shift-until-bittest idiom detection.\n");
2425
2426 // Give up if the loop has multiple blocks or multiple backedges.
2427 if (CurLoop->getNumBlocks() != 1 || CurLoop->getNumBackEdges() != 1) {
2428 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad block/backedge count.\n");
2429 return false;
2430 }
2431
2432 BasicBlock *LoopHeaderBB = CurLoop->getHeader();
2433 BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
2434 assert(LoopPreheaderBB && "There is always a loop preheader.");
2435
2436 using namespace PatternMatch;
2437
2438 // Step 1: Check if the loop backedge is in desirable form.
2439
2441 Value *CmpLHS, *CmpRHS;
2442 BasicBlock *TrueBB, *FalseBB;
2443 if (!match(LoopHeaderBB->getTerminator(),
2444 m_Br(m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)),
2445 m_BasicBlock(TrueBB), m_BasicBlock(FalseBB)))) {
2446 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge structure.\n");
2447 return false;
2448 }
2449
2450 // Step 2: Check if the backedge's condition is in desirable form.
2451
2452 auto MatchVariableBitMask = [&]() {
2453 return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) &&
2454 match(CmpLHS,
2455 m_c_And(m_Value(CurrX),
2457 m_Value(BitMask),
2458 m_LoopInvariant(m_Shl(m_One(), m_Value(BitPos)),
2459 CurLoop))));
2460 };
2461 auto MatchConstantBitMask = [&]() {
2462 return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) &&
2463 match(CmpLHS, m_And(m_Value(CurrX),
2464 m_CombineAnd(m_Value(BitMask), m_Power2()))) &&
2465 (BitPos = ConstantExpr::getExactLogBase2(cast<Constant>(BitMask)));
2466 };
2467 auto MatchDecomposableConstantBitMask = [&]() {
2468 APInt Mask;
2469 return llvm::decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, CurrX, Mask) &&
2470 ICmpInst::isEquality(Pred) && Mask.isPowerOf2() &&
2471 (BitMask = ConstantInt::get(CurrX->getType(), Mask)) &&
2472 (BitPos = ConstantInt::get(CurrX->getType(), Mask.logBase2()));
2473 };
2474
2475 if (!MatchVariableBitMask() && !MatchConstantBitMask() &&
2476 !MatchDecomposableConstantBitMask()) {
2477 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge comparison.\n");
2478 return false;
2479 }
2480
2481 // Step 3: Check if the recurrence is in desirable form.
2482 auto *CurrXPN = dyn_cast<PHINode>(CurrX);
2483 if (!CurrXPN || CurrXPN->getParent() != LoopHeaderBB) {
2484 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Not an expected PHI node.\n");
2485 return false;
2486 }
2487
2488 BaseX = CurrXPN->getIncomingValueForBlock(LoopPreheaderBB);
2489 NextX =
2490 dyn_cast<Instruction>(CurrXPN->getIncomingValueForBlock(LoopHeaderBB));
2491
2492 assert(CurLoop->isLoopInvariant(BaseX) &&
2493 "Expected BaseX to be avaliable in the preheader!");
2494
2495 if (!NextX || !match(NextX, m_Shl(m_Specific(CurrX), m_One()))) {
2496 // FIXME: support right-shift?
2497 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad recurrence.\n");
2498 return false;
2499 }
2500
2501 // Step 4: Check if the backedge's destinations are in desirable form.
2502
2504 "Should only get equality predicates here.");
2505
2506 // cmp-br is commutative, so canonicalize to a single variant.
2507 if (Pred != ICmpInst::Predicate::ICMP_EQ) {
2508 Pred = ICmpInst::getInversePredicate(Pred);
2509 std::swap(TrueBB, FalseBB);
2510 }
2511
2512 // We expect to exit loop when comparison yields false,
2513 // so when it yields true we should branch back to loop header.
2514 if (TrueBB != LoopHeaderBB) {
2515 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge flow.\n");
2516 return false;
2517 }
2518
2519 // Okay, idiom checks out.
2520 return true;
2521}
2522
2523/// Look for the following loop:
2524/// \code
2525/// entry:
2526/// <...>
2527/// %bitmask = shl i32 1, %bitpos
2528/// br label %loop
2529///
2530/// loop:
2531/// %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ]
2532/// %x.curr.bitmasked = and i32 %x.curr, %bitmask
2533/// %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0
2534/// %x.next = shl i32 %x.curr, 1
2535/// <...>
2536/// br i1 %x.curr.isbitunset, label %loop, label %end
2537///
2538/// end:
2539/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
2540/// %x.next.res = phi i32 [ %x.next, %loop ] <...>
2541/// <...>
2542/// \endcode
2543///
2544/// And transform it into:
2545/// \code
2546/// entry:
2547/// %bitmask = shl i32 1, %bitpos
2548/// %lowbitmask = add i32 %bitmask, -1
2549/// %mask = or i32 %lowbitmask, %bitmask
2550/// %x.masked = and i32 %x, %mask
2551/// %x.masked.numleadingzeros = call i32 @llvm.ctlz.i32(i32 %x.masked,
2552/// i1 true)
2553/// %x.masked.numactivebits = sub i32 32, %x.masked.numleadingzeros
2554/// %x.masked.leadingonepos = add i32 %x.masked.numactivebits, -1
2555/// %backedgetakencount = sub i32 %bitpos, %x.masked.leadingonepos
2556/// %tripcount = add i32 %backedgetakencount, 1
2557/// %x.curr = shl i32 %x, %backedgetakencount
2558/// %x.next = shl i32 %x, %tripcount
2559/// br label %loop
2560///
2561/// loop:
2562/// %loop.iv = phi i32 [ 0, %entry ], [ %loop.iv.next, %loop ]
2563/// %loop.iv.next = add nuw i32 %loop.iv, 1
2564/// %loop.ivcheck = icmp eq i32 %loop.iv.next, %tripcount
2565/// <...>
2566/// br i1 %loop.ivcheck, label %end, label %loop
2567///
2568/// end:
2569/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
2570/// %x.next.res = phi i32 [ %x.next, %loop ] <...>
2571/// <...>
2572/// \endcode
2573bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
2574 bool MadeChange = false;
2575
2576 Value *X, *BitMask, *BitPos, *XCurr;
2577 Instruction *XNext;
2578 if (!detectShiftUntilBitTestIdiom(CurLoop, X, BitMask, BitPos, XCurr,
2579 XNext)) {
2581 " shift-until-bittest idiom detection failed.\n");
2582 return MadeChange;
2583 }
2584 LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom detected!\n");
2585
2586 // Ok, it is the idiom we were looking for, we *could* transform this loop,
2587 // but is it profitable to transform?
2588
2589 BasicBlock *LoopHeaderBB = CurLoop->getHeader();
2590 BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
2591 assert(LoopPreheaderBB && "There is always a loop preheader.");
2592
2593 BasicBlock *SuccessorBB = CurLoop->getExitBlock();
2594 assert(SuccessorBB && "There is only a single successor.");
2595
2596 IRBuilder<> Builder(LoopPreheaderBB->getTerminator());
2597 Builder.SetCurrentDebugLocation(cast<Instruction>(XCurr)->getDebugLoc());
2598
2599 Intrinsic::ID IntrID = Intrinsic::ctlz;
2600 Type *Ty = X->getType();
2601 unsigned Bitwidth = Ty->getScalarSizeInBits();
2602
2605
2606 // The rewrite is considered to be unprofitable iff and only iff the
2607 // intrinsic/shift we'll use are not cheap. Note that we are okay with *just*
2608 // making the loop countable, even if nothing else changes.
2610 IntrID, Ty, {PoisonValue::get(Ty), /*is_zero_poison=*/Builder.getTrue()});
2614 " Intrinsic is too costly, not beneficial\n");
2615 return MadeChange;
2616 }
2617 if (TTI->getArithmeticInstrCost(Instruction::Shl, Ty, CostKind) >
2619 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Shift is too costly, not beneficial\n");
2620 return MadeChange;
2621 }
2622
2623 // Ok, transform appears worthwhile.
2624 MadeChange = true;
2625
2626 if (!isGuaranteedNotToBeUndefOrPoison(BitPos)) {
2627 // BitMask may be computed from BitPos, Freeze BitPos so we can increase
2628 // it's use count.
2629 std::optional<BasicBlock::iterator> InsertPt = std::nullopt;
2630 if (auto *BitPosI = dyn_cast<Instruction>(BitPos))
2631 InsertPt = BitPosI->getInsertionPointAfterDef();
2632 else
2633 InsertPt = DT->getRoot()->getFirstNonPHIOrDbgOrAlloca();
2634 if (!InsertPt)
2635 return false;
2636 FreezeInst *BitPosFrozen =
2637 new FreezeInst(BitPos, BitPos->getName() + ".fr", *InsertPt);
2638 BitPos->replaceUsesWithIf(BitPosFrozen, [BitPosFrozen](Use &U) {
2639 return U.getUser() != BitPosFrozen;
2640 });
2641 BitPos = BitPosFrozen;
2642 }
2643
2644 // Step 1: Compute the loop trip count.
2645
2646 Value *LowBitMask = Builder.CreateAdd(BitMask, Constant::getAllOnesValue(Ty),
2647 BitPos->getName() + ".lowbitmask");
2648 Value *Mask =
2649 Builder.CreateOr(LowBitMask, BitMask, BitPos->getName() + ".mask");
2650 Value *XMasked = Builder.CreateAnd(X, Mask, X->getName() + ".masked");
2651 CallInst *XMaskedNumLeadingZeros = Builder.CreateIntrinsic(
2652 IntrID, Ty, {XMasked, /*is_zero_poison=*/Builder.getTrue()},
2653 /*FMFSource=*/nullptr, XMasked->getName() + ".numleadingzeros");
2654 Value *XMaskedNumActiveBits = Builder.CreateSub(
2655 ConstantInt::get(Ty, Ty->getScalarSizeInBits()), XMaskedNumLeadingZeros,
2656 XMasked->getName() + ".numactivebits", /*HasNUW=*/true,
2657 /*HasNSW=*/Bitwidth != 2);
2658 Value *XMaskedLeadingOnePos =
2659 Builder.CreateAdd(XMaskedNumActiveBits, Constant::getAllOnesValue(Ty),
2660 XMasked->getName() + ".leadingonepos", /*HasNUW=*/false,
2661 /*HasNSW=*/Bitwidth > 2);
2662
2663 Value *LoopBackedgeTakenCount = Builder.CreateSub(
2664 BitPos, XMaskedLeadingOnePos, CurLoop->getName() + ".backedgetakencount",
2665 /*HasNUW=*/true, /*HasNSW=*/true);
2666 // We know loop's backedge-taken count, but what's loop's trip count?
2667 // Note that while NUW is always safe, while NSW is only for bitwidths != 2.
2668 Value *LoopTripCount =
2669 Builder.CreateAdd(LoopBackedgeTakenCount, ConstantInt::get(Ty, 1),
2670 CurLoop->getName() + ".tripcount", /*HasNUW=*/true,
2671 /*HasNSW=*/Bitwidth != 2);
2672
2673 // Step 2: Compute the recurrence's final value without a loop.
2674
2675 // NewX is always safe to compute, because `LoopBackedgeTakenCount`
2676 // will always be smaller than `bitwidth(X)`, i.e. we never get poison.
2677 Value *NewX = Builder.CreateShl(X, LoopBackedgeTakenCount);
2678 NewX->takeName(XCurr);
2679 if (auto *I = dyn_cast<Instruction>(NewX))
2680 I->copyIRFlags(XNext, /*IncludeWrapFlags=*/true);
2681
2682 Value *NewXNext;
2683 // Rewriting XNext is more complicated, however, because `X << LoopTripCount`
2684 // will be poison iff `LoopTripCount == bitwidth(X)` (which will happen
2685 // iff `BitPos` is `bitwidth(x) - 1` and `X` is `1`). So unless we know
2686 // that isn't the case, we'll need to emit an alternative, safe IR.
2687 if (XNext->hasNoSignedWrap() || XNext->hasNoUnsignedWrap() ||
2691 Ty->getScalarSizeInBits() - 1))))
2692 NewXNext = Builder.CreateShl(X, LoopTripCount);
2693 else {
2694 // Otherwise, just additionally shift by one. It's the smallest solution,
2695 // alternatively, we could check that NewX is INT_MIN (or BitPos is )
2696 // and select 0 instead.
2697 NewXNext = Builder.CreateShl(NewX, ConstantInt::get(Ty, 1));
2698 }
2699
2700 NewXNext->takeName(XNext);
2701 if (auto *I = dyn_cast<Instruction>(NewXNext))
2702 I->copyIRFlags(XNext, /*IncludeWrapFlags=*/true);
2703
2704 // Step 3: Adjust the successor basic block to recieve the computed
2705 // recurrence's final value instead of the recurrence itself.
2706
2707 XCurr->replaceUsesOutsideBlock(NewX, LoopHeaderBB);
2708 XNext->replaceUsesOutsideBlock(NewXNext, LoopHeaderBB);
2709
2710 // Step 4: Rewrite the loop into a countable form, with canonical IV.
2711
2712 // The new canonical induction variable.
2713 Builder.SetInsertPoint(LoopHeaderBB, LoopHeaderBB->begin());
2714 auto *IV = Builder.CreatePHI(Ty, 2, CurLoop->getName() + ".iv");
2715
2716 // The induction itself.
2717 // Note that while NUW is always safe, while NSW is only for bitwidths != 2.
2718 Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
2719 auto *IVNext =
2720 Builder.CreateAdd(IV, ConstantInt::get(Ty, 1), IV->getName() + ".next",
2721 /*HasNUW=*/true, /*HasNSW=*/Bitwidth != 2);
2722
2723 // The loop trip count check.
2724 auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount,
2725 CurLoop->getName() + ".ivcheck");
2726 Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
2727 LoopHeaderBB->getTerminator()->eraseFromParent();
2728
2729 // Populate the IV PHI.
2730 IV->addIncoming(ConstantInt::get(Ty, 0), LoopPreheaderBB);
2731 IV->addIncoming(IVNext, LoopHeaderBB);
2732
2733 // Step 5: Forget the "non-computable" trip-count SCEV associated with the
2734 // loop. The loop would otherwise not be deleted even if it becomes empty.
2735
2736 SE->forgetLoop(CurLoop);
2737
2738 // Other passes will take care of actually deleting the loop if possible.
2739
2740 LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom optimized!\n");
2741
2742 ++NumShiftUntilBitTest;
2743 return MadeChange;
2744}
2745
2746/// Return true if the idiom is detected in the loop.
2747///
2748/// The core idiom we are trying to detect is:
2749/// \code
2750/// entry:
2751/// <...>
2752/// %start = <...>
2753/// %extraoffset = <...>
2754/// <...>
2755/// br label %for.cond
2756///
2757/// loop:
2758/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ]
2759/// %nbits = add nsw i8 %iv, %extraoffset
2760/// %val.shifted = {{l,a}shr,shl} i8 %val, %nbits
2761/// %val.shifted.iszero = icmp eq i8 %val.shifted, 0
2762/// %iv.next = add i8 %iv, 1
2763/// <...>
2764/// br i1 %val.shifted.iszero, label %end, label %loop
2765///
2766/// end:
2767/// %iv.res = phi i8 [ %iv, %loop ] <...>
2768/// %nbits.res = phi i8 [ %nbits, %loop ] <...>
2769/// %val.shifted.res = phi i8 [ %val.shifted, %loop ] <...>
2770/// %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ] <...>
2771/// %iv.next.res = phi i8 [ %iv.next, %loop ] <...>
2772/// <...>
2773/// \endcode
2775 Instruction *&ValShiftedIsZero,
2776 Intrinsic::ID &IntrinID, Instruction *&IV,
2777 Value *&Start, Value *&Val,
2778 const SCEV *&ExtraOffsetExpr,
2779 bool &InvertedCond) {
2781 " Performing shift-until-zero idiom detection.\n");
2782
2783 // Give up if the loop has multiple blocks or multiple backedges.
2784 if (CurLoop->getNumBlocks() != 1 || CurLoop->getNumBackEdges() != 1) {
2785 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad block/backedge count.\n");
2786 return false;
2787 }
2788
2789 Instruction *ValShifted, *NBits, *IVNext;
2790 Value *ExtraOffset;
2791
2792 BasicBlock *LoopHeaderBB = CurLoop->getHeader();
2793 BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
2794 assert(LoopPreheaderBB && "There is always a loop preheader.");
2795
2796 using namespace PatternMatch;
2797
2798 // Step 1: Check if the loop backedge, condition is in desirable form.
2799
2801 BasicBlock *TrueBB, *FalseBB;
2802 if (!match(LoopHeaderBB->getTerminator(),
2803 m_Br(m_Instruction(ValShiftedIsZero), m_BasicBlock(TrueBB),
2804 m_BasicBlock(FalseBB))) ||
2805 !match(ValShiftedIsZero,
2806 m_ICmp(Pred, m_Instruction(ValShifted), m_Zero())) ||
2807 !ICmpInst::isEquality(Pred)) {
2808 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge structure.\n");
2809 return false;
2810 }
2811
2812 // Step 2: Check if the comparison's operand is in desirable form.
2813 // FIXME: Val could be a one-input PHI node, which we should look past.
2814 if (!match(ValShifted, m_Shift(m_LoopInvariant(m_Value(Val), CurLoop),
2815 m_Instruction(NBits)))) {
2816 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad comparisons value computation.\n");
2817 return false;
2818 }
2819 IntrinID = ValShifted->getOpcode() == Instruction::Shl ? Intrinsic::cttz
2820 : Intrinsic::ctlz;
2821
2822 // Step 3: Check if the shift amount is in desirable form.
2823
2824 if (match(NBits, m_c_Add(m_Instruction(IV),
2825 m_LoopInvariant(m_Value(ExtraOffset), CurLoop))) &&
2826 (NBits->hasNoSignedWrap() || NBits->hasNoUnsignedWrap()))
2827 ExtraOffsetExpr = SE->getNegativeSCEV(SE->getSCEV(ExtraOffset));
2828 else if (match(NBits,
2830 m_LoopInvariant(m_Value(ExtraOffset), CurLoop))) &&
2831 NBits->hasNoSignedWrap())
2832 ExtraOffsetExpr = SE->getSCEV(ExtraOffset);
2833 else {
2834 IV = NBits;
2835 ExtraOffsetExpr = SE->getZero(NBits->getType());
2836 }
2837
2838 // Step 4: Check if the recurrence is in desirable form.
2839 auto *IVPN = dyn_cast<PHINode>(IV);
2840 if (!IVPN || IVPN->getParent() != LoopHeaderBB) {
2841 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Not an expected PHI node.\n");
2842 return false;
2843 }
2844
2845 Start = IVPN->getIncomingValueForBlock(LoopPreheaderBB);
2846 IVNext = dyn_cast<Instruction>(IVPN->getIncomingValueForBlock(LoopHeaderBB));
2847
2848 if (!IVNext || !match(IVNext, m_Add(m_Specific(IVPN), m_One()))) {
2849 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad recurrence.\n");
2850 return false;
2851 }
2852
2853 // Step 4: Check if the backedge's destinations are in desirable form.
2854
2856 "Should only get equality predicates here.");
2857
2858 // cmp-br is commutative, so canonicalize to a single variant.
2859 InvertedCond = Pred != ICmpInst::Predicate::ICMP_EQ;
2860 if (InvertedCond) {
2861 Pred = ICmpInst::getInversePredicate(Pred);
2862 std::swap(TrueBB, FalseBB);
2863 }
2864
2865 // We expect to exit loop when comparison yields true,
2866 // so when it yields false we should branch back to loop header.
2867 if (FalseBB != LoopHeaderBB) {
2868 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge flow.\n");
2869 return false;
2870 }
2871
2872 // The new, countable, loop will certainly only run a known number of
2873 // iterations, It won't be infinite. But the old loop might be infinite
2874 // under certain conditions. For logical shifts, the value will become zero
2875 // after at most bitwidth(%Val) loop iterations. However, for arithmetic
2876 // right-shift, iff the sign bit was set, the value will never become zero,
2877 // and the loop may never finish.
2878 if (ValShifted->getOpcode() == Instruction::AShr &&
2879 !isMustProgress(CurLoop) && !SE->isKnownNonNegative(SE->getSCEV(Val))) {
2880 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Can not prove the loop is finite.\n");
2881 return false;
2882 }
2883
2884 // Okay, idiom checks out.
2885 return true;
2886}
2887
2888/// Look for the following loop:
2889/// \code
2890/// entry:
2891/// <...>
2892/// %start = <...>
2893/// %extraoffset = <...>
2894/// <...>
2895/// br label %for.cond
2896///
2897/// loop:
2898/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ]
2899/// %nbits = add nsw i8 %iv, %extraoffset
2900/// %val.shifted = {{l,a}shr,shl} i8 %val, %nbits
2901/// %val.shifted.iszero = icmp eq i8 %val.shifted, 0
2902/// %iv.next = add i8 %iv, 1
2903/// <...>
2904/// br i1 %val.shifted.iszero, label %end, label %loop
2905///
2906/// end:
2907/// %iv.res = phi i8 [ %iv, %loop ] <...>
2908/// %nbits.res = phi i8 [ %nbits, %loop ] <...>
2909/// %val.shifted.res = phi i8 [ %val.shifted, %loop ] <...>
2910/// %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ] <...>
2911/// %iv.next.res = phi i8 [ %iv.next, %loop ] <...>
2912/// <...>
2913/// \endcode
2914///
2915/// And transform it into:
2916/// \code
2917/// entry:
2918/// <...>
2919/// %start = <...>
2920/// %extraoffset = <...>
2921/// <...>
2922/// %val.numleadingzeros = call i8 @llvm.ct{l,t}z.i8(i8 %val, i1 0)
2923/// %val.numactivebits = sub i8 8, %val.numleadingzeros
2924/// %extraoffset.neg = sub i8 0, %extraoffset
2925/// %tmp = add i8 %val.numactivebits, %extraoffset.neg
2926/// %iv.final = call i8 @llvm.smax.i8(i8 %tmp, i8 %start)
2927/// %loop.tripcount = sub i8 %iv.final, %start
2928/// br label %loop
2929///
2930/// loop:
2931/// %loop.iv = phi i8 [ 0, %entry ], [ %loop.iv.next, %loop ]
2932/// %loop.iv.next = add i8 %loop.iv, 1
2933/// %loop.ivcheck = icmp eq i8 %loop.iv.next, %loop.tripcount
2934/// %iv = add i8 %loop.iv, %start
2935/// <...>
2936/// br i1 %loop.ivcheck, label %end, label %loop
2937///
2938/// end:
2939/// %iv.res = phi i8 [ %iv.final, %loop ] <...>
2940/// <...>
2941/// \endcode
2942bool LoopIdiomRecognize::recognizeShiftUntilZero() {
2943 bool MadeChange = false;
2944
2945 Instruction *ValShiftedIsZero;
2946 Intrinsic::ID IntrID;
2947 Instruction *IV;
2948 Value *Start, *Val;
2949 const SCEV *ExtraOffsetExpr;
2950 bool InvertedCond;
2951 if (!detectShiftUntilZeroIdiom(CurLoop, SE, ValShiftedIsZero, IntrID, IV,
2952 Start, Val, ExtraOffsetExpr, InvertedCond)) {
2954 " shift-until-zero idiom detection failed.\n");
2955 return MadeChange;
2956 }
2957 LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-zero idiom detected!\n");
2958
2959 // Ok, it is the idiom we were looking for, we *could* transform this loop,
2960 // but is it profitable to transform?
2961
2962 BasicBlock *LoopHeaderBB = CurLoop->getHeader();
2963 BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
2964 assert(LoopPreheaderBB && "There is always a loop preheader.");
2965
2966 BasicBlock *SuccessorBB = CurLoop->getExitBlock();
2967 assert(SuccessorBB && "There is only a single successor.");
2968
2969 IRBuilder<> Builder(LoopPreheaderBB->getTerminator());
2970 Builder.SetCurrentDebugLocation(IV->getDebugLoc());
2971
2972 Type *Ty = Val->getType();
2973 unsigned Bitwidth = Ty->getScalarSizeInBits();
2974
2977
2978 // The rewrite is considered to be unprofitable iff and only iff the
2979 // intrinsic we'll use are not cheap. Note that we are okay with *just*
2980 // making the loop countable, even if nothing else changes.
2982 IntrID, Ty, {PoisonValue::get(Ty), /*is_zero_poison=*/Builder.getFalse()});
2986 " Intrinsic is too costly, not beneficial\n");
2987 return MadeChange;
2988 }
2989
2990 // Ok, transform appears worthwhile.
2991 MadeChange = true;
2992
2993 bool OffsetIsZero = false;
2994 if (auto *ExtraOffsetExprC = dyn_cast<SCEVConstant>(ExtraOffsetExpr))
2995 OffsetIsZero = ExtraOffsetExprC->isZero();
2996
2997 // Step 1: Compute the loop's final IV value / trip count.
2998
2999 CallInst *ValNumLeadingZeros = Builder.CreateIntrinsic(
3000 IntrID, Ty, {Val, /*is_zero_poison=*/Builder.getFalse()},
3001 /*FMFSource=*/nullptr, Val->getName() + ".numleadingzeros");
3002 Value *ValNumActiveBits = Builder.CreateSub(
3003 ConstantInt::get(Ty, Ty->getScalarSizeInBits()), ValNumLeadingZeros,
3004 Val->getName() + ".numactivebits", /*HasNUW=*/true,
3005 /*HasNSW=*/Bitwidth != 2);
3006
3007 SCEVExpander Expander(*SE, *DL, "loop-idiom");
3008 Expander.setInsertPoint(&*Builder.GetInsertPoint());
3009 Value *ExtraOffset = Expander.expandCodeFor(ExtraOffsetExpr);
3010
3011 Value *ValNumActiveBitsOffset = Builder.CreateAdd(
3012 ValNumActiveBits, ExtraOffset, ValNumActiveBits->getName() + ".offset",
3013 /*HasNUW=*/OffsetIsZero, /*HasNSW=*/true);
3014 Value *IVFinal = Builder.CreateIntrinsic(Intrinsic::smax, {Ty},
3015 {ValNumActiveBitsOffset, Start},
3016 /*FMFSource=*/nullptr, "iv.final");
3017
3018 auto *LoopBackedgeTakenCount = cast<Instruction>(Builder.CreateSub(
3019 IVFinal, Start, CurLoop->getName() + ".backedgetakencount",
3020 /*HasNUW=*/OffsetIsZero, /*HasNSW=*/true));
3021 // FIXME: or when the offset was `add nuw`
3022
3023 // We know loop's backedge-taken count, but what's loop's trip count?
3024 Value *LoopTripCount =
3025 Builder.CreateAdd(LoopBackedgeTakenCount, ConstantInt::get(Ty, 1),
3026 CurLoop->getName() + ".tripcount", /*HasNUW=*/true,
3027 /*HasNSW=*/Bitwidth != 2);
3028
3029 // Step 2: Adjust the successor basic block to recieve the original
3030 // induction variable's final value instead of the orig. IV itself.
3031
3032 IV->replaceUsesOutsideBlock(IVFinal, LoopHeaderBB);
3033
3034 // Step 3: Rewrite the loop into a countable form, with canonical IV.
3035
3036 // The new canonical induction variable.
3037 Builder.SetInsertPoint(LoopHeaderBB, LoopHeaderBB->begin());
3038 auto *CIV = Builder.CreatePHI(Ty, 2, CurLoop->getName() + ".iv");
3039
3040 // The induction itself.
3041 Builder.SetInsertPoint(LoopHeaderBB, LoopHeaderBB->getFirstNonPHIIt());
3042 auto *CIVNext =
3043 Builder.CreateAdd(CIV, ConstantInt::get(Ty, 1), CIV->getName() + ".next",
3044 /*HasNUW=*/true, /*HasNSW=*/Bitwidth != 2);
3045
3046 // The loop trip count check.
3047 auto *CIVCheck = Builder.CreateICmpEQ(CIVNext, LoopTripCount,
3048 CurLoop->getName() + ".ivcheck");
3049 auto *NewIVCheck = CIVCheck;
3050 if (InvertedCond) {
3051 NewIVCheck = Builder.CreateNot(CIVCheck);
3052 NewIVCheck->takeName(ValShiftedIsZero);
3053 }
3054
3055 // The original IV, but rebased to be an offset to the CIV.
3056 auto *IVDePHId = Builder.CreateAdd(CIV, Start, "", /*HasNUW=*/false,
3057 /*HasNSW=*/true); // FIXME: what about NUW?
3058 IVDePHId->takeName(IV);
3059
3060 // The loop terminator.
3061 Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
3062 Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
3063 LoopHeaderBB->getTerminator()->eraseFromParent();
3064
3065 // Populate the IV PHI.
3066 CIV->addIncoming(ConstantInt::get(Ty, 0), LoopPreheaderBB);
3067 CIV->addIncoming(CIVNext, LoopHeaderBB);
3068
3069 // Step 4: Forget the "non-computable" trip-count SCEV associated with the
3070 // loop. The loop would otherwise not be deleted even if it becomes empty.
3071
3072 SE->forgetLoop(CurLoop);
3073
3074 // Step 5: Try to cleanup the loop's body somewhat.
3075 IV->replaceAllUsesWith(IVDePHId);
3076 IV->eraseFromParent();
3077
3078 ValShiftedIsZero->replaceAllUsesWith(NewIVCheck);
3079 ValShiftedIsZero->eraseFromParent();
3080
3081 // Other passes will take care of actually deleting the loop if possible.
3082
3083 LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-zero idiom optimized!\n");
3084
3085 ++NumShiftUntilZero;
3086 return MadeChange;
3087}
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
std::string Name
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define DEBUG_TYPE
static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, const SCEV *BECount, unsigned StoreSize, AliasAnalysis &AA, SmallPtrSetImpl< Instruction * > &Ignored)
mayLoopAccessLocation - Return true if the specified loop might access the specified pointer location...
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static Value * matchCondition(BranchInst *BI, BasicBlock *LoopEntry, bool JmpOnZero=false)
Check if the given conditional branch is based on the comparison between a variable and zero,...
static PHINode * getRecurrenceVar(Value *VarX, Instruction *DefX, BasicBlock *LoopEntry)
static cl::opt< bool, true > DisableLIRPMemset("disable-" DEBUG_TYPE "-memset", cl::desc("Proceed with loop idiom recognize pass, but do " "not convert loop(s) to memset."), cl::location(DisableLIRP::Memset), cl::init(false), cl::ReallyHidden)
static cl::opt< bool > UseLIRCodeSizeHeurs("use-lir-code-size-heurs", cl::desc("Use loop idiom recognition code size heuristics when compiling" "with -Os/-Oz"), cl::init(true), cl::Hidden)
static CallInst * createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL, bool ZeroCheck, Intrinsic::ID IID)
static bool detectShiftUntilLessThanIdiom(Loop *CurLoop, const DataLayout &DL, Intrinsic::ID &IntrinID, Value *&InitX, Instruction *&CntInst, PHINode *&CntPhi, Instruction *&DefX, APInt &Threshold)
Return true if the idiom is detected in the loop.
static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX, Value *&BitMask, Value *&BitPos, Value *&CurrX, Instruction *&NextX)
Return true if the idiom is detected in the loop.
static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, Instruction *&CntInst, PHINode *&CntPhi, Value *&Var)
Return true iff the idiom is detected in the loop.
static Constant * getMemSetPatternValue(Value *V, const DataLayout *DL)
getMemSetPatternValue - If a strided store of the specified value is safe to turn into a memset_patte...
static cl::opt< bool, true > DisableLIRPMemcpy("disable-" DEBUG_TYPE "-memcpy", cl::desc("Proceed with loop idiom recognize pass, but do " "not convert loop(s) to memcpy."), cl::location(DisableLIRP::Memcpy), cl::init(false), cl::ReallyHidden)
static CallInst * createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL)
static const SCEV * getNumBytes(const SCEV *BECount, Type *IntPtr, const SCEV *StoreSizeSCEV, Loop *CurLoop, const DataLayout *DL, ScalarEvolution *SE)
Compute the number of bytes as a SCEV from the backedge taken count.
static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL, Intrinsic::ID &IntrinID, Value *&InitX, Instruction *&CntInst, PHINode *&CntPhi, Instruction *&DefX)
Return true if the idiom is detected in the loop.
static const SCEV * getStartForNegStride(const SCEV *Start, const SCEV *BECount, Type *IntPtr, const SCEV *StoreSizeSCEV, ScalarEvolution *SE)
static APInt getStoreStride(const SCEVAddRecExpr *StoreEv)
static Value * matchShiftULTCondition(BranchInst *BI, BasicBlock *LoopEntry, APInt &Threshold)
Check if the given conditional branch is based on an unsigned less-than comparison between a variable...
match_LoopInvariant< Ty > m_LoopInvariant(const Ty &M, const Loop *L)
Matches if the value is loop-invariant.
#define DEBUG_TYPE
static cl::opt< bool, true > DisableLIRPAll("disable-" DEBUG_TYPE "-all", cl::desc("Options to disable Loop Idiom Recognize Pass."), cl::location(DisableLIRP::All), cl::init(false), cl::ReallyHidden)
static void deleteDeadInstruction(Instruction *I)
#define I(x, y, z)
Definition: MD5.cpp:58
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Module.h This file contains the declarations for the Module class.
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
This header defines various interfaces for pass management in LLVM.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
verify safepoint Safepoint IR Verifier
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
This pass exposes codegen information to IR-level passes.
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
Definition: APInt.h:78
std::optional< uint64_t > tryZExtValue() const
Get zero extended value if possible.
Definition: APInt.h:1510
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1446
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1520
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:635
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:250
InstListType::const_iterator getFirstNonPHIIt() const
Iterator returning form of getFirstNonPHI.
Definition: BasicBlock.cpp:374
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:367
const Instruction & front() const
Definition: BasicBlock.h:471
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:459
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const_iterator getFirstNonPHIOrDbgOrAlloca() const
Returns an iterator to the first instruction in this block that is not a PHINode, a debug intrinsic,...
Definition: BasicBlock.cpp:430
size_t size() const
Definition: BasicBlock.h:469
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
BinaryOps getOpcode() const
Definition: InstrTypes.h:442
Conditional or Unconditional Branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1465
This class represents a function call, abstracting a target machine's calling convention.
void setPredicate(Predicate P)
Set the predicate for this instruction to the specified value.
Definition: InstrTypes.h:850
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:787
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:780
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:782
@ ICMP_EQ
equal
Definition: InstrTypes.h:778
@ ICMP_NE
not equal
Definition: InstrTypes.h:779
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:871
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:847
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1292
static Constant * getExactLogBase2(Constant *C)
If C is a scalar/fixed width vector of known powers of 2, then this function returns a new scalar/fix...
Definition: Constants.cpp:2636
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:218
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:212
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:206
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:155
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:146
static ConstantInt * getBool(LLVMContext &Context, bool V)
Definition: Constants.cpp:864
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
A debug info location.
Definition: DebugLoc.h:33
NodeT * getRoot() const
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
This class represents a freeze function that returns random concrete value if an operand is either a ...
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:168
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalObject.
Definition: Globals.cpp:137
void setUnnamedAddr(UnnamedAddr Val)
Definition: GlobalValue.h:231
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:60
This instruction compares its operands according to the predicate given to the constructor.
bool isEquality() const
Return true if this predicate is either EQ or NE.
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition: IRBuilder.h:458
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2432
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2686
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:97
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:466
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1642
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1713
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
bool isShift() const
Definition: Instruction.h:281
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:463
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
Definition: Instructions.h:174
Value * getPointerOperand()
Definition: Instructions.h:253
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:203
bool isUnordered() const
Definition: Instructions.h:247
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:209
static LocationSize precise(uint64_t Value)
static constexpr LocationSize afterPointer()
Any location after the base pointer (but still within the underlying object).
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
BlockT * getHeader() const
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop.
block_iterator block_begin() const
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:61
StringRef getName() const
Definition: LoopInfo.h:388
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
This class wraps the llvm.memcpy intrinsic.
Value * getLength() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
MaybeAlign getDestAlign() const
bool isVolatile() const
Value * getValue() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
MaybeAlign getSourceAlign() const
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
Representation for a specific memory location.
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:924
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:697
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
Helper to remove instructions inserted during SCEV expansion, unless they are marked as used.
This class uses information about analyze scalars to rewrite expressions in canonical form.
const SCEV * getOperand(unsigned i) const
This class represents an analyzed expression in the program.
bool isOne() const
Return true if the expression is a constant one.
bool isNonConstantNegative() const
Return true if the specified scev is negated, but not a constant.
The main scalar evolution driver.
bool isKnownNonNegative(const SCEV *S)
Test if the given expression is known to be non-negative.
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
A vector that has set insertion semantics.
Definition: SetVector.h:57
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
Simple and conservative implementation of LoopSafetyInfo that can give false-positive answers to its ...
Definition: MustExecute.h:109
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
Definition: MustExecute.cpp:51
bool anyBlockMayThrow() const override
Returns true iff any block of the loop for which this info is contains an instruction that may throw ...
Definition: MustExecute.cpp:47
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:346
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:384
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:435
iterator end() const
Definition: SmallPtrSet.h:460
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:367
iterator begin() const
Definition: SmallPtrSet.h:455
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:441
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:502
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:290
Align getAlign() const
Definition: Instructions.h:329
Value * getValueOperand()
Definition: Instructions.h:374
Value * getPointerOperand()
Definition: Instructions.h:377
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
unsigned getAtomicMemIntrinsicMaxElementSize() const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
TargetCostKind
The kind of cost model.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCC_Basic
The cost of a typical 'add' instruction.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:343
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
Definition: Value.cpp:542
void replaceUsesOutsideBlock(Value *V, BasicBlock *BB)
replaceUsesOutsideBlock - Go through the uses list for this definition and make each use point to "V"...
Definition: Value.cpp:592
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:204
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
@ HeaderSize
Definition: BTF.h:61
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1539
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
cst_pred_ty< is_power2 > m_Power2()
Match an integer or vector power-of-2.
Definition: PatternMatch.h:619
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:816
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:875
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:592
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
Definition: PatternMatch.h:245
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
Definition: PatternMatch.h:189
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:612
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
cst_pred_ty< icmp_pred_with_threshold > m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold)
Match an integer or vector with every element comparing 'pred' (eg/ne/...) to Threshold.
Definition: PatternMatch.h:698
@ ReallyHidden
Definition: CommandLine.h:138
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:463
constexpr double e
Definition: MathExtras.h:47
DiagnosticInfoOptimizationBase::setExtraArgs setExtraArgs
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:540
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
bool inferNonMandatoryLibFuncAttrs(Module *M, StringRef Name, const TargetLibraryInfo &TLI)
Analyze the name and prototype of the given function and set any applicable attributes.
bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI, LibFunc TheLibFunc)
Check whether the library function is available on target and also that it in the current Module is a...
bool isMustProgress(const Loop *L)
Return true if this loop can be assumed to make progress.
Definition: LoopInfo.cpp:1150
@ None
Definition: CodeGenData.h:101
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool isModOrRefSet(const ModRefInfo MRI)
Definition: ModRef.h:42
FunctionCallee getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI, LibFunc TheLibFunc, FunctionType *T, AttributeList AttributeList)
Calls getOrInsertFunction() and then makes sure to add mandatory argument attributes.
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: ModRef.h:27
@ ModRef
The access may reference and may modify the value stored in memory.
@ Mod
The access may modify the value stored in memory.
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:84
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
bool decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate &Pred, Value *&X, APInt &Mask, bool LookThroughTrunc=true)
Decompose an icmp into the form ((X & Mask) pred 0) if possible.
bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
MDNode * TBAAStruct
The tag for type-based alias analysis (tbaa struct).
Definition: Metadata.h:780
MDNode * Scope
The tag for alias scope specification (used with noalias).
Definition: Metadata.h:783
MDNode * TBAA
The tag for type-based alias analysis.
Definition: Metadata.h:777
AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
MDNode * NoAlias
The tag specifying the noalias scope.
Definition: Metadata.h:786
AAMDNodes extendTo(ssize_t Len) const
Create a new AAMDNode that describes this AAMDNode after extending it to apply to a series of bytes o...
Definition: Metadata.h:827
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
static bool Memset
When true, Memset is disabled.
static bool All
When true, the entire pass is disabled.
static bool Memcpy
When true, Memcpy is disabled.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Match loop-invariant value.
match_LoopInvariant(const SubPattern_t &SP, const Loop *L)