LLVM 20.0.0git
LoopIdiomRecognize.cpp
Go to the documentation of this file.
1//===- LoopIdiomRecognize.cpp - Loop idiom recognition --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements an idiom recognizer that transforms simple loops into a
10// non-loop form. In cases that this kicks in, it can be a significant
11// performance win.
12//
13// If compiling for code size we avoid idiom recognition if the resulting
14// code could be larger than the code for the original loop. One way this could
15// happen is if the loop is not removable after idiom recognition due to the
16// presence of non-idiom instructions. The initial implementation of the
17// heuristics applies to idioms in multi-block loops.
18//
19//===----------------------------------------------------------------------===//
20//
21// TODO List:
22//
23// Future loop memory idioms to recognize:
24// memcmp, strlen, etc.
25//
26// This could recognize common matrix multiplies and dot product idioms and
27// replace them with calls to BLAS (if linked in??).
28//
29//===----------------------------------------------------------------------===//
30
32#include "llvm/ADT/APInt.h"
33#include "llvm/ADT/ArrayRef.h"
34#include "llvm/ADT/DenseMap.h"
35#include "llvm/ADT/MapVector.h"
36#include "llvm/ADT/SetVector.h"
39#include "llvm/ADT/Statistic.h"
40#include "llvm/ADT/StringRef.h"
55#include "llvm/IR/BasicBlock.h"
56#include "llvm/IR/Constant.h"
57#include "llvm/IR/Constants.h"
58#include "llvm/IR/DataLayout.h"
59#include "llvm/IR/DebugLoc.h"
61#include "llvm/IR/Dominators.h"
62#include "llvm/IR/GlobalValue.h"
64#include "llvm/IR/IRBuilder.h"
65#include "llvm/IR/InstrTypes.h"
66#include "llvm/IR/Instruction.h"
69#include "llvm/IR/Intrinsics.h"
70#include "llvm/IR/LLVMContext.h"
71#include "llvm/IR/Module.h"
72#include "llvm/IR/PassManager.h"
74#include "llvm/IR/Type.h"
75#include "llvm/IR/User.h"
76#include "llvm/IR/Value.h"
77#include "llvm/IR/ValueHandle.h"
80#include "llvm/Support/Debug.h"
87#include <algorithm>
88#include <cassert>
89#include <cstdint>
90#include <utility>
91#include <vector>
92
93using namespace llvm;
94
95#define DEBUG_TYPE "loop-idiom"
96
97STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
98STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
99STATISTIC(NumMemMove, "Number of memmove's formed from loop load+stores");
101 NumShiftUntilBitTest,
102 "Number of uncountable loops recognized as 'shift until bitttest' idiom");
103STATISTIC(NumShiftUntilZero,
104 "Number of uncountable loops recognized as 'shift until zero' idiom");
105
108 DisableLIRPAll("disable-" DEBUG_TYPE "-all",
109 cl::desc("Options to disable Loop Idiom Recognize Pass."),
112
115 DisableLIRPMemset("disable-" DEBUG_TYPE "-memset",
116 cl::desc("Proceed with loop idiom recognize pass, but do "
117 "not convert loop(s) to memset."),
120
123 DisableLIRPMemcpy("disable-" DEBUG_TYPE "-memcpy",
124 cl::desc("Proceed with loop idiom recognize pass, but do "
125 "not convert loop(s) to memcpy."),
128
130 "use-lir-code-size-heurs",
131 cl::desc("Use loop idiom recognition code size heuristics when compiling"
132 "with -Os/-Oz"),
133 cl::init(true), cl::Hidden);
134
135namespace {
136
137class LoopIdiomRecognize {
138 Loop *CurLoop = nullptr;
139 AliasAnalysis *AA;
140 DominatorTree *DT;
141 LoopInfo *LI;
142 ScalarEvolution *SE;
145 const DataLayout *DL;
147 bool ApplyCodeSizeHeuristics;
148 std::unique_ptr<MemorySSAUpdater> MSSAU;
149
150public:
151 explicit LoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT,
152 LoopInfo *LI, ScalarEvolution *SE,
154 const TargetTransformInfo *TTI, MemorySSA *MSSA,
155 const DataLayout *DL,
157 : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {
158 if (MSSA)
159 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
160 }
161
162 bool runOnLoop(Loop *L);
163
164private:
165 using StoreList = SmallVector<StoreInst *, 8>;
166 using StoreListMap = MapVector<Value *, StoreList>;
167
168 StoreListMap StoreRefsForMemset;
169 StoreListMap StoreRefsForMemsetPattern;
170 StoreList StoreRefsForMemcpy;
171 bool HasMemset;
172 bool HasMemsetPattern;
173 bool HasMemcpy;
174
175 /// Return code for isLegalStore()
176 enum LegalStoreKind {
177 None = 0,
178 Memset,
179 MemsetPattern,
180 Memcpy,
181 UnorderedAtomicMemcpy,
182 DontUse // Dummy retval never to be used. Allows catching errors in retval
183 // handling.
184 };
185
186 /// \name Countable Loop Idiom Handling
187 /// @{
188
189 bool runOnCountableLoop();
190 bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
192
193 void collectStores(BasicBlock *BB);
194 LegalStoreKind isLegalStore(StoreInst *SI);
195 enum class ForMemset { No, Yes };
196 bool processLoopStores(SmallVectorImpl<StoreInst *> &SL, const SCEV *BECount,
197 ForMemset For);
198
199 template <typename MemInst>
200 bool processLoopMemIntrinsic(
201 BasicBlock *BB,
202 bool (LoopIdiomRecognize::*Processor)(MemInst *, const SCEV *),
203 const SCEV *BECount);
204 bool processLoopMemCpy(MemCpyInst *MCI, const SCEV *BECount);
205 bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
206
207 bool processLoopStridedStore(Value *DestPtr, const SCEV *StoreSizeSCEV,
208 MaybeAlign StoreAlignment, Value *StoredVal,
209 Instruction *TheStore,
211 const SCEVAddRecExpr *Ev, const SCEV *BECount,
212 bool IsNegStride, bool IsLoopMemset = false);
213 bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount);
214 bool processLoopStoreOfLoopLoad(Value *DestPtr, Value *SourcePtr,
215 const SCEV *StoreSize, MaybeAlign StoreAlign,
216 MaybeAlign LoadAlign, Instruction *TheStore,
217 Instruction *TheLoad,
218 const SCEVAddRecExpr *StoreEv,
219 const SCEVAddRecExpr *LoadEv,
220 const SCEV *BECount);
221 bool avoidLIRForMultiBlockLoop(bool IsMemset = false,
222 bool IsLoopMemset = false);
223
224 /// @}
225 /// \name Noncountable Loop Idiom Handling
226 /// @{
227
228 bool runOnNoncountableLoop();
229
230 bool recognizePopcount();
231 void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
232 PHINode *CntPhi, Value *Var);
233 bool isProfitableToInsertFFS(Intrinsic::ID IntrinID, Value *InitX,
234 bool ZeroCheck, size_t CanonicalSize);
235 bool insertFFSIfProfitable(Intrinsic::ID IntrinID, Value *InitX,
236 Instruction *DefX, PHINode *CntPhi,
237 Instruction *CntInst);
238 bool recognizeAndInsertFFS(); /// Find First Set: ctlz or cttz
239 bool recognizeShiftUntilLessThan();
240 void transformLoopToCountable(Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
241 Instruction *CntInst, PHINode *CntPhi,
242 Value *Var, Instruction *DefX,
243 const DebugLoc &DL, bool ZeroCheck,
244 bool IsCntPhiUsedOutsideLoop,
245 bool InsertSub = false);
246
247 bool recognizeShiftUntilBitTest();
248 bool recognizeShiftUntilZero();
249
250 /// @}
251};
252} // end anonymous namespace
253
256 LPMUpdater &) {
258 return PreservedAnalyses::all();
259
260 const auto *DL = &L.getHeader()->getDataLayout();
261
262 // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
263 // pass. Function analyses need to be preserved across loop transformations
264 // but ORE cannot be preserved (see comment before the pass definition).
265 OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
266
267 LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI,
268 AR.MSSA, DL, ORE);
269 if (!LIR.runOnLoop(&L))
270 return PreservedAnalyses::all();
271
273 if (AR.MSSA)
274 PA.preserve<MemorySSAAnalysis>();
275 return PA;
276}
277
279 I->replaceAllUsesWith(PoisonValue::get(I->getType()));
280 I->eraseFromParent();
281}
282
283//===----------------------------------------------------------------------===//
284//
285// Implementation of LoopIdiomRecognize
286//
287//===----------------------------------------------------------------------===//
288
289bool LoopIdiomRecognize::runOnLoop(Loop *L) {
290 CurLoop = L;
291 // If the loop could not be converted to canonical form, it must have an
292 // indirectbr in it, just give up.
293 if (!L->getLoopPreheader())
294 return false;
295
296 // Disable loop idiom recognition if the function's name is a common idiom.
297 StringRef Name = L->getHeader()->getParent()->getName();
298 if (Name == "memset" || Name == "memcpy")
299 return false;
300
301 // Determine if code size heuristics need to be applied.
302 ApplyCodeSizeHeuristics =
303 L->getHeader()->getParent()->hasOptSize() && UseLIRCodeSizeHeurs;
304
305 HasMemset = TLI->has(LibFunc_memset);
306 HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
307 HasMemcpy = TLI->has(LibFunc_memcpy);
308
309 if (HasMemset || HasMemsetPattern || HasMemcpy)
311 return runOnCountableLoop();
312
313 return runOnNoncountableLoop();
314}
315
316bool LoopIdiomRecognize::runOnCountableLoop() {
317 const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop);
318 assert(!isa<SCEVCouldNotCompute>(BECount) &&
319 "runOnCountableLoop() called on a loop without a predictable"
320 "backedge-taken count");
321
322 // If this loop executes exactly one time, then it should be peeled, not
323 // optimized by this pass.
324 if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
325 if (BECst->getAPInt() == 0)
326 return false;
327
329 CurLoop->getUniqueExitBlocks(ExitBlocks);
330
331 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
332 << CurLoop->getHeader()->getParent()->getName()
333 << "] Countable Loop %" << CurLoop->getHeader()->getName()
334 << "\n");
335
336 // The following transforms hoist stores/memsets into the loop pre-header.
337 // Give up if the loop has instructions that may throw.
338 SimpleLoopSafetyInfo SafetyInfo;
339 SafetyInfo.computeLoopSafetyInfo(CurLoop);
340 if (SafetyInfo.anyBlockMayThrow())
341 return false;
342
343 bool MadeChange = false;
344
345 // Scan all the blocks in the loop that are not in subloops.
346 for (auto *BB : CurLoop->getBlocks()) {
347 // Ignore blocks in subloops.
348 if (LI->getLoopFor(BB) != CurLoop)
349 continue;
350
351 MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks);
352 }
353 return MadeChange;
354}
355
356static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) {
357 const SCEVConstant *ConstStride = cast<SCEVConstant>(StoreEv->getOperand(1));
358 return ConstStride->getAPInt();
359}
360
361/// getMemSetPatternValue - If a strided store of the specified value is safe to
362/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
363/// be passed in. Otherwise, return null.
364///
365/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
366/// just replicate their input array and then pass on to memset_pattern16.
368 // FIXME: This could check for UndefValue because it can be merged into any
369 // other valid pattern.
370
371 // If the value isn't a constant, we can't promote it to being in a constant
372 // array. We could theoretically do a store to an alloca or something, but
373 // that doesn't seem worthwhile.
374 Constant *C = dyn_cast<Constant>(V);
375 if (!C || isa<ConstantExpr>(C))
376 return nullptr;
377
378 // Only handle simple values that are a power of two bytes in size.
379 uint64_t Size = DL->getTypeSizeInBits(V->getType());
380 if (Size == 0 || (Size & 7) || (Size & (Size - 1)))
381 return nullptr;
382
383 // Don't care enough about darwin/ppc to implement this.
384 if (DL->isBigEndian())
385 return nullptr;
386
387 // Convert to size in bytes.
388 Size /= 8;
389
390 // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
391 // if the top and bottom are the same (e.g. for vectors and large integers).
392 if (Size > 16)
393 return nullptr;
394
395 // If the constant is exactly 16 bytes, just use it.
396 if (Size == 16)
397 return C;
398
399 // Otherwise, we'll use an array of the constants.
400 unsigned ArraySize = 16 / Size;
401 ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
402 return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
403}
404
405LoopIdiomRecognize::LegalStoreKind
406LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
407 // Don't touch volatile stores.
408 if (SI->isVolatile())
409 return LegalStoreKind::None;
410 // We only want simple or unordered-atomic stores.
411 if (!SI->isUnordered())
412 return LegalStoreKind::None;
413
414 // Avoid merging nontemporal stores.
415 if (SI->getMetadata(LLVMContext::MD_nontemporal))
416 return LegalStoreKind::None;
417
418 Value *StoredVal = SI->getValueOperand();
419 Value *StorePtr = SI->getPointerOperand();
420
421 // Don't convert stores of non-integral pointer types to memsets (which stores
422 // integers).
423 if (DL->isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
424 return LegalStoreKind::None;
425
426 // Reject stores that are so large that they overflow an unsigned.
427 // When storing out scalable vectors we bail out for now, since the code
428 // below currently only works for constant strides.
429 TypeSize SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
430 if (SizeInBits.isScalable() || (SizeInBits.getFixedValue() & 7) ||
431 (SizeInBits.getFixedValue() >> 32) != 0)
432 return LegalStoreKind::None;
433
434 // See if the pointer expression is an AddRec like {base,+,1} on the current
435 // loop, which indicates a strided store. If we have something else, it's a
436 // random store we can't handle.
437 const SCEVAddRecExpr *StoreEv =
438 dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
439 if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
440 return LegalStoreKind::None;
441
442 // Check to see if we have a constant stride.
443 if (!isa<SCEVConstant>(StoreEv->getOperand(1)))
444 return LegalStoreKind::None;
445
446 // See if the store can be turned into a memset.
447
448 // If the stored value is a byte-wise value (like i32 -1), then it may be
449 // turned into a memset of i8 -1, assuming that all the consecutive bytes
450 // are stored. A store of i32 0x01020304 can never be turned into a memset,
451 // but it can be turned into memset_pattern if the target supports it.
452 Value *SplatValue = isBytewiseValue(StoredVal, *DL);
453
454 // Note: memset and memset_pattern on unordered-atomic is yet not supported
455 bool UnorderedAtomic = SI->isUnordered() && !SI->isSimple();
456
457 // If we're allowed to form a memset, and the stored value would be
458 // acceptable for memset, use it.
459 if (!UnorderedAtomic && HasMemset && SplatValue && !DisableLIRP::Memset &&
460 // Verify that the stored value is loop invariant. If not, we can't
461 // promote the memset.
462 CurLoop->isLoopInvariant(SplatValue)) {
463 // It looks like we can use SplatValue.
464 return LegalStoreKind::Memset;
465 }
466 if (!UnorderedAtomic && HasMemsetPattern && !DisableLIRP::Memset &&
467 // Don't create memset_pattern16s with address spaces.
468 StorePtr->getType()->getPointerAddressSpace() == 0 &&
469 getMemSetPatternValue(StoredVal, DL)) {
470 // It looks like we can use PatternValue!
471 return LegalStoreKind::MemsetPattern;
472 }
473
474 // Otherwise, see if the store can be turned into a memcpy.
475 if (HasMemcpy && !DisableLIRP::Memcpy) {
476 // Check to see if the stride matches the size of the store. If so, then we
477 // know that every byte is touched in the loop.
478 APInt Stride = getStoreStride(StoreEv);
479 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
480 if (StoreSize != Stride && StoreSize != -Stride)
481 return LegalStoreKind::None;
482
483 // The store must be feeding a non-volatile load.
484 LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
485
486 // Only allow non-volatile loads
487 if (!LI || LI->isVolatile())
488 return LegalStoreKind::None;
489 // Only allow simple or unordered-atomic loads
490 if (!LI->isUnordered())
491 return LegalStoreKind::None;
492
493 // See if the pointer expression is an AddRec like {base,+,1} on the current
494 // loop, which indicates a strided load. If we have something else, it's a
495 // random load we can't handle.
496 const SCEVAddRecExpr *LoadEv =
497 dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
498 if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
499 return LegalStoreKind::None;
500
501 // The store and load must share the same stride.
502 if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
503 return LegalStoreKind::None;
504
505 // Success. This store can be converted into a memcpy.
506 UnorderedAtomic = UnorderedAtomic || LI->isAtomic();
507 return UnorderedAtomic ? LegalStoreKind::UnorderedAtomicMemcpy
508 : LegalStoreKind::Memcpy;
509 }
510 // This store can't be transformed into a memset/memcpy.
511 return LegalStoreKind::None;
512}
513
514void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
515 StoreRefsForMemset.clear();
516 StoreRefsForMemsetPattern.clear();
517 StoreRefsForMemcpy.clear();
518 for (Instruction &I : *BB) {
519 StoreInst *SI = dyn_cast<StoreInst>(&I);
520 if (!SI)
521 continue;
522
523 // Make sure this is a strided store with a constant stride.
524 switch (isLegalStore(SI)) {
525 case LegalStoreKind::None:
526 // Nothing to do
527 break;
528 case LegalStoreKind::Memset: {
529 // Find the base pointer.
530 Value *Ptr = getUnderlyingObject(SI->getPointerOperand());
531 StoreRefsForMemset[Ptr].push_back(SI);
532 } break;
533 case LegalStoreKind::MemsetPattern: {
534 // Find the base pointer.
535 Value *Ptr = getUnderlyingObject(SI->getPointerOperand());
536 StoreRefsForMemsetPattern[Ptr].push_back(SI);
537 } break;
538 case LegalStoreKind::Memcpy:
539 case LegalStoreKind::UnorderedAtomicMemcpy:
540 StoreRefsForMemcpy.push_back(SI);
541 break;
542 default:
543 assert(false && "unhandled return value");
544 break;
545 }
546 }
547}
548
549/// runOnLoopBlock - Process the specified block, which lives in a counted loop
550/// with the specified backedge count. This block is known to be in the current
551/// loop and not in any subloops.
552bool LoopIdiomRecognize::runOnLoopBlock(
553 BasicBlock *BB, const SCEV *BECount,
554 SmallVectorImpl<BasicBlock *> &ExitBlocks) {
555 // We can only promote stores in this block if they are unconditionally
556 // executed in the loop. For a block to be unconditionally executed, it has
557 // to dominate all the exit blocks of the loop. Verify this now.
558 for (BasicBlock *ExitBlock : ExitBlocks)
559 if (!DT->dominates(BB, ExitBlock))
560 return false;
561
562 bool MadeChange = false;
563 // Look for store instructions, which may be optimized to memset/memcpy.
564 collectStores(BB);
565
566 // Look for a single store or sets of stores with a common base, which can be
567 // optimized into a memset (memset_pattern). The latter most commonly happens
568 // with structs and handunrolled loops.
569 for (auto &SL : StoreRefsForMemset)
570 MadeChange |= processLoopStores(SL.second, BECount, ForMemset::Yes);
571
572 for (auto &SL : StoreRefsForMemsetPattern)
573 MadeChange |= processLoopStores(SL.second, BECount, ForMemset::No);
574
575 // Optimize the store into a memcpy, if it feeds an similarly strided load.
576 for (auto &SI : StoreRefsForMemcpy)
577 MadeChange |= processLoopStoreOfLoopLoad(SI, BECount);
578
579 MadeChange |= processLoopMemIntrinsic<MemCpyInst>(
580 BB, &LoopIdiomRecognize::processLoopMemCpy, BECount);
581 MadeChange |= processLoopMemIntrinsic<MemSetInst>(
582 BB, &LoopIdiomRecognize::processLoopMemSet, BECount);
583
584 return MadeChange;
585}
586
587/// See if this store(s) can be promoted to a memset.
588bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
589 const SCEV *BECount, ForMemset For) {
590 // Try to find consecutive stores that can be transformed into memsets.
591 SetVector<StoreInst *> Heads, Tails;
593
594 // Do a quadratic search on all of the given stores and find
595 // all of the pairs of stores that follow each other.
596 SmallVector<unsigned, 16> IndexQueue;
597 for (unsigned i = 0, e = SL.size(); i < e; ++i) {
598 assert(SL[i]->isSimple() && "Expected only non-volatile stores.");
599
600 Value *FirstStoredVal = SL[i]->getValueOperand();
601 Value *FirstStorePtr = SL[i]->getPointerOperand();
602 const SCEVAddRecExpr *FirstStoreEv =
603 cast<SCEVAddRecExpr>(SE->getSCEV(FirstStorePtr));
604 APInt FirstStride = getStoreStride(FirstStoreEv);
605 unsigned FirstStoreSize = DL->getTypeStoreSize(SL[i]->getValueOperand()->getType());
606
607 // See if we can optimize just this store in isolation.
608 if (FirstStride == FirstStoreSize || -FirstStride == FirstStoreSize) {
609 Heads.insert(SL[i]);
610 continue;
611 }
612
613 Value *FirstSplatValue = nullptr;
614 Constant *FirstPatternValue = nullptr;
615
616 if (For == ForMemset::Yes)
617 FirstSplatValue = isBytewiseValue(FirstStoredVal, *DL);
618 else
619 FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL);
620
621 assert((FirstSplatValue || FirstPatternValue) &&
622 "Expected either splat value or pattern value.");
623
624 IndexQueue.clear();
625 // If a store has multiple consecutive store candidates, search Stores
626 // array according to the sequence: from i+1 to e, then from i-1 to 0.
627 // This is because usually pairing with immediate succeeding or preceding
628 // candidate create the best chance to find memset opportunity.
629 unsigned j = 0;
630 for (j = i + 1; j < e; ++j)
631 IndexQueue.push_back(j);
632 for (j = i; j > 0; --j)
633 IndexQueue.push_back(j - 1);
634
635 for (auto &k : IndexQueue) {
636 assert(SL[k]->isSimple() && "Expected only non-volatile stores.");
637 Value *SecondStorePtr = SL[k]->getPointerOperand();
638 const SCEVAddRecExpr *SecondStoreEv =
639 cast<SCEVAddRecExpr>(SE->getSCEV(SecondStorePtr));
640 APInt SecondStride = getStoreStride(SecondStoreEv);
641
642 if (FirstStride != SecondStride)
643 continue;
644
645 Value *SecondStoredVal = SL[k]->getValueOperand();
646 Value *SecondSplatValue = nullptr;
647 Constant *SecondPatternValue = nullptr;
648
649 if (For == ForMemset::Yes)
650 SecondSplatValue = isBytewiseValue(SecondStoredVal, *DL);
651 else
652 SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL);
653
654 assert((SecondSplatValue || SecondPatternValue) &&
655 "Expected either splat value or pattern value.");
656
657 if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) {
658 if (For == ForMemset::Yes) {
659 if (isa<UndefValue>(FirstSplatValue))
660 FirstSplatValue = SecondSplatValue;
661 if (FirstSplatValue != SecondSplatValue)
662 continue;
663 } else {
664 if (isa<UndefValue>(FirstPatternValue))
665 FirstPatternValue = SecondPatternValue;
666 if (FirstPatternValue != SecondPatternValue)
667 continue;
668 }
669 Tails.insert(SL[k]);
670 Heads.insert(SL[i]);
671 ConsecutiveChain[SL[i]] = SL[k];
672 break;
673 }
674 }
675 }
676
677 // We may run into multiple chains that merge into a single chain. We mark the
678 // stores that we transformed so that we don't visit the same store twice.
679 SmallPtrSet<Value *, 16> TransformedStores;
680 bool Changed = false;
681
682 // For stores that start but don't end a link in the chain:
683 for (StoreInst *I : Heads) {
684 if (Tails.count(I))
685 continue;
686
687 // We found a store instr that starts a chain. Now follow the chain and try
688 // to transform it.
689 SmallPtrSet<Instruction *, 8> AdjacentStores;
690 StoreInst *HeadStore = I;
691 unsigned StoreSize = 0;
692
693 // Collect the chain into a list.
694 while (Tails.count(I) || Heads.count(I)) {
695 if (TransformedStores.count(I))
696 break;
697 AdjacentStores.insert(I);
698
699 StoreSize += DL->getTypeStoreSize(I->getValueOperand()->getType());
700 // Move to the next value in the chain.
701 I = ConsecutiveChain[I];
702 }
703
704 Value *StoredVal = HeadStore->getValueOperand();
705 Value *StorePtr = HeadStore->getPointerOperand();
706 const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
707 APInt Stride = getStoreStride(StoreEv);
708
709 // Check to see if the stride matches the size of the stores. If so, then
710 // we know that every byte is touched in the loop.
711 if (StoreSize != Stride && StoreSize != -Stride)
712 continue;
713
714 bool IsNegStride = StoreSize == -Stride;
715
716 Type *IntIdxTy = DL->getIndexType(StorePtr->getType());
717 const SCEV *StoreSizeSCEV = SE->getConstant(IntIdxTy, StoreSize);
718 if (processLoopStridedStore(StorePtr, StoreSizeSCEV,
719 MaybeAlign(HeadStore->getAlign()), StoredVal,
720 HeadStore, AdjacentStores, StoreEv, BECount,
721 IsNegStride)) {
722 TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end());
723 Changed = true;
724 }
725 }
726
727 return Changed;
728}
729
730/// processLoopMemIntrinsic - Template function for calling different processor
731/// functions based on mem intrinsic type.
732template <typename MemInst>
733bool LoopIdiomRecognize::processLoopMemIntrinsic(
734 BasicBlock *BB,
735 bool (LoopIdiomRecognize::*Processor)(MemInst *, const SCEV *),
736 const SCEV *BECount) {
737 bool MadeChange = false;
738 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
739 Instruction *Inst = &*I++;
740 // Look for memory instructions, which may be optimized to a larger one.
741 if (MemInst *MI = dyn_cast<MemInst>(Inst)) {
742 WeakTrackingVH InstPtr(&*I);
743 if (!(this->*Processor)(MI, BECount))
744 continue;
745 MadeChange = true;
746
747 // If processing the instruction invalidated our iterator, start over from
748 // the top of the block.
749 if (!InstPtr)
750 I = BB->begin();
751 }
752 }
753 return MadeChange;
754}
755
756/// processLoopMemCpy - See if this memcpy can be promoted to a large memcpy
757bool LoopIdiomRecognize::processLoopMemCpy(MemCpyInst *MCI,
758 const SCEV *BECount) {
759 // We can only handle non-volatile memcpys with a constant size.
760 if (MCI->isVolatile() || !isa<ConstantInt>(MCI->getLength()))
761 return false;
762
763 // If we're not allowed to hack on memcpy, we fail.
764 if ((!HasMemcpy && !isa<MemCpyInlineInst>(MCI)) || DisableLIRP::Memcpy)
765 return false;
766
767 Value *Dest = MCI->getDest();
768 Value *Source = MCI->getSource();
769 if (!Dest || !Source)
770 return false;
771
772 // See if the load and store pointer expressions are AddRec like {base,+,1} on
773 // the current loop, which indicates a strided load and store. If we have
774 // something else, it's a random load or store we can't handle.
775 const SCEVAddRecExpr *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Dest));
776 if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
777 return false;
778 const SCEVAddRecExpr *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Source));
779 if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
780 return false;
781
782 // Reject memcpys that are so large that they overflow an unsigned.
783 uint64_t SizeInBytes = cast<ConstantInt>(MCI->getLength())->getZExtValue();
784 if ((SizeInBytes >> 32) != 0)
785 return false;
786
787 // Check if the stride matches the size of the memcpy. If so, then we know
788 // that every byte is touched in the loop.
789 const SCEVConstant *ConstStoreStride =
790 dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
791 const SCEVConstant *ConstLoadStride =
792 dyn_cast<SCEVConstant>(LoadEv->getOperand(1));
793 if (!ConstStoreStride || !ConstLoadStride)
794 return false;
795
796 APInt StoreStrideValue = ConstStoreStride->getAPInt();
797 APInt LoadStrideValue = ConstLoadStride->getAPInt();
798 // Huge stride value - give up
799 if (StoreStrideValue.getBitWidth() > 64 || LoadStrideValue.getBitWidth() > 64)
800 return false;
801
802 if (SizeInBytes != StoreStrideValue && SizeInBytes != -StoreStrideValue) {
803 ORE.emit([&]() {
804 return OptimizationRemarkMissed(DEBUG_TYPE, "SizeStrideUnequal", MCI)
805 << ore::NV("Inst", "memcpy") << " in "
806 << ore::NV("Function", MCI->getFunction())
807 << " function will not be hoisted: "
808 << ore::NV("Reason", "memcpy size is not equal to stride");
809 });
810 return false;
811 }
812
813 int64_t StoreStrideInt = StoreStrideValue.getSExtValue();
814 int64_t LoadStrideInt = LoadStrideValue.getSExtValue();
815 // Check if the load stride matches the store stride.
816 if (StoreStrideInt != LoadStrideInt)
817 return false;
818
819 return processLoopStoreOfLoopLoad(
820 Dest, Source, SE->getConstant(Dest->getType(), SizeInBytes),
821 MCI->getDestAlign(), MCI->getSourceAlign(), MCI, MCI, StoreEv, LoadEv,
822 BECount);
823}
824
825/// processLoopMemSet - See if this memset can be promoted to a large memset.
826bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
827 const SCEV *BECount) {
828 // We can only handle non-volatile memsets.
829 if (MSI->isVolatile())
830 return false;
831
832 // If we're not allowed to hack on memset, we fail.
833 if (!HasMemset || DisableLIRP::Memset)
834 return false;
835
836 Value *Pointer = MSI->getDest();
837
838 // See if the pointer expression is an AddRec like {base,+,1} on the current
839 // loop, which indicates a strided store. If we have something else, it's a
840 // random store we can't handle.
841 const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
842 if (!Ev || Ev->getLoop() != CurLoop)
843 return false;
844 if (!Ev->isAffine()) {
845 LLVM_DEBUG(dbgs() << " Pointer is not affine, abort\n");
846 return false;
847 }
848
849 const SCEV *PointerStrideSCEV = Ev->getOperand(1);
850 const SCEV *MemsetSizeSCEV = SE->getSCEV(MSI->getLength());
851 if (!PointerStrideSCEV || !MemsetSizeSCEV)
852 return false;
853
854 bool IsNegStride = false;
855 const bool IsConstantSize = isa<ConstantInt>(MSI->getLength());
856
857 if (IsConstantSize) {
858 // Memset size is constant.
859 // Check if the pointer stride matches the memset size. If so, then
860 // we know that every byte is touched in the loop.
861 LLVM_DEBUG(dbgs() << " memset size is constant\n");
862 uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
863 const SCEVConstant *ConstStride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
864 if (!ConstStride)
865 return false;
866
867 APInt Stride = ConstStride->getAPInt();
868 if (SizeInBytes != Stride && SizeInBytes != -Stride)
869 return false;
870
871 IsNegStride = SizeInBytes == -Stride;
872 } else {
873 // Memset size is non-constant.
874 // Check if the pointer stride matches the memset size.
875 // To be conservative, the pass would not promote pointers that aren't in
876 // address space zero. Also, the pass only handles memset length and stride
877 // that are invariant for the top level loop.
878 LLVM_DEBUG(dbgs() << " memset size is non-constant\n");
879 if (Pointer->getType()->getPointerAddressSpace() != 0) {
880 LLVM_DEBUG(dbgs() << " pointer is not in address space zero, "
881 << "abort\n");
882 return false;
883 }
884 if (!SE->isLoopInvariant(MemsetSizeSCEV, CurLoop)) {
885 LLVM_DEBUG(dbgs() << " memset size is not a loop-invariant, "
886 << "abort\n");
887 return false;
888 }
889
890 // Compare positive direction PointerStrideSCEV with MemsetSizeSCEV
891 IsNegStride = PointerStrideSCEV->isNonConstantNegative();
892 const SCEV *PositiveStrideSCEV =
893 IsNegStride ? SE->getNegativeSCEV(PointerStrideSCEV)
894 : PointerStrideSCEV;
895 LLVM_DEBUG(dbgs() << " MemsetSizeSCEV: " << *MemsetSizeSCEV << "\n"
896 << " PositiveStrideSCEV: " << *PositiveStrideSCEV
897 << "\n");
898
899 if (PositiveStrideSCEV != MemsetSizeSCEV) {
900 // If an expression is covered by the loop guard, compare again and
901 // proceed with optimization if equal.
902 const SCEV *FoldedPositiveStride =
903 SE->applyLoopGuards(PositiveStrideSCEV, CurLoop);
904 const SCEV *FoldedMemsetSize =
905 SE->applyLoopGuards(MemsetSizeSCEV, CurLoop);
906
907 LLVM_DEBUG(dbgs() << " Try to fold SCEV based on loop guard\n"
908 << " FoldedMemsetSize: " << *FoldedMemsetSize << "\n"
909 << " FoldedPositiveStride: " << *FoldedPositiveStride
910 << "\n");
911
912 if (FoldedPositiveStride != FoldedMemsetSize) {
913 LLVM_DEBUG(dbgs() << " SCEV don't match, abort\n");
914 return false;
915 }
916 }
917 }
918
919 // Verify that the memset value is loop invariant. If not, we can't promote
920 // the memset.
921 Value *SplatValue = MSI->getValue();
922 if (!SplatValue || !CurLoop->isLoopInvariant(SplatValue))
923 return false;
924
926 MSIs.insert(MSI);
927 return processLoopStridedStore(Pointer, SE->getSCEV(MSI->getLength()),
928 MSI->getDestAlign(), SplatValue, MSI, MSIs, Ev,
929 BECount, IsNegStride, /*IsLoopMemset=*/true);
930}
931
932/// mayLoopAccessLocation - Return true if the specified loop might access the
933/// specified pointer location, which is a loop-strided access. The 'Access'
934/// argument specifies what the verboten forms of access are (read or write).
935static bool
937 const SCEV *BECount, const SCEV *StoreSizeSCEV,
938 AliasAnalysis &AA,
939 SmallPtrSetImpl<Instruction *> &IgnoredInsts) {
940 // Get the location that may be stored across the loop. Since the access is
941 // strided positively through memory, we say that the modified location starts
942 // at the pointer and has infinite size.
944
945 // If the loop iterates a fixed number of times, we can refine the access size
946 // to be exactly the size of the memset, which is (BECount+1)*StoreSize
947 const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount);
948 const SCEVConstant *ConstSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
949 if (BECst && ConstSize) {
950 std::optional<uint64_t> BEInt = BECst->getAPInt().tryZExtValue();
951 std::optional<uint64_t> SizeInt = ConstSize->getAPInt().tryZExtValue();
952 // FIXME: Should this check for overflow?
953 if (BEInt && SizeInt)
954 AccessSize = LocationSize::precise((*BEInt + 1) * *SizeInt);
955 }
956
957 // TODO: For this to be really effective, we have to dive into the pointer
958 // operand in the store. Store to &A[i] of 100 will always return may alias
959 // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
960 // which will then no-alias a store to &A[100].
961 MemoryLocation StoreLoc(Ptr, AccessSize);
962
963 for (BasicBlock *B : L->blocks())
964 for (Instruction &I : *B)
965 if (!IgnoredInsts.contains(&I) &&
966 isModOrRefSet(AA.getModRefInfo(&I, StoreLoc) & Access))
967 return true;
968 return false;
969}
970
971// If we have a negative stride, Start refers to the end of the memory location
972// we're trying to memset. Therefore, we need to recompute the base pointer,
973// which is just Start - BECount*Size.
974static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount,
975 Type *IntPtr, const SCEV *StoreSizeSCEV,
976 ScalarEvolution *SE) {
977 const SCEV *Index = SE->getTruncateOrZeroExtend(BECount, IntPtr);
978 if (!StoreSizeSCEV->isOne()) {
979 // index = back edge count * store size
980 Index = SE->getMulExpr(Index,
981 SE->getTruncateOrZeroExtend(StoreSizeSCEV, IntPtr),
983 }
984 // base pointer = start - index * store size
985 return SE->getMinusSCEV(Start, Index);
986}
987
988/// Compute the number of bytes as a SCEV from the backedge taken count.
989///
990/// This also maps the SCEV into the provided type and tries to handle the
991/// computation in a way that will fold cleanly.
992static const SCEV *getNumBytes(const SCEV *BECount, Type *IntPtr,
993 const SCEV *StoreSizeSCEV, Loop *CurLoop,
994 const DataLayout *DL, ScalarEvolution *SE) {
995 const SCEV *TripCountSCEV =
996 SE->getTripCountFromExitCount(BECount, IntPtr, CurLoop);
997 return SE->getMulExpr(TripCountSCEV,
998 SE->getTruncateOrZeroExtend(StoreSizeSCEV, IntPtr),
1000}
1001
1002/// processLoopStridedStore - We see a strided store of some value. If we can
1003/// transform this into a memset or memset_pattern in the loop preheader, do so.
1004bool LoopIdiomRecognize::processLoopStridedStore(
1005 Value *DestPtr, const SCEV *StoreSizeSCEV, MaybeAlign StoreAlignment,
1006 Value *StoredVal, Instruction *TheStore,
1008 const SCEV *BECount, bool IsNegStride, bool IsLoopMemset) {
1009 Module *M = TheStore->getModule();
1010 Value *SplatValue = isBytewiseValue(StoredVal, *DL);
1011 Constant *PatternValue = nullptr;
1012
1013 if (!SplatValue)
1014 PatternValue = getMemSetPatternValue(StoredVal, DL);
1015
1016 assert((SplatValue || PatternValue) &&
1017 "Expected either splat value or pattern value.");
1018
1019 // The trip count of the loop and the base pointer of the addrec SCEV is
1020 // guaranteed to be loop invariant, which means that it should dominate the
1021 // header. This allows us to insert code for it in the preheader.
1022 unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
1023 BasicBlock *Preheader = CurLoop->getLoopPreheader();
1024 IRBuilder<> Builder(Preheader->getTerminator());
1025 SCEVExpander Expander(*SE, *DL, "loop-idiom");
1026 SCEVExpanderCleaner ExpCleaner(Expander);
1027
1028 Type *DestInt8PtrTy = Builder.getPtrTy(DestAS);
1029 Type *IntIdxTy = DL->getIndexType(DestPtr->getType());
1030
1031 bool Changed = false;
1032 const SCEV *Start = Ev->getStart();
1033 // Handle negative strided loops.
1034 if (IsNegStride)
1035 Start = getStartForNegStride(Start, BECount, IntIdxTy, StoreSizeSCEV, SE);
1036
1037 // TODO: ideally we should still be able to generate memset if SCEV expander
1038 // is taught to generate the dependencies at the latest point.
1039 if (!Expander.isSafeToExpand(Start))
1040 return Changed;
1041
1042 // Okay, we have a strided store "p[i]" of a splattable value. We can turn
1043 // this into a memset in the loop preheader now if we want. However, this
1044 // would be unsafe to do if there is anything else in the loop that may read
1045 // or write to the aliased location. Check for any overlap by generating the
1046 // base pointer and checking the region.
1047 Value *BasePtr =
1048 Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
1049
1050 // From here on out, conservatively report to the pass manager that we've
1051 // changed the IR, even if we later clean up these added instructions. There
1052 // may be structural differences e.g. in the order of use lists not accounted
1053 // for in just a textual dump of the IR. This is written as a variable, even
1054 // though statically all the places this dominates could be replaced with
1055 // 'true', with the hope that anyone trying to be clever / "more precise" with
1056 // the return value will read this comment, and leave them alone.
1057 Changed = true;
1058
1059 if (mayLoopAccessLocation(BasePtr, ModRefInfo::ModRef, CurLoop, BECount,
1060 StoreSizeSCEV, *AA, Stores))
1061 return Changed;
1062
1063 if (avoidLIRForMultiBlockLoop(/*IsMemset=*/true, IsLoopMemset))
1064 return Changed;
1065
1066 // Okay, everything looks good, insert the memset.
1067
1068 const SCEV *NumBytesS =
1069 getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE);
1070
1071 // TODO: ideally we should still be able to generate memset if SCEV expander
1072 // is taught to generate the dependencies at the latest point.
1073 if (!Expander.isSafeToExpand(NumBytesS))
1074 return Changed;
1075
1076 Value *NumBytes =
1077 Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
1078
1079 if (!SplatValue && !isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16))
1080 return Changed;
1081
1082 AAMDNodes AATags = TheStore->getAAMetadata();
1083 for (Instruction *Store : Stores)
1084 AATags = AATags.merge(Store->getAAMetadata());
1085 if (auto CI = dyn_cast<ConstantInt>(NumBytes))
1086 AATags = AATags.extendTo(CI->getZExtValue());
1087 else
1088 AATags = AATags.extendTo(-1);
1089
1090 CallInst *NewCall;
1091 if (SplatValue) {
1092 NewCall = Builder.CreateMemSet(
1093 BasePtr, SplatValue, NumBytes, MaybeAlign(StoreAlignment),
1094 /*isVolatile=*/false, AATags.TBAA, AATags.Scope, AATags.NoAlias);
1095 } else {
1096 assert (isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16));
1097 // Everything is emitted in default address space
1098 Type *Int8PtrTy = DestInt8PtrTy;
1099
1100 StringRef FuncName = "memset_pattern16";
1101 FunctionCallee MSP = getOrInsertLibFunc(M, *TLI, LibFunc_memset_pattern16,
1102 Builder.getVoidTy(), Int8PtrTy, Int8PtrTy, IntIdxTy);
1103 inferNonMandatoryLibFuncAttrs(M, FuncName, *TLI);
1104
1105 // Otherwise we should form a memset_pattern16. PatternValue is known to be
1106 // an constant array of 16-bytes. Plop the value into a mergable global.
1107 GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
1109 PatternValue, ".memset_pattern");
1110 GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Ok to merge these.
1111 GV->setAlignment(Align(16));
1112 Value *PatternPtr = GV;
1113 NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes});
1114
1115 // Set the TBAA info if present.
1116 if (AATags.TBAA)
1117 NewCall->setMetadata(LLVMContext::MD_tbaa, AATags.TBAA);
1118
1119 if (AATags.Scope)
1120 NewCall->setMetadata(LLVMContext::MD_alias_scope, AATags.Scope);
1121
1122 if (AATags.NoAlias)
1123 NewCall->setMetadata(LLVMContext::MD_noalias, AATags.NoAlias);
1124 }
1125
1126 NewCall->setDebugLoc(TheStore->getDebugLoc());
1127
1128 if (MSSAU) {
1129 MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1130 NewCall, nullptr, NewCall->getParent(), MemorySSA::BeforeTerminator);
1131 MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
1132 }
1133
1134 LLVM_DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n"
1135 << " from store to: " << *Ev << " at: " << *TheStore
1136 << "\n");
1137
1138 ORE.emit([&]() {
1139 OptimizationRemark R(DEBUG_TYPE, "ProcessLoopStridedStore",
1140 NewCall->getDebugLoc(), Preheader);
1141 R << "Transformed loop-strided store in "
1142 << ore::NV("Function", TheStore->getFunction())
1143 << " function into a call to "
1144 << ore::NV("NewFunction", NewCall->getCalledFunction())
1145 << "() intrinsic";
1146 if (!Stores.empty())
1147 R << ore::setExtraArgs();
1148 for (auto *I : Stores) {
1149 R << ore::NV("FromBlock", I->getParent()->getName())
1150 << ore::NV("ToBlock", Preheader->getName());
1151 }
1152 return R;
1153 });
1154
1155 // Okay, the memset has been formed. Zap the original store and anything that
1156 // feeds into it.
1157 for (auto *I : Stores) {
1158 if (MSSAU)
1159 MSSAU->removeMemoryAccess(I, true);
1161 }
1162 if (MSSAU && VerifyMemorySSA)
1163 MSSAU->getMemorySSA()->verifyMemorySSA();
1164 ++NumMemSet;
1165 ExpCleaner.markResultUsed();
1166 return true;
1167}
1168
1169/// If the stored value is a strided load in the same loop with the same stride
1170/// this may be transformable into a memcpy. This kicks in for stuff like
1171/// for (i) A[i] = B[i];
1172bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
1173 const SCEV *BECount) {
1174 assert(SI->isUnordered() && "Expected only non-volatile non-ordered stores.");
1175
1176 Value *StorePtr = SI->getPointerOperand();
1177 const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
1178 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
1179
1180 // The store must be feeding a non-volatile load.
1181 LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
1182 assert(LI->isUnordered() && "Expected only non-volatile non-ordered loads.");
1183
1184 // See if the pointer expression is an AddRec like {base,+,1} on the current
1185 // loop, which indicates a strided load. If we have something else, it's a
1186 // random load we can't handle.
1187 Value *LoadPtr = LI->getPointerOperand();
1188 const SCEVAddRecExpr *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr));
1189
1190 const SCEV *StoreSizeSCEV = SE->getConstant(StorePtr->getType(), StoreSize);
1191 return processLoopStoreOfLoopLoad(StorePtr, LoadPtr, StoreSizeSCEV,
1192 SI->getAlign(), LI->getAlign(), SI, LI,
1193 StoreEv, LoadEv, BECount);
1194}
1195
1196namespace {
1197class MemmoveVerifier {
1198public:
1199 explicit MemmoveVerifier(const Value &LoadBasePtr, const Value &StoreBasePtr,
1200 const DataLayout &DL)
1202 LoadBasePtr.stripPointerCasts(), LoadOff, DL)),
1204 StoreBasePtr.stripPointerCasts(), StoreOff, DL)),
1205 IsSameObject(BP1 == BP2) {}
1206
1207 bool loadAndStoreMayFormMemmove(unsigned StoreSize, bool IsNegStride,
1208 const Instruction &TheLoad,
1209 bool IsMemCpy) const {
1210 if (IsMemCpy) {
1211 // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr
1212 // for negative stride.
1213 if ((!IsNegStride && LoadOff <= StoreOff) ||
1214 (IsNegStride && LoadOff >= StoreOff))
1215 return false;
1216 } else {
1217 // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr
1218 // for negative stride. LoadBasePtr shouldn't overlap with StoreBasePtr.
1219 int64_t LoadSize =
1220 DL.getTypeSizeInBits(TheLoad.getType()).getFixedValue() / 8;
1221 if (BP1 != BP2 || LoadSize != int64_t(StoreSize))
1222 return false;
1223 if ((!IsNegStride && LoadOff < StoreOff + int64_t(StoreSize)) ||
1224 (IsNegStride && LoadOff + LoadSize > StoreOff))
1225 return false;
1226 }
1227 return true;
1228 }
1229
1230private:
1231 const DataLayout &DL;
1232 int64_t LoadOff = 0;
1233 int64_t StoreOff = 0;
1234 const Value *BP1;
1235 const Value *BP2;
1236
1237public:
1238 const bool IsSameObject;
1239};
1240} // namespace
1241
1242bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
1243 Value *DestPtr, Value *SourcePtr, const SCEV *StoreSizeSCEV,
1244 MaybeAlign StoreAlign, MaybeAlign LoadAlign, Instruction *TheStore,
1245 Instruction *TheLoad, const SCEVAddRecExpr *StoreEv,
1246 const SCEVAddRecExpr *LoadEv, const SCEV *BECount) {
1247
1248 // FIXME: until llvm.memcpy.inline supports dynamic sizes, we need to
1249 // conservatively bail here, since otherwise we may have to transform
1250 // llvm.memcpy.inline into llvm.memcpy which is illegal.
1251 if (isa<MemCpyInlineInst>(TheStore))
1252 return false;
1253
1254 // The trip count of the loop and the base pointer of the addrec SCEV is
1255 // guaranteed to be loop invariant, which means that it should dominate the
1256 // header. This allows us to insert code for it in the preheader.
1257 BasicBlock *Preheader = CurLoop->getLoopPreheader();
1258 IRBuilder<> Builder(Preheader->getTerminator());
1259 SCEVExpander Expander(*SE, *DL, "loop-idiom");
1260
1261 SCEVExpanderCleaner ExpCleaner(Expander);
1262
1263 bool Changed = false;
1264 const SCEV *StrStart = StoreEv->getStart();
1265 unsigned StrAS = DestPtr->getType()->getPointerAddressSpace();
1266 Type *IntIdxTy = Builder.getIntNTy(DL->getIndexSizeInBits(StrAS));
1267
1268 APInt Stride = getStoreStride(StoreEv);
1269 const SCEVConstant *ConstStoreSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
1270
1271 // TODO: Deal with non-constant size; Currently expect constant store size
1272 assert(ConstStoreSize && "store size is expected to be a constant");
1273
1274 int64_t StoreSize = ConstStoreSize->getValue()->getZExtValue();
1275 bool IsNegStride = StoreSize == -Stride;
1276
1277 // Handle negative strided loops.
1278 if (IsNegStride)
1279 StrStart =
1280 getStartForNegStride(StrStart, BECount, IntIdxTy, StoreSizeSCEV, SE);
1281
1282 // Okay, we have a strided store "p[i]" of a loaded value. We can turn
1283 // this into a memcpy in the loop preheader now if we want. However, this
1284 // would be unsafe to do if there is anything else in the loop that may read
1285 // or write the memory region we're storing to. This includes the load that
1286 // feeds the stores. Check for an alias by generating the base address and
1287 // checking everything.
1288 Value *StoreBasePtr = Expander.expandCodeFor(
1289 StrStart, Builder.getPtrTy(StrAS), Preheader->getTerminator());
1290
1291 // From here on out, conservatively report to the pass manager that we've
1292 // changed the IR, even if we later clean up these added instructions. There
1293 // may be structural differences e.g. in the order of use lists not accounted
1294 // for in just a textual dump of the IR. This is written as a variable, even
1295 // though statically all the places this dominates could be replaced with
1296 // 'true', with the hope that anyone trying to be clever / "more precise" with
1297 // the return value will read this comment, and leave them alone.
1298 Changed = true;
1299
1300 SmallPtrSet<Instruction *, 2> IgnoredInsts;
1301 IgnoredInsts.insert(TheStore);
1302
1303 bool IsMemCpy = isa<MemCpyInst>(TheStore);
1304 const StringRef InstRemark = IsMemCpy ? "memcpy" : "load and store";
1305
1306 bool LoopAccessStore =
1307 mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
1308 StoreSizeSCEV, *AA, IgnoredInsts);
1309 if (LoopAccessStore) {
1310 // For memmove case it's not enough to guarantee that loop doesn't access
1311 // TheStore and TheLoad. Additionally we need to make sure that TheStore is
1312 // the only user of TheLoad.
1313 if (!TheLoad->hasOneUse())
1314 return Changed;
1315 IgnoredInsts.insert(TheLoad);
1316 if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop,
1317 BECount, StoreSizeSCEV, *AA, IgnoredInsts)) {
1318 ORE.emit([&]() {
1319 return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessStore",
1320 TheStore)
1321 << ore::NV("Inst", InstRemark) << " in "
1322 << ore::NV("Function", TheStore->getFunction())
1323 << " function will not be hoisted: "
1324 << ore::NV("Reason", "The loop may access store location");
1325 });
1326 return Changed;
1327 }
1328 IgnoredInsts.erase(TheLoad);
1329 }
1330
1331 const SCEV *LdStart = LoadEv->getStart();
1332 unsigned LdAS = SourcePtr->getType()->getPointerAddressSpace();
1333
1334 // Handle negative strided loops.
1335 if (IsNegStride)
1336 LdStart =
1337 getStartForNegStride(LdStart, BECount, IntIdxTy, StoreSizeSCEV, SE);
1338
1339 // For a memcpy, we have to make sure that the input array is not being
1340 // mutated by the loop.
1341 Value *LoadBasePtr = Expander.expandCodeFor(LdStart, Builder.getPtrTy(LdAS),
1342 Preheader->getTerminator());
1343
1344 // If the store is a memcpy instruction, we must check if it will write to
1345 // the load memory locations. So remove it from the ignored stores.
1346 MemmoveVerifier Verifier(*LoadBasePtr, *StoreBasePtr, *DL);
1347 if (IsMemCpy && !Verifier.IsSameObject)
1348 IgnoredInsts.erase(TheStore);
1349 if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
1350 StoreSizeSCEV, *AA, IgnoredInsts)) {
1351 ORE.emit([&]() {
1352 return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad", TheLoad)
1353 << ore::NV("Inst", InstRemark) << " in "
1354 << ore::NV("Function", TheStore->getFunction())
1355 << " function will not be hoisted: "
1356 << ore::NV("Reason", "The loop may access load location");
1357 });
1358 return Changed;
1359 }
1360
1361 bool UseMemMove = IsMemCpy ? Verifier.IsSameObject : LoopAccessStore;
1362 if (UseMemMove)
1363 if (!Verifier.loadAndStoreMayFormMemmove(StoreSize, IsNegStride, *TheLoad,
1364 IsMemCpy))
1365 return Changed;
1366
1367 if (avoidLIRForMultiBlockLoop())
1368 return Changed;
1369
1370 // Okay, everything is safe, we can transform this!
1371
1372 const SCEV *NumBytesS =
1373 getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE);
1374
1375 Value *NumBytes =
1376 Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
1377
1378 AAMDNodes AATags = TheLoad->getAAMetadata();
1379 AAMDNodes StoreAATags = TheStore->getAAMetadata();
1380 AATags = AATags.merge(StoreAATags);
1381 if (auto CI = dyn_cast<ConstantInt>(NumBytes))
1382 AATags = AATags.extendTo(CI->getZExtValue());
1383 else
1384 AATags = AATags.extendTo(-1);
1385
1386 CallInst *NewCall = nullptr;
1387 // Check whether to generate an unordered atomic memcpy:
1388 // If the load or store are atomic, then they must necessarily be unordered
1389 // by previous checks.
1390 if (!TheStore->isAtomic() && !TheLoad->isAtomic()) {
1391 if (UseMemMove)
1392 NewCall = Builder.CreateMemMove(
1393 StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign, NumBytes,
1394 /*isVolatile=*/false, AATags.TBAA, AATags.Scope, AATags.NoAlias);
1395 else
1396 NewCall =
1397 Builder.CreateMemCpy(StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign,
1398 NumBytes, /*isVolatile=*/false, AATags.TBAA,
1399 AATags.TBAAStruct, AATags.Scope, AATags.NoAlias);
1400 } else {
1401 // For now don't support unordered atomic memmove.
1402 if (UseMemMove)
1403 return Changed;
1404 // We cannot allow unaligned ops for unordered load/store, so reject
1405 // anything where the alignment isn't at least the element size.
1406 assert((StoreAlign && LoadAlign) &&
1407 "Expect unordered load/store to have align.");
1408 if (*StoreAlign < StoreSize || *LoadAlign < StoreSize)
1409 return Changed;
1410
1411 // If the element.atomic memcpy is not lowered into explicit
1412 // loads/stores later, then it will be lowered into an element-size
1413 // specific lib call. If the lib call doesn't exist for our store size, then
1414 // we shouldn't generate the memcpy.
1415 if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize())
1416 return Changed;
1417
1418 // Create the call.
1419 // Note that unordered atomic loads/stores are *required* by the spec to
1420 // have an alignment but non-atomic loads/stores may not.
1421 NewCall = Builder.CreateElementUnorderedAtomicMemCpy(
1422 StoreBasePtr, *StoreAlign, LoadBasePtr, *LoadAlign, NumBytes, StoreSize,
1423 AATags.TBAA, AATags.TBAAStruct, AATags.Scope, AATags.NoAlias);
1424 }
1425 NewCall->setDebugLoc(TheStore->getDebugLoc());
1426
1427 if (MSSAU) {
1428 MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1429 NewCall, nullptr, NewCall->getParent(), MemorySSA::BeforeTerminator);
1430 MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
1431 }
1432
1433 LLVM_DEBUG(dbgs() << " Formed new call: " << *NewCall << "\n"
1434 << " from load ptr=" << *LoadEv << " at: " << *TheLoad
1435 << "\n"
1436 << " from store ptr=" << *StoreEv << " at: " << *TheStore
1437 << "\n");
1438
1439 ORE.emit([&]() {
1440 return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStoreOfLoopLoad",
1441 NewCall->getDebugLoc(), Preheader)
1442 << "Formed a call to "
1443 << ore::NV("NewFunction", NewCall->getCalledFunction())
1444 << "() intrinsic from " << ore::NV("Inst", InstRemark)
1445 << " instruction in " << ore::NV("Function", TheStore->getFunction())
1446 << " function"
1448 << ore::NV("FromBlock", TheStore->getParent()->getName())
1449 << ore::NV("ToBlock", Preheader->getName());
1450 });
1451
1452 // Okay, a new call to memcpy/memmove has been formed. Zap the original store
1453 // and anything that feeds into it.
1454 if (MSSAU)
1455 MSSAU->removeMemoryAccess(TheStore, true);
1456 deleteDeadInstruction(TheStore);
1457 if (MSSAU && VerifyMemorySSA)
1458 MSSAU->getMemorySSA()->verifyMemorySSA();
1459 if (UseMemMove)
1460 ++NumMemMove;
1461 else
1462 ++NumMemCpy;
1463 ExpCleaner.markResultUsed();
1464 return true;
1465}
1466
1467// When compiling for codesize we avoid idiom recognition for a multi-block loop
1468// unless it is a loop_memset idiom or a memset/memcpy idiom in a nested loop.
1469//
1470bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
1471 bool IsLoopMemset) {
1472 if (ApplyCodeSizeHeuristics && CurLoop->getNumBlocks() > 1) {
1473 if (CurLoop->isOutermost() && (!IsMemset || !IsLoopMemset)) {
1474 LLVM_DEBUG(dbgs() << " " << CurLoop->getHeader()->getParent()->getName()
1475 << " : LIR " << (IsMemset ? "Memset" : "Memcpy")
1476 << " avoided: multi-block top-level loop\n");
1477 return true;
1478 }
1479 }
1480
1481 return false;
1482}
1483
1484bool LoopIdiomRecognize::runOnNoncountableLoop() {
1485 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
1486 << CurLoop->getHeader()->getParent()->getName()
1487 << "] Noncountable Loop %"
1488 << CurLoop->getHeader()->getName() << "\n");
1489
1490 return recognizePopcount() || recognizeAndInsertFFS() ||
1491 recognizeShiftUntilBitTest() || recognizeShiftUntilZero() ||
1492 recognizeShiftUntilLessThan();
1493}
1494
1495/// Check if the given conditional branch is based on the comparison between
1496/// a variable and zero, and if the variable is non-zero or zero (JmpOnZero is
1497/// true), the control yields to the loop entry. If the branch matches the
1498/// behavior, the variable involved in the comparison is returned. This function
1499/// will be called to see if the precondition and postcondition of the loop are
1500/// in desirable form.
1502 bool JmpOnZero = false) {
1503 if (!BI || !BI->isConditional())
1504 return nullptr;
1505
1506 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
1507 if (!Cond)
1508 return nullptr;
1509
1510 ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
1511 if (!CmpZero || !CmpZero->isZero())
1512 return nullptr;
1513
1514 BasicBlock *TrueSucc = BI->getSuccessor(0);
1515 BasicBlock *FalseSucc = BI->getSuccessor(1);
1516 if (JmpOnZero)
1517 std::swap(TrueSucc, FalseSucc);
1518
1519 ICmpInst::Predicate Pred = Cond->getPredicate();
1520 if ((Pred == ICmpInst::ICMP_NE && TrueSucc == LoopEntry) ||
1521 (Pred == ICmpInst::ICMP_EQ && FalseSucc == LoopEntry))
1522 return Cond->getOperand(0);
1523
1524 return nullptr;
1525}
1526
1527/// Check if the given conditional branch is based on an unsigned less-than
1528/// comparison between a variable and a constant, and if the comparison is false
1529/// the control yields to the loop entry. If the branch matches the behaviour,
1530/// the variable involved in the comparison is returned.
1532 APInt &Threshold) {
1533 if (!BI || !BI->isConditional())
1534 return nullptr;
1535
1536 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
1537 if (!Cond)
1538 return nullptr;
1539
1540 ConstantInt *CmpConst = dyn_cast<ConstantInt>(Cond->getOperand(1));
1541 if (!CmpConst)
1542 return nullptr;
1543
1544 BasicBlock *FalseSucc = BI->getSuccessor(1);
1545 ICmpInst::Predicate Pred = Cond->getPredicate();
1546
1547 if (Pred == ICmpInst::ICMP_ULT && FalseSucc == LoopEntry) {
1548 Threshold = CmpConst->getValue();
1549 return Cond->getOperand(0);
1550 }
1551
1552 return nullptr;
1553}
1554
1555// Check if the recurrence variable `VarX` is in the right form to create
1556// the idiom. Returns the value coerced to a PHINode if so.
1558 BasicBlock *LoopEntry) {
1559 auto *PhiX = dyn_cast<PHINode>(VarX);
1560 if (PhiX && PhiX->getParent() == LoopEntry &&
1561 (PhiX->getOperand(0) == DefX || PhiX->getOperand(1) == DefX))
1562 return PhiX;
1563 return nullptr;
1564}
1565
1566/// Return true if the idiom is detected in the loop.
1567///
1568/// Additionally:
1569/// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
1570/// or nullptr if there is no such.
1571/// 2) \p CntPhi is set to the corresponding phi node
1572/// or nullptr if there is no such.
1573/// 3) \p InitX is set to the value whose CTLZ could be used.
1574/// 4) \p DefX is set to the instruction calculating Loop exit condition.
1575/// 5) \p Threshold is set to the constant involved in the unsigned less-than
1576/// comparison.
1577///
1578/// The core idiom we are trying to detect is:
1579/// \code
1580/// if (x0 < 2)
1581/// goto loop-exit // the precondition of the loop
1582/// cnt0 = init-val
1583/// do {
1584/// x = phi (x0, x.next); //PhiX
1585/// cnt = phi (cnt0, cnt.next)
1586///
1587/// cnt.next = cnt + 1;
1588/// ...
1589/// x.next = x >> 1; // DefX
1590/// } while (x >= 4)
1591/// loop-exit:
1592/// \endcode
1594 Intrinsic::ID &IntrinID,
1595 Value *&InitX, Instruction *&CntInst,
1596 PHINode *&CntPhi, Instruction *&DefX,
1597 APInt &Threshold) {
1598 BasicBlock *LoopEntry;
1599
1600 DefX = nullptr;
1601 CntInst = nullptr;
1602 CntPhi = nullptr;
1603 LoopEntry = *(CurLoop->block_begin());
1604
1605 // step 1: Check if the loop-back branch is in desirable form.
1607 dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry,
1608 Threshold))
1609 DefX = dyn_cast<Instruction>(T);
1610 else
1611 return false;
1612
1613 // step 2: Check the recurrence of variable X
1614 if (!DefX || !isa<PHINode>(DefX))
1615 return false;
1616
1617 PHINode *VarPhi = cast<PHINode>(DefX);
1618 int Idx = VarPhi->getBasicBlockIndex(LoopEntry);
1619 if (Idx == -1)
1620 return false;
1621
1622 DefX = dyn_cast<Instruction>(VarPhi->getIncomingValue(Idx));
1623 if (!DefX || DefX->getNumOperands() == 0 || DefX->getOperand(0) != VarPhi)
1624 return false;
1625
1626 // step 3: detect instructions corresponding to "x.next = x >> 1"
1627 if (DefX->getOpcode() != Instruction::LShr)
1628 return false;
1629
1630 IntrinID = Intrinsic::ctlz;
1631 ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
1632 if (!Shft || !Shft->isOne())
1633 return false;
1634
1635 InitX = VarPhi->getIncomingValueForBlock(CurLoop->getLoopPreheader());
1636
1637 // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
1638 // or cnt.next = cnt + -1.
1639 // TODO: We can skip the step. If loop trip count is known (CTLZ),
1640 // then all uses of "cnt.next" could be optimized to the trip count
1641 // plus "cnt0". Currently it is not optimized.
1642 // This step could be used to detect POPCNT instruction:
1643 // cnt.next = cnt + (x.next & 1)
1644 for (Instruction &Inst : llvm::make_range(
1645 LoopEntry->getFirstNonPHI()->getIterator(), LoopEntry->end())) {
1646 if (Inst.getOpcode() != Instruction::Add)
1647 continue;
1648
1649 ConstantInt *Inc = dyn_cast<ConstantInt>(Inst.getOperand(1));
1650 if (!Inc || (!Inc->isOne() && !Inc->isMinusOne()))
1651 continue;
1652
1653 PHINode *Phi = getRecurrenceVar(Inst.getOperand(0), &Inst, LoopEntry);
1654 if (!Phi)
1655 continue;
1656
1657 CntInst = &Inst;
1658 CntPhi = Phi;
1659 break;
1660 }
1661 if (!CntInst)
1662 return false;
1663
1664 return true;
1665}
1666
1667/// Return true iff the idiom is detected in the loop.
1668///
1669/// Additionally:
1670/// 1) \p CntInst is set to the instruction counting the population bit.
1671/// 2) \p CntPhi is set to the corresponding phi node.
1672/// 3) \p Var is set to the value whose population bits are being counted.
1673///
1674/// The core idiom we are trying to detect is:
1675/// \code
1676/// if (x0 != 0)
1677/// goto loop-exit // the precondition of the loop
1678/// cnt0 = init-val;
1679/// do {
1680/// x1 = phi (x0, x2);
1681/// cnt1 = phi(cnt0, cnt2);
1682///
1683/// cnt2 = cnt1 + 1;
1684/// ...
1685/// x2 = x1 & (x1 - 1);
1686/// ...
1687/// } while(x != 0);
1688///
1689/// loop-exit:
1690/// \endcode
1691static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
1692 Instruction *&CntInst, PHINode *&CntPhi,
1693 Value *&Var) {
1694 // step 1: Check to see if the look-back branch match this pattern:
1695 // "if (a!=0) goto loop-entry".
1696 BasicBlock *LoopEntry;
1697 Instruction *DefX2, *CountInst;
1698 Value *VarX1, *VarX0;
1699 PHINode *PhiX, *CountPhi;
1700
1701 DefX2 = CountInst = nullptr;
1702 VarX1 = VarX0 = nullptr;
1703 PhiX = CountPhi = nullptr;
1704 LoopEntry = *(CurLoop->block_begin());
1705
1706 // step 1: Check if the loop-back branch is in desirable form.
1707 {
1708 if (Value *T = matchCondition(
1709 dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
1710 DefX2 = dyn_cast<Instruction>(T);
1711 else
1712 return false;
1713 }
1714
1715 // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
1716 {
1717 if (!DefX2 || DefX2->getOpcode() != Instruction::And)
1718 return false;
1719
1720 BinaryOperator *SubOneOp;
1721
1722 if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0))))
1723 VarX1 = DefX2->getOperand(1);
1724 else {
1725 VarX1 = DefX2->getOperand(0);
1726 SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1));
1727 }
1728 if (!SubOneOp || SubOneOp->getOperand(0) != VarX1)
1729 return false;
1730
1731 ConstantInt *Dec = dyn_cast<ConstantInt>(SubOneOp->getOperand(1));
1732 if (!Dec ||
1733 !((SubOneOp->getOpcode() == Instruction::Sub && Dec->isOne()) ||
1734 (SubOneOp->getOpcode() == Instruction::Add &&
1735 Dec->isMinusOne()))) {
1736 return false;
1737 }
1738 }
1739
1740 // step 3: Check the recurrence of variable X
1741 PhiX = getRecurrenceVar(VarX1, DefX2, LoopEntry);
1742 if (!PhiX)
1743 return false;
1744
1745 // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
1746 {
1747 CountInst = nullptr;
1748 for (Instruction &Inst : llvm::make_range(
1749 LoopEntry->getFirstNonPHI()->getIterator(), LoopEntry->end())) {
1750 if (Inst.getOpcode() != Instruction::Add)
1751 continue;
1752
1753 ConstantInt *Inc = dyn_cast<ConstantInt>(Inst.getOperand(1));
1754 if (!Inc || !Inc->isOne())
1755 continue;
1756
1757 PHINode *Phi = getRecurrenceVar(Inst.getOperand(0), &Inst, LoopEntry);
1758 if (!Phi)
1759 continue;
1760
1761 // Check if the result of the instruction is live of the loop.
1762 bool LiveOutLoop = false;
1763 for (User *U : Inst.users()) {
1764 if ((cast<Instruction>(U))->getParent() != LoopEntry) {
1765 LiveOutLoop = true;
1766 break;
1767 }
1768 }
1769
1770 if (LiveOutLoop) {
1771 CountInst = &Inst;
1772 CountPhi = Phi;
1773 break;
1774 }
1775 }
1776
1777 if (!CountInst)
1778 return false;
1779 }
1780
1781 // step 5: check if the precondition is in this form:
1782 // "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;"
1783 {
1784 auto *PreCondBr = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1785 Value *T = matchCondition(PreCondBr, CurLoop->getLoopPreheader());
1786 if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1))
1787 return false;
1788
1789 CntInst = CountInst;
1790 CntPhi = CountPhi;
1791 Var = T;
1792 }
1793
1794 return true;
1795}
1796
1797/// Return true if the idiom is detected in the loop.
1798///
1799/// Additionally:
1800/// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
1801/// or nullptr if there is no such.
1802/// 2) \p CntPhi is set to the corresponding phi node
1803/// or nullptr if there is no such.
1804/// 3) \p Var is set to the value whose CTLZ could be used.
1805/// 4) \p DefX is set to the instruction calculating Loop exit condition.
1806///
1807/// The core idiom we are trying to detect is:
1808/// \code
1809/// if (x0 == 0)
1810/// goto loop-exit // the precondition of the loop
1811/// cnt0 = init-val;
1812/// do {
1813/// x = phi (x0, x.next); //PhiX
1814/// cnt = phi(cnt0, cnt.next);
1815///
1816/// cnt.next = cnt + 1;
1817/// ...
1818/// x.next = x >> 1; // DefX
1819/// ...
1820/// } while(x.next != 0);
1821///
1822/// loop-exit:
1823/// \endcode
1824static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
1825 Intrinsic::ID &IntrinID, Value *&InitX,
1826 Instruction *&CntInst, PHINode *&CntPhi,
1827 Instruction *&DefX) {
1828 BasicBlock *LoopEntry;
1829 Value *VarX = nullptr;
1830
1831 DefX = nullptr;
1832 CntInst = nullptr;
1833 CntPhi = nullptr;
1834 LoopEntry = *(CurLoop->block_begin());
1835
1836 // step 1: Check if the loop-back branch is in desirable form.
1837 if (Value *T = matchCondition(
1838 dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
1839 DefX = dyn_cast<Instruction>(T);
1840 else
1841 return false;
1842
1843 // step 2: detect instructions corresponding to "x.next = x >> 1 or x << 1"
1844 if (!DefX || !DefX->isShift())
1845 return false;
1846 IntrinID = DefX->getOpcode() == Instruction::Shl ? Intrinsic::cttz :
1847 Intrinsic::ctlz;
1848 ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
1849 if (!Shft || !Shft->isOne())
1850 return false;
1851 VarX = DefX->getOperand(0);
1852
1853 // step 3: Check the recurrence of variable X
1854 PHINode *PhiX = getRecurrenceVar(VarX, DefX, LoopEntry);
1855 if (!PhiX)
1856 return false;
1857
1858 InitX = PhiX->getIncomingValueForBlock(CurLoop->getLoopPreheader());
1859
1860 // Make sure the initial value can't be negative otherwise the ashr in the
1861 // loop might never reach zero which would make the loop infinite.
1862 if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, DL))
1863 return false;
1864
1865 // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
1866 // or cnt.next = cnt + -1.
1867 // TODO: We can skip the step. If loop trip count is known (CTLZ),
1868 // then all uses of "cnt.next" could be optimized to the trip count
1869 // plus "cnt0". Currently it is not optimized.
1870 // This step could be used to detect POPCNT instruction:
1871 // cnt.next = cnt + (x.next & 1)
1872 for (Instruction &Inst : llvm::make_range(
1873 LoopEntry->getFirstNonPHI()->getIterator(), LoopEntry->end())) {
1874 if (Inst.getOpcode() != Instruction::Add)
1875 continue;
1876
1877 ConstantInt *Inc = dyn_cast<ConstantInt>(Inst.getOperand(1));
1878 if (!Inc || (!Inc->isOne() && !Inc->isMinusOne()))
1879 continue;
1880
1881 PHINode *Phi = getRecurrenceVar(Inst.getOperand(0), &Inst, LoopEntry);
1882 if (!Phi)
1883 continue;
1884
1885 CntInst = &Inst;
1886 CntPhi = Phi;
1887 break;
1888 }
1889 if (!CntInst)
1890 return false;
1891
1892 return true;
1893}
1894
1895// Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
1896// profitable if we delete the loop.
1897bool LoopIdiomRecognize::isProfitableToInsertFFS(Intrinsic::ID IntrinID,
1898 Value *InitX, bool ZeroCheck,
1899 size_t CanonicalSize) {
1900 const Value *Args[] = {InitX,
1901 ConstantInt::getBool(InitX->getContext(), ZeroCheck)};
1902
1903 // @llvm.dbg doesn't count as they have no semantic effect.
1904 auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
1906 std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
1907
1908 IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args);
1911 if (HeaderSize != CanonicalSize && Cost > TargetTransformInfo::TCC_Basic)
1912 return false;
1913
1914 return true;
1915}
1916
1917/// Convert CTLZ / CTTZ idiom loop into countable loop.
1918/// If CTLZ / CTTZ inserted as a new trip count returns true; otherwise,
1919/// returns false.
1920bool LoopIdiomRecognize::insertFFSIfProfitable(Intrinsic::ID IntrinID,
1921 Value *InitX, Instruction *DefX,
1922 PHINode *CntPhi,
1923 Instruction *CntInst) {
1924 bool IsCntPhiUsedOutsideLoop = false;
1925 for (User *U : CntPhi->users())
1926 if (!CurLoop->contains(cast<Instruction>(U))) {
1927 IsCntPhiUsedOutsideLoop = true;
1928 break;
1929 }
1930 bool IsCntInstUsedOutsideLoop = false;
1931 for (User *U : CntInst->users())
1932 if (!CurLoop->contains(cast<Instruction>(U))) {
1933 IsCntInstUsedOutsideLoop = true;
1934 break;
1935 }
1936 // If both CntInst and CntPhi are used outside the loop the profitability
1937 // is questionable.
1938 if (IsCntInstUsedOutsideLoop && IsCntPhiUsedOutsideLoop)
1939 return false;
1940
1941 // For some CPUs result of CTLZ(X) intrinsic is undefined
1942 // when X is 0. If we can not guarantee X != 0, we need to check this
1943 // when expand.
1944 bool ZeroCheck = false;
1945 // It is safe to assume Preheader exist as it was checked in
1946 // parent function RunOnLoop.
1947 BasicBlock *PH = CurLoop->getLoopPreheader();
1948
1949 // If we are using the count instruction outside the loop, make sure we
1950 // have a zero check as a precondition. Without the check the loop would run
1951 // one iteration for before any check of the input value. This means 0 and 1
1952 // would have identical behavior in the original loop and thus
1953 if (!IsCntPhiUsedOutsideLoop) {
1954 auto *PreCondBB = PH->getSinglePredecessor();
1955 if (!PreCondBB)
1956 return false;
1957 auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1958 if (!PreCondBI)
1959 return false;
1960 if (matchCondition(PreCondBI, PH) != InitX)
1961 return false;
1962 ZeroCheck = true;
1963 }
1964
1965 // FFS idiom loop has only 6 instructions:
1966 // %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
1967 // %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
1968 // %shr = ashr %n.addr.0, 1
1969 // %tobool = icmp eq %shr, 0
1970 // %inc = add nsw %i.0, 1
1971 // br i1 %tobool
1972 size_t IdiomCanonicalSize = 6;
1973 if (!isProfitableToInsertFFS(IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
1974 return false;
1975
1976 transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
1977 DefX->getDebugLoc(), ZeroCheck,
1978 IsCntPhiUsedOutsideLoop);
1979 return true;
1980}
1981
1982/// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
1983/// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
1984/// trip count returns true; otherwise, returns false.
1985bool LoopIdiomRecognize::recognizeAndInsertFFS() {
1986 // Give up if the loop has multiple blocks or multiple backedges.
1987 if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
1988 return false;
1989
1990 Intrinsic::ID IntrinID;
1991 Value *InitX;
1992 Instruction *DefX = nullptr;
1993 PHINode *CntPhi = nullptr;
1994 Instruction *CntInst = nullptr;
1995
1996 if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX, CntInst, CntPhi,
1997 DefX))
1998 return false;
1999
2000 return insertFFSIfProfitable(IntrinID, InitX, DefX, CntPhi, CntInst);
2001}
2002
2003bool LoopIdiomRecognize::recognizeShiftUntilLessThan() {
2004 // Give up if the loop has multiple blocks or multiple backedges.
2005 if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
2006 return false;
2007
2008 Intrinsic::ID IntrinID;
2009 Value *InitX;
2010 Instruction *DefX = nullptr;
2011 PHINode *CntPhi = nullptr;
2012 Instruction *CntInst = nullptr;
2013
2014 APInt LoopThreshold;
2015 if (!detectShiftUntilLessThanIdiom(CurLoop, *DL, IntrinID, InitX, CntInst,
2016 CntPhi, DefX, LoopThreshold))
2017 return false;
2018
2019 if (LoopThreshold == 2) {
2020 // Treat as regular FFS.
2021 return insertFFSIfProfitable(IntrinID, InitX, DefX, CntPhi, CntInst);
2022 }
2023
2024 // Look for Floor Log2 Idiom.
2025 if (LoopThreshold != 4)
2026 return false;
2027
2028 // Abort if CntPhi is used outside of the loop.
2029 for (User *U : CntPhi->users())
2030 if (!CurLoop->contains(cast<Instruction>(U)))
2031 return false;
2032
2033 // It is safe to assume Preheader exist as it was checked in
2034 // parent function RunOnLoop.
2035 BasicBlock *PH = CurLoop->getLoopPreheader();
2036 auto *PreCondBB = PH->getSinglePredecessor();
2037 if (!PreCondBB)
2038 return false;
2039 auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
2040 if (!PreCondBI)
2041 return false;
2042
2043 APInt PreLoopThreshold;
2044 if (matchShiftULTCondition(PreCondBI, PH, PreLoopThreshold) != InitX ||
2045 PreLoopThreshold != 2)
2046 return false;
2047
2048 bool ZeroCheck = true;
2049
2050 // the loop has only 6 instructions:
2051 // %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
2052 // %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
2053 // %shr = ashr %n.addr.0, 1
2054 // %tobool = icmp ult %n.addr.0, C
2055 // %inc = add nsw %i.0, 1
2056 // br i1 %tobool
2057 size_t IdiomCanonicalSize = 6;
2058 if (!isProfitableToInsertFFS(IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
2059 return false;
2060
2061 // log2(x) = w − 1 − clz(x)
2062 transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
2063 DefX->getDebugLoc(), ZeroCheck,
2064 /*IsCntPhiUsedOutsideLoop=*/false,
2065 /*InsertSub=*/true);
2066 return true;
2067}
2068
2069/// Recognizes a population count idiom in a non-countable loop.
2070///
2071/// If detected, transforms the relevant code to issue the popcount intrinsic
2072/// function call, and returns true; otherwise, returns false.
2073bool LoopIdiomRecognize::recognizePopcount() {
2075 return false;
2076
2077 // Counting population are usually conducted by few arithmetic instructions.
2078 // Such instructions can be easily "absorbed" by vacant slots in a
2079 // non-compact loop. Therefore, recognizing popcount idiom only makes sense
2080 // in a compact loop.
2081
2082 // Give up if the loop has multiple blocks or multiple backedges.
2083 if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
2084 return false;
2085
2086 BasicBlock *LoopBody = *(CurLoop->block_begin());
2087 if (LoopBody->size() >= 20) {
2088 // The loop is too big, bail out.
2089 return false;
2090 }
2091
2092 // It should have a preheader containing nothing but an unconditional branch.
2093 BasicBlock *PH = CurLoop->getLoopPreheader();
2094 if (!PH || &PH->front() != PH->getTerminator())
2095 return false;
2096 auto *EntryBI = dyn_cast<BranchInst>(PH->getTerminator());
2097 if (!EntryBI || EntryBI->isConditional())
2098 return false;
2099
2100 // It should have a precondition block where the generated popcount intrinsic
2101 // function can be inserted.
2102 auto *PreCondBB = PH->getSinglePredecessor();
2103 if (!PreCondBB)
2104 return false;
2105 auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
2106 if (!PreCondBI || PreCondBI->isUnconditional())
2107 return false;
2108
2109 Instruction *CntInst;
2110 PHINode *CntPhi;
2111 Value *Val;
2112 if (!detectPopcountIdiom(CurLoop, PreCondBB, CntInst, CntPhi, Val))
2113 return false;
2114
2115 transformLoopToPopcount(PreCondBB, CntInst, CntPhi, Val);
2116 return true;
2117}
2118
2120 const DebugLoc &DL) {
2121 Value *Ops[] = {Val};
2122 Type *Tys[] = {Val->getType()};
2123
2124 CallInst *CI = IRBuilder.CreateIntrinsic(Intrinsic::ctpop, Tys, Ops);
2125 CI->setDebugLoc(DL);
2126
2127 return CI;
2128}
2129
2131 const DebugLoc &DL, bool ZeroCheck,
2132 Intrinsic::ID IID) {
2133 Value *Ops[] = {Val, IRBuilder.getInt1(ZeroCheck)};
2134 Type *Tys[] = {Val->getType()};
2135
2136 CallInst *CI = IRBuilder.CreateIntrinsic(IID, Tys, Ops);
2137 CI->setDebugLoc(DL);
2138
2139 return CI;
2140}
2141
2142/// Transform the following loop (Using CTLZ, CTTZ is similar):
2143/// loop:
2144/// CntPhi = PHI [Cnt0, CntInst]
2145/// PhiX = PHI [InitX, DefX]
2146/// CntInst = CntPhi + 1
2147/// DefX = PhiX >> 1
2148/// LOOP_BODY
2149/// Br: loop if (DefX != 0)
2150/// Use(CntPhi) or Use(CntInst)
2151///
2152/// Into:
2153/// If CntPhi used outside the loop:
2154/// CountPrev = BitWidth(InitX) - CTLZ(InitX >> 1)
2155/// Count = CountPrev + 1
2156/// else
2157/// Count = BitWidth(InitX) - CTLZ(InitX)
2158/// loop:
2159/// CntPhi = PHI [Cnt0, CntInst]
2160/// PhiX = PHI [InitX, DefX]
2161/// PhiCount = PHI [Count, Dec]
2162/// CntInst = CntPhi + 1
2163/// DefX = PhiX >> 1
2164/// Dec = PhiCount - 1
2165/// LOOP_BODY
2166/// Br: loop if (Dec != 0)
2167/// Use(CountPrev + Cnt0) // Use(CntPhi)
2168/// or
2169/// Use(Count + Cnt0) // Use(CntInst)
2170///
2171/// If LOOP_BODY is empty the loop will be deleted.
2172/// If CntInst and DefX are not used in LOOP_BODY they will be removed.
2173void LoopIdiomRecognize::transformLoopToCountable(
2174 Intrinsic::ID IntrinID, BasicBlock *Preheader, Instruction *CntInst,
2175 PHINode *CntPhi, Value *InitX, Instruction *DefX, const DebugLoc &DL,
2176 bool ZeroCheck, bool IsCntPhiUsedOutsideLoop, bool InsertSub) {
2177 BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
2178
2179 // Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
2180 IRBuilder<> Builder(PreheaderBr);
2181 Builder.SetCurrentDebugLocation(DL);
2182
2183 // If there are no uses of CntPhi crate:
2184 // Count = BitWidth - CTLZ(InitX);
2185 // NewCount = Count;
2186 // If there are uses of CntPhi create:
2187 // NewCount = BitWidth - CTLZ(InitX >> 1);
2188 // Count = NewCount + 1;
2189 Value *InitXNext;
2190 if (IsCntPhiUsedOutsideLoop) {
2191 if (DefX->getOpcode() == Instruction::AShr)
2192 InitXNext = Builder.CreateAShr(InitX, 1);
2193 else if (DefX->getOpcode() == Instruction::LShr)
2194 InitXNext = Builder.CreateLShr(InitX, 1);
2195 else if (DefX->getOpcode() == Instruction::Shl) // cttz
2196 InitXNext = Builder.CreateShl(InitX, 1);
2197 else
2198 llvm_unreachable("Unexpected opcode!");
2199 } else
2200 InitXNext = InitX;
2201 Value *Count =
2202 createFFSIntrinsic(Builder, InitXNext, DL, ZeroCheck, IntrinID);
2203 Type *CountTy = Count->getType();
2204 Count = Builder.CreateSub(
2205 ConstantInt::get(CountTy, CountTy->getIntegerBitWidth()), Count);
2206 if (InsertSub)
2207 Count = Builder.CreateSub(Count, ConstantInt::get(CountTy, 1));
2208 Value *NewCount = Count;
2209 if (IsCntPhiUsedOutsideLoop)
2210 Count = Builder.CreateAdd(Count, ConstantInt::get(CountTy, 1));
2211
2212 NewCount = Builder.CreateZExtOrTrunc(NewCount, CntInst->getType());
2213
2214 Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader);
2215 if (cast<ConstantInt>(CntInst->getOperand(1))->isOne()) {
2216 // If the counter was being incremented in the loop, add NewCount to the
2217 // counter's initial value, but only if the initial value is not zero.
2218 ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
2219 if (!InitConst || !InitConst->isZero())
2220 NewCount = Builder.CreateAdd(NewCount, CntInitVal);
2221 } else {
2222 // If the count was being decremented in the loop, subtract NewCount from
2223 // the counter's initial value.
2224 NewCount = Builder.CreateSub(CntInitVal, NewCount);
2225 }
2226
2227 // Step 2: Insert new IV and loop condition:
2228 // loop:
2229 // ...
2230 // PhiCount = PHI [Count, Dec]
2231 // ...
2232 // Dec = PhiCount - 1
2233 // ...
2234 // Br: loop if (Dec != 0)
2235 BasicBlock *Body = *(CurLoop->block_begin());
2236 auto *LbBr = cast<BranchInst>(Body->getTerminator());
2237 ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
2238
2239 PHINode *TcPhi = PHINode::Create(CountTy, 2, "tcphi");
2240 TcPhi->insertBefore(Body->begin());
2241
2242 Builder.SetInsertPoint(LbCond);
2243 Instruction *TcDec = cast<Instruction>(Builder.CreateSub(
2244 TcPhi, ConstantInt::get(CountTy, 1), "tcdec", false, true));
2245
2246 TcPhi->addIncoming(Count, Preheader);
2247 TcPhi->addIncoming(TcDec, Body);
2248
2249 CmpInst::Predicate Pred =
2250 (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
2251 LbCond->setPredicate(Pred);
2252 LbCond->setOperand(0, TcDec);
2253 LbCond->setOperand(1, ConstantInt::get(CountTy, 0));
2254
2255 // Step 3: All the references to the original counter outside
2256 // the loop are replaced with the NewCount
2257 if (IsCntPhiUsedOutsideLoop)
2258 CntPhi->replaceUsesOutsideBlock(NewCount, Body);
2259 else
2260 CntInst->replaceUsesOutsideBlock(NewCount, Body);
2261
2262 // step 4: Forget the "non-computable" trip-count SCEV associated with the
2263 // loop. The loop would otherwise not be deleted even if it becomes empty.
2264 SE->forgetLoop(CurLoop);
2265}
2266
2267void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
2268 Instruction *CntInst,
2269 PHINode *CntPhi, Value *Var) {
2270 BasicBlock *PreHead = CurLoop->getLoopPreheader();
2271 auto *PreCondBr = cast<BranchInst>(PreCondBB->getTerminator());
2272 const DebugLoc &DL = CntInst->getDebugLoc();
2273
2274 // Assuming before transformation, the loop is following:
2275 // if (x) // the precondition
2276 // do { cnt++; x &= x - 1; } while(x);
2277
2278 // Step 1: Insert the ctpop instruction at the end of the precondition block
2279 IRBuilder<> Builder(PreCondBr);
2280 Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
2281 {
2282 PopCnt = createPopcntIntrinsic(Builder, Var, DL);
2283 NewCount = PopCntZext =
2284 Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType()));
2285
2286 if (NewCount != PopCnt)
2287 (cast<Instruction>(NewCount))->setDebugLoc(DL);
2288
2289 // TripCnt is exactly the number of iterations the loop has
2290 TripCnt = NewCount;
2291
2292 // If the population counter's initial value is not zero, insert Add Inst.
2293 Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
2294 ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
2295 if (!InitConst || !InitConst->isZero()) {
2296 NewCount = Builder.CreateAdd(NewCount, CntInitVal);
2297 (cast<Instruction>(NewCount))->setDebugLoc(DL);
2298 }
2299 }
2300
2301 // Step 2: Replace the precondition from "if (x == 0) goto loop-exit" to
2302 // "if (NewCount == 0) loop-exit". Without this change, the intrinsic
2303 // function would be partial dead code, and downstream passes will drag
2304 // it back from the precondition block to the preheader.
2305 {
2306 ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
2307
2308 Value *Opnd0 = PopCntZext;
2309 Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);
2310 if (PreCond->getOperand(0) != Var)
2311 std::swap(Opnd0, Opnd1);
2312
2313 ICmpInst *NewPreCond = cast<ICmpInst>(
2314 Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
2315 PreCondBr->setCondition(NewPreCond);
2316
2318 }
2319
2320 // Step 3: Note that the population count is exactly the trip count of the
2321 // loop in question, which enable us to convert the loop from noncountable
2322 // loop into a countable one. The benefit is twofold:
2323 //
2324 // - If the loop only counts population, the entire loop becomes dead after
2325 // the transformation. It is a lot easier to prove a countable loop dead
2326 // than to prove a noncountable one. (In some C dialects, an infinite loop
2327 // isn't dead even if it computes nothing useful. In general, DCE needs
2328 // to prove a noncountable loop finite before safely delete it.)
2329 //
2330 // - If the loop also performs something else, it remains alive.
2331 // Since it is transformed to countable form, it can be aggressively
2332 // optimized by some optimizations which are in general not applicable
2333 // to a noncountable loop.
2334 //
2335 // After this step, this loop (conceptually) would look like following:
2336 // newcnt = __builtin_ctpop(x);
2337 // t = newcnt;
2338 // if (x)
2339 // do { cnt++; x &= x-1; t--) } while (t > 0);
2340 BasicBlock *Body = *(CurLoop->block_begin());
2341 {
2342 auto *LbBr = cast<BranchInst>(Body->getTerminator());
2343 ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
2344 Type *Ty = TripCnt->getType();
2345
2346 PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi");
2347 TcPhi->insertBefore(Body->begin());
2348
2349 Builder.SetInsertPoint(LbCond);
2350 Instruction *TcDec = cast<Instruction>(
2351 Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
2352 "tcdec", false, true));
2353
2354 TcPhi->addIncoming(TripCnt, PreHead);
2355 TcPhi->addIncoming(TcDec, Body);
2356
2357 CmpInst::Predicate Pred =
2358 (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_UGT : CmpInst::ICMP_SLE;
2359 LbCond->setPredicate(Pred);
2360 LbCond->setOperand(0, TcDec);
2361 LbCond->setOperand(1, ConstantInt::get(Ty, 0));
2362 }
2363
2364 // Step 4: All the references to the original population counter outside
2365 // the loop are replaced with the NewCount -- the value returned from
2366 // __builtin_ctpop().
2367 CntInst->replaceUsesOutsideBlock(NewCount, Body);
2368
2369 // step 5: Forget the "non-computable" trip-count SCEV associated with the
2370 // loop. The loop would otherwise not be deleted even if it becomes empty.
2371 SE->forgetLoop(CurLoop);
2372}
2373
2374/// Match loop-invariant value.
2375template <typename SubPattern_t> struct match_LoopInvariant {
2376 SubPattern_t SubPattern;
2377 const Loop *L;
2378
2379 match_LoopInvariant(const SubPattern_t &SP, const Loop *L)
2380 : SubPattern(SP), L(L) {}
2381
2382 template <typename ITy> bool match(ITy *V) {
2383 return L->isLoopInvariant(V) && SubPattern.match(V);
2384 }
2385};
2386
2387/// Matches if the value is loop-invariant.
2388template <typename Ty>
2389inline match_LoopInvariant<Ty> m_LoopInvariant(const Ty &M, const Loop *L) {
2390 return match_LoopInvariant<Ty>(M, L);
2391}
2392
2393/// Return true if the idiom is detected in the loop.
2394///
2395/// The core idiom we are trying to detect is:
2396/// \code
2397/// entry:
2398/// <...>
2399/// %bitmask = shl i32 1, %bitpos
2400/// br label %loop
2401///
2402/// loop:
2403/// %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ]
2404/// %x.curr.bitmasked = and i32 %x.curr, %bitmask
2405/// %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0
2406/// %x.next = shl i32 %x.curr, 1
2407/// <...>
2408/// br i1 %x.curr.isbitunset, label %loop, label %end
2409///
2410/// end:
2411/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
2412/// %x.next.res = phi i32 [ %x.next, %loop ] <...>
2413/// <...>
2414/// \endcode
2415static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX,
2416 Value *&BitMask, Value *&BitPos,
2417 Value *&CurrX, Instruction *&NextX) {
2419 " Performing shift-until-bittest idiom detection.\n");
2420
2421 // Give up if the loop has multiple blocks or multiple backedges.
2422 if (CurLoop->getNumBlocks() != 1 || CurLoop->getNumBackEdges() != 1) {
2423 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad block/backedge count.\n");
2424 return false;
2425 }
2426
2427 BasicBlock *LoopHeaderBB = CurLoop->getHeader();
2428 BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
2429 assert(LoopPreheaderBB && "There is always a loop preheader.");
2430
2431 using namespace PatternMatch;
2432
2433 // Step 1: Check if the loop backedge is in desirable form.
2434
2435 CmpPredicate Pred;
2436 Value *CmpLHS, *CmpRHS;
2437 BasicBlock *TrueBB, *FalseBB;
2438 if (!match(LoopHeaderBB->getTerminator(),
2439 m_Br(m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)),
2440 m_BasicBlock(TrueBB), m_BasicBlock(FalseBB)))) {
2441 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge structure.\n");
2442 return false;
2443 }
2444
2445 // Step 2: Check if the backedge's condition is in desirable form.
2446
2447 auto MatchVariableBitMask = [&]() {
2448 return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) &&
2449 match(CmpLHS,
2450 m_c_And(m_Value(CurrX),
2452 m_Value(BitMask),
2453 m_LoopInvariant(m_Shl(m_One(), m_Value(BitPos)),
2454 CurLoop))));
2455 };
2456 auto MatchConstantBitMask = [&]() {
2457 return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) &&
2458 match(CmpLHS, m_And(m_Value(CurrX),
2459 m_CombineAnd(m_Value(BitMask), m_Power2()))) &&
2460 (BitPos = ConstantExpr::getExactLogBase2(cast<Constant>(BitMask)));
2461 };
2462 auto MatchDecomposableConstantBitMask = [&]() {
2463 auto Res = llvm::decomposeBitTestICmp(CmpLHS, CmpRHS, Pred);
2464 if (Res && Res->Mask.isPowerOf2()) {
2465 assert(ICmpInst::isEquality(Res->Pred));
2466 Pred = Res->Pred;
2467 CurrX = Res->X;
2468 BitMask = ConstantInt::get(CurrX->getType(), Res->Mask);
2469 BitPos = ConstantInt::get(CurrX->getType(), Res->Mask.logBase2());
2470 return true;
2471 }
2472 return false;
2473 };
2474
2475 if (!MatchVariableBitMask() && !MatchConstantBitMask() &&
2476 !MatchDecomposableConstantBitMask()) {
2477 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge comparison.\n");
2478 return false;
2479 }
2480
2481 // Step 3: Check if the recurrence is in desirable form.
2482 auto *CurrXPN = dyn_cast<PHINode>(CurrX);
2483 if (!CurrXPN || CurrXPN->getParent() != LoopHeaderBB) {
2484 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Not an expected PHI node.\n");
2485 return false;
2486 }
2487
2488 BaseX = CurrXPN->getIncomingValueForBlock(LoopPreheaderBB);
2489 NextX =
2490 dyn_cast<Instruction>(CurrXPN->getIncomingValueForBlock(LoopHeaderBB));
2491
2492 assert(CurLoop->isLoopInvariant(BaseX) &&
2493 "Expected BaseX to be available in the preheader!");
2494
2495 if (!NextX || !match(NextX, m_Shl(m_Specific(CurrX), m_One()))) {
2496 // FIXME: support right-shift?
2497 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad recurrence.\n");
2498 return false;
2499 }
2500
2501 // Step 4: Check if the backedge's destinations are in desirable form.
2502
2504 "Should only get equality predicates here.");
2505
2506 // cmp-br is commutative, so canonicalize to a single variant.
2507 if (Pred != ICmpInst::Predicate::ICMP_EQ) {
2508 Pred = ICmpInst::getInversePredicate(Pred);
2509 std::swap(TrueBB, FalseBB);
2510 }
2511
2512 // We expect to exit loop when comparison yields false,
2513 // so when it yields true we should branch back to loop header.
2514 if (TrueBB != LoopHeaderBB) {
2515 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge flow.\n");
2516 return false;
2517 }
2518
2519 // Okay, idiom checks out.
2520 return true;
2521}
2522
2523/// Look for the following loop:
2524/// \code
2525/// entry:
2526/// <...>
2527/// %bitmask = shl i32 1, %bitpos
2528/// br label %loop
2529///
2530/// loop:
2531/// %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ]
2532/// %x.curr.bitmasked = and i32 %x.curr, %bitmask
2533/// %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0
2534/// %x.next = shl i32 %x.curr, 1
2535/// <...>
2536/// br i1 %x.curr.isbitunset, label %loop, label %end
2537///
2538/// end:
2539/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
2540/// %x.next.res = phi i32 [ %x.next, %loop ] <...>
2541/// <...>
2542/// \endcode
2543///
2544/// And transform it into:
2545/// \code
2546/// entry:
2547/// %bitmask = shl i32 1, %bitpos
2548/// %lowbitmask = add i32 %bitmask, -1
2549/// %mask = or i32 %lowbitmask, %bitmask
2550/// %x.masked = and i32 %x, %mask
2551/// %x.masked.numleadingzeros = call i32 @llvm.ctlz.i32(i32 %x.masked,
2552/// i1 true)
2553/// %x.masked.numactivebits = sub i32 32, %x.masked.numleadingzeros
2554/// %x.masked.leadingonepos = add i32 %x.masked.numactivebits, -1
2555/// %backedgetakencount = sub i32 %bitpos, %x.masked.leadingonepos
2556/// %tripcount = add i32 %backedgetakencount, 1
2557/// %x.curr = shl i32 %x, %backedgetakencount
2558/// %x.next = shl i32 %x, %tripcount
2559/// br label %loop
2560///
2561/// loop:
2562/// %loop.iv = phi i32 [ 0, %entry ], [ %loop.iv.next, %loop ]
2563/// %loop.iv.next = add nuw i32 %loop.iv, 1
2564/// %loop.ivcheck = icmp eq i32 %loop.iv.next, %tripcount
2565/// <...>
2566/// br i1 %loop.ivcheck, label %end, label %loop
2567///
2568/// end:
2569/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
2570/// %x.next.res = phi i32 [ %x.next, %loop ] <...>
2571/// <...>
2572/// \endcode
2573bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
2574 bool MadeChange = false;
2575
2576 Value *X, *BitMask, *BitPos, *XCurr;
2577 Instruction *XNext;
2578 if (!detectShiftUntilBitTestIdiom(CurLoop, X, BitMask, BitPos, XCurr,
2579 XNext)) {
2581 " shift-until-bittest idiom detection failed.\n");
2582 return MadeChange;
2583 }
2584 LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom detected!\n");
2585
2586 // Ok, it is the idiom we were looking for, we *could* transform this loop,
2587 // but is it profitable to transform?
2588
2589 BasicBlock *LoopHeaderBB = CurLoop->getHeader();
2590 BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
2591 assert(LoopPreheaderBB && "There is always a loop preheader.");
2592
2593 BasicBlock *SuccessorBB = CurLoop->getExitBlock();
2594 assert(SuccessorBB && "There is only a single successor.");
2595
2596 IRBuilder<> Builder(LoopPreheaderBB->getTerminator());
2597 Builder.SetCurrentDebugLocation(cast<Instruction>(XCurr)->getDebugLoc());
2598
2599 Intrinsic::ID IntrID = Intrinsic::ctlz;
2600 Type *Ty = X->getType();
2601 unsigned Bitwidth = Ty->getScalarSizeInBits();
2602
2605
2606 // The rewrite is considered to be unprofitable iff and only iff the
2607 // intrinsic/shift we'll use are not cheap. Note that we are okay with *just*
2608 // making the loop countable, even if nothing else changes.
2610 IntrID, Ty, {PoisonValue::get(Ty), /*is_zero_poison=*/Builder.getTrue()});
2614 " Intrinsic is too costly, not beneficial\n");
2615 return MadeChange;
2616 }
2617 if (TTI->getArithmeticInstrCost(Instruction::Shl, Ty, CostKind) >
2619 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Shift is too costly, not beneficial\n");
2620 return MadeChange;
2621 }
2622
2623 // Ok, transform appears worthwhile.
2624 MadeChange = true;
2625
2626 if (!isGuaranteedNotToBeUndefOrPoison(BitPos)) {
2627 // BitMask may be computed from BitPos, Freeze BitPos so we can increase
2628 // it's use count.
2629 std::optional<BasicBlock::iterator> InsertPt = std::nullopt;
2630 if (auto *BitPosI = dyn_cast<Instruction>(BitPos))
2631 InsertPt = BitPosI->getInsertionPointAfterDef();
2632 else
2633 InsertPt = DT->getRoot()->getFirstNonPHIOrDbgOrAlloca();
2634 if (!InsertPt)
2635 return false;
2636 FreezeInst *BitPosFrozen =
2637 new FreezeInst(BitPos, BitPos->getName() + ".fr", *InsertPt);
2638 BitPos->replaceUsesWithIf(BitPosFrozen, [BitPosFrozen](Use &U) {
2639 return U.getUser() != BitPosFrozen;
2640 });
2641 BitPos = BitPosFrozen;
2642 }
2643
2644 // Step 1: Compute the loop trip count.
2645
2646 Value *LowBitMask = Builder.CreateAdd(BitMask, Constant::getAllOnesValue(Ty),
2647 BitPos->getName() + ".lowbitmask");
2648 Value *Mask =
2649 Builder.CreateOr(LowBitMask, BitMask, BitPos->getName() + ".mask");
2650 Value *XMasked = Builder.CreateAnd(X, Mask, X->getName() + ".masked");
2651 CallInst *XMaskedNumLeadingZeros = Builder.CreateIntrinsic(
2652 IntrID, Ty, {XMasked, /*is_zero_poison=*/Builder.getTrue()},
2653 /*FMFSource=*/nullptr, XMasked->getName() + ".numleadingzeros");
2654 Value *XMaskedNumActiveBits = Builder.CreateSub(
2655 ConstantInt::get(Ty, Ty->getScalarSizeInBits()), XMaskedNumLeadingZeros,
2656 XMasked->getName() + ".numactivebits", /*HasNUW=*/true,
2657 /*HasNSW=*/Bitwidth != 2);
2658 Value *XMaskedLeadingOnePos =
2659 Builder.CreateAdd(XMaskedNumActiveBits, Constant::getAllOnesValue(Ty),
2660 XMasked->getName() + ".leadingonepos", /*HasNUW=*/false,
2661 /*HasNSW=*/Bitwidth > 2);
2662
2663 Value *LoopBackedgeTakenCount = Builder.CreateSub(
2664 BitPos, XMaskedLeadingOnePos, CurLoop->getName() + ".backedgetakencount",
2665 /*HasNUW=*/true, /*HasNSW=*/true);
2666 // We know loop's backedge-taken count, but what's loop's trip count?
2667 // Note that while NUW is always safe, while NSW is only for bitwidths != 2.
2668 Value *LoopTripCount =
2669 Builder.CreateAdd(LoopBackedgeTakenCount, ConstantInt::get(Ty, 1),
2670 CurLoop->getName() + ".tripcount", /*HasNUW=*/true,
2671 /*HasNSW=*/Bitwidth != 2);
2672
2673 // Step 2: Compute the recurrence's final value without a loop.
2674
2675 // NewX is always safe to compute, because `LoopBackedgeTakenCount`
2676 // will always be smaller than `bitwidth(X)`, i.e. we never get poison.
2677 Value *NewX = Builder.CreateShl(X, LoopBackedgeTakenCount);
2678 NewX->takeName(XCurr);
2679 if (auto *I = dyn_cast<Instruction>(NewX))
2680 I->copyIRFlags(XNext, /*IncludeWrapFlags=*/true);
2681
2682 Value *NewXNext;
2683 // Rewriting XNext is more complicated, however, because `X << LoopTripCount`
2684 // will be poison iff `LoopTripCount == bitwidth(X)` (which will happen
2685 // iff `BitPos` is `bitwidth(x) - 1` and `X` is `1`). So unless we know
2686 // that isn't the case, we'll need to emit an alternative, safe IR.
2687 if (XNext->hasNoSignedWrap() || XNext->hasNoUnsignedWrap() ||
2691 Ty->getScalarSizeInBits() - 1))))
2692 NewXNext = Builder.CreateShl(X, LoopTripCount);
2693 else {
2694 // Otherwise, just additionally shift by one. It's the smallest solution,
2695 // alternatively, we could check that NewX is INT_MIN (or BitPos is )
2696 // and select 0 instead.
2697 NewXNext = Builder.CreateShl(NewX, ConstantInt::get(Ty, 1));
2698 }
2699
2700 NewXNext->takeName(XNext);
2701 if (auto *I = dyn_cast<Instruction>(NewXNext))
2702 I->copyIRFlags(XNext, /*IncludeWrapFlags=*/true);
2703
2704 // Step 3: Adjust the successor basic block to recieve the computed
2705 // recurrence's final value instead of the recurrence itself.
2706
2707 XCurr->replaceUsesOutsideBlock(NewX, LoopHeaderBB);
2708 XNext->replaceUsesOutsideBlock(NewXNext, LoopHeaderBB);
2709
2710 // Step 4: Rewrite the loop into a countable form, with canonical IV.
2711
2712 // The new canonical induction variable.
2713 Builder.SetInsertPoint(LoopHeaderBB, LoopHeaderBB->begin());
2714 auto *IV = Builder.CreatePHI(Ty, 2, CurLoop->getName() + ".iv");
2715
2716 // The induction itself.
2717 // Note that while NUW is always safe, while NSW is only for bitwidths != 2.
2718 Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
2719 auto *IVNext =
2720 Builder.CreateAdd(IV, ConstantInt::get(Ty, 1), IV->getName() + ".next",
2721 /*HasNUW=*/true, /*HasNSW=*/Bitwidth != 2);
2722
2723 // The loop trip count check.
2724 auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount,
2725 CurLoop->getName() + ".ivcheck");
2726 Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
2727 LoopHeaderBB->getTerminator()->eraseFromParent();
2728
2729 // Populate the IV PHI.
2730 IV->addIncoming(ConstantInt::get(Ty, 0), LoopPreheaderBB);
2731 IV->addIncoming(IVNext, LoopHeaderBB);
2732
2733 // Step 5: Forget the "non-computable" trip-count SCEV associated with the
2734 // loop. The loop would otherwise not be deleted even if it becomes empty.
2735
2736 SE->forgetLoop(CurLoop);
2737
2738 // Other passes will take care of actually deleting the loop if possible.
2739
2740 LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom optimized!\n");
2741
2742 ++NumShiftUntilBitTest;
2743 return MadeChange;
2744}
2745
2746/// Return true if the idiom is detected in the loop.
2747///
2748/// The core idiom we are trying to detect is:
2749/// \code
2750/// entry:
2751/// <...>
2752/// %start = <...>
2753/// %extraoffset = <...>
2754/// <...>
2755/// br label %for.cond
2756///
2757/// loop:
2758/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ]
2759/// %nbits = add nsw i8 %iv, %extraoffset
2760/// %val.shifted = {{l,a}shr,shl} i8 %val, %nbits
2761/// %val.shifted.iszero = icmp eq i8 %val.shifted, 0
2762/// %iv.next = add i8 %iv, 1
2763/// <...>
2764/// br i1 %val.shifted.iszero, label %end, label %loop
2765///
2766/// end:
2767/// %iv.res = phi i8 [ %iv, %loop ] <...>
2768/// %nbits.res = phi i8 [ %nbits, %loop ] <...>
2769/// %val.shifted.res = phi i8 [ %val.shifted, %loop ] <...>
2770/// %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ] <...>
2771/// %iv.next.res = phi i8 [ %iv.next, %loop ] <...>
2772/// <...>
2773/// \endcode
2775 Instruction *&ValShiftedIsZero,
2776 Intrinsic::ID &IntrinID, Instruction *&IV,
2777 Value *&Start, Value *&Val,
2778 const SCEV *&ExtraOffsetExpr,
2779 bool &InvertedCond) {
2781 " Performing shift-until-zero idiom detection.\n");
2782
2783 // Give up if the loop has multiple blocks or multiple backedges.
2784 if (CurLoop->getNumBlocks() != 1 || CurLoop->getNumBackEdges() != 1) {
2785 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad block/backedge count.\n");
2786 return false;
2787 }
2788
2789 Instruction *ValShifted, *NBits, *IVNext;
2790 Value *ExtraOffset;
2791
2792 BasicBlock *LoopHeaderBB = CurLoop->getHeader();
2793 BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
2794 assert(LoopPreheaderBB && "There is always a loop preheader.");
2795
2796 using namespace PatternMatch;
2797
2798 // Step 1: Check if the loop backedge, condition is in desirable form.
2799
2800 CmpPredicate Pred;
2801 BasicBlock *TrueBB, *FalseBB;
2802 if (!match(LoopHeaderBB->getTerminator(),
2803 m_Br(m_Instruction(ValShiftedIsZero), m_BasicBlock(TrueBB),
2804 m_BasicBlock(FalseBB))) ||
2805 !match(ValShiftedIsZero,
2806 m_ICmp(Pred, m_Instruction(ValShifted), m_Zero())) ||
2807 !ICmpInst::isEquality(Pred)) {
2808 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge structure.\n");
2809 return false;
2810 }
2811
2812 // Step 2: Check if the comparison's operand is in desirable form.
2813 // FIXME: Val could be a one-input PHI node, which we should look past.
2814 if (!match(ValShifted, m_Shift(m_LoopInvariant(m_Value(Val), CurLoop),
2815 m_Instruction(NBits)))) {
2816 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad comparisons value computation.\n");
2817 return false;
2818 }
2819 IntrinID = ValShifted->getOpcode() == Instruction::Shl ? Intrinsic::cttz
2820 : Intrinsic::ctlz;
2821
2822 // Step 3: Check if the shift amount is in desirable form.
2823
2824 if (match(NBits, m_c_Add(m_Instruction(IV),
2825 m_LoopInvariant(m_Value(ExtraOffset), CurLoop))) &&
2826 (NBits->hasNoSignedWrap() || NBits->hasNoUnsignedWrap()))
2827 ExtraOffsetExpr = SE->getNegativeSCEV(SE->getSCEV(ExtraOffset));
2828 else if (match(NBits,
2830 m_LoopInvariant(m_Value(ExtraOffset), CurLoop))) &&
2831 NBits->hasNoSignedWrap())
2832 ExtraOffsetExpr = SE->getSCEV(ExtraOffset);
2833 else {
2834 IV = NBits;
2835 ExtraOffsetExpr = SE->getZero(NBits->getType());
2836 }
2837
2838 // Step 4: Check if the recurrence is in desirable form.
2839 auto *IVPN = dyn_cast<PHINode>(IV);
2840 if (!IVPN || IVPN->getParent() != LoopHeaderBB) {
2841 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Not an expected PHI node.\n");
2842 return false;
2843 }
2844
2845 Start = IVPN->getIncomingValueForBlock(LoopPreheaderBB);
2846 IVNext = dyn_cast<Instruction>(IVPN->getIncomingValueForBlock(LoopHeaderBB));
2847
2848 if (!IVNext || !match(IVNext, m_Add(m_Specific(IVPN), m_One()))) {
2849 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad recurrence.\n");
2850 return false;
2851 }
2852
2853 // Step 4: Check if the backedge's destinations are in desirable form.
2854
2856 "Should only get equality predicates here.");
2857
2858 // cmp-br is commutative, so canonicalize to a single variant.
2859 InvertedCond = Pred != ICmpInst::Predicate::ICMP_EQ;
2860 if (InvertedCond) {
2861 Pred = ICmpInst::getInversePredicate(Pred);
2862 std::swap(TrueBB, FalseBB);
2863 }
2864
2865 // We expect to exit loop when comparison yields true,
2866 // so when it yields false we should branch back to loop header.
2867 if (FalseBB != LoopHeaderBB) {
2868 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge flow.\n");
2869 return false;
2870 }
2871
2872 // The new, countable, loop will certainly only run a known number of
2873 // iterations, It won't be infinite. But the old loop might be infinite
2874 // under certain conditions. For logical shifts, the value will become zero
2875 // after at most bitwidth(%Val) loop iterations. However, for arithmetic
2876 // right-shift, iff the sign bit was set, the value will never become zero,
2877 // and the loop may never finish.
2878 if (ValShifted->getOpcode() == Instruction::AShr &&
2879 !isMustProgress(CurLoop) && !SE->isKnownNonNegative(SE->getSCEV(Val))) {
2880 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Can not prove the loop is finite.\n");
2881 return false;
2882 }
2883
2884 // Okay, idiom checks out.
2885 return true;
2886}
2887
2888/// Look for the following loop:
2889/// \code
2890/// entry:
2891/// <...>
2892/// %start = <...>
2893/// %extraoffset = <...>
2894/// <...>
2895/// br label %for.cond
2896///
2897/// loop:
2898/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ]
2899/// %nbits = add nsw i8 %iv, %extraoffset
2900/// %val.shifted = {{l,a}shr,shl} i8 %val, %nbits
2901/// %val.shifted.iszero = icmp eq i8 %val.shifted, 0
2902/// %iv.next = add i8 %iv, 1
2903/// <...>
2904/// br i1 %val.shifted.iszero, label %end, label %loop
2905///
2906/// end:
2907/// %iv.res = phi i8 [ %iv, %loop ] <...>
2908/// %nbits.res = phi i8 [ %nbits, %loop ] <...>
2909/// %val.shifted.res = phi i8 [ %val.shifted, %loop ] <...>
2910/// %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ] <...>
2911/// %iv.next.res = phi i8 [ %iv.next, %loop ] <...>
2912/// <...>
2913/// \endcode
2914///
2915/// And transform it into:
2916/// \code
2917/// entry:
2918/// <...>
2919/// %start = <...>
2920/// %extraoffset = <...>
2921/// <...>
2922/// %val.numleadingzeros = call i8 @llvm.ct{l,t}z.i8(i8 %val, i1 0)
2923/// %val.numactivebits = sub i8 8, %val.numleadingzeros
2924/// %extraoffset.neg = sub i8 0, %extraoffset
2925/// %tmp = add i8 %val.numactivebits, %extraoffset.neg
2926/// %iv.final = call i8 @llvm.smax.i8(i8 %tmp, i8 %start)
2927/// %loop.tripcount = sub i8 %iv.final, %start
2928/// br label %loop
2929///
2930/// loop:
2931/// %loop.iv = phi i8 [ 0, %entry ], [ %loop.iv.next, %loop ]
2932/// %loop.iv.next = add i8 %loop.iv, 1
2933/// %loop.ivcheck = icmp eq i8 %loop.iv.next, %loop.tripcount
2934/// %iv = add i8 %loop.iv, %start
2935/// <...>
2936/// br i1 %loop.ivcheck, label %end, label %loop
2937///
2938/// end:
2939/// %iv.res = phi i8 [ %iv.final, %loop ] <...>
2940/// <...>
2941/// \endcode
2942bool LoopIdiomRecognize::recognizeShiftUntilZero() {
2943 bool MadeChange = false;
2944
2945 Instruction *ValShiftedIsZero;
2946 Intrinsic::ID IntrID;
2947 Instruction *IV;
2948 Value *Start, *Val;
2949 const SCEV *ExtraOffsetExpr;
2950 bool InvertedCond;
2951 if (!detectShiftUntilZeroIdiom(CurLoop, SE, ValShiftedIsZero, IntrID, IV,
2952 Start, Val, ExtraOffsetExpr, InvertedCond)) {
2954 " shift-until-zero idiom detection failed.\n");
2955 return MadeChange;
2956 }
2957 LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-zero idiom detected!\n");
2958
2959 // Ok, it is the idiom we were looking for, we *could* transform this loop,
2960 // but is it profitable to transform?
2961
2962 BasicBlock *LoopHeaderBB = CurLoop->getHeader();
2963 BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
2964 assert(LoopPreheaderBB && "There is always a loop preheader.");
2965
2966 BasicBlock *SuccessorBB = CurLoop->getExitBlock();
2967 assert(SuccessorBB && "There is only a single successor.");
2968
2969 IRBuilder<> Builder(LoopPreheaderBB->getTerminator());
2970 Builder.SetCurrentDebugLocation(IV->getDebugLoc());
2971
2972 Type *Ty = Val->getType();
2973 unsigned Bitwidth = Ty->getScalarSizeInBits();
2974
2977
2978 // The rewrite is considered to be unprofitable iff and only iff the
2979 // intrinsic we'll use are not cheap. Note that we are okay with *just*
2980 // making the loop countable, even if nothing else changes.
2982 IntrID, Ty, {PoisonValue::get(Ty), /*is_zero_poison=*/Builder.getFalse()});
2986 " Intrinsic is too costly, not beneficial\n");
2987 return MadeChange;
2988 }
2989
2990 // Ok, transform appears worthwhile.
2991 MadeChange = true;
2992
2993 bool OffsetIsZero = false;
2994 if (auto *ExtraOffsetExprC = dyn_cast<SCEVConstant>(ExtraOffsetExpr))
2995 OffsetIsZero = ExtraOffsetExprC->isZero();
2996
2997 // Step 1: Compute the loop's final IV value / trip count.
2998
2999 CallInst *ValNumLeadingZeros = Builder.CreateIntrinsic(
3000 IntrID, Ty, {Val, /*is_zero_poison=*/Builder.getFalse()},
3001 /*FMFSource=*/nullptr, Val->getName() + ".numleadingzeros");
3002 Value *ValNumActiveBits = Builder.CreateSub(
3003 ConstantInt::get(Ty, Ty->getScalarSizeInBits()), ValNumLeadingZeros,
3004 Val->getName() + ".numactivebits", /*HasNUW=*/true,
3005 /*HasNSW=*/Bitwidth != 2);
3006
3007 SCEVExpander Expander(*SE, *DL, "loop-idiom");
3008 Expander.setInsertPoint(&*Builder.GetInsertPoint());
3009 Value *ExtraOffset = Expander.expandCodeFor(ExtraOffsetExpr);
3010
3011 Value *ValNumActiveBitsOffset = Builder.CreateAdd(
3012 ValNumActiveBits, ExtraOffset, ValNumActiveBits->getName() + ".offset",
3013 /*HasNUW=*/OffsetIsZero, /*HasNSW=*/true);
3014 Value *IVFinal = Builder.CreateIntrinsic(Intrinsic::smax, {Ty},
3015 {ValNumActiveBitsOffset, Start},
3016 /*FMFSource=*/nullptr, "iv.final");
3017
3018 auto *LoopBackedgeTakenCount = cast<Instruction>(Builder.CreateSub(
3019 IVFinal, Start, CurLoop->getName() + ".backedgetakencount",
3020 /*HasNUW=*/OffsetIsZero, /*HasNSW=*/true));
3021 // FIXME: or when the offset was `add nuw`
3022
3023 // We know loop's backedge-taken count, but what's loop's trip count?
3024 Value *LoopTripCount =
3025 Builder.CreateAdd(LoopBackedgeTakenCount, ConstantInt::get(Ty, 1),
3026 CurLoop->getName() + ".tripcount", /*HasNUW=*/true,
3027 /*HasNSW=*/Bitwidth != 2);
3028
3029 // Step 2: Adjust the successor basic block to recieve the original
3030 // induction variable's final value instead of the orig. IV itself.
3031
3032 IV->replaceUsesOutsideBlock(IVFinal, LoopHeaderBB);
3033
3034 // Step 3: Rewrite the loop into a countable form, with canonical IV.
3035
3036 // The new canonical induction variable.
3037 Builder.SetInsertPoint(LoopHeaderBB, LoopHeaderBB->begin());
3038 auto *CIV = Builder.CreatePHI(Ty, 2, CurLoop->getName() + ".iv");
3039
3040 // The induction itself.
3041 Builder.SetInsertPoint(LoopHeaderBB, LoopHeaderBB->getFirstNonPHIIt());
3042 auto *CIVNext =
3043 Builder.CreateAdd(CIV, ConstantInt::get(Ty, 1), CIV->getName() + ".next",
3044 /*HasNUW=*/true, /*HasNSW=*/Bitwidth != 2);
3045
3046 // The loop trip count check.
3047 auto *CIVCheck = Builder.CreateICmpEQ(CIVNext, LoopTripCount,
3048 CurLoop->getName() + ".ivcheck");
3049 auto *NewIVCheck = CIVCheck;
3050 if (InvertedCond) {
3051 NewIVCheck = Builder.CreateNot(CIVCheck);
3052 NewIVCheck->takeName(ValShiftedIsZero);
3053 }
3054
3055 // The original IV, but rebased to be an offset to the CIV.
3056 auto *IVDePHId = Builder.CreateAdd(CIV, Start, "", /*HasNUW=*/false,
3057 /*HasNSW=*/true); // FIXME: what about NUW?
3058 IVDePHId->takeName(IV);
3059
3060 // The loop terminator.
3061 Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
3062 Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
3063 LoopHeaderBB->getTerminator()->eraseFromParent();
3064
3065 // Populate the IV PHI.
3066 CIV->addIncoming(ConstantInt::get(Ty, 0), LoopPreheaderBB);
3067 CIV->addIncoming(CIVNext, LoopHeaderBB);
3068
3069 // Step 4: Forget the "non-computable" trip-count SCEV associated with the
3070 // loop. The loop would otherwise not be deleted even if it becomes empty.
3071
3072 SE->forgetLoop(CurLoop);
3073
3074 // Step 5: Try to cleanup the loop's body somewhat.
3075 IV->replaceAllUsesWith(IVDePHId);
3076 IV->eraseFromParent();
3077
3078 ValShiftedIsZero->replaceAllUsesWith(NewIVCheck);
3079 ValShiftedIsZero->eraseFromParent();
3080
3081 // Other passes will take care of actually deleting the loop if possible.
3082
3083 LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-zero idiom optimized!\n");
3084
3085 ++NumShiftUntilZero;
3086 return MadeChange;
3087}
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
DXIL Resource Access
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
std::string Name
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define DEBUG_TYPE
static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, const SCEV *BECount, unsigned StoreSize, AliasAnalysis &AA, SmallPtrSetImpl< Instruction * > &Ignored)
mayLoopAccessLocation - Return true if the specified loop might access the specified pointer location...
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static Value * matchCondition(BranchInst *BI, BasicBlock *LoopEntry, bool JmpOnZero=false)
Check if the given conditional branch is based on the comparison between a variable and zero,...
static PHINode * getRecurrenceVar(Value *VarX, Instruction *DefX, BasicBlock *LoopEntry)
static cl::opt< bool, true > DisableLIRPMemset("disable-" DEBUG_TYPE "-memset", cl::desc("Proceed with loop idiom recognize pass, but do " "not convert loop(s) to memset."), cl::location(DisableLIRP::Memset), cl::init(false), cl::ReallyHidden)
static cl::opt< bool > UseLIRCodeSizeHeurs("use-lir-code-size-heurs", cl::desc("Use loop idiom recognition code size heuristics when compiling" "with -Os/-Oz"), cl::init(true), cl::Hidden)
static CallInst * createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL, bool ZeroCheck, Intrinsic::ID IID)
static bool detectShiftUntilLessThanIdiom(Loop *CurLoop, const DataLayout &DL, Intrinsic::ID &IntrinID, Value *&InitX, Instruction *&CntInst, PHINode *&CntPhi, Instruction *&DefX, APInt &Threshold)
Return true if the idiom is detected in the loop.
static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX, Value *&BitMask, Value *&BitPos, Value *&CurrX, Instruction *&NextX)
Return true if the idiom is detected in the loop.
static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, Instruction *&CntInst, PHINode *&CntPhi, Value *&Var)
Return true iff the idiom is detected in the loop.
static Constant * getMemSetPatternValue(Value *V, const DataLayout *DL)
getMemSetPatternValue - If a strided store of the specified value is safe to turn into a memset_patte...
static cl::opt< bool, true > DisableLIRPMemcpy("disable-" DEBUG_TYPE "-memcpy", cl::desc("Proceed with loop idiom recognize pass, but do " "not convert loop(s) to memcpy."), cl::location(DisableLIRP::Memcpy), cl::init(false), cl::ReallyHidden)
static CallInst * createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL)
static const SCEV * getNumBytes(const SCEV *BECount, Type *IntPtr, const SCEV *StoreSizeSCEV, Loop *CurLoop, const DataLayout *DL, ScalarEvolution *SE)
Compute the number of bytes as a SCEV from the backedge taken count.
static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL, Intrinsic::ID &IntrinID, Value *&InitX, Instruction *&CntInst, PHINode *&CntPhi, Instruction *&DefX)
Return true if the idiom is detected in the loop.
static const SCEV * getStartForNegStride(const SCEV *Start, const SCEV *BECount, Type *IntPtr, const SCEV *StoreSizeSCEV, ScalarEvolution *SE)
static APInt getStoreStride(const SCEVAddRecExpr *StoreEv)
static Value * matchShiftULTCondition(BranchInst *BI, BasicBlock *LoopEntry, APInt &Threshold)
Check if the given conditional branch is based on an unsigned less-than comparison between a variable...
match_LoopInvariant< Ty > m_LoopInvariant(const Ty &M, const Loop *L)
Matches if the value is loop-invariant.
#define DEBUG_TYPE
static cl::opt< bool, true > DisableLIRPAll("disable-" DEBUG_TYPE "-all", cl::desc("Options to disable Loop Idiom Recognize Pass."), cl::location(DisableLIRP::All), cl::init(false), cl::ReallyHidden)
static void deleteDeadInstruction(Instruction *I)
#define I(x, y, z)
Definition: MD5.cpp:58
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
verify safepoint Safepoint IR Verifier
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
This pass exposes codegen information to IR-level passes.
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
Definition: APInt.h:78
std::optional< uint64_t > tryZExtValue() const
Get zero extended value if possible.
Definition: APInt.h:1532
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1468
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:250
InstListType::const_iterator getFirstNonPHIIt() const
Iterator returning form of getFirstNonPHI.
Definition: BasicBlock.cpp:374
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:367
const Instruction & front() const
Definition: BasicBlock.h:471
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:459
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const_iterator getFirstNonPHIOrDbgOrAlloca() const
Returns an iterator to the first instruction in this block that is not a PHINode, a debug intrinsic,...
Definition: BasicBlock.cpp:430
size_t size() const
Definition: BasicBlock.h:469
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
BinaryOps getOpcode() const
Definition: InstrTypes.h:370
Conditional or Unconditional Branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1349
This class represents a function call, abstracting a target machine's calling convention.
void setPredicate(Predicate P)
Set the predicate for this instruction to the specified value.
Definition: InstrTypes.h:766
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:703
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:696
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:698
@ ICMP_EQ
equal
Definition: InstrTypes.h:694
@ ICMP_NE
not equal
Definition: InstrTypes.h:695
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:787
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:763
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition: CmpPredicate.h:22
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1312
static Constant * getExactLogBase2(Constant *C)
If C is a scalar/fixed width vector of known powers of 2, then this function returns a new scalar/fix...
Definition: Constants.cpp:2662
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:220
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:214
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:208
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:148
static ConstantInt * getBool(LLVMContext &Context, bool V)
Definition: Constants.cpp:880
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:420
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
A debug info location.
Definition: DebugLoc.h:33
NodeT * getRoot() const
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
This class represents a freeze function that returns random concrete value if an operand is either a ...
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:170
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalObject.
Definition: Globals.cpp:143
void setUnnamedAddr(UnnamedAddr Val)
Definition: GlobalValue.h:231
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:60
This instruction compares its operands according to the predicate given to the constructor.
bool isEquality() const
Return true if this predicate is either EQ or NE.
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition: IRBuilder.h:458
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:890
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:99
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:475
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:68
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:72
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1679
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1750
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
bool isShift() const
Definition: Instruction.h:282
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:472
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
Definition: Instructions.h:176
Value * getPointerOperand()
Definition: Instructions.h:255
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:205
bool isUnordered() const
Definition: Instructions.h:249
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:211
static LocationSize precise(uint64_t Value)
static constexpr LocationSize afterPointer()
Any location after the base pointer (but still within the underlying object).
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
BlockT * getHeader() const
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop.
block_iterator block_begin() const
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:61
StringRef getName() const
Definition: LoopInfo.h:388
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
This class wraps the llvm.memcpy intrinsic.
Value * getLength() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
MaybeAlign getDestAlign() const
bool isVolatile() const
Value * getValue() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
MaybeAlign getSourceAlign() const
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
Representation for a specific memory location.
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:928
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:701
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
Helper to remove instructions inserted during SCEV expansion, unless they are marked as used.
This class uses information about analyze scalars to rewrite expressions in canonical form.
const SCEV * getOperand(unsigned i) const
This class represents an analyzed expression in the program.
bool isOne() const
Return true if the expression is a constant one.
bool isNonConstantNegative() const
Return true if the specified scev is negated, but not a constant.
The main scalar evolution driver.
bool isKnownNonNegative(const SCEV *S)
Test if the given expression is known to be non-negative.
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
A vector that has set insertion semantics.
Definition: SetVector.h:57
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
Simple and conservative implementation of LoopSafetyInfo that can give false-positive answers to its ...
Definition: MustExecute.h:109
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
Definition: MustExecute.cpp:49
bool anyBlockMayThrow() const override
Returns true iff any block of the loop for which this info is contains an instruction that may throw ...
Definition: MustExecute.cpp:45
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:401
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
iterator end() const
Definition: SmallPtrSet.h:477
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
iterator begin() const
Definition: SmallPtrSet.h:472
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:458
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
Align getAlign() const
Definition: Instructions.h:333
Value * getValueOperand()
Definition: Instructions.h:378
Value * getPointerOperand()
Definition: Instructions.h:381
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
unsigned getAtomicMemIntrinsicMaxElementSize() const
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
TargetCostKind
The kind of cost model.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
@ TCC_Basic
The cost of a typical 'add' instruction.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void setOperand(unsigned i, Value *Val)
Definition: User.h:233
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
Definition: Value.cpp:542
void replaceUsesOutsideBlock(Value *V, BasicBlock *BB)
replaceUsesOutsideBlock - Go through the uses list for this definition and make each use point to "V"...
Definition: Value.cpp:592
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:204
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
@ HeaderSize
Definition: BTF.h:61
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
cst_pred_ty< is_power2 > m_Power2()
Match an integer or vector power-of-2.
Definition: PatternMatch.h:619
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:826
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:885
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:592
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
Definition: PatternMatch.h:245
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
Definition: PatternMatch.h:189
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:612
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
cst_pred_ty< icmp_pred_with_threshold > m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold)
Match an integer or vector with every element comparing 'pred' (eg/ne/...) to Threshold.
Definition: PatternMatch.h:698
@ ReallyHidden
Definition: CommandLine.h:138
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:463
constexpr double e
Definition: MathExtras.h:47
DiagnosticInfoOptimizationBase::setExtraArgs setExtraArgs
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:546
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
bool inferNonMandatoryLibFuncAttrs(Module *M, StringRef Name, const TargetLibraryInfo &TLI)
Analyze the name and prototype of the given function and set any applicable attributes.
bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI, LibFunc TheLibFunc)
Check whether the library function is available on target and also that it in the current Module is a...
bool isMustProgress(const Loop *L)
Return true if this loop can be assumed to make progress.
Definition: LoopInfo.cpp:1150
@ None
Definition: CodeGenData.h:106
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool isModOrRefSet(const ModRefInfo MRI)
Definition: ModRef.h:42
FunctionCallee getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI, LibFunc TheLibFunc, FunctionType *T, AttributeList AttributeList)
Calls getOrInsertFunction() and then makes sure to add mandatory argument attributes.
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: ModRef.h:27
@ ModRef
The access may reference and may modify the value stored in memory.
@ Mod
The access may modify the value stored in memory.
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:84
std::optional< DecomposedBitTest > decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate Pred, bool LookThroughTrunc=true, bool AllowNonZeroC=false)
Decompose an icmp into the form ((X & Mask) pred C) if possible.
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
MDNode * TBAAStruct
The tag for type-based alias analysis (tbaa struct).
Definition: Metadata.h:780
MDNode * Scope
The tag for alias scope specification (used with noalias).
Definition: Metadata.h:783
MDNode * TBAA
The tag for type-based alias analysis.
Definition: Metadata.h:777
AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
MDNode * NoAlias
The tag specifying the noalias scope.
Definition: Metadata.h:786
AAMDNodes extendTo(ssize_t Len) const
Create a new AAMDNode that describes this AAMDNode after extending it to apply to a series of bytes o...
Definition: Metadata.h:827
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
static bool Memset
When true, Memset is disabled.
static bool All
When true, the entire pass is disabled.
static bool Memcpy
When true, Memcpy is disabled.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Match loop-invariant value.
match_LoopInvariant(const SubPattern_t &SP, const Loop *L)