LLVM 22.0.0git
LoopIdiomRecognize.cpp
Go to the documentation of this file.
1//===- LoopIdiomRecognize.cpp - Loop idiom recognition --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements an idiom recognizer that transforms simple loops into a
10// non-loop form. In cases that this kicks in, it can be a significant
11// performance win.
12//
13// If compiling for code size we avoid idiom recognition if the resulting
14// code could be larger than the code for the original loop. One way this could
15// happen is if the loop is not removable after idiom recognition due to the
16// presence of non-idiom instructions. The initial implementation of the
17// heuristics applies to idioms in multi-block loops.
18//
19//===----------------------------------------------------------------------===//
20//
21// TODO List:
22//
23// Future loop memory idioms to recognize: memcmp, etc.
24//
25// This could recognize common matrix multiplies and dot product idioms and
26// replace them with calls to BLAS (if linked in??).
27//
28//===----------------------------------------------------------------------===//
29
31#include "llvm/ADT/APInt.h"
32#include "llvm/ADT/ArrayRef.h"
33#include "llvm/ADT/DenseMap.h"
34#include "llvm/ADT/MapVector.h"
35#include "llvm/ADT/SetVector.h"
38#include "llvm/ADT/Statistic.h"
39#include "llvm/ADT/StringRef.h"
56#include "llvm/IR/BasicBlock.h"
57#include "llvm/IR/Constant.h"
58#include "llvm/IR/Constants.h"
59#include "llvm/IR/DataLayout.h"
60#include "llvm/IR/DebugLoc.h"
62#include "llvm/IR/Dominators.h"
63#include "llvm/IR/GlobalValue.h"
65#include "llvm/IR/IRBuilder.h"
66#include "llvm/IR/InstrTypes.h"
67#include "llvm/IR/Instruction.h"
70#include "llvm/IR/Intrinsics.h"
71#include "llvm/IR/LLVMContext.h"
72#include "llvm/IR/Module.h"
73#include "llvm/IR/PassManager.h"
76#include "llvm/IR/Type.h"
77#include "llvm/IR/User.h"
78#include "llvm/IR/Value.h"
79#include "llvm/IR/ValueHandle.h"
82#include "llvm/Support/Debug.h"
89#include <algorithm>
90#include <cassert>
91#include <cstdint>
92#include <utility>
93
94using namespace llvm;
95using namespace SCEVPatternMatch;
96
97#define DEBUG_TYPE "loop-idiom"
98
99STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
100STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
101STATISTIC(NumMemMove, "Number of memmove's formed from loop load+stores");
102STATISTIC(NumStrLen, "Number of strlen's and wcslen's formed from loop loads");
104 NumShiftUntilBitTest,
105 "Number of uncountable loops recognized as 'shift until bitttest' idiom");
106STATISTIC(NumShiftUntilZero,
107 "Number of uncountable loops recognized as 'shift until zero' idiom");
108
109namespace llvm {
112 DisableLIRPAll("disable-" DEBUG_TYPE "-all",
113 cl::desc("Options to disable Loop Idiom Recognize Pass."),
116
119 DisableLIRPMemset("disable-" DEBUG_TYPE "-memset",
120 cl::desc("Proceed with loop idiom recognize pass, but do "
121 "not convert loop(s) to memset."),
124
127 DisableLIRPMemcpy("disable-" DEBUG_TYPE "-memcpy",
128 cl::desc("Proceed with loop idiom recognize pass, but do "
129 "not convert loop(s) to memcpy."),
132
135 DisableLIRPStrlen("disable-loop-idiom-strlen",
136 cl::desc("Proceed with loop idiom recognize pass, but do "
137 "not convert loop(s) to strlen."),
140
143 EnableLIRPWcslen("disable-loop-idiom-wcslen",
144 cl::desc("Proceed with loop idiom recognize pass, "
145 "enable conversion of loop(s) to wcslen."),
148
151 DisableLIRPHashRecognize("disable-" DEBUG_TYPE "-hashrecognize",
152 cl::desc("Proceed with loop idiom recognize pass, "
153 "but do not optimize CRC loops."),
155 cl::init(false), cl::ReallyHidden);
156
158 "use-lir-code-size-heurs",
159 cl::desc("Use loop idiom recognition code size heuristics when compiling "
160 "with -Os/-Oz"),
161 cl::init(true), cl::Hidden);
162
164 "loop-idiom-force-memset-pattern-intrinsic",
165 cl::desc("Use memset.pattern intrinsic whenever possible"), cl::init(false),
166 cl::Hidden);
167
169
170} // namespace llvm
171
172namespace {
173
174class LoopIdiomRecognize {
175 Loop *CurLoop = nullptr;
177 DominatorTree *DT;
178 LoopInfo *LI;
179 ScalarEvolution *SE;
182 const DataLayout *DL;
184 bool ApplyCodeSizeHeuristics;
185 std::unique_ptr<MemorySSAUpdater> MSSAU;
186
187public:
188 explicit LoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT,
189 LoopInfo *LI, ScalarEvolution *SE,
191 const TargetTransformInfo *TTI, MemorySSA *MSSA,
192 const DataLayout *DL,
194 : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {
195 if (MSSA)
196 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
197 }
198
199 bool runOnLoop(Loop *L);
200
201private:
202 using StoreList = SmallVector<StoreInst *, 8>;
203 using StoreListMap = MapVector<Value *, StoreList>;
204
205 StoreListMap StoreRefsForMemset;
206 StoreListMap StoreRefsForMemsetPattern;
207 StoreList StoreRefsForMemcpy;
208 bool HasMemset;
209 bool HasMemsetPattern;
210 bool HasMemcpy;
211
212 /// Return code for isLegalStore()
213 enum LegalStoreKind {
214 None = 0,
215 Memset,
216 MemsetPattern,
217 Memcpy,
218 UnorderedAtomicMemcpy,
219 DontUse // Dummy retval never to be used. Allows catching errors in retval
220 // handling.
221 };
222
223 /// \name Countable Loop Idiom Handling
224 /// @{
225
226 bool runOnCountableLoop();
227 bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
228 SmallVectorImpl<BasicBlock *> &ExitBlocks);
229
230 void collectStores(BasicBlock *BB);
231 LegalStoreKind isLegalStore(StoreInst *SI);
232 enum class ForMemset { No, Yes };
233 bool processLoopStores(SmallVectorImpl<StoreInst *> &SL, const SCEV *BECount,
234 ForMemset For);
235
236 template <typename MemInst>
237 bool processLoopMemIntrinsic(
238 BasicBlock *BB,
239 bool (LoopIdiomRecognize::*Processor)(MemInst *, const SCEV *),
240 const SCEV *BECount);
241 bool processLoopMemCpy(MemCpyInst *MCI, const SCEV *BECount);
242 bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
243
244 bool processLoopStridedStore(Value *DestPtr, const SCEV *StoreSizeSCEV,
245 MaybeAlign StoreAlignment, Value *StoredVal,
246 Instruction *TheStore,
247 SmallPtrSetImpl<Instruction *> &Stores,
248 const SCEVAddRecExpr *Ev, const SCEV *BECount,
249 bool IsNegStride, bool IsLoopMemset = false);
250 bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount);
251 bool processLoopStoreOfLoopLoad(Value *DestPtr, Value *SourcePtr,
252 const SCEV *StoreSize, MaybeAlign StoreAlign,
253 MaybeAlign LoadAlign, Instruction *TheStore,
254 Instruction *TheLoad,
255 const SCEVAddRecExpr *StoreEv,
256 const SCEVAddRecExpr *LoadEv,
257 const SCEV *BECount);
258 bool avoidLIRForMultiBlockLoop(bool IsMemset = false,
259 bool IsLoopMemset = false);
260 bool optimizeCRCLoop(const PolynomialInfo &Info);
261
262 /// @}
263 /// \name Noncountable Loop Idiom Handling
264 /// @{
265
266 bool runOnNoncountableLoop();
267
268 bool recognizePopcount();
269 void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
270 PHINode *CntPhi, Value *Var);
271 bool isProfitableToInsertFFS(Intrinsic::ID IntrinID, Value *InitX,
272 bool ZeroCheck, size_t CanonicalSize);
273 bool insertFFSIfProfitable(Intrinsic::ID IntrinID, Value *InitX,
274 Instruction *DefX, PHINode *CntPhi,
275 Instruction *CntInst);
276 bool recognizeAndInsertFFS(); /// Find First Set: ctlz or cttz
277 bool recognizeShiftUntilLessThan();
278 void transformLoopToCountable(Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
279 Instruction *CntInst, PHINode *CntPhi,
280 Value *Var, Instruction *DefX,
281 const DebugLoc &DL, bool ZeroCheck,
282 bool IsCntPhiUsedOutsideLoop,
283 bool InsertSub = false);
284
285 bool recognizeShiftUntilBitTest();
286 bool recognizeShiftUntilZero();
287 bool recognizeAndInsertStrLen();
288
289 /// @}
290};
291} // end anonymous namespace
292
295 LPMUpdater &) {
297 return PreservedAnalyses::all();
298
299 const auto *DL = &L.getHeader()->getDataLayout();
300
301 // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
302 // pass. Function analyses need to be preserved across loop transformations
303 // but ORE cannot be preserved (see comment before the pass definition).
304 OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
305
306 LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI,
307 AR.MSSA, DL, ORE);
308 if (!LIR.runOnLoop(&L))
309 return PreservedAnalyses::all();
310
312 if (AR.MSSA)
313 PA.preserve<MemorySSAAnalysis>();
314 return PA;
315}
316
318 I->replaceAllUsesWith(PoisonValue::get(I->getType()));
319 I->eraseFromParent();
320}
321
322//===----------------------------------------------------------------------===//
323//
324// Implementation of LoopIdiomRecognize
325//
326//===----------------------------------------------------------------------===//
327
328bool LoopIdiomRecognize::runOnLoop(Loop *L) {
329 CurLoop = L;
330 // If the loop could not be converted to canonical form, it must have an
331 // indirectbr in it, just give up.
332 if (!L->getLoopPreheader())
333 return false;
334
335 // Disable loop idiom recognition if the function's name is a common idiom.
336 StringRef Name = L->getHeader()->getParent()->getName();
337 if (Name == "memset" || Name == "memcpy" || Name == "strlen" ||
338 Name == "wcslen")
339 return false;
340
341 // Determine if code size heuristics need to be applied.
342 ApplyCodeSizeHeuristics =
343 L->getHeader()->getParent()->hasOptSize() && UseLIRCodeSizeHeurs;
344
345 HasMemset = TLI->has(LibFunc_memset);
346 // TODO: Unconditionally enable use of the memset pattern intrinsic (or at
347 // least, opt-in via target hook) once we are confident it will never result
348 // in worse codegen than without. For now, use it only when the target
349 // supports memset_pattern16 libcall (or unless this is overridden by
350 // command line option).
351 HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
352 HasMemcpy = TLI->has(LibFunc_memcpy);
353
354 if (HasMemset || HasMemsetPattern || ForceMemsetPatternIntrinsic ||
355 HasMemcpy || !DisableLIRP::HashRecognize)
357 return runOnCountableLoop();
358
359 return runOnNoncountableLoop();
360}
361
362bool LoopIdiomRecognize::runOnCountableLoop() {
363 const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop);
365 "runOnCountableLoop() called on a loop without a predictable"
366 "backedge-taken count");
367
368 // If this loop executes exactly one time, then it should be peeled, not
369 // optimized by this pass.
370 if (BECount->isZero())
371 return false;
372
374 CurLoop->getUniqueExitBlocks(ExitBlocks);
375
376 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
377 << CurLoop->getHeader()->getParent()->getName()
378 << "] Countable Loop %" << CurLoop->getHeader()->getName()
379 << "\n");
380
381 // The following transforms hoist stores/memsets into the loop pre-header.
382 // Give up if the loop has instructions that may throw.
383 SimpleLoopSafetyInfo SafetyInfo;
384 SafetyInfo.computeLoopSafetyInfo(CurLoop);
385 if (SafetyInfo.anyBlockMayThrow())
386 return false;
387
388 bool MadeChange = false;
389
390 // Scan all the blocks in the loop that are not in subloops.
391 for (auto *BB : CurLoop->getBlocks()) {
392 // Ignore blocks in subloops.
393 if (LI->getLoopFor(BB) != CurLoop)
394 continue;
395
396 MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks);
397 }
398
399 // Optimize a CRC loop if HashRecognize found one, provided we're not
400 // optimizing for size.
401 if (!DisableLIRP::HashRecognize && !ApplyCodeSizeHeuristics)
402 if (auto Res = HashRecognize(*CurLoop, *SE).getResult())
403 optimizeCRCLoop(*Res);
404
405 return MadeChange;
406}
407
408static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) {
409 const SCEVConstant *ConstStride = cast<SCEVConstant>(StoreEv->getOperand(1));
410 return ConstStride->getAPInt();
411}
412
413/// getMemSetPatternValue - If a strided store of the specified value is safe to
414/// turn into a memset.patternn intrinsic, return the Constant that should
415/// be passed in. Otherwise, return null.
416///
417/// TODO this function could allow more constants than it does today (e.g.
418/// those over 16 bytes) now it has transitioned to being used for the
419/// memset.pattern intrinsic rather than directly the memset_pattern16
420/// libcall.
422 // FIXME: This could check for UndefValue because it can be merged into any
423 // other valid pattern.
424
425 // If the value isn't a constant, we can't promote it to being in a constant
426 // array. We could theoretically do a store to an alloca or something, but
427 // that doesn't seem worthwhile.
429 if (!C || isa<ConstantExpr>(C))
430 return nullptr;
431
432 // Only handle simple values that are a power of two bytes in size.
433 uint64_t Size = DL->getTypeSizeInBits(V->getType());
434 if (Size == 0 || (Size & 7) || (Size & (Size - 1)))
435 return nullptr;
436
437 // Don't care enough about darwin/ppc to implement this.
438 if (DL->isBigEndian())
439 return nullptr;
440
441 // Convert to size in bytes.
442 Size /= 8;
443
444 // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
445 // if the top and bottom are the same (e.g. for vectors and large integers).
446 if (Size > 16)
447 return nullptr;
448
449 // For now, don't handle types that aren't int, floats, or pointers.
450 Type *CTy = C->getType();
451 if (!CTy->isIntOrPtrTy() && !CTy->isFloatingPointTy())
452 return nullptr;
453
454 return C;
455}
456
457LoopIdiomRecognize::LegalStoreKind
458LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
459 // Don't touch volatile stores.
460 if (SI->isVolatile())
461 return LegalStoreKind::None;
462 // We only want simple or unordered-atomic stores.
463 if (!SI->isUnordered())
464 return LegalStoreKind::None;
465
466 // Avoid merging nontemporal stores.
467 if (SI->getMetadata(LLVMContext::MD_nontemporal))
468 return LegalStoreKind::None;
469
470 Value *StoredVal = SI->getValueOperand();
471 Value *StorePtr = SI->getPointerOperand();
472
473 // Don't convert stores of non-integral pointer types to memsets (which stores
474 // integers).
475 if (DL->isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
476 return LegalStoreKind::None;
477
478 // Reject stores that are so large that they overflow an unsigned.
479 // When storing out scalable vectors we bail out for now, since the code
480 // below currently only works for constant strides.
481 TypeSize SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
482 if (SizeInBits.isScalable() || (SizeInBits.getFixedValue() & 7) ||
483 (SizeInBits.getFixedValue() >> 32) != 0)
484 return LegalStoreKind::None;
485
486 // See if the pointer expression is an AddRec like {base,+,1} on the current
487 // loop, which indicates a strided store. If we have something else, it's a
488 // random store we can't handle.
489 const SCEV *StoreEv = SE->getSCEV(StorePtr);
490 const SCEVConstant *Stride;
491 if (!match(StoreEv, m_scev_AffineAddRec(m_SCEV(), m_SCEVConstant(Stride),
492 m_SpecificLoop(CurLoop))))
493 return LegalStoreKind::None;
494
495 // See if the store can be turned into a memset.
496
497 // If the stored value is a byte-wise value (like i32 -1), then it may be
498 // turned into a memset of i8 -1, assuming that all the consecutive bytes
499 // are stored. A store of i32 0x01020304 can never be turned into a memset,
500 // but it can be turned into memset_pattern if the target supports it.
501 Value *SplatValue = isBytewiseValue(StoredVal, *DL);
502
503 // Note: memset and memset_pattern on unordered-atomic is yet not supported
504 bool UnorderedAtomic = SI->isUnordered() && !SI->isSimple();
505
506 // If we're allowed to form a memset, and the stored value would be
507 // acceptable for memset, use it.
508 if (!UnorderedAtomic && HasMemset && SplatValue && !DisableLIRP::Memset &&
509 // Verify that the stored value is loop invariant. If not, we can't
510 // promote the memset.
511 CurLoop->isLoopInvariant(SplatValue)) {
512 // It looks like we can use SplatValue.
513 return LegalStoreKind::Memset;
514 }
515 if (!UnorderedAtomic && (HasMemsetPattern || ForceMemsetPatternIntrinsic) &&
517 // Don't create memset_pattern16s with address spaces.
518 StorePtr->getType()->getPointerAddressSpace() == 0 &&
519 getMemSetPatternValue(StoredVal, DL)) {
520 // It looks like we can use PatternValue!
521 return LegalStoreKind::MemsetPattern;
522 }
523
524 // Otherwise, see if the store can be turned into a memcpy.
525 if (HasMemcpy && !DisableLIRP::Memcpy) {
526 // Check to see if the stride matches the size of the store. If so, then we
527 // know that every byte is touched in the loop.
528 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
529 APInt StrideAP = Stride->getAPInt();
530 if (StoreSize != StrideAP && StoreSize != -StrideAP)
531 return LegalStoreKind::None;
532
533 // The store must be feeding a non-volatile load.
534 LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
535
536 // Only allow non-volatile loads
537 if (!LI || LI->isVolatile())
538 return LegalStoreKind::None;
539 // Only allow simple or unordered-atomic loads
540 if (!LI->isUnordered())
541 return LegalStoreKind::None;
542
543 // See if the pointer expression is an AddRec like {base,+,1} on the current
544 // loop, which indicates a strided load. If we have something else, it's a
545 // random load we can't handle.
546 const SCEV *LoadEv = SE->getSCEV(LI->getPointerOperand());
547
548 // The store and load must share the same stride.
549 if (!match(LoadEv, m_scev_AffineAddRec(m_SCEV(), m_scev_Specific(Stride),
550 m_SpecificLoop(CurLoop))))
551 return LegalStoreKind::None;
552
553 // Success. This store can be converted into a memcpy.
554 UnorderedAtomic = UnorderedAtomic || LI->isAtomic();
555 return UnorderedAtomic ? LegalStoreKind::UnorderedAtomicMemcpy
556 : LegalStoreKind::Memcpy;
557 }
558 // This store can't be transformed into a memset/memcpy.
559 return LegalStoreKind::None;
560}
561
562void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
563 StoreRefsForMemset.clear();
564 StoreRefsForMemsetPattern.clear();
565 StoreRefsForMemcpy.clear();
566 for (Instruction &I : *BB) {
568 if (!SI)
569 continue;
570
571 // Make sure this is a strided store with a constant stride.
572 switch (isLegalStore(SI)) {
573 case LegalStoreKind::None:
574 // Nothing to do
575 break;
576 case LegalStoreKind::Memset: {
577 // Find the base pointer.
578 Value *Ptr = getUnderlyingObject(SI->getPointerOperand());
579 StoreRefsForMemset[Ptr].push_back(SI);
580 } break;
581 case LegalStoreKind::MemsetPattern: {
582 // Find the base pointer.
583 Value *Ptr = getUnderlyingObject(SI->getPointerOperand());
584 StoreRefsForMemsetPattern[Ptr].push_back(SI);
585 } break;
586 case LegalStoreKind::Memcpy:
587 case LegalStoreKind::UnorderedAtomicMemcpy:
588 StoreRefsForMemcpy.push_back(SI);
589 break;
590 default:
591 assert(false && "unhandled return value");
592 break;
593 }
594 }
595}
596
597/// runOnLoopBlock - Process the specified block, which lives in a counted loop
598/// with the specified backedge count. This block is known to be in the current
599/// loop and not in any subloops.
600bool LoopIdiomRecognize::runOnLoopBlock(
601 BasicBlock *BB, const SCEV *BECount,
602 SmallVectorImpl<BasicBlock *> &ExitBlocks) {
603 // We can only promote stores in this block if they are unconditionally
604 // executed in the loop. For a block to be unconditionally executed, it has
605 // to dominate all the exit blocks of the loop. Verify this now.
606 for (BasicBlock *ExitBlock : ExitBlocks)
607 if (!DT->dominates(BB, ExitBlock))
608 return false;
609
610 bool MadeChange = false;
611 // Look for store instructions, which may be optimized to memset/memcpy.
612 collectStores(BB);
613
614 // Look for a single store or sets of stores with a common base, which can be
615 // optimized into a memset (memset_pattern). The latter most commonly happens
616 // with structs and handunrolled loops.
617 for (auto &SL : StoreRefsForMemset)
618 MadeChange |= processLoopStores(SL.second, BECount, ForMemset::Yes);
619
620 for (auto &SL : StoreRefsForMemsetPattern)
621 MadeChange |= processLoopStores(SL.second, BECount, ForMemset::No);
622
623 // Optimize the store into a memcpy, if it feeds an similarly strided load.
624 for (auto &SI : StoreRefsForMemcpy)
625 MadeChange |= processLoopStoreOfLoopLoad(SI, BECount);
626
627 MadeChange |= processLoopMemIntrinsic<MemCpyInst>(
628 BB, &LoopIdiomRecognize::processLoopMemCpy, BECount);
629 MadeChange |= processLoopMemIntrinsic<MemSetInst>(
630 BB, &LoopIdiomRecognize::processLoopMemSet, BECount);
631
632 return MadeChange;
633}
634
635/// See if this store(s) can be promoted to a memset.
636bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
637 const SCEV *BECount, ForMemset For) {
638 // Try to find consecutive stores that can be transformed into memsets.
639 SetVector<StoreInst *> Heads, Tails;
641
642 // Do a quadratic search on all of the given stores and find
643 // all of the pairs of stores that follow each other.
644 SmallVector<unsigned, 16> IndexQueue;
645 for (unsigned i = 0, e = SL.size(); i < e; ++i) {
646 assert(SL[i]->isSimple() && "Expected only non-volatile stores.");
647
648 Value *FirstStoredVal = SL[i]->getValueOperand();
649 Value *FirstStorePtr = SL[i]->getPointerOperand();
650 const SCEVAddRecExpr *FirstStoreEv =
651 cast<SCEVAddRecExpr>(SE->getSCEV(FirstStorePtr));
652 APInt FirstStride = getStoreStride(FirstStoreEv);
653 unsigned FirstStoreSize = DL->getTypeStoreSize(SL[i]->getValueOperand()->getType());
654
655 // See if we can optimize just this store in isolation.
656 if (FirstStride == FirstStoreSize || -FirstStride == FirstStoreSize) {
657 Heads.insert(SL[i]);
658 continue;
659 }
660
661 Value *FirstSplatValue = nullptr;
662 Constant *FirstPatternValue = nullptr;
663
664 if (For == ForMemset::Yes)
665 FirstSplatValue = isBytewiseValue(FirstStoredVal, *DL);
666 else
667 FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL);
668
669 assert((FirstSplatValue || FirstPatternValue) &&
670 "Expected either splat value or pattern value.");
671
672 IndexQueue.clear();
673 // If a store has multiple consecutive store candidates, search Stores
674 // array according to the sequence: from i+1 to e, then from i-1 to 0.
675 // This is because usually pairing with immediate succeeding or preceding
676 // candidate create the best chance to find memset opportunity.
677 unsigned j = 0;
678 for (j = i + 1; j < e; ++j)
679 IndexQueue.push_back(j);
680 for (j = i; j > 0; --j)
681 IndexQueue.push_back(j - 1);
682
683 for (auto &k : IndexQueue) {
684 assert(SL[k]->isSimple() && "Expected only non-volatile stores.");
685 Value *SecondStorePtr = SL[k]->getPointerOperand();
686 const SCEVAddRecExpr *SecondStoreEv =
687 cast<SCEVAddRecExpr>(SE->getSCEV(SecondStorePtr));
688 APInt SecondStride = getStoreStride(SecondStoreEv);
689
690 if (FirstStride != SecondStride)
691 continue;
692
693 Value *SecondStoredVal = SL[k]->getValueOperand();
694 Value *SecondSplatValue = nullptr;
695 Constant *SecondPatternValue = nullptr;
696
697 if (For == ForMemset::Yes)
698 SecondSplatValue = isBytewiseValue(SecondStoredVal, *DL);
699 else
700 SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL);
701
702 assert((SecondSplatValue || SecondPatternValue) &&
703 "Expected either splat value or pattern value.");
704
705 if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) {
706 if (For == ForMemset::Yes) {
707 if (isa<UndefValue>(FirstSplatValue))
708 FirstSplatValue = SecondSplatValue;
709 if (FirstSplatValue != SecondSplatValue)
710 continue;
711 } else {
712 if (isa<UndefValue>(FirstPatternValue))
713 FirstPatternValue = SecondPatternValue;
714 if (FirstPatternValue != SecondPatternValue)
715 continue;
716 }
717 Tails.insert(SL[k]);
718 Heads.insert(SL[i]);
719 ConsecutiveChain[SL[i]] = SL[k];
720 break;
721 }
722 }
723 }
724
725 // We may run into multiple chains that merge into a single chain. We mark the
726 // stores that we transformed so that we don't visit the same store twice.
727 SmallPtrSet<Value *, 16> TransformedStores;
728 bool Changed = false;
729
730 // For stores that start but don't end a link in the chain:
731 for (StoreInst *I : Heads) {
732 if (Tails.count(I))
733 continue;
734
735 // We found a store instr that starts a chain. Now follow the chain and try
736 // to transform it.
737 SmallPtrSet<Instruction *, 8> AdjacentStores;
738 StoreInst *HeadStore = I;
739 unsigned StoreSize = 0;
740
741 // Collect the chain into a list.
742 while (Tails.count(I) || Heads.count(I)) {
743 if (TransformedStores.count(I))
744 break;
745 AdjacentStores.insert(I);
746
747 StoreSize += DL->getTypeStoreSize(I->getValueOperand()->getType());
748 // Move to the next value in the chain.
749 I = ConsecutiveChain[I];
750 }
751
752 Value *StoredVal = HeadStore->getValueOperand();
753 Value *StorePtr = HeadStore->getPointerOperand();
754 const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
755 APInt Stride = getStoreStride(StoreEv);
756
757 // Check to see if the stride matches the size of the stores. If so, then
758 // we know that every byte is touched in the loop.
759 if (StoreSize != Stride && StoreSize != -Stride)
760 continue;
761
762 bool IsNegStride = StoreSize == -Stride;
763
764 Type *IntIdxTy = DL->getIndexType(StorePtr->getType());
765 const SCEV *StoreSizeSCEV = SE->getConstant(IntIdxTy, StoreSize);
766 if (processLoopStridedStore(StorePtr, StoreSizeSCEV,
767 MaybeAlign(HeadStore->getAlign()), StoredVal,
768 HeadStore, AdjacentStores, StoreEv, BECount,
769 IsNegStride)) {
770 TransformedStores.insert_range(AdjacentStores);
771 Changed = true;
772 }
773 }
774
775 return Changed;
776}
777
778/// processLoopMemIntrinsic - Template function for calling different processor
779/// functions based on mem intrinsic type.
780template <typename MemInst>
781bool LoopIdiomRecognize::processLoopMemIntrinsic(
782 BasicBlock *BB,
783 bool (LoopIdiomRecognize::*Processor)(MemInst *, const SCEV *),
784 const SCEV *BECount) {
785 bool MadeChange = false;
786 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
787 Instruction *Inst = &*I++;
788 // Look for memory instructions, which may be optimized to a larger one.
789 if (MemInst *MI = dyn_cast<MemInst>(Inst)) {
790 WeakTrackingVH InstPtr(&*I);
791 if (!(this->*Processor)(MI, BECount))
792 continue;
793 MadeChange = true;
794
795 // If processing the instruction invalidated our iterator, start over from
796 // the top of the block.
797 if (!InstPtr)
798 I = BB->begin();
799 }
800 }
801 return MadeChange;
802}
803
804/// processLoopMemCpy - See if this memcpy can be promoted to a large memcpy
805bool LoopIdiomRecognize::processLoopMemCpy(MemCpyInst *MCI,
806 const SCEV *BECount) {
807 // We can only handle non-volatile memcpys with a constant size.
808 if (MCI->isVolatile() || !isa<ConstantInt>(MCI->getLength()))
809 return false;
810
811 // If we're not allowed to hack on memcpy, we fail.
812 if ((!HasMemcpy && !MCI->isForceInlined()) || DisableLIRP::Memcpy)
813 return false;
814
815 Value *Dest = MCI->getDest();
816 Value *Source = MCI->getSource();
817 if (!Dest || !Source)
818 return false;
819
820 // See if the load and store pointer expressions are AddRec like {base,+,1} on
821 // the current loop, which indicates a strided load and store. If we have
822 // something else, it's a random load or store we can't handle.
823 const SCEV *StoreEv = SE->getSCEV(Dest);
824 const SCEV *LoadEv = SE->getSCEV(Source);
825 const APInt *StoreStrideValue, *LoadStrideValue;
826 if (!match(StoreEv,
827 m_scev_AffineAddRec(m_SCEV(), m_scev_APInt(StoreStrideValue),
828 m_SpecificLoop(CurLoop))) ||
829 !match(LoadEv,
830 m_scev_AffineAddRec(m_SCEV(), m_scev_APInt(LoadStrideValue),
831 m_SpecificLoop(CurLoop))))
832 return false;
833
834 // Reject memcpys that are so large that they overflow an unsigned.
835 uint64_t SizeInBytes = cast<ConstantInt>(MCI->getLength())->getZExtValue();
836 if ((SizeInBytes >> 32) != 0)
837 return false;
838
839 // Huge stride value - give up
840 if (StoreStrideValue->getBitWidth() > 64 ||
841 LoadStrideValue->getBitWidth() > 64)
842 return false;
843
844 if (SizeInBytes != *StoreStrideValue && SizeInBytes != -*StoreStrideValue) {
845 ORE.emit([&]() {
846 return OptimizationRemarkMissed(DEBUG_TYPE, "SizeStrideUnequal", MCI)
847 << ore::NV("Inst", "memcpy") << " in "
848 << ore::NV("Function", MCI->getFunction())
849 << " function will not be hoisted: "
850 << ore::NV("Reason", "memcpy size is not equal to stride");
851 });
852 return false;
853 }
854
855 int64_t StoreStrideInt = StoreStrideValue->getSExtValue();
856 int64_t LoadStrideInt = LoadStrideValue->getSExtValue();
857 // Check if the load stride matches the store stride.
858 if (StoreStrideInt != LoadStrideInt)
859 return false;
860
861 return processLoopStoreOfLoopLoad(
862 Dest, Source, SE->getConstant(Dest->getType(), SizeInBytes),
863 MCI->getDestAlign(), MCI->getSourceAlign(), MCI, MCI,
864 cast<SCEVAddRecExpr>(StoreEv), cast<SCEVAddRecExpr>(LoadEv), BECount);
865}
866
867/// processLoopMemSet - See if this memset can be promoted to a large memset.
868bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
869 const SCEV *BECount) {
870 // We can only handle non-volatile memsets.
871 if (MSI->isVolatile())
872 return false;
873
874 // If we're not allowed to hack on memset, we fail.
875 if (!HasMemset || DisableLIRP::Memset)
876 return false;
877
878 Value *Pointer = MSI->getDest();
879
880 // See if the pointer expression is an AddRec like {base,+,1} on the current
881 // loop, which indicates a strided store. If we have something else, it's a
882 // random store we can't handle.
883 const SCEV *Ev = SE->getSCEV(Pointer);
884 const SCEV *PointerStrideSCEV;
885 if (!match(Ev, m_scev_AffineAddRec(m_SCEV(), m_SCEV(PointerStrideSCEV),
886 m_SpecificLoop(CurLoop)))) {
887 LLVM_DEBUG(dbgs() << " Pointer is not affine, abort\n");
888 return false;
889 }
890
891 const SCEV *MemsetSizeSCEV = SE->getSCEV(MSI->getLength());
892
893 bool IsNegStride = false;
894 const bool IsConstantSize = isa<ConstantInt>(MSI->getLength());
895
896 if (IsConstantSize) {
897 // Memset size is constant.
898 // Check if the pointer stride matches the memset size. If so, then
899 // we know that every byte is touched in the loop.
900 LLVM_DEBUG(dbgs() << " memset size is constant\n");
901 uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
902 const APInt *Stride;
903 if (!match(PointerStrideSCEV, m_scev_APInt(Stride)))
904 return false;
905
906 if (SizeInBytes != *Stride && SizeInBytes != -*Stride)
907 return false;
908
909 IsNegStride = SizeInBytes == -*Stride;
910 } else {
911 // Memset size is non-constant.
912 // Check if the pointer stride matches the memset size.
913 // To be conservative, the pass would not promote pointers that aren't in
914 // address space zero. Also, the pass only handles memset length and stride
915 // that are invariant for the top level loop.
916 LLVM_DEBUG(dbgs() << " memset size is non-constant\n");
917 if (Pointer->getType()->getPointerAddressSpace() != 0) {
918 LLVM_DEBUG(dbgs() << " pointer is not in address space zero, "
919 << "abort\n");
920 return false;
921 }
922 if (!SE->isLoopInvariant(MemsetSizeSCEV, CurLoop)) {
923 LLVM_DEBUG(dbgs() << " memset size is not a loop-invariant, "
924 << "abort\n");
925 return false;
926 }
927
928 // Compare positive direction PointerStrideSCEV with MemsetSizeSCEV
929 IsNegStride = PointerStrideSCEV->isNonConstantNegative();
930 const SCEV *PositiveStrideSCEV =
931 IsNegStride ? SE->getNegativeSCEV(PointerStrideSCEV)
932 : PointerStrideSCEV;
933 LLVM_DEBUG(dbgs() << " MemsetSizeSCEV: " << *MemsetSizeSCEV << "\n"
934 << " PositiveStrideSCEV: " << *PositiveStrideSCEV
935 << "\n");
936
937 if (PositiveStrideSCEV != MemsetSizeSCEV) {
938 // If an expression is covered by the loop guard, compare again and
939 // proceed with optimization if equal.
940 const SCEV *FoldedPositiveStride =
941 SE->applyLoopGuards(PositiveStrideSCEV, CurLoop);
942 const SCEV *FoldedMemsetSize =
943 SE->applyLoopGuards(MemsetSizeSCEV, CurLoop);
944
945 LLVM_DEBUG(dbgs() << " Try to fold SCEV based on loop guard\n"
946 << " FoldedMemsetSize: " << *FoldedMemsetSize << "\n"
947 << " FoldedPositiveStride: " << *FoldedPositiveStride
948 << "\n");
949
950 if (FoldedPositiveStride != FoldedMemsetSize) {
951 LLVM_DEBUG(dbgs() << " SCEV don't match, abort\n");
952 return false;
953 }
954 }
955 }
956
957 // Verify that the memset value is loop invariant. If not, we can't promote
958 // the memset.
959 Value *SplatValue = MSI->getValue();
960 if (!SplatValue || !CurLoop->isLoopInvariant(SplatValue))
961 return false;
962
964 MSIs.insert(MSI);
965 return processLoopStridedStore(Pointer, SE->getSCEV(MSI->getLength()),
966 MSI->getDestAlign(), SplatValue, MSI, MSIs,
967 cast<SCEVAddRecExpr>(Ev), BECount, IsNegStride,
968 /*IsLoopMemset=*/true);
969}
970
971/// mayLoopAccessLocation - Return true if the specified loop might access the
972/// specified pointer location, which is a loop-strided access. The 'Access'
973/// argument specifies what the verboten forms of access are (read or write).
974static bool
976 const SCEV *BECount, const SCEV *StoreSizeSCEV,
978 SmallPtrSetImpl<Instruction *> &IgnoredInsts) {
979 // Get the location that may be stored across the loop. Since the access is
980 // strided positively through memory, we say that the modified location starts
981 // at the pointer and has infinite size.
983
984 // If the loop iterates a fixed number of times, we can refine the access size
985 // to be exactly the size of the memset, which is (BECount+1)*StoreSize
986 const APInt *BECst, *ConstSize;
987 if (match(BECount, m_scev_APInt(BECst)) &&
988 match(StoreSizeSCEV, m_scev_APInt(ConstSize))) {
989 std::optional<uint64_t> BEInt = BECst->tryZExtValue();
990 std::optional<uint64_t> SizeInt = ConstSize->tryZExtValue();
991 // FIXME: Should this check for overflow?
992 if (BEInt && SizeInt)
993 AccessSize = LocationSize::precise((*BEInt + 1) * *SizeInt);
994 }
995
996 // TODO: For this to be really effective, we have to dive into the pointer
997 // operand in the store. Store to &A[i] of 100 will always return may alias
998 // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
999 // which will then no-alias a store to &A[100].
1000 MemoryLocation StoreLoc(Ptr, AccessSize);
1001
1002 for (BasicBlock *B : L->blocks())
1003 for (Instruction &I : *B)
1004 if (!IgnoredInsts.contains(&I) &&
1005 isModOrRefSet(AA.getModRefInfo(&I, StoreLoc) & Access))
1006 return true;
1007 return false;
1008}
1009
1010// If we have a negative stride, Start refers to the end of the memory location
1011// we're trying to memset. Therefore, we need to recompute the base pointer,
1012// which is just Start - BECount*Size.
1013static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount,
1014 Type *IntPtr, const SCEV *StoreSizeSCEV,
1015 ScalarEvolution *SE) {
1016 const SCEV *Index = SE->getTruncateOrZeroExtend(BECount, IntPtr);
1017 if (!StoreSizeSCEV->isOne()) {
1018 // index = back edge count * store size
1019 Index = SE->getMulExpr(Index,
1020 SE->getTruncateOrZeroExtend(StoreSizeSCEV, IntPtr),
1022 }
1023 // base pointer = start - index * store size
1024 return SE->getMinusSCEV(Start, Index);
1025}
1026
1027/// Compute the number of bytes as a SCEV from the backedge taken count.
1028///
1029/// This also maps the SCEV into the provided type and tries to handle the
1030/// computation in a way that will fold cleanly.
1031static const SCEV *getNumBytes(const SCEV *BECount, Type *IntPtr,
1032 const SCEV *StoreSizeSCEV, Loop *CurLoop,
1033 const DataLayout *DL, ScalarEvolution *SE) {
1034 const SCEV *TripCountSCEV =
1035 SE->getTripCountFromExitCount(BECount, IntPtr, CurLoop);
1036 return SE->getMulExpr(TripCountSCEV,
1037 SE->getTruncateOrZeroExtend(StoreSizeSCEV, IntPtr),
1039}
1040
1041/// processLoopStridedStore - We see a strided store of some value. If we can
1042/// transform this into a memset or memset_pattern in the loop preheader, do so.
1043bool LoopIdiomRecognize::processLoopStridedStore(
1044 Value *DestPtr, const SCEV *StoreSizeSCEV, MaybeAlign StoreAlignment,
1045 Value *StoredVal, Instruction *TheStore,
1047 const SCEV *BECount, bool IsNegStride, bool IsLoopMemset) {
1048 Module *M = TheStore->getModule();
1049
1050 // The trip count of the loop and the base pointer of the addrec SCEV is
1051 // guaranteed to be loop invariant, which means that it should dominate the
1052 // header. This allows us to insert code for it in the preheader.
1053 unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
1054 BasicBlock *Preheader = CurLoop->getLoopPreheader();
1055 IRBuilder<> Builder(Preheader->getTerminator());
1056 SCEVExpander Expander(*SE, *DL, "loop-idiom");
1057 SCEVExpanderCleaner ExpCleaner(Expander);
1058
1059 Type *DestInt8PtrTy = Builder.getPtrTy(DestAS);
1060 Type *IntIdxTy = DL->getIndexType(DestPtr->getType());
1061
1062 bool Changed = false;
1063 const SCEV *Start = Ev->getStart();
1064 // Handle negative strided loops.
1065 if (IsNegStride)
1066 Start = getStartForNegStride(Start, BECount, IntIdxTy, StoreSizeSCEV, SE);
1067
1068 // TODO: ideally we should still be able to generate memset if SCEV expander
1069 // is taught to generate the dependencies at the latest point.
1070 if (!Expander.isSafeToExpand(Start))
1071 return Changed;
1072
1073 // Okay, we have a strided store "p[i]" of a splattable value. We can turn
1074 // this into a memset in the loop preheader now if we want. However, this
1075 // would be unsafe to do if there is anything else in the loop that may read
1076 // or write to the aliased location. Check for any overlap by generating the
1077 // base pointer and checking the region.
1078 Value *BasePtr =
1079 Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
1080
1081 // From here on out, conservatively report to the pass manager that we've
1082 // changed the IR, even if we later clean up these added instructions. There
1083 // may be structural differences e.g. in the order of use lists not accounted
1084 // for in just a textual dump of the IR. This is written as a variable, even
1085 // though statically all the places this dominates could be replaced with
1086 // 'true', with the hope that anyone trying to be clever / "more precise" with
1087 // the return value will read this comment, and leave them alone.
1088 Changed = true;
1089
1090 if (mayLoopAccessLocation(BasePtr, ModRefInfo::ModRef, CurLoop, BECount,
1091 StoreSizeSCEV, *AA, Stores))
1092 return Changed;
1093
1094 if (avoidLIRForMultiBlockLoop(/*IsMemset=*/true, IsLoopMemset))
1095 return Changed;
1096
1097 // Okay, everything looks good, insert the memset.
1098 Value *SplatValue = isBytewiseValue(StoredVal, *DL);
1099 Constant *PatternValue = nullptr;
1100 if (!SplatValue)
1101 PatternValue = getMemSetPatternValue(StoredVal, DL);
1102
1103 // MemsetArg is the number of bytes for the memset libcall, and the number
1104 // of pattern repetitions if the memset.pattern intrinsic is being used.
1105 Value *MemsetArg;
1106 std::optional<int64_t> BytesWritten;
1107
1108 if (PatternValue && (HasMemsetPattern || ForceMemsetPatternIntrinsic)) {
1109 const SCEV *TripCountS =
1110 SE->getTripCountFromExitCount(BECount, IntIdxTy, CurLoop);
1111 if (!Expander.isSafeToExpand(TripCountS))
1112 return Changed;
1113 const SCEVConstant *ConstStoreSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
1114 if (!ConstStoreSize)
1115 return Changed;
1116 Value *TripCount = Expander.expandCodeFor(TripCountS, IntIdxTy,
1117 Preheader->getTerminator());
1118 uint64_t PatternRepsPerTrip =
1119 (ConstStoreSize->getValue()->getZExtValue() * 8) /
1120 DL->getTypeSizeInBits(PatternValue->getType());
1121 // If ConstStoreSize is not equal to the width of PatternValue, then
1122 // MemsetArg is TripCount * (ConstStoreSize/PatternValueWidth). Else
1123 // MemSetArg is just TripCount.
1124 MemsetArg =
1125 PatternRepsPerTrip == 1
1126 ? TripCount
1127 : Builder.CreateMul(TripCount,
1128 Builder.getIntN(IntIdxTy->getIntegerBitWidth(),
1129 PatternRepsPerTrip));
1130 if (auto *CI = dyn_cast<ConstantInt>(TripCount))
1131 BytesWritten =
1132 CI->getZExtValue() * ConstStoreSize->getValue()->getZExtValue();
1133
1134 } else {
1135 const SCEV *NumBytesS =
1136 getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE);
1137
1138 // TODO: ideally we should still be able to generate memset if SCEV expander
1139 // is taught to generate the dependencies at the latest point.
1140 if (!Expander.isSafeToExpand(NumBytesS))
1141 return Changed;
1142 MemsetArg =
1143 Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
1144 if (auto *CI = dyn_cast<ConstantInt>(MemsetArg))
1145 BytesWritten = CI->getZExtValue();
1146 }
1147 assert(MemsetArg && "MemsetArg should have been set");
1148
1149 AAMDNodes AATags = TheStore->getAAMetadata();
1150 for (Instruction *Store : Stores)
1151 AATags = AATags.merge(Store->getAAMetadata());
1152 if (BytesWritten)
1153 AATags = AATags.extendTo(BytesWritten.value());
1154 else
1155 AATags = AATags.extendTo(-1);
1156
1157 CallInst *NewCall;
1158 if (SplatValue) {
1159 NewCall = Builder.CreateMemSet(BasePtr, SplatValue, MemsetArg,
1160 MaybeAlign(StoreAlignment),
1161 /*isVolatile=*/false, AATags);
1162 } else if (ForceMemsetPatternIntrinsic ||
1163 isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16)) {
1164 assert(isa<SCEVConstant>(StoreSizeSCEV) && "Expected constant store size");
1165
1166 NewCall = Builder.CreateIntrinsic(
1167 Intrinsic::experimental_memset_pattern,
1168 {DestInt8PtrTy, PatternValue->getType(), IntIdxTy},
1169 {BasePtr, PatternValue, MemsetArg,
1170 ConstantInt::getFalse(M->getContext())});
1171 if (StoreAlignment)
1172 cast<MemSetPatternInst>(NewCall)->setDestAlignment(*StoreAlignment);
1173 NewCall->setAAMetadata(AATags);
1174 } else {
1175 // Neither a memset, nor memset_pattern16
1176 return Changed;
1177 }
1178
1179 NewCall->setDebugLoc(TheStore->getDebugLoc());
1180
1181 if (MSSAU) {
1182 MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1183 NewCall, nullptr, NewCall->getParent(), MemorySSA::BeforeTerminator);
1184 MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
1185 }
1186
1187 LLVM_DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n"
1188 << " from store to: " << *Ev << " at: " << *TheStore
1189 << "\n");
1190
1191 ORE.emit([&]() {
1192 OptimizationRemark R(DEBUG_TYPE, "ProcessLoopStridedStore",
1193 NewCall->getDebugLoc(), Preheader);
1194 R << "Transformed loop-strided store in "
1195 << ore::NV("Function", TheStore->getFunction())
1196 << " function into a call to "
1197 << ore::NV("NewFunction", NewCall->getCalledFunction())
1198 << "() intrinsic";
1199 if (!Stores.empty())
1200 R << ore::setExtraArgs();
1201 for (auto *I : Stores) {
1202 R << ore::NV("FromBlock", I->getParent()->getName())
1203 << ore::NV("ToBlock", Preheader->getName());
1204 }
1205 return R;
1206 });
1207
1208 // Okay, the memset has been formed. Zap the original store and anything that
1209 // feeds into it.
1210 for (auto *I : Stores) {
1211 if (MSSAU)
1212 MSSAU->removeMemoryAccess(I, true);
1214 }
1215 if (MSSAU && VerifyMemorySSA)
1216 MSSAU->getMemorySSA()->verifyMemorySSA();
1217 ++NumMemSet;
1218 ExpCleaner.markResultUsed();
1219 return true;
1220}
1221
1222/// If the stored value is a strided load in the same loop with the same stride
1223/// this may be transformable into a memcpy. This kicks in for stuff like
1224/// for (i) A[i] = B[i];
1225bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
1226 const SCEV *BECount) {
1227 assert(SI->isUnordered() && "Expected only non-volatile non-ordered stores.");
1228
1229 Value *StorePtr = SI->getPointerOperand();
1230 const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
1231 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
1232
1233 // The store must be feeding a non-volatile load.
1234 LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
1235 assert(LI->isUnordered() && "Expected only non-volatile non-ordered loads.");
1236
1237 // See if the pointer expression is an AddRec like {base,+,1} on the current
1238 // loop, which indicates a strided load. If we have something else, it's a
1239 // random load we can't handle.
1240 Value *LoadPtr = LI->getPointerOperand();
1241 const SCEVAddRecExpr *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr));
1242
1243 const SCEV *StoreSizeSCEV = SE->getConstant(StorePtr->getType(), StoreSize);
1244 return processLoopStoreOfLoopLoad(StorePtr, LoadPtr, StoreSizeSCEV,
1245 SI->getAlign(), LI->getAlign(), SI, LI,
1246 StoreEv, LoadEv, BECount);
1247}
1248
1249namespace {
1250class MemmoveVerifier {
1251public:
1252 explicit MemmoveVerifier(const Value &LoadBasePtr, const Value &StoreBasePtr,
1253 const DataLayout &DL)
1255 LoadBasePtr.stripPointerCasts(), LoadOff, DL)),
1257 StoreBasePtr.stripPointerCasts(), StoreOff, DL)),
1258 IsSameObject(BP1 == BP2) {}
1259
1260 bool loadAndStoreMayFormMemmove(unsigned StoreSize, bool IsNegStride,
1261 const Instruction &TheLoad,
1262 bool IsMemCpy) const {
1263 if (IsMemCpy) {
1264 // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr
1265 // for negative stride.
1266 if ((!IsNegStride && LoadOff <= StoreOff) ||
1267 (IsNegStride && LoadOff >= StoreOff))
1268 return false;
1269 } else {
1270 // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr
1271 // for negative stride. LoadBasePtr shouldn't overlap with StoreBasePtr.
1272 int64_t LoadSize =
1273 DL.getTypeSizeInBits(TheLoad.getType()).getFixedValue() / 8;
1274 if (BP1 != BP2 || LoadSize != int64_t(StoreSize))
1275 return false;
1276 if ((!IsNegStride && LoadOff < StoreOff + int64_t(StoreSize)) ||
1277 (IsNegStride && LoadOff + LoadSize > StoreOff))
1278 return false;
1279 }
1280 return true;
1281 }
1282
1283private:
1284 const DataLayout &DL;
1285 int64_t LoadOff = 0;
1286 int64_t StoreOff = 0;
1287 const Value *BP1;
1288 const Value *BP2;
1289
1290public:
1291 const bool IsSameObject;
1292};
1293} // namespace
1294
1295bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
1296 Value *DestPtr, Value *SourcePtr, const SCEV *StoreSizeSCEV,
1297 MaybeAlign StoreAlign, MaybeAlign LoadAlign, Instruction *TheStore,
1298 Instruction *TheLoad, const SCEVAddRecExpr *StoreEv,
1299 const SCEVAddRecExpr *LoadEv, const SCEV *BECount) {
1300
1301 // FIXME: until llvm.memcpy.inline supports dynamic sizes, we need to
1302 // conservatively bail here, since otherwise we may have to transform
1303 // llvm.memcpy.inline into llvm.memcpy which is illegal.
1304 if (auto *MCI = dyn_cast<MemCpyInst>(TheStore); MCI && MCI->isForceInlined())
1305 return false;
1306
1307 // The trip count of the loop and the base pointer of the addrec SCEV is
1308 // guaranteed to be loop invariant, which means that it should dominate the
1309 // header. This allows us to insert code for it in the preheader.
1310 BasicBlock *Preheader = CurLoop->getLoopPreheader();
1311 IRBuilder<> Builder(Preheader->getTerminator());
1312 SCEVExpander Expander(*SE, *DL, "loop-idiom");
1313
1314 SCEVExpanderCleaner ExpCleaner(Expander);
1315
1316 bool Changed = false;
1317 const SCEV *StrStart = StoreEv->getStart();
1318 unsigned StrAS = DestPtr->getType()->getPointerAddressSpace();
1319 Type *IntIdxTy = Builder.getIntNTy(DL->getIndexSizeInBits(StrAS));
1320
1321 APInt Stride = getStoreStride(StoreEv);
1322 const SCEVConstant *ConstStoreSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
1323
1324 // TODO: Deal with non-constant size; Currently expect constant store size
1325 assert(ConstStoreSize && "store size is expected to be a constant");
1326
1327 int64_t StoreSize = ConstStoreSize->getValue()->getZExtValue();
1328 bool IsNegStride = StoreSize == -Stride;
1329
1330 // Handle negative strided loops.
1331 if (IsNegStride)
1332 StrStart =
1333 getStartForNegStride(StrStart, BECount, IntIdxTy, StoreSizeSCEV, SE);
1334
1335 // Okay, we have a strided store "p[i]" of a loaded value. We can turn
1336 // this into a memcpy in the loop preheader now if we want. However, this
1337 // would be unsafe to do if there is anything else in the loop that may read
1338 // or write the memory region we're storing to. This includes the load that
1339 // feeds the stores. Check for an alias by generating the base address and
1340 // checking everything.
1341 Value *StoreBasePtr = Expander.expandCodeFor(
1342 StrStart, Builder.getPtrTy(StrAS), Preheader->getTerminator());
1343
1344 // From here on out, conservatively report to the pass manager that we've
1345 // changed the IR, even if we later clean up these added instructions. There
1346 // may be structural differences e.g. in the order of use lists not accounted
1347 // for in just a textual dump of the IR. This is written as a variable, even
1348 // though statically all the places this dominates could be replaced with
1349 // 'true', with the hope that anyone trying to be clever / "more precise" with
1350 // the return value will read this comment, and leave them alone.
1351 Changed = true;
1352
1353 SmallPtrSet<Instruction *, 2> IgnoredInsts;
1354 IgnoredInsts.insert(TheStore);
1355
1356 bool IsMemCpy = isa<MemCpyInst>(TheStore);
1357 const StringRef InstRemark = IsMemCpy ? "memcpy" : "load and store";
1358
1359 bool LoopAccessStore =
1360 mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
1361 StoreSizeSCEV, *AA, IgnoredInsts);
1362 if (LoopAccessStore) {
1363 // For memmove case it's not enough to guarantee that loop doesn't access
1364 // TheStore and TheLoad. Additionally we need to make sure that TheStore is
1365 // the only user of TheLoad.
1366 if (!TheLoad->hasOneUse())
1367 return Changed;
1368 IgnoredInsts.insert(TheLoad);
1369 if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop,
1370 BECount, StoreSizeSCEV, *AA, IgnoredInsts)) {
1371 ORE.emit([&]() {
1372 return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessStore",
1373 TheStore)
1374 << ore::NV("Inst", InstRemark) << " in "
1375 << ore::NV("Function", TheStore->getFunction())
1376 << " function will not be hoisted: "
1377 << ore::NV("Reason", "The loop may access store location");
1378 });
1379 return Changed;
1380 }
1381 IgnoredInsts.erase(TheLoad);
1382 }
1383
1384 const SCEV *LdStart = LoadEv->getStart();
1385 unsigned LdAS = SourcePtr->getType()->getPointerAddressSpace();
1386
1387 // Handle negative strided loops.
1388 if (IsNegStride)
1389 LdStart =
1390 getStartForNegStride(LdStart, BECount, IntIdxTy, StoreSizeSCEV, SE);
1391
1392 // For a memcpy, we have to make sure that the input array is not being
1393 // mutated by the loop.
1394 Value *LoadBasePtr = Expander.expandCodeFor(LdStart, Builder.getPtrTy(LdAS),
1395 Preheader->getTerminator());
1396
1397 // If the store is a memcpy instruction, we must check if it will write to
1398 // the load memory locations. So remove it from the ignored stores.
1399 MemmoveVerifier Verifier(*LoadBasePtr, *StoreBasePtr, *DL);
1400 if (IsMemCpy && !Verifier.IsSameObject)
1401 IgnoredInsts.erase(TheStore);
1402 if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
1403 StoreSizeSCEV, *AA, IgnoredInsts)) {
1404 ORE.emit([&]() {
1405 return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad", TheLoad)
1406 << ore::NV("Inst", InstRemark) << " in "
1407 << ore::NV("Function", TheStore->getFunction())
1408 << " function will not be hoisted: "
1409 << ore::NV("Reason", "The loop may access load location");
1410 });
1411 return Changed;
1412 }
1413
1414 bool IsAtomic = TheStore->isAtomic() || TheLoad->isAtomic();
1415 bool UseMemMove = IsMemCpy ? Verifier.IsSameObject : LoopAccessStore;
1416
1417 if (IsAtomic) {
1418 // For now don't support unordered atomic memmove.
1419 if (UseMemMove)
1420 return Changed;
1421
1422 // We cannot allow unaligned ops for unordered load/store, so reject
1423 // anything where the alignment isn't at least the element size.
1424 assert((StoreAlign && LoadAlign) &&
1425 "Expect unordered load/store to have align.");
1426 if (*StoreAlign < StoreSize || *LoadAlign < StoreSize)
1427 return Changed;
1428
1429 // If the element.atomic memcpy is not lowered into explicit
1430 // loads/stores later, then it will be lowered into an element-size
1431 // specific lib call. If the lib call doesn't exist for our store size, then
1432 // we shouldn't generate the memcpy.
1433 if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize())
1434 return Changed;
1435 }
1436
1437 if (UseMemMove)
1438 if (!Verifier.loadAndStoreMayFormMemmove(StoreSize, IsNegStride, *TheLoad,
1439 IsMemCpy))
1440 return Changed;
1441
1442 if (avoidLIRForMultiBlockLoop())
1443 return Changed;
1444
1445 // Okay, everything is safe, we can transform this!
1446
1447 const SCEV *NumBytesS =
1448 getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE);
1449
1450 Value *NumBytes =
1451 Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
1452
1453 AAMDNodes AATags = TheLoad->getAAMetadata();
1454 AAMDNodes StoreAATags = TheStore->getAAMetadata();
1455 AATags = AATags.merge(StoreAATags);
1456 if (auto CI = dyn_cast<ConstantInt>(NumBytes))
1457 AATags = AATags.extendTo(CI->getZExtValue());
1458 else
1459 AATags = AATags.extendTo(-1);
1460
1461 CallInst *NewCall = nullptr;
1462 // Check whether to generate an unordered atomic memcpy:
1463 // If the load or store are atomic, then they must necessarily be unordered
1464 // by previous checks.
1465 if (!IsAtomic) {
1466 if (UseMemMove)
1467 NewCall = Builder.CreateMemMove(StoreBasePtr, StoreAlign, LoadBasePtr,
1468 LoadAlign, NumBytes,
1469 /*isVolatile=*/false, AATags);
1470 else
1471 NewCall =
1472 Builder.CreateMemCpy(StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign,
1473 NumBytes, /*isVolatile=*/false, AATags);
1474 } else {
1475 // Create the call.
1476 // Note that unordered atomic loads/stores are *required* by the spec to
1477 // have an alignment but non-atomic loads/stores may not.
1478 NewCall = Builder.CreateElementUnorderedAtomicMemCpy(
1479 StoreBasePtr, *StoreAlign, LoadBasePtr, *LoadAlign, NumBytes, StoreSize,
1480 AATags);
1481 }
1482 NewCall->setDebugLoc(TheStore->getDebugLoc());
1483
1484 if (MSSAU) {
1485 MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1486 NewCall, nullptr, NewCall->getParent(), MemorySSA::BeforeTerminator);
1487 MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
1488 }
1489
1490 LLVM_DEBUG(dbgs() << " Formed new call: " << *NewCall << "\n"
1491 << " from load ptr=" << *LoadEv << " at: " << *TheLoad
1492 << "\n"
1493 << " from store ptr=" << *StoreEv << " at: " << *TheStore
1494 << "\n");
1495
1496 ORE.emit([&]() {
1497 return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStoreOfLoopLoad",
1498 NewCall->getDebugLoc(), Preheader)
1499 << "Formed a call to "
1500 << ore::NV("NewFunction", NewCall->getCalledFunction())
1501 << "() intrinsic from " << ore::NV("Inst", InstRemark)
1502 << " instruction in " << ore::NV("Function", TheStore->getFunction())
1503 << " function"
1505 << ore::NV("FromBlock", TheStore->getParent()->getName())
1506 << ore::NV("ToBlock", Preheader->getName());
1507 });
1508
1509 // Okay, a new call to memcpy/memmove has been formed. Zap the original store
1510 // and anything that feeds into it.
1511 if (MSSAU)
1512 MSSAU->removeMemoryAccess(TheStore, true);
1513 deleteDeadInstruction(TheStore);
1514 if (MSSAU && VerifyMemorySSA)
1515 MSSAU->getMemorySSA()->verifyMemorySSA();
1516 if (UseMemMove)
1517 ++NumMemMove;
1518 else
1519 ++NumMemCpy;
1520 ExpCleaner.markResultUsed();
1521 return true;
1522}
1523
1524// When compiling for codesize we avoid idiom recognition for a multi-block loop
1525// unless it is a loop_memset idiom or a memset/memcpy idiom in a nested loop.
1526//
1527bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
1528 bool IsLoopMemset) {
1529 if (ApplyCodeSizeHeuristics && CurLoop->getNumBlocks() > 1) {
1530 if (CurLoop->isOutermost() && (!IsMemset || !IsLoopMemset)) {
1531 LLVM_DEBUG(dbgs() << " " << CurLoop->getHeader()->getParent()->getName()
1532 << " : LIR " << (IsMemset ? "Memset" : "Memcpy")
1533 << " avoided: multi-block top-level loop\n");
1534 return true;
1535 }
1536 }
1537
1538 return false;
1539}
1540
1541bool LoopIdiomRecognize::optimizeCRCLoop(const PolynomialInfo &Info) {
1542 // FIXME: Hexagon has a special HexagonLoopIdiom that optimizes CRC using
1543 // carry-less multiplication instructions, which is more efficient than our
1544 // Sarwate table-lookup optimization. Hence, until we're able to emit
1545 // target-specific instructions for Hexagon, subsuming HexagonLoopIdiom,
1546 // disable the optimization for Hexagon.
1547 Module &M = *CurLoop->getHeader()->getModule();
1548 Triple TT(M.getTargetTriple());
1549 if (TT.getArch() == Triple::hexagon)
1550 return false;
1551
1552 // First, create a new GlobalVariable corresponding to the
1553 // Sarwate-lookup-table.
1554 Type *CRCTy = Info.LHS->getType();
1555 unsigned CRCBW = CRCTy->getIntegerBitWidth();
1556 std::array<Constant *, 256> CRCConstants;
1557 transform(HashRecognize::genSarwateTable(Info.RHS, Info.ByteOrderSwapped),
1558 CRCConstants.begin(),
1559 [CRCTy](const APInt &E) { return ConstantInt::get(CRCTy, E); });
1560 Constant *ConstArray =
1561 ConstantArray::get(ArrayType::get(CRCTy, 256), CRCConstants);
1562 GlobalVariable *GV =
1563 new GlobalVariable(M, ConstArray->getType(), true,
1564 GlobalValue::PrivateLinkage, ConstArray, ".crctable");
1565
1568
1569 // Next, mark all PHIs for removal except IV.
1570 {
1571 for (PHINode &PN : CurLoop->getHeader()->phis()) {
1572 if (&PN == IV)
1573 continue;
1574 PN.replaceAllUsesWith(PoisonValue::get(PN.getType()));
1575 Cleanup.push_back(&PN);
1576 }
1577 }
1578
1579 // Next, fix up the trip count.
1580 {
1581 unsigned NewBTC = (Info.TripCount / 8) - 1;
1582 BasicBlock *LoopBlk = CurLoop->getLoopLatch();
1583 BranchInst *BrInst = cast<BranchInst>(LoopBlk->getTerminator());
1584 CmpPredicate ExitPred = BrInst->getSuccessor(0) == LoopBlk
1587 Instruction *ExitCond = CurLoop->getLatchCmpInst();
1588 Value *ExitLimit = ConstantInt::get(IV->getType(), NewBTC);
1589 IRBuilder<> Builder(ExitCond);
1590 Value *NewExitCond =
1591 Builder.CreateICmp(ExitPred, IV, ExitLimit, "exit.cond");
1592 ExitCond->replaceAllUsesWith(NewExitCond);
1593 deleteDeadInstruction(ExitCond);
1594 }
1595
1596 // Finally, fill the loop with the Sarwate-table-lookup logic, and replace all
1597 // uses of ComputedValue.
1598 //
1599 // Little-endian:
1600 // crc = (crc >> 8) ^ tbl[(iv'th byte of data) ^ (bottom byte of crc)]
1601 // Big-Endian:
1602 // crc = (crc << 8) ^ tbl[(iv'th byte of data) ^ (top byte of crc)]
1603 {
1604 auto LoByte = [](IRBuilderBase &Builder, Value *Op, const Twine &Name) {
1605 return Builder.CreateZExtOrTrunc(
1606 Op, IntegerType::getInt8Ty(Op->getContext()), Name);
1607 };
1608 auto HiIdx = [LoByte, CRCBW](IRBuilderBase &Builder, Value *Op,
1609 const Twine &Name) {
1610 Type *OpTy = Op->getType();
1611
1612 // When the bitwidth of the CRC mismatches the Op's bitwidth, we need to
1613 // use the CRC's bitwidth as the reference for shifting right.
1614 return LoByte(Builder,
1615 CRCBW > 8 ? Builder.CreateLShr(
1616 Op, ConstantInt::get(OpTy, CRCBW - 8), Name)
1617 : Op,
1618 Name + ".lo.byte");
1619 };
1620
1621 IRBuilder<> Builder(CurLoop->getHeader(),
1622 CurLoop->getHeader()->getFirstNonPHIIt());
1623
1624 // Create the CRC PHI, and initialize its incoming value to the initial
1625 // value of CRC.
1626 PHINode *CRCPhi = Builder.CreatePHI(CRCTy, 2, "crc");
1627 CRCPhi->addIncoming(Info.LHS, CurLoop->getLoopPreheader());
1628
1629 // CRC is now an evolving variable, initialized to the PHI.
1630 Value *CRC = CRCPhi;
1631
1632 // TableIndexer = ((top|bottom) byte of CRC). It is XOR'ed with (iv'th byte
1633 // of LHSAux), if LHSAux is non-nullptr.
1634 Value *Indexer = CRC;
1635 if (Value *Data = Info.LHSAux) {
1636 Type *DataTy = Data->getType();
1637
1638 // To index into the (iv'th byte of LHSAux), we multiply iv by 8, and we
1639 // shift right by that amount, and take the lo-byte (in the little-endian
1640 // case), or shift left by that amount, and take the hi-idx (in the
1641 // big-endian case).
1642 Value *IVBits = Builder.CreateZExtOrTrunc(
1643 Builder.CreateShl(IV, 3, "iv.bits"), DataTy, "iv.indexer");
1644 Value *DataIndexer =
1645 Info.ByteOrderSwapped
1646 ? Builder.CreateShl(Data, IVBits, "data.indexer")
1647 : Builder.CreateLShr(Data, IVBits, "data.indexer");
1648 Indexer = Builder.CreateXor(
1649 DataIndexer,
1650 Builder.CreateZExtOrTrunc(Indexer, DataTy, "crc.indexer.cast"),
1651 "crc.data.indexer");
1652 }
1653
1654 Indexer = Info.ByteOrderSwapped ? HiIdx(Builder, Indexer, "indexer.hi")
1655 : LoByte(Builder, Indexer, "indexer.lo");
1656
1657 // Always index into a GEP using the index type.
1658 Indexer = Builder.CreateZExt(
1659 Indexer, SE->getDataLayout().getIndexType(GV->getType()),
1660 "indexer.ext");
1661
1662 // CRCTableLd = CRCTable[(iv'th byte of data) ^ (top|bottom) byte of CRC].
1663 Value *CRCTableGEP =
1664 Builder.CreateInBoundsGEP(CRCTy, GV, Indexer, "tbl.ptradd");
1665 Value *CRCTableLd = Builder.CreateLoad(CRCTy, CRCTableGEP, "tbl.ld");
1666
1667 // CRCNext = (CRC (<<|>>) 8) ^ CRCTableLd, or simply CRCTableLd in case of
1668 // CRC-8.
1669 Value *CRCNext = CRCTableLd;
1670 if (CRCBW > 8) {
1671 Value *CRCShift = Info.ByteOrderSwapped
1672 ? Builder.CreateShl(CRC, 8, "crc.be.shift")
1673 : Builder.CreateLShr(CRC, 8, "crc.le.shift");
1674 CRCNext = Builder.CreateXor(CRCShift, CRCTableLd, "crc.next");
1675 }
1676
1677 // Connect the back-edge for the loop, and RAUW the ComputedValue.
1678 CRCPhi->addIncoming(CRCNext, CurLoop->getLoopLatch());
1679 Info.ComputedValue->replaceUsesOutsideBlock(CRCNext,
1680 CurLoop->getLoopLatch());
1681 }
1682
1683 // Cleanup.
1684 {
1685 for (PHINode *PN : Cleanup)
1687 SE->forgetLoop(CurLoop);
1688 }
1689 return true;
1690}
1691
1692bool LoopIdiomRecognize::runOnNoncountableLoop() {
1693 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
1694 << CurLoop->getHeader()->getParent()->getName()
1695 << "] Noncountable Loop %"
1696 << CurLoop->getHeader()->getName() << "\n");
1697
1698 return recognizePopcount() || recognizeAndInsertFFS() ||
1699 recognizeShiftUntilBitTest() || recognizeShiftUntilZero() ||
1700 recognizeShiftUntilLessThan() || recognizeAndInsertStrLen();
1701}
1702
1703/// Check if the given conditional branch is based on the comparison between
1704/// a variable and zero, and if the variable is non-zero or zero (JmpOnZero is
1705/// true), the control yields to the loop entry. If the branch matches the
1706/// behavior, the variable involved in the comparison is returned. This function
1707/// will be called to see if the precondition and postcondition of the loop are
1708/// in desirable form.
1710 bool JmpOnZero = false) {
1711 if (!BI || !BI->isConditional())
1712 return nullptr;
1713
1715 if (!Cond)
1716 return nullptr;
1717
1718 auto *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
1719 if (!CmpZero || !CmpZero->isZero())
1720 return nullptr;
1721
1722 BasicBlock *TrueSucc = BI->getSuccessor(0);
1723 BasicBlock *FalseSucc = BI->getSuccessor(1);
1724 if (JmpOnZero)
1725 std::swap(TrueSucc, FalseSucc);
1726
1727 ICmpInst::Predicate Pred = Cond->getPredicate();
1728 if ((Pred == ICmpInst::ICMP_NE && TrueSucc == LoopEntry) ||
1729 (Pred == ICmpInst::ICMP_EQ && FalseSucc == LoopEntry))
1730 return Cond->getOperand(0);
1731
1732 return nullptr;
1733}
1734
1735namespace {
1736
1737class StrlenVerifier {
1738public:
1739 explicit StrlenVerifier(const Loop *CurLoop, ScalarEvolution *SE,
1740 const TargetLibraryInfo *TLI)
1741 : CurLoop(CurLoop), SE(SE), TLI(TLI) {}
1742
1743 bool isValidStrlenIdiom() {
1744 // Give up if the loop has multiple blocks, multiple backedges, or
1745 // multiple exit blocks
1746 if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1 ||
1747 !CurLoop->getUniqueExitBlock())
1748 return false;
1749
1750 // It should have a preheader and a branch instruction.
1751 BasicBlock *Preheader = CurLoop->getLoopPreheader();
1752 if (!Preheader)
1753 return false;
1754
1755 BranchInst *EntryBI = dyn_cast<BranchInst>(Preheader->getTerminator());
1756 if (!EntryBI)
1757 return false;
1758
1759 // The loop exit must be conditioned on an icmp with 0 the null terminator.
1760 // The icmp operand has to be a load on some SSA reg that increments
1761 // by 1 in the loop.
1762 BasicBlock *LoopBody = *CurLoop->block_begin();
1763
1764 // Skip if the body is too big as it most likely is not a strlen idiom.
1765 if (!LoopBody || LoopBody->size() >= 15)
1766 return false;
1767
1768 BranchInst *LoopTerm = dyn_cast<BranchInst>(LoopBody->getTerminator());
1769 Value *LoopCond = matchCondition(LoopTerm, LoopBody);
1770 if (!LoopCond)
1771 return false;
1772
1773 LoadInst *LoopLoad = dyn_cast<LoadInst>(LoopCond);
1774 if (!LoopLoad || LoopLoad->getPointerAddressSpace() != 0)
1775 return false;
1776
1777 OperandType = LoopLoad->getType();
1778 if (!OperandType || !OperandType->isIntegerTy())
1779 return false;
1780
1781 // See if the pointer expression is an AddRec with constant step a of form
1782 // ({n,+,a}) where a is the width of the char type.
1783 Value *IncPtr = LoopLoad->getPointerOperand();
1784 const SCEV *LoadEv = SE->getSCEV(IncPtr);
1785 const APInt *Step;
1786 if (!match(LoadEv,
1787 m_scev_AffineAddRec(m_SCEV(LoadBaseEv), m_scev_APInt(Step))))
1788 return false;
1789
1790 LLVM_DEBUG(dbgs() << "pointer load scev: " << *LoadEv << "\n");
1791
1792 unsigned StepSize = Step->getZExtValue();
1793
1794 // Verify that StepSize is consistent with platform char width.
1795 OpWidth = OperandType->getIntegerBitWidth();
1796 unsigned WcharSize = TLI->getWCharSize(*LoopLoad->getModule());
1797 if (OpWidth != StepSize * 8)
1798 return false;
1799 if (OpWidth != 8 && OpWidth != 16 && OpWidth != 32)
1800 return false;
1801 if (OpWidth >= 16)
1802 if (OpWidth != WcharSize * 8)
1803 return false;
1804
1805 // Scan every instruction in the loop to ensure there are no side effects.
1806 for (Instruction &I : *LoopBody)
1807 if (I.mayHaveSideEffects())
1808 return false;
1809
1810 BasicBlock *LoopExitBB = CurLoop->getExitBlock();
1811 if (!LoopExitBB)
1812 return false;
1813
1814 for (PHINode &PN : LoopExitBB->phis()) {
1815 if (!SE->isSCEVable(PN.getType()))
1816 return false;
1817
1818 const SCEV *Ev = SE->getSCEV(&PN);
1819 if (!Ev)
1820 return false;
1821
1822 LLVM_DEBUG(dbgs() << "loop exit phi scev: " << *Ev << "\n");
1823
1824 // Since we verified that the loop trip count will be a valid strlen
1825 // idiom, we can expand all lcssa phi with {n,+,1} as (n + strlen) and use
1826 // SCEVExpander materialize the loop output.
1827 const SCEVAddRecExpr *AddRecEv = dyn_cast<SCEVAddRecExpr>(Ev);
1828 if (!AddRecEv || !AddRecEv->isAffine())
1829 return false;
1830
1831 // We only want RecAddExpr with recurrence step that is constant. This
1832 // is good enough for all the idioms we want to recognize. Later we expand
1833 // and materialize the recurrence as {base,+,a} -> (base + a * strlen)
1834 if (!isa<SCEVConstant>(AddRecEv->getStepRecurrence(*SE)))
1835 return false;
1836 }
1837
1838 return true;
1839 }
1840
1841public:
1842 const Loop *CurLoop;
1843 ScalarEvolution *SE;
1844 const TargetLibraryInfo *TLI;
1845
1846 unsigned OpWidth;
1847 ConstantInt *StepSizeCI;
1848 const SCEV *LoadBaseEv;
1850};
1851
1852} // namespace
1853
1854/// The Strlen Idiom we are trying to detect has the following structure
1855///
1856/// preheader:
1857/// ...
1858/// br label %body, ...
1859///
1860/// body:
1861/// ... ; %0 is incremented by a gep
1862/// %1 = load i8, ptr %0, align 1
1863/// %2 = icmp eq i8 %1, 0
1864/// br i1 %2, label %exit, label %body
1865///
1866/// exit:
1867/// %lcssa = phi [%0, %body], ...
1868///
1869/// We expect the strlen idiom to have a load of a character type that
1870/// is compared against '\0', and such load pointer operand must have scev
1871/// expression of the form {%str,+,c} where c is a ConstantInt of the
1872/// appropiate character width for the idiom, and %str is the base of the string
1873/// And, that all lcssa phis have the form {...,+,n} where n is a constant,
1874///
1875/// When transforming the output of the strlen idiom, the lccsa phi are
1876/// expanded using SCEVExpander as {base scev,+,a} -> (base scev + a * strlen)
1877/// and all subsequent uses are replaced. For example,
1878///
1879/// \code{.c}
1880/// const char* base = str;
1881/// while (*str != '\0')
1882/// ++str;
1883/// size_t result = str - base;
1884/// \endcode
1885///
1886/// will be transformed as follows: The idiom will be replaced by a strlen
1887/// computation to compute the address of the null terminator of the string.
1888///
1889/// \code{.c}
1890/// const char* base = str;
1891/// const char* end = base + strlen(str);
1892/// size_t result = end - base;
1893/// \endcode
1894///
1895/// In the case we index by an induction variable, as long as the induction
1896/// variable has a constant int increment, we can replace all such indvars
1897/// with the closed form computation of strlen
1898///
1899/// \code{.c}
1900/// size_t i = 0;
1901/// while (str[i] != '\0')
1902/// ++i;
1903/// size_t result = i;
1904/// \endcode
1905///
1906/// Will be replaced by
1907///
1908/// \code{.c}
1909/// size_t i = 0 + strlen(str);
1910/// size_t result = i;
1911/// \endcode
1912///
1913bool LoopIdiomRecognize::recognizeAndInsertStrLen() {
1914 if (DisableLIRP::All)
1915 return false;
1916
1917 StrlenVerifier Verifier(CurLoop, SE, TLI);
1918
1919 if (!Verifier.isValidStrlenIdiom())
1920 return false;
1921
1922 BasicBlock *Preheader = CurLoop->getLoopPreheader();
1923 BasicBlock *LoopBody = *CurLoop->block_begin();
1924 BasicBlock *LoopExitBB = CurLoop->getExitBlock();
1925 BranchInst *LoopTerm = dyn_cast<BranchInst>(LoopBody->getTerminator());
1926 assert(Preheader && LoopBody && LoopExitBB && LoopTerm &&
1927 "Should be verified to be valid by StrlenVerifier");
1928
1929 if (Verifier.OpWidth == 8) {
1931 return false;
1932 if (!isLibFuncEmittable(Preheader->getModule(), TLI, LibFunc_strlen))
1933 return false;
1934 } else {
1936 return false;
1937 if (!isLibFuncEmittable(Preheader->getModule(), TLI, LibFunc_wcslen))
1938 return false;
1939 }
1940
1941 IRBuilder<> Builder(Preheader->getTerminator());
1942 Builder.SetCurrentDebugLocation(CurLoop->getStartLoc());
1943 SCEVExpander Expander(*SE, Preheader->getModule()->getDataLayout(),
1944 "strlen_idiom");
1945 Value *MaterialzedBase = Expander.expandCodeFor(
1946 Verifier.LoadBaseEv, Verifier.LoadBaseEv->getType(),
1947 Builder.GetInsertPoint());
1948
1949 Value *StrLenFunc = nullptr;
1950 if (Verifier.OpWidth == 8) {
1951 StrLenFunc = emitStrLen(MaterialzedBase, Builder, *DL, TLI);
1952 } else {
1953 StrLenFunc = emitWcsLen(MaterialzedBase, Builder, *DL, TLI);
1954 }
1955 assert(StrLenFunc && "Failed to emit strlen function.");
1956
1957 const SCEV *StrlenEv = SE->getSCEV(StrLenFunc);
1959 for (PHINode &PN : LoopExitBB->phis()) {
1960 // We can now materialize the loop output as all phi have scev {base,+,a}.
1961 // We expand the phi as:
1962 // %strlen = call i64 @strlen(%str)
1963 // %phi.new = base expression + step * %strlen
1964 const SCEV *Ev = SE->getSCEV(&PN);
1965 const SCEVAddRecExpr *AddRecEv = dyn_cast<SCEVAddRecExpr>(Ev);
1966 const SCEVConstant *Step =
1968 const SCEV *Base = AddRecEv->getStart();
1969
1970 // It is safe to truncate to base since if base is narrower than size_t
1971 // the equivalent user code will have to truncate anyways.
1972 const SCEV *NewEv = SE->getAddExpr(
1974 StrlenEv, Base->getType())));
1975
1976 Value *MaterializedPHI = Expander.expandCodeFor(NewEv, NewEv->getType(),
1977 Builder.GetInsertPoint());
1978 Expander.clear();
1979 PN.replaceAllUsesWith(MaterializedPHI);
1980 Cleanup.push_back(&PN);
1981 }
1982
1983 // All LCSSA Loop Phi are dead, the left over dead loop body can be cleaned
1984 // up by later passes
1985 for (PHINode *PN : Cleanup)
1987
1988 // LoopDeletion only delete invariant loops with known trip-count. We can
1989 // update the condition so it will reliablely delete the invariant loop
1990 assert(LoopTerm->getNumSuccessors() == 2 &&
1991 (LoopTerm->getSuccessor(0) == LoopBody ||
1992 LoopTerm->getSuccessor(1) == LoopBody) &&
1993 "loop body must have a successor that is it self");
1994 ConstantInt *NewLoopCond = LoopTerm->getSuccessor(0) == LoopBody
1995 ? Builder.getFalse()
1996 : Builder.getTrue();
1997 LoopTerm->setCondition(NewLoopCond);
1998 SE->forgetLoop(CurLoop);
1999
2000 ++NumStrLen;
2001 LLVM_DEBUG(dbgs() << " Formed strlen idiom: " << *StrLenFunc << "\n");
2002 ORE.emit([&]() {
2003 return OptimizationRemark(DEBUG_TYPE, "recognizeAndInsertStrLen",
2004 CurLoop->getStartLoc(), Preheader)
2005 << "Transformed " << StrLenFunc->getName() << " loop idiom";
2006 });
2007
2008 return true;
2009}
2010
2011/// Check if the given conditional branch is based on an unsigned less-than
2012/// comparison between a variable and a constant, and if the comparison is false
2013/// the control yields to the loop entry. If the branch matches the behaviour,
2014/// the variable involved in the comparison is returned.
2016 APInt &Threshold) {
2017 if (!BI || !BI->isConditional())
2018 return nullptr;
2019
2021 if (!Cond)
2022 return nullptr;
2023
2024 ConstantInt *CmpConst = dyn_cast<ConstantInt>(Cond->getOperand(1));
2025 if (!CmpConst)
2026 return nullptr;
2027
2028 BasicBlock *FalseSucc = BI->getSuccessor(1);
2029 ICmpInst::Predicate Pred = Cond->getPredicate();
2030
2031 if (Pred == ICmpInst::ICMP_ULT && FalseSucc == LoopEntry) {
2032 Threshold = CmpConst->getValue();
2033 return Cond->getOperand(0);
2034 }
2035
2036 return nullptr;
2037}
2038
2039// Check if the recurrence variable `VarX` is in the right form to create
2040// the idiom. Returns the value coerced to a PHINode if so.
2042 BasicBlock *LoopEntry) {
2043 auto *PhiX = dyn_cast<PHINode>(VarX);
2044 if (PhiX && PhiX->getParent() == LoopEntry &&
2045 (PhiX->getOperand(0) == DefX || PhiX->getOperand(1) == DefX))
2046 return PhiX;
2047 return nullptr;
2048}
2049
2050/// Return true if the idiom is detected in the loop.
2051///
2052/// Additionally:
2053/// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
2054/// or nullptr if there is no such.
2055/// 2) \p CntPhi is set to the corresponding phi node
2056/// or nullptr if there is no such.
2057/// 3) \p InitX is set to the value whose CTLZ could be used.
2058/// 4) \p DefX is set to the instruction calculating Loop exit condition.
2059/// 5) \p Threshold is set to the constant involved in the unsigned less-than
2060/// comparison.
2061///
2062/// The core idiom we are trying to detect is:
2063/// \code
2064/// if (x0 < 2)
2065/// goto loop-exit // the precondition of the loop
2066/// cnt0 = init-val
2067/// do {
2068/// x = phi (x0, x.next); //PhiX
2069/// cnt = phi (cnt0, cnt.next)
2070///
2071/// cnt.next = cnt + 1;
2072/// ...
2073/// x.next = x >> 1; // DefX
2074/// } while (x >= 4)
2075/// loop-exit:
2076/// \endcode
2078 Intrinsic::ID &IntrinID,
2079 Value *&InitX, Instruction *&CntInst,
2080 PHINode *&CntPhi, Instruction *&DefX,
2081 APInt &Threshold) {
2082 BasicBlock *LoopEntry;
2083
2084 DefX = nullptr;
2085 CntInst = nullptr;
2086 CntPhi = nullptr;
2087 LoopEntry = *(CurLoop->block_begin());
2088
2089 // step 1: Check if the loop-back branch is in desirable form.
2091 dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry,
2092 Threshold))
2093 DefX = dyn_cast<Instruction>(T);
2094 else
2095 return false;
2096
2097 // step 2: Check the recurrence of variable X
2098 if (!DefX || !isa<PHINode>(DefX))
2099 return false;
2100
2101 PHINode *VarPhi = cast<PHINode>(DefX);
2102 int Idx = VarPhi->getBasicBlockIndex(LoopEntry);
2103 if (Idx == -1)
2104 return false;
2105
2106 DefX = dyn_cast<Instruction>(VarPhi->getIncomingValue(Idx));
2107 if (!DefX || DefX->getNumOperands() == 0 || DefX->getOperand(0) != VarPhi)
2108 return false;
2109
2110 // step 3: detect instructions corresponding to "x.next = x >> 1"
2111 if (DefX->getOpcode() != Instruction::LShr)
2112 return false;
2113
2114 IntrinID = Intrinsic::ctlz;
2116 if (!Shft || !Shft->isOne())
2117 return false;
2118
2119 InitX = VarPhi->getIncomingValueForBlock(CurLoop->getLoopPreheader());
2120
2121 // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
2122 // or cnt.next = cnt + -1.
2123 // TODO: We can skip the step. If loop trip count is known (CTLZ),
2124 // then all uses of "cnt.next" could be optimized to the trip count
2125 // plus "cnt0". Currently it is not optimized.
2126 // This step could be used to detect POPCNT instruction:
2127 // cnt.next = cnt + (x.next & 1)
2128 for (Instruction &Inst :
2129 llvm::make_range(LoopEntry->getFirstNonPHIIt(), LoopEntry->end())) {
2130 if (Inst.getOpcode() != Instruction::Add)
2131 continue;
2132
2134 if (!Inc || (!Inc->isOne() && !Inc->isMinusOne()))
2135 continue;
2136
2137 PHINode *Phi = getRecurrenceVar(Inst.getOperand(0), &Inst, LoopEntry);
2138 if (!Phi)
2139 continue;
2140
2141 CntInst = &Inst;
2142 CntPhi = Phi;
2143 break;
2144 }
2145 if (!CntInst)
2146 return false;
2147
2148 return true;
2149}
2150
2151/// Return true iff the idiom is detected in the loop.
2152///
2153/// Additionally:
2154/// 1) \p CntInst is set to the instruction counting the population bit.
2155/// 2) \p CntPhi is set to the corresponding phi node.
2156/// 3) \p Var is set to the value whose population bits are being counted.
2157///
2158/// The core idiom we are trying to detect is:
2159/// \code
2160/// if (x0 != 0)
2161/// goto loop-exit // the precondition of the loop
2162/// cnt0 = init-val;
2163/// do {
2164/// x1 = phi (x0, x2);
2165/// cnt1 = phi(cnt0, cnt2);
2166///
2167/// cnt2 = cnt1 + 1;
2168/// ...
2169/// x2 = x1 & (x1 - 1);
2170/// ...
2171/// } while(x != 0);
2172///
2173/// loop-exit:
2174/// \endcode
2175static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
2176 Instruction *&CntInst, PHINode *&CntPhi,
2177 Value *&Var) {
2178 // step 1: Check to see if the look-back branch match this pattern:
2179 // "if (a!=0) goto loop-entry".
2180 BasicBlock *LoopEntry;
2181 Instruction *DefX2, *CountInst;
2182 Value *VarX1, *VarX0;
2183 PHINode *PhiX, *CountPhi;
2184
2185 DefX2 = CountInst = nullptr;
2186 VarX1 = VarX0 = nullptr;
2187 PhiX = CountPhi = nullptr;
2188 LoopEntry = *(CurLoop->block_begin());
2189
2190 // step 1: Check if the loop-back branch is in desirable form.
2191 {
2192 if (Value *T = matchCondition(
2193 dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
2194 DefX2 = dyn_cast<Instruction>(T);
2195 else
2196 return false;
2197 }
2198
2199 // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
2200 {
2201 if (!DefX2 || DefX2->getOpcode() != Instruction::And)
2202 return false;
2203
2204 BinaryOperator *SubOneOp;
2205
2206 if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0))))
2207 VarX1 = DefX2->getOperand(1);
2208 else {
2209 VarX1 = DefX2->getOperand(0);
2210 SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1));
2211 }
2212 if (!SubOneOp || SubOneOp->getOperand(0) != VarX1)
2213 return false;
2214
2215 ConstantInt *Dec = dyn_cast<ConstantInt>(SubOneOp->getOperand(1));
2216 if (!Dec ||
2217 !((SubOneOp->getOpcode() == Instruction::Sub && Dec->isOne()) ||
2218 (SubOneOp->getOpcode() == Instruction::Add &&
2219 Dec->isMinusOne()))) {
2220 return false;
2221 }
2222 }
2223
2224 // step 3: Check the recurrence of variable X
2225 PhiX = getRecurrenceVar(VarX1, DefX2, LoopEntry);
2226 if (!PhiX)
2227 return false;
2228
2229 // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
2230 {
2231 CountInst = nullptr;
2232 for (Instruction &Inst :
2233 llvm::make_range(LoopEntry->getFirstNonPHIIt(), LoopEntry->end())) {
2234 if (Inst.getOpcode() != Instruction::Add)
2235 continue;
2236
2238 if (!Inc || !Inc->isOne())
2239 continue;
2240
2241 PHINode *Phi = getRecurrenceVar(Inst.getOperand(0), &Inst, LoopEntry);
2242 if (!Phi)
2243 continue;
2244
2245 // Check if the result of the instruction is live of the loop.
2246 bool LiveOutLoop = false;
2247 for (User *U : Inst.users()) {
2248 if ((cast<Instruction>(U))->getParent() != LoopEntry) {
2249 LiveOutLoop = true;
2250 break;
2251 }
2252 }
2253
2254 if (LiveOutLoop) {
2255 CountInst = &Inst;
2256 CountPhi = Phi;
2257 break;
2258 }
2259 }
2260
2261 if (!CountInst)
2262 return false;
2263 }
2264
2265 // step 5: check if the precondition is in this form:
2266 // "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;"
2267 {
2268 auto *PreCondBr = dyn_cast<BranchInst>(PreCondBB->getTerminator());
2269 Value *T = matchCondition(PreCondBr, CurLoop->getLoopPreheader());
2270 if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1))
2271 return false;
2272
2273 CntInst = CountInst;
2274 CntPhi = CountPhi;
2275 Var = T;
2276 }
2277
2278 return true;
2279}
2280
2281/// Return true if the idiom is detected in the loop.
2282///
2283/// Additionally:
2284/// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
2285/// or nullptr if there is no such.
2286/// 2) \p CntPhi is set to the corresponding phi node
2287/// or nullptr if there is no such.
2288/// 3) \p Var is set to the value whose CTLZ could be used.
2289/// 4) \p DefX is set to the instruction calculating Loop exit condition.
2290///
2291/// The core idiom we are trying to detect is:
2292/// \code
2293/// if (x0 == 0)
2294/// goto loop-exit // the precondition of the loop
2295/// cnt0 = init-val;
2296/// do {
2297/// x = phi (x0, x.next); //PhiX
2298/// cnt = phi(cnt0, cnt.next);
2299///
2300/// cnt.next = cnt + 1;
2301/// ...
2302/// x.next = x >> 1; // DefX
2303/// ...
2304/// } while(x.next != 0);
2305///
2306/// loop-exit:
2307/// \endcode
2308static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
2309 Intrinsic::ID &IntrinID, Value *&InitX,
2310 Instruction *&CntInst, PHINode *&CntPhi,
2311 Instruction *&DefX) {
2312 BasicBlock *LoopEntry;
2313 Value *VarX = nullptr;
2314
2315 DefX = nullptr;
2316 CntInst = nullptr;
2317 CntPhi = nullptr;
2318 LoopEntry = *(CurLoop->block_begin());
2319
2320 // step 1: Check if the loop-back branch is in desirable form.
2321 if (Value *T = matchCondition(
2322 dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
2323 DefX = dyn_cast<Instruction>(T);
2324 else
2325 return false;
2326
2327 // step 2: detect instructions corresponding to "x.next = x >> 1 or x << 1"
2328 if (!DefX || !DefX->isShift())
2329 return false;
2330 IntrinID = DefX->getOpcode() == Instruction::Shl ? Intrinsic::cttz :
2331 Intrinsic::ctlz;
2333 if (!Shft || !Shft->isOne())
2334 return false;
2335 VarX = DefX->getOperand(0);
2336
2337 // step 3: Check the recurrence of variable X
2338 PHINode *PhiX = getRecurrenceVar(VarX, DefX, LoopEntry);
2339 if (!PhiX)
2340 return false;
2341
2342 InitX = PhiX->getIncomingValueForBlock(CurLoop->getLoopPreheader());
2343
2344 // Make sure the initial value can't be negative otherwise the ashr in the
2345 // loop might never reach zero which would make the loop infinite.
2346 if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, DL))
2347 return false;
2348
2349 // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
2350 // or cnt.next = cnt + -1.
2351 // TODO: We can skip the step. If loop trip count is known (CTLZ),
2352 // then all uses of "cnt.next" could be optimized to the trip count
2353 // plus "cnt0". Currently it is not optimized.
2354 // This step could be used to detect POPCNT instruction:
2355 // cnt.next = cnt + (x.next & 1)
2356 for (Instruction &Inst :
2357 llvm::make_range(LoopEntry->getFirstNonPHIIt(), LoopEntry->end())) {
2358 if (Inst.getOpcode() != Instruction::Add)
2359 continue;
2360
2362 if (!Inc || (!Inc->isOne() && !Inc->isMinusOne()))
2363 continue;
2364
2365 PHINode *Phi = getRecurrenceVar(Inst.getOperand(0), &Inst, LoopEntry);
2366 if (!Phi)
2367 continue;
2368
2369 CntInst = &Inst;
2370 CntPhi = Phi;
2371 break;
2372 }
2373 if (!CntInst)
2374 return false;
2375
2376 return true;
2377}
2378
2379// Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
2380// profitable if we delete the loop.
2381bool LoopIdiomRecognize::isProfitableToInsertFFS(Intrinsic::ID IntrinID,
2382 Value *InitX, bool ZeroCheck,
2383 size_t CanonicalSize) {
2384 const Value *Args[] = {InitX,
2385 ConstantInt::getBool(InitX->getContext(), ZeroCheck)};
2386
2387 // @llvm.dbg doesn't count as they have no semantic effect.
2388 auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
2389 uint32_t HeaderSize =
2390 std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
2391
2392 IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args);
2393 InstructionCost Cost = TTI->getIntrinsicInstrCost(
2395 if (HeaderSize != CanonicalSize && Cost > TargetTransformInfo::TCC_Basic)
2396 return false;
2397
2398 return true;
2399}
2400
2401/// Convert CTLZ / CTTZ idiom loop into countable loop.
2402/// If CTLZ / CTTZ inserted as a new trip count returns true; otherwise,
2403/// returns false.
2404bool LoopIdiomRecognize::insertFFSIfProfitable(Intrinsic::ID IntrinID,
2405 Value *InitX, Instruction *DefX,
2406 PHINode *CntPhi,
2407 Instruction *CntInst) {
2408 bool IsCntPhiUsedOutsideLoop = false;
2409 for (User *U : CntPhi->users())
2410 if (!CurLoop->contains(cast<Instruction>(U))) {
2411 IsCntPhiUsedOutsideLoop = true;
2412 break;
2413 }
2414 bool IsCntInstUsedOutsideLoop = false;
2415 for (User *U : CntInst->users())
2416 if (!CurLoop->contains(cast<Instruction>(U))) {
2417 IsCntInstUsedOutsideLoop = true;
2418 break;
2419 }
2420 // If both CntInst and CntPhi are used outside the loop the profitability
2421 // is questionable.
2422 if (IsCntInstUsedOutsideLoop && IsCntPhiUsedOutsideLoop)
2423 return false;
2424
2425 // For some CPUs result of CTLZ(X) intrinsic is undefined
2426 // when X is 0. If we can not guarantee X != 0, we need to check this
2427 // when expand.
2428 bool ZeroCheck = false;
2429 // It is safe to assume Preheader exist as it was checked in
2430 // parent function RunOnLoop.
2431 BasicBlock *PH = CurLoop->getLoopPreheader();
2432
2433 // If we are using the count instruction outside the loop, make sure we
2434 // have a zero check as a precondition. Without the check the loop would run
2435 // one iteration for before any check of the input value. This means 0 and 1
2436 // would have identical behavior in the original loop and thus
2437 if (!IsCntPhiUsedOutsideLoop) {
2438 auto *PreCondBB = PH->getSinglePredecessor();
2439 if (!PreCondBB)
2440 return false;
2441 auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
2442 if (!PreCondBI)
2443 return false;
2444 if (matchCondition(PreCondBI, PH) != InitX)
2445 return false;
2446 ZeroCheck = true;
2447 }
2448
2449 // FFS idiom loop has only 6 instructions:
2450 // %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
2451 // %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
2452 // %shr = ashr %n.addr.0, 1
2453 // %tobool = icmp eq %shr, 0
2454 // %inc = add nsw %i.0, 1
2455 // br i1 %tobool
2456 size_t IdiomCanonicalSize = 6;
2457 if (!isProfitableToInsertFFS(IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
2458 return false;
2459
2460 transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
2461 DefX->getDebugLoc(), ZeroCheck,
2462 IsCntPhiUsedOutsideLoop);
2463 return true;
2464}
2465
2466/// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
2467/// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
2468/// trip count returns true; otherwise, returns false.
2469bool LoopIdiomRecognize::recognizeAndInsertFFS() {
2470 // Give up if the loop has multiple blocks or multiple backedges.
2471 if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
2472 return false;
2473
2474 Intrinsic::ID IntrinID;
2475 Value *InitX;
2476 Instruction *DefX = nullptr;
2477 PHINode *CntPhi = nullptr;
2478 Instruction *CntInst = nullptr;
2479
2480 if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX, CntInst, CntPhi,
2481 DefX))
2482 return false;
2483
2484 return insertFFSIfProfitable(IntrinID, InitX, DefX, CntPhi, CntInst);
2485}
2486
2487bool LoopIdiomRecognize::recognizeShiftUntilLessThan() {
2488 // Give up if the loop has multiple blocks or multiple backedges.
2489 if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
2490 return false;
2491
2492 Intrinsic::ID IntrinID;
2493 Value *InitX;
2494 Instruction *DefX = nullptr;
2495 PHINode *CntPhi = nullptr;
2496 Instruction *CntInst = nullptr;
2497
2498 APInt LoopThreshold;
2499 if (!detectShiftUntilLessThanIdiom(CurLoop, *DL, IntrinID, InitX, CntInst,
2500 CntPhi, DefX, LoopThreshold))
2501 return false;
2502
2503 if (LoopThreshold == 2) {
2504 // Treat as regular FFS.
2505 return insertFFSIfProfitable(IntrinID, InitX, DefX, CntPhi, CntInst);
2506 }
2507
2508 // Look for Floor Log2 Idiom.
2509 if (LoopThreshold != 4)
2510 return false;
2511
2512 // Abort if CntPhi is used outside of the loop.
2513 for (User *U : CntPhi->users())
2514 if (!CurLoop->contains(cast<Instruction>(U)))
2515 return false;
2516
2517 // It is safe to assume Preheader exist as it was checked in
2518 // parent function RunOnLoop.
2519 BasicBlock *PH = CurLoop->getLoopPreheader();
2520 auto *PreCondBB = PH->getSinglePredecessor();
2521 if (!PreCondBB)
2522 return false;
2523 auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
2524 if (!PreCondBI)
2525 return false;
2526
2527 APInt PreLoopThreshold;
2528 if (matchShiftULTCondition(PreCondBI, PH, PreLoopThreshold) != InitX ||
2529 PreLoopThreshold != 2)
2530 return false;
2531
2532 bool ZeroCheck = true;
2533
2534 // the loop has only 6 instructions:
2535 // %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
2536 // %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
2537 // %shr = ashr %n.addr.0, 1
2538 // %tobool = icmp ult %n.addr.0, C
2539 // %inc = add nsw %i.0, 1
2540 // br i1 %tobool
2541 size_t IdiomCanonicalSize = 6;
2542 if (!isProfitableToInsertFFS(IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
2543 return false;
2544
2545 // log2(x) = w − 1 − clz(x)
2546 transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
2547 DefX->getDebugLoc(), ZeroCheck,
2548 /*IsCntPhiUsedOutsideLoop=*/false,
2549 /*InsertSub=*/true);
2550 return true;
2551}
2552
2553/// Recognizes a population count idiom in a non-countable loop.
2554///
2555/// If detected, transforms the relevant code to issue the popcount intrinsic
2556/// function call, and returns true; otherwise, returns false.
2557bool LoopIdiomRecognize::recognizePopcount() {
2558 if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware)
2559 return false;
2560
2561 // Counting population are usually conducted by few arithmetic instructions.
2562 // Such instructions can be easily "absorbed" by vacant slots in a
2563 // non-compact loop. Therefore, recognizing popcount idiom only makes sense
2564 // in a compact loop.
2565
2566 // Give up if the loop has multiple blocks or multiple backedges.
2567 if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
2568 return false;
2569
2570 BasicBlock *LoopBody = *(CurLoop->block_begin());
2571 if (LoopBody->size() >= 20) {
2572 // The loop is too big, bail out.
2573 return false;
2574 }
2575
2576 // It should have a preheader containing nothing but an unconditional branch.
2577 BasicBlock *PH = CurLoop->getLoopPreheader();
2578 if (!PH || &PH->front() != PH->getTerminator())
2579 return false;
2580 auto *EntryBI = dyn_cast<BranchInst>(PH->getTerminator());
2581 if (!EntryBI || EntryBI->isConditional())
2582 return false;
2583
2584 // It should have a precondition block where the generated popcount intrinsic
2585 // function can be inserted.
2586 auto *PreCondBB = PH->getSinglePredecessor();
2587 if (!PreCondBB)
2588 return false;
2589 auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
2590 if (!PreCondBI || PreCondBI->isUnconditional())
2591 return false;
2592
2593 Instruction *CntInst;
2594 PHINode *CntPhi;
2595 Value *Val;
2596 if (!detectPopcountIdiom(CurLoop, PreCondBB, CntInst, CntPhi, Val))
2597 return false;
2598
2599 transformLoopToPopcount(PreCondBB, CntInst, CntPhi, Val);
2600 return true;
2601}
2602
2604 const DebugLoc &DL) {
2605 Value *Ops[] = {Val};
2606 Type *Tys[] = {Val->getType()};
2607
2608 CallInst *CI = IRBuilder.CreateIntrinsic(Intrinsic::ctpop, Tys, Ops);
2609 CI->setDebugLoc(DL);
2610
2611 return CI;
2612}
2613
2615 const DebugLoc &DL, bool ZeroCheck,
2616 Intrinsic::ID IID) {
2617 Value *Ops[] = {Val, IRBuilder.getInt1(ZeroCheck)};
2618 Type *Tys[] = {Val->getType()};
2619
2620 CallInst *CI = IRBuilder.CreateIntrinsic(IID, Tys, Ops);
2621 CI->setDebugLoc(DL);
2622
2623 return CI;
2624}
2625
2626/// Transform the following loop (Using CTLZ, CTTZ is similar):
2627/// loop:
2628/// CntPhi = PHI [Cnt0, CntInst]
2629/// PhiX = PHI [InitX, DefX]
2630/// CntInst = CntPhi + 1
2631/// DefX = PhiX >> 1
2632/// LOOP_BODY
2633/// Br: loop if (DefX != 0)
2634/// Use(CntPhi) or Use(CntInst)
2635///
2636/// Into:
2637/// If CntPhi used outside the loop:
2638/// CountPrev = BitWidth(InitX) - CTLZ(InitX >> 1)
2639/// Count = CountPrev + 1
2640/// else
2641/// Count = BitWidth(InitX) - CTLZ(InitX)
2642/// loop:
2643/// CntPhi = PHI [Cnt0, CntInst]
2644/// PhiX = PHI [InitX, DefX]
2645/// PhiCount = PHI [Count, Dec]
2646/// CntInst = CntPhi + 1
2647/// DefX = PhiX >> 1
2648/// Dec = PhiCount - 1
2649/// LOOP_BODY
2650/// Br: loop if (Dec != 0)
2651/// Use(CountPrev + Cnt0) // Use(CntPhi)
2652/// or
2653/// Use(Count + Cnt0) // Use(CntInst)
2654///
2655/// If LOOP_BODY is empty the loop will be deleted.
2656/// If CntInst and DefX are not used in LOOP_BODY they will be removed.
2657void LoopIdiomRecognize::transformLoopToCountable(
2658 Intrinsic::ID IntrinID, BasicBlock *Preheader, Instruction *CntInst,
2659 PHINode *CntPhi, Value *InitX, Instruction *DefX, const DebugLoc &DL,
2660 bool ZeroCheck, bool IsCntPhiUsedOutsideLoop, bool InsertSub) {
2661 BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
2662
2663 // Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
2664 IRBuilder<> Builder(PreheaderBr);
2665 Builder.SetCurrentDebugLocation(DL);
2666
2667 // If there are no uses of CntPhi crate:
2668 // Count = BitWidth - CTLZ(InitX);
2669 // NewCount = Count;
2670 // If there are uses of CntPhi create:
2671 // NewCount = BitWidth - CTLZ(InitX >> 1);
2672 // Count = NewCount + 1;
2673 Value *InitXNext;
2674 if (IsCntPhiUsedOutsideLoop) {
2675 if (DefX->getOpcode() == Instruction::AShr)
2676 InitXNext = Builder.CreateAShr(InitX, 1);
2677 else if (DefX->getOpcode() == Instruction::LShr)
2678 InitXNext = Builder.CreateLShr(InitX, 1);
2679 else if (DefX->getOpcode() == Instruction::Shl) // cttz
2680 InitXNext = Builder.CreateShl(InitX, 1);
2681 else
2682 llvm_unreachable("Unexpected opcode!");
2683 } else
2684 InitXNext = InitX;
2685 Value *Count =
2686 createFFSIntrinsic(Builder, InitXNext, DL, ZeroCheck, IntrinID);
2687 Type *CountTy = Count->getType();
2688 Count = Builder.CreateSub(
2689 ConstantInt::get(CountTy, CountTy->getIntegerBitWidth()), Count);
2690 if (InsertSub)
2691 Count = Builder.CreateSub(Count, ConstantInt::get(CountTy, 1));
2692 Value *NewCount = Count;
2693 if (IsCntPhiUsedOutsideLoop)
2694 Count = Builder.CreateAdd(Count, ConstantInt::get(CountTy, 1));
2695
2696 NewCount = Builder.CreateZExtOrTrunc(NewCount, CntInst->getType());
2697
2698 Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader);
2699 if (cast<ConstantInt>(CntInst->getOperand(1))->isOne()) {
2700 // If the counter was being incremented in the loop, add NewCount to the
2701 // counter's initial value, but only if the initial value is not zero.
2702 ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
2703 if (!InitConst || !InitConst->isZero())
2704 NewCount = Builder.CreateAdd(NewCount, CntInitVal);
2705 } else {
2706 // If the count was being decremented in the loop, subtract NewCount from
2707 // the counter's initial value.
2708 NewCount = Builder.CreateSub(CntInitVal, NewCount);
2709 }
2710
2711 // Step 2: Insert new IV and loop condition:
2712 // loop:
2713 // ...
2714 // PhiCount = PHI [Count, Dec]
2715 // ...
2716 // Dec = PhiCount - 1
2717 // ...
2718 // Br: loop if (Dec != 0)
2719 BasicBlock *Body = *(CurLoop->block_begin());
2720 auto *LbBr = cast<BranchInst>(Body->getTerminator());
2721 ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
2722
2723 PHINode *TcPhi = PHINode::Create(CountTy, 2, "tcphi");
2724 TcPhi->insertBefore(Body->begin());
2725
2726 Builder.SetInsertPoint(LbCond);
2727 Instruction *TcDec = cast<Instruction>(Builder.CreateSub(
2728 TcPhi, ConstantInt::get(CountTy, 1), "tcdec", false, true));
2729
2730 TcPhi->addIncoming(Count, Preheader);
2731 TcPhi->addIncoming(TcDec, Body);
2732
2733 CmpInst::Predicate Pred =
2734 (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
2735 LbCond->setPredicate(Pred);
2736 LbCond->setOperand(0, TcDec);
2737 LbCond->setOperand(1, ConstantInt::get(CountTy, 0));
2738
2739 // Step 3: All the references to the original counter outside
2740 // the loop are replaced with the NewCount
2741 if (IsCntPhiUsedOutsideLoop)
2742 CntPhi->replaceUsesOutsideBlock(NewCount, Body);
2743 else
2744 CntInst->replaceUsesOutsideBlock(NewCount, Body);
2745
2746 // step 4: Forget the "non-computable" trip-count SCEV associated with the
2747 // loop. The loop would otherwise not be deleted even if it becomes empty.
2748 SE->forgetLoop(CurLoop);
2749}
2750
2751void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
2752 Instruction *CntInst,
2753 PHINode *CntPhi, Value *Var) {
2754 BasicBlock *PreHead = CurLoop->getLoopPreheader();
2755 auto *PreCondBr = cast<BranchInst>(PreCondBB->getTerminator());
2756 const DebugLoc &DL = CntInst->getDebugLoc();
2757
2758 // Assuming before transformation, the loop is following:
2759 // if (x) // the precondition
2760 // do { cnt++; x &= x - 1; } while(x);
2761
2762 // Step 1: Insert the ctpop instruction at the end of the precondition block
2763 IRBuilder<> Builder(PreCondBr);
2764 Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
2765 {
2766 PopCnt = createPopcntIntrinsic(Builder, Var, DL);
2767 NewCount = PopCntZext =
2768 Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType()));
2769
2770 if (NewCount != PopCnt)
2771 (cast<Instruction>(NewCount))->setDebugLoc(DL);
2772
2773 // TripCnt is exactly the number of iterations the loop has
2774 TripCnt = NewCount;
2775
2776 // If the population counter's initial value is not zero, insert Add Inst.
2777 Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
2778 ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
2779 if (!InitConst || !InitConst->isZero()) {
2780 NewCount = Builder.CreateAdd(NewCount, CntInitVal);
2781 (cast<Instruction>(NewCount))->setDebugLoc(DL);
2782 }
2783 }
2784
2785 // Step 2: Replace the precondition from "if (x == 0) goto loop-exit" to
2786 // "if (NewCount == 0) loop-exit". Without this change, the intrinsic
2787 // function would be partial dead code, and downstream passes will drag
2788 // it back from the precondition block to the preheader.
2789 {
2790 ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
2791
2792 Value *Opnd0 = PopCntZext;
2793 Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);
2794 if (PreCond->getOperand(0) != Var)
2795 std::swap(Opnd0, Opnd1);
2796
2797 ICmpInst *NewPreCond = cast<ICmpInst>(
2798 Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
2799 PreCondBr->setCondition(NewPreCond);
2800
2802 }
2803
2804 // Step 3: Note that the population count is exactly the trip count of the
2805 // loop in question, which enable us to convert the loop from noncountable
2806 // loop into a countable one. The benefit is twofold:
2807 //
2808 // - If the loop only counts population, the entire loop becomes dead after
2809 // the transformation. It is a lot easier to prove a countable loop dead
2810 // than to prove a noncountable one. (In some C dialects, an infinite loop
2811 // isn't dead even if it computes nothing useful. In general, DCE needs
2812 // to prove a noncountable loop finite before safely delete it.)
2813 //
2814 // - If the loop also performs something else, it remains alive.
2815 // Since it is transformed to countable form, it can be aggressively
2816 // optimized by some optimizations which are in general not applicable
2817 // to a noncountable loop.
2818 //
2819 // After this step, this loop (conceptually) would look like following:
2820 // newcnt = __builtin_ctpop(x);
2821 // t = newcnt;
2822 // if (x)
2823 // do { cnt++; x &= x-1; t--) } while (t > 0);
2824 BasicBlock *Body = *(CurLoop->block_begin());
2825 {
2826 auto *LbBr = cast<BranchInst>(Body->getTerminator());
2827 ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
2828 Type *Ty = TripCnt->getType();
2829
2830 PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi");
2831 TcPhi->insertBefore(Body->begin());
2832
2833 Builder.SetInsertPoint(LbCond);
2835 Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
2836 "tcdec", false, true));
2837
2838 TcPhi->addIncoming(TripCnt, PreHead);
2839 TcPhi->addIncoming(TcDec, Body);
2840
2841 CmpInst::Predicate Pred =
2842 (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_UGT : CmpInst::ICMP_SLE;
2843 LbCond->setPredicate(Pred);
2844 LbCond->setOperand(0, TcDec);
2845 LbCond->setOperand(1, ConstantInt::get(Ty, 0));
2846 }
2847
2848 // Step 4: All the references to the original population counter outside
2849 // the loop are replaced with the NewCount -- the value returned from
2850 // __builtin_ctpop().
2851 CntInst->replaceUsesOutsideBlock(NewCount, Body);
2852
2853 // step 5: Forget the "non-computable" trip-count SCEV associated with the
2854 // loop. The loop would otherwise not be deleted even if it becomes empty.
2855 SE->forgetLoop(CurLoop);
2856}
2857
2858/// Match loop-invariant value.
2859template <typename SubPattern_t> struct match_LoopInvariant {
2860 SubPattern_t SubPattern;
2861 const Loop *L;
2862
2863 match_LoopInvariant(const SubPattern_t &SP, const Loop *L)
2864 : SubPattern(SP), L(L) {}
2865
2866 template <typename ITy> bool match(ITy *V) const {
2867 return L->isLoopInvariant(V) && SubPattern.match(V);
2868 }
2869};
2870
2871/// Matches if the value is loop-invariant.
2872template <typename Ty>
2873inline match_LoopInvariant<Ty> m_LoopInvariant(const Ty &M, const Loop *L) {
2874 return match_LoopInvariant<Ty>(M, L);
2875}
2876
2877/// Return true if the idiom is detected in the loop.
2878///
2879/// The core idiom we are trying to detect is:
2880/// \code
2881/// entry:
2882/// <...>
2883/// %bitmask = shl i32 1, %bitpos
2884/// br label %loop
2885///
2886/// loop:
2887/// %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ]
2888/// %x.curr.bitmasked = and i32 %x.curr, %bitmask
2889/// %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0
2890/// %x.next = shl i32 %x.curr, 1
2891/// <...>
2892/// br i1 %x.curr.isbitunset, label %loop, label %end
2893///
2894/// end:
2895/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
2896/// %x.next.res = phi i32 [ %x.next, %loop ] <...>
2897/// <...>
2898/// \endcode
2899static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX,
2900 Value *&BitMask, Value *&BitPos,
2901 Value *&CurrX, Instruction *&NextX) {
2903 " Performing shift-until-bittest idiom detection.\n");
2904
2905 // Give up if the loop has multiple blocks or multiple backedges.
2906 if (CurLoop->getNumBlocks() != 1 || CurLoop->getNumBackEdges() != 1) {
2907 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad block/backedge count.\n");
2908 return false;
2909 }
2910
2911 BasicBlock *LoopHeaderBB = CurLoop->getHeader();
2912 BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
2913 assert(LoopPreheaderBB && "There is always a loop preheader.");
2914
2915 using namespace PatternMatch;
2916
2917 // Step 1: Check if the loop backedge is in desirable form.
2918
2919 CmpPredicate Pred;
2920 Value *CmpLHS, *CmpRHS;
2921 BasicBlock *TrueBB, *FalseBB;
2922 if (!match(LoopHeaderBB->getTerminator(),
2923 m_Br(m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)),
2924 m_BasicBlock(TrueBB), m_BasicBlock(FalseBB)))) {
2925 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge structure.\n");
2926 return false;
2927 }
2928
2929 // Step 2: Check if the backedge's condition is in desirable form.
2930
2931 auto MatchVariableBitMask = [&]() {
2932 return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) &&
2933 match(CmpLHS,
2934 m_c_And(m_Value(CurrX),
2936 m_Value(BitMask),
2937 m_LoopInvariant(m_Shl(m_One(), m_Value(BitPos)),
2938 CurLoop))));
2939 };
2940
2941 auto MatchDecomposableConstantBitMask = [&]() {
2942 auto Res = llvm::decomposeBitTestICmp(
2943 CmpLHS, CmpRHS, Pred, /*LookThroughTrunc=*/true,
2944 /*AllowNonZeroC=*/false, /*DecomposeAnd=*/true);
2945 if (Res && Res->Mask.isPowerOf2()) {
2946 assert(ICmpInst::isEquality(Res->Pred));
2947 Pred = Res->Pred;
2948 CurrX = Res->X;
2949 BitMask = ConstantInt::get(CurrX->getType(), Res->Mask);
2950 BitPos = ConstantInt::get(CurrX->getType(), Res->Mask.logBase2());
2951 return true;
2952 }
2953 return false;
2954 };
2955
2956 if (!MatchVariableBitMask() && !MatchDecomposableConstantBitMask()) {
2957 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge comparison.\n");
2958 return false;
2959 }
2960
2961 // Step 3: Check if the recurrence is in desirable form.
2962 auto *CurrXPN = dyn_cast<PHINode>(CurrX);
2963 if (!CurrXPN || CurrXPN->getParent() != LoopHeaderBB) {
2964 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Not an expected PHI node.\n");
2965 return false;
2966 }
2967
2968 BaseX = CurrXPN->getIncomingValueForBlock(LoopPreheaderBB);
2969 NextX =
2970 dyn_cast<Instruction>(CurrXPN->getIncomingValueForBlock(LoopHeaderBB));
2971
2972 assert(CurLoop->isLoopInvariant(BaseX) &&
2973 "Expected BaseX to be available in the preheader!");
2974
2975 if (!NextX || !match(NextX, m_Shl(m_Specific(CurrX), m_One()))) {
2976 // FIXME: support right-shift?
2977 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad recurrence.\n");
2978 return false;
2979 }
2980
2981 // Step 4: Check if the backedge's destinations are in desirable form.
2982
2984 "Should only get equality predicates here.");
2985
2986 // cmp-br is commutative, so canonicalize to a single variant.
2987 if (Pred != ICmpInst::Predicate::ICMP_EQ) {
2988 Pred = ICmpInst::getInversePredicate(Pred);
2989 std::swap(TrueBB, FalseBB);
2990 }
2991
2992 // We expect to exit loop when comparison yields false,
2993 // so when it yields true we should branch back to loop header.
2994 if (TrueBB != LoopHeaderBB) {
2995 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge flow.\n");
2996 return false;
2997 }
2998
2999 // Okay, idiom checks out.
3000 return true;
3001}
3002
3003/// Look for the following loop:
3004/// \code
3005/// entry:
3006/// <...>
3007/// %bitmask = shl i32 1, %bitpos
3008/// br label %loop
3009///
3010/// loop:
3011/// %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ]
3012/// %x.curr.bitmasked = and i32 %x.curr, %bitmask
3013/// %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0
3014/// %x.next = shl i32 %x.curr, 1
3015/// <...>
3016/// br i1 %x.curr.isbitunset, label %loop, label %end
3017///
3018/// end:
3019/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
3020/// %x.next.res = phi i32 [ %x.next, %loop ] <...>
3021/// <...>
3022/// \endcode
3023///
3024/// And transform it into:
3025/// \code
3026/// entry:
3027/// %bitmask = shl i32 1, %bitpos
3028/// %lowbitmask = add i32 %bitmask, -1
3029/// %mask = or i32 %lowbitmask, %bitmask
3030/// %x.masked = and i32 %x, %mask
3031/// %x.masked.numleadingzeros = call i32 @llvm.ctlz.i32(i32 %x.masked,
3032/// i1 true)
3033/// %x.masked.numactivebits = sub i32 32, %x.masked.numleadingzeros
3034/// %x.masked.leadingonepos = add i32 %x.masked.numactivebits, -1
3035/// %backedgetakencount = sub i32 %bitpos, %x.masked.leadingonepos
3036/// %tripcount = add i32 %backedgetakencount, 1
3037/// %x.curr = shl i32 %x, %backedgetakencount
3038/// %x.next = shl i32 %x, %tripcount
3039/// br label %loop
3040///
3041/// loop:
3042/// %loop.iv = phi i32 [ 0, %entry ], [ %loop.iv.next, %loop ]
3043/// %loop.iv.next = add nuw i32 %loop.iv, 1
3044/// %loop.ivcheck = icmp eq i32 %loop.iv.next, %tripcount
3045/// <...>
3046/// br i1 %loop.ivcheck, label %end, label %loop
3047///
3048/// end:
3049/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
3050/// %x.next.res = phi i32 [ %x.next, %loop ] <...>
3051/// <...>
3052/// \endcode
3053bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
3054 bool MadeChange = false;
3055
3056 Value *X, *BitMask, *BitPos, *XCurr;
3057 Instruction *XNext;
3058 if (!detectShiftUntilBitTestIdiom(CurLoop, X, BitMask, BitPos, XCurr,
3059 XNext)) {
3061 " shift-until-bittest idiom detection failed.\n");
3062 return MadeChange;
3063 }
3064 LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom detected!\n");
3065
3066 // Ok, it is the idiom we were looking for, we *could* transform this loop,
3067 // but is it profitable to transform?
3068
3069 BasicBlock *LoopHeaderBB = CurLoop->getHeader();
3070 BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
3071 assert(LoopPreheaderBB && "There is always a loop preheader.");
3072
3073 BasicBlock *SuccessorBB = CurLoop->getExitBlock();
3074 assert(SuccessorBB && "There is only a single successor.");
3075
3076 IRBuilder<> Builder(LoopPreheaderBB->getTerminator());
3077 Builder.SetCurrentDebugLocation(cast<Instruction>(XCurr)->getDebugLoc());
3078
3079 Intrinsic::ID IntrID = Intrinsic::ctlz;
3080 Type *Ty = X->getType();
3081 unsigned Bitwidth = Ty->getScalarSizeInBits();
3082
3085
3086 // The rewrite is considered to be unprofitable iff and only iff the
3087 // intrinsic/shift we'll use are not cheap. Note that we are okay with *just*
3088 // making the loop countable, even if nothing else changes.
3090 IntrID, Ty, {PoisonValue::get(Ty), /*is_zero_poison=*/Builder.getTrue()});
3091 InstructionCost Cost = TTI->getIntrinsicInstrCost(Attrs, CostKind);
3094 " Intrinsic is too costly, not beneficial\n");
3095 return MadeChange;
3096 }
3097 if (TTI->getArithmeticInstrCost(Instruction::Shl, Ty, CostKind) >
3099 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Shift is too costly, not beneficial\n");
3100 return MadeChange;
3101 }
3102
3103 // Ok, transform appears worthwhile.
3104 MadeChange = true;
3105
3106 if (!isGuaranteedNotToBeUndefOrPoison(BitPos)) {
3107 // BitMask may be computed from BitPos, Freeze BitPos so we can increase
3108 // it's use count.
3109 std::optional<BasicBlock::iterator> InsertPt = std::nullopt;
3110 if (auto *BitPosI = dyn_cast<Instruction>(BitPos))
3111 InsertPt = BitPosI->getInsertionPointAfterDef();
3112 else
3113 InsertPt = DT->getRoot()->getFirstNonPHIOrDbgOrAlloca();
3114 if (!InsertPt)
3115 return false;
3116 FreezeInst *BitPosFrozen =
3117 new FreezeInst(BitPos, BitPos->getName() + ".fr", *InsertPt);
3118 BitPos->replaceUsesWithIf(BitPosFrozen, [BitPosFrozen](Use &U) {
3119 return U.getUser() != BitPosFrozen;
3120 });
3121 BitPos = BitPosFrozen;
3122 }
3123
3124 // Step 1: Compute the loop trip count.
3125
3126 Value *LowBitMask = Builder.CreateAdd(BitMask, Constant::getAllOnesValue(Ty),
3127 BitPos->getName() + ".lowbitmask");
3128 Value *Mask =
3129 Builder.CreateOr(LowBitMask, BitMask, BitPos->getName() + ".mask");
3130 Value *XMasked = Builder.CreateAnd(X, Mask, X->getName() + ".masked");
3131 CallInst *XMaskedNumLeadingZeros = Builder.CreateIntrinsic(
3132 IntrID, Ty, {XMasked, /*is_zero_poison=*/Builder.getTrue()},
3133 /*FMFSource=*/nullptr, XMasked->getName() + ".numleadingzeros");
3134 Value *XMaskedNumActiveBits = Builder.CreateSub(
3135 ConstantInt::get(Ty, Ty->getScalarSizeInBits()), XMaskedNumLeadingZeros,
3136 XMasked->getName() + ".numactivebits", /*HasNUW=*/true,
3137 /*HasNSW=*/Bitwidth != 2);
3138 Value *XMaskedLeadingOnePos =
3139 Builder.CreateAdd(XMaskedNumActiveBits, Constant::getAllOnesValue(Ty),
3140 XMasked->getName() + ".leadingonepos", /*HasNUW=*/false,
3141 /*HasNSW=*/Bitwidth > 2);
3142
3143 Value *LoopBackedgeTakenCount = Builder.CreateSub(
3144 BitPos, XMaskedLeadingOnePos, CurLoop->getName() + ".backedgetakencount",
3145 /*HasNUW=*/true, /*HasNSW=*/true);
3146 // We know loop's backedge-taken count, but what's loop's trip count?
3147 // Note that while NUW is always safe, while NSW is only for bitwidths != 2.
3148 Value *LoopTripCount =
3149 Builder.CreateAdd(LoopBackedgeTakenCount, ConstantInt::get(Ty, 1),
3150 CurLoop->getName() + ".tripcount", /*HasNUW=*/true,
3151 /*HasNSW=*/Bitwidth != 2);
3152
3153 // Step 2: Compute the recurrence's final value without a loop.
3154
3155 // NewX is always safe to compute, because `LoopBackedgeTakenCount`
3156 // will always be smaller than `bitwidth(X)`, i.e. we never get poison.
3157 Value *NewX = Builder.CreateShl(X, LoopBackedgeTakenCount);
3158 NewX->takeName(XCurr);
3159 if (auto *I = dyn_cast<Instruction>(NewX))
3160 I->copyIRFlags(XNext, /*IncludeWrapFlags=*/true);
3161
3162 Value *NewXNext;
3163 // Rewriting XNext is more complicated, however, because `X << LoopTripCount`
3164 // will be poison iff `LoopTripCount == bitwidth(X)` (which will happen
3165 // iff `BitPos` is `bitwidth(x) - 1` and `X` is `1`). So unless we know
3166 // that isn't the case, we'll need to emit an alternative, safe IR.
3167 if (XNext->hasNoSignedWrap() || XNext->hasNoUnsignedWrap() ||
3171 Ty->getScalarSizeInBits() - 1))))
3172 NewXNext = Builder.CreateShl(X, LoopTripCount);
3173 else {
3174 // Otherwise, just additionally shift by one. It's the smallest solution,
3175 // alternatively, we could check that NewX is INT_MIN (or BitPos is )
3176 // and select 0 instead.
3177 NewXNext = Builder.CreateShl(NewX, ConstantInt::get(Ty, 1));
3178 }
3179
3180 NewXNext->takeName(XNext);
3181 if (auto *I = dyn_cast<Instruction>(NewXNext))
3182 I->copyIRFlags(XNext, /*IncludeWrapFlags=*/true);
3183
3184 // Step 3: Adjust the successor basic block to recieve the computed
3185 // recurrence's final value instead of the recurrence itself.
3186
3187 XCurr->replaceUsesOutsideBlock(NewX, LoopHeaderBB);
3188 XNext->replaceUsesOutsideBlock(NewXNext, LoopHeaderBB);
3189
3190 // Step 4: Rewrite the loop into a countable form, with canonical IV.
3191
3192 // The new canonical induction variable.
3193 Builder.SetInsertPoint(LoopHeaderBB, LoopHeaderBB->begin());
3194 auto *IV = Builder.CreatePHI(Ty, 2, CurLoop->getName() + ".iv");
3195
3196 // The induction itself.
3197 // Note that while NUW is always safe, while NSW is only for bitwidths != 2.
3198 Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
3199 auto *IVNext =
3200 Builder.CreateAdd(IV, ConstantInt::get(Ty, 1), IV->getName() + ".next",
3201 /*HasNUW=*/true, /*HasNSW=*/Bitwidth != 2);
3202
3203 // The loop trip count check.
3204 auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount,
3205 CurLoop->getName() + ".ivcheck");
3206 SmallVector<uint32_t> BranchWeights;
3207 const bool HasBranchWeights =
3209 extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);
3210
3211 auto *BI = Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
3212 if (HasBranchWeights) {
3213 if (SuccessorBB == LoopHeaderBB->getTerminator()->getSuccessor(1))
3214 std::swap(BranchWeights[0], BranchWeights[1]);
3215 // We're not changing the loop profile, so we can reuse the original loop's
3216 // profile.
3217 setBranchWeights(*BI, BranchWeights,
3218 /*IsExpected=*/false);
3219 }
3220
3221 LoopHeaderBB->getTerminator()->eraseFromParent();
3222
3223 // Populate the IV PHI.
3224 IV->addIncoming(ConstantInt::get(Ty, 0), LoopPreheaderBB);
3225 IV->addIncoming(IVNext, LoopHeaderBB);
3226
3227 // Step 5: Forget the "non-computable" trip-count SCEV associated with the
3228 // loop. The loop would otherwise not be deleted even if it becomes empty.
3229
3230 SE->forgetLoop(CurLoop);
3231
3232 // Other passes will take care of actually deleting the loop if possible.
3233
3234 LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom optimized!\n");
3235
3236 ++NumShiftUntilBitTest;
3237 return MadeChange;
3238}
3239
3240/// Return true if the idiom is detected in the loop.
3241///
3242/// The core idiom we are trying to detect is:
3243/// \code
3244/// entry:
3245/// <...>
3246/// %start = <...>
3247/// %extraoffset = <...>
3248/// <...>
3249/// br label %for.cond
3250///
3251/// loop:
3252/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ]
3253/// %nbits = add nsw i8 %iv, %extraoffset
3254/// %val.shifted = {{l,a}shr,shl} i8 %val, %nbits
3255/// %val.shifted.iszero = icmp eq i8 %val.shifted, 0
3256/// %iv.next = add i8 %iv, 1
3257/// <...>
3258/// br i1 %val.shifted.iszero, label %end, label %loop
3259///
3260/// end:
3261/// %iv.res = phi i8 [ %iv, %loop ] <...>
3262/// %nbits.res = phi i8 [ %nbits, %loop ] <...>
3263/// %val.shifted.res = phi i8 [ %val.shifted, %loop ] <...>
3264/// %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ] <...>
3265/// %iv.next.res = phi i8 [ %iv.next, %loop ] <...>
3266/// <...>
3267/// \endcode
3269 Instruction *&ValShiftedIsZero,
3270 Intrinsic::ID &IntrinID, Instruction *&IV,
3271 Value *&Start, Value *&Val,
3272 const SCEV *&ExtraOffsetExpr,
3273 bool &InvertedCond) {
3275 " Performing shift-until-zero idiom detection.\n");
3276
3277 // Give up if the loop has multiple blocks or multiple backedges.
3278 if (CurLoop->getNumBlocks() != 1 || CurLoop->getNumBackEdges() != 1) {
3279 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad block/backedge count.\n");
3280 return false;
3281 }
3282
3283 Instruction *ValShifted, *NBits, *IVNext;
3284 Value *ExtraOffset;
3285
3286 BasicBlock *LoopHeaderBB = CurLoop->getHeader();
3287 BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
3288 assert(LoopPreheaderBB && "There is always a loop preheader.");
3289
3290 using namespace PatternMatch;
3291
3292 // Step 1: Check if the loop backedge, condition is in desirable form.
3293
3294 CmpPredicate Pred;
3295 BasicBlock *TrueBB, *FalseBB;
3296 if (!match(LoopHeaderBB->getTerminator(),
3297 m_Br(m_Instruction(ValShiftedIsZero), m_BasicBlock(TrueBB),
3298 m_BasicBlock(FalseBB))) ||
3299 !match(ValShiftedIsZero,
3300 m_ICmp(Pred, m_Instruction(ValShifted), m_Zero())) ||
3301 !ICmpInst::isEquality(Pred)) {
3302 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge structure.\n");
3303 return false;
3304 }
3305
3306 // Step 2: Check if the comparison's operand is in desirable form.
3307 // FIXME: Val could be a one-input PHI node, which we should look past.
3308 if (!match(ValShifted, m_Shift(m_LoopInvariant(m_Value(Val), CurLoop),
3309 m_Instruction(NBits)))) {
3310 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad comparisons value computation.\n");
3311 return false;
3312 }
3313 IntrinID = ValShifted->getOpcode() == Instruction::Shl ? Intrinsic::cttz
3314 : Intrinsic::ctlz;
3315
3316 // Step 3: Check if the shift amount is in desirable form.
3317
3318 if (match(NBits, m_c_Add(m_Instruction(IV),
3319 m_LoopInvariant(m_Value(ExtraOffset), CurLoop))) &&
3320 (NBits->hasNoSignedWrap() || NBits->hasNoUnsignedWrap()))
3321 ExtraOffsetExpr = SE->getNegativeSCEV(SE->getSCEV(ExtraOffset));
3322 else if (match(NBits,
3324 m_LoopInvariant(m_Value(ExtraOffset), CurLoop))) &&
3325 NBits->hasNoSignedWrap())
3326 ExtraOffsetExpr = SE->getSCEV(ExtraOffset);
3327 else {
3328 IV = NBits;
3329 ExtraOffsetExpr = SE->getZero(NBits->getType());
3330 }
3331
3332 // Step 4: Check if the recurrence is in desirable form.
3333 auto *IVPN = dyn_cast<PHINode>(IV);
3334 if (!IVPN || IVPN->getParent() != LoopHeaderBB) {
3335 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Not an expected PHI node.\n");
3336 return false;
3337 }
3338
3339 Start = IVPN->getIncomingValueForBlock(LoopPreheaderBB);
3340 IVNext = dyn_cast<Instruction>(IVPN->getIncomingValueForBlock(LoopHeaderBB));
3341
3342 if (!IVNext || !match(IVNext, m_Add(m_Specific(IVPN), m_One()))) {
3343 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad recurrence.\n");
3344 return false;
3345 }
3346
3347 // Step 4: Check if the backedge's destinations are in desirable form.
3348
3350 "Should only get equality predicates here.");
3351
3352 // cmp-br is commutative, so canonicalize to a single variant.
3353 InvertedCond = Pred != ICmpInst::Predicate::ICMP_EQ;
3354 if (InvertedCond) {
3355 Pred = ICmpInst::getInversePredicate(Pred);
3356 std::swap(TrueBB, FalseBB);
3357 }
3358
3359 // We expect to exit loop when comparison yields true,
3360 // so when it yields false we should branch back to loop header.
3361 if (FalseBB != LoopHeaderBB) {
3362 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge flow.\n");
3363 return false;
3364 }
3365
3366 // The new, countable, loop will certainly only run a known number of
3367 // iterations, It won't be infinite. But the old loop might be infinite
3368 // under certain conditions. For logical shifts, the value will become zero
3369 // after at most bitwidth(%Val) loop iterations. However, for arithmetic
3370 // right-shift, iff the sign bit was set, the value will never become zero,
3371 // and the loop may never finish.
3372 if (ValShifted->getOpcode() == Instruction::AShr &&
3373 !isMustProgress(CurLoop) && !SE->isKnownNonNegative(SE->getSCEV(Val))) {
3374 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Can not prove the loop is finite.\n");
3375 return false;
3376 }
3377
3378 // Okay, idiom checks out.
3379 return true;
3380}
3381
3382/// Look for the following loop:
3383/// \code
3384/// entry:
3385/// <...>
3386/// %start = <...>
3387/// %extraoffset = <...>
3388/// <...>
3389/// br label %loop
3390///
3391/// loop:
3392/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
3393/// %nbits = add nsw i8 %iv, %extraoffset
3394/// %val.shifted = {{l,a}shr,shl} i8 %val, %nbits
3395/// %val.shifted.iszero = icmp eq i8 %val.shifted, 0
3396/// %iv.next = add i8 %iv, 1
3397/// <...>
3398/// br i1 %val.shifted.iszero, label %end, label %loop
3399///
3400/// end:
3401/// %iv.res = phi i8 [ %iv, %loop ] <...>
3402/// %nbits.res = phi i8 [ %nbits, %loop ] <...>
3403/// %val.shifted.res = phi i8 [ %val.shifted, %loop ] <...>
3404/// %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ] <...>
3405/// %iv.next.res = phi i8 [ %iv.next, %loop ] <...>
3406/// <...>
3407/// \endcode
3408///
3409/// And transform it into:
3410/// \code
3411/// entry:
3412/// <...>
3413/// %start = <...>
3414/// %extraoffset = <...>
3415/// <...>
3416/// %val.numleadingzeros = call i8 @llvm.ct{l,t}z.i8(i8 %val, i1 0)
3417/// %val.numactivebits = sub i8 8, %val.numleadingzeros
3418/// %extraoffset.neg = sub i8 0, %extraoffset
3419/// %tmp = add i8 %val.numactivebits, %extraoffset.neg
3420/// %iv.final = call i8 @llvm.smax.i8(i8 %tmp, i8 %start)
3421/// %loop.tripcount = sub i8 %iv.final, %start
3422/// br label %loop
3423///
3424/// loop:
3425/// %loop.iv = phi i8 [ 0, %entry ], [ %loop.iv.next, %loop ]
3426/// %loop.iv.next = add i8 %loop.iv, 1
3427/// %loop.ivcheck = icmp eq i8 %loop.iv.next, %loop.tripcount
3428/// %iv = add i8 %loop.iv, %start
3429/// <...>
3430/// br i1 %loop.ivcheck, label %end, label %loop
3431///
3432/// end:
3433/// %iv.res = phi i8 [ %iv.final, %loop ] <...>
3434/// <...>
3435/// \endcode
3436bool LoopIdiomRecognize::recognizeShiftUntilZero() {
3437 bool MadeChange = false;
3438
3439 Instruction *ValShiftedIsZero;
3440 Intrinsic::ID IntrID;
3441 Instruction *IV;
3442 Value *Start, *Val;
3443 const SCEV *ExtraOffsetExpr;
3444 bool InvertedCond;
3445 if (!detectShiftUntilZeroIdiom(CurLoop, SE, ValShiftedIsZero, IntrID, IV,
3446 Start, Val, ExtraOffsetExpr, InvertedCond)) {
3448 " shift-until-zero idiom detection failed.\n");
3449 return MadeChange;
3450 }
3451 LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-zero idiom detected!\n");
3452
3453 // Ok, it is the idiom we were looking for, we *could* transform this loop,
3454 // but is it profitable to transform?
3455
3456 BasicBlock *LoopHeaderBB = CurLoop->getHeader();
3457 BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
3458 assert(LoopPreheaderBB && "There is always a loop preheader.");
3459
3460 BasicBlock *SuccessorBB = CurLoop->getExitBlock();
3461 assert(SuccessorBB && "There is only a single successor.");
3462
3463 IRBuilder<> Builder(LoopPreheaderBB->getTerminator());
3464 Builder.SetCurrentDebugLocation(IV->getDebugLoc());
3465
3466 Type *Ty = Val->getType();
3467 unsigned Bitwidth = Ty->getScalarSizeInBits();
3468
3471
3472 // The rewrite is considered to be unprofitable iff and only iff the
3473 // intrinsic we'll use are not cheap. Note that we are okay with *just*
3474 // making the loop countable, even if nothing else changes.
3476 IntrID, Ty, {PoisonValue::get(Ty), /*is_zero_poison=*/Builder.getFalse()});
3477 InstructionCost Cost = TTI->getIntrinsicInstrCost(Attrs, CostKind);
3480 " Intrinsic is too costly, not beneficial\n");
3481 return MadeChange;
3482 }
3483
3484 // Ok, transform appears worthwhile.
3485 MadeChange = true;
3486
3487 bool OffsetIsZero = ExtraOffsetExpr->isZero();
3488
3489 // Step 1: Compute the loop's final IV value / trip count.
3490
3491 CallInst *ValNumLeadingZeros = Builder.CreateIntrinsic(
3492 IntrID, Ty, {Val, /*is_zero_poison=*/Builder.getFalse()},
3493 /*FMFSource=*/nullptr, Val->getName() + ".numleadingzeros");
3494 Value *ValNumActiveBits = Builder.CreateSub(
3495 ConstantInt::get(Ty, Ty->getScalarSizeInBits()), ValNumLeadingZeros,
3496 Val->getName() + ".numactivebits", /*HasNUW=*/true,
3497 /*HasNSW=*/Bitwidth != 2);
3498
3499 SCEVExpander Expander(*SE, *DL, "loop-idiom");
3500 Expander.setInsertPoint(&*Builder.GetInsertPoint());
3501 Value *ExtraOffset = Expander.expandCodeFor(ExtraOffsetExpr);
3502
3503 Value *ValNumActiveBitsOffset = Builder.CreateAdd(
3504 ValNumActiveBits, ExtraOffset, ValNumActiveBits->getName() + ".offset",
3505 /*HasNUW=*/OffsetIsZero, /*HasNSW=*/true);
3506 Value *IVFinal = Builder.CreateIntrinsic(Intrinsic::smax, {Ty},
3507 {ValNumActiveBitsOffset, Start},
3508 /*FMFSource=*/nullptr, "iv.final");
3509
3510 auto *LoopBackedgeTakenCount = cast<Instruction>(Builder.CreateSub(
3511 IVFinal, Start, CurLoop->getName() + ".backedgetakencount",
3512 /*HasNUW=*/OffsetIsZero, /*HasNSW=*/true));
3513 // FIXME: or when the offset was `add nuw`
3514
3515 // We know loop's backedge-taken count, but what's loop's trip count?
3516 Value *LoopTripCount =
3517 Builder.CreateAdd(LoopBackedgeTakenCount, ConstantInt::get(Ty, 1),
3518 CurLoop->getName() + ".tripcount", /*HasNUW=*/true,
3519 /*HasNSW=*/Bitwidth != 2);
3520
3521 // Step 2: Adjust the successor basic block to recieve the original
3522 // induction variable's final value instead of the orig. IV itself.
3523
3524 IV->replaceUsesOutsideBlock(IVFinal, LoopHeaderBB);
3525
3526 // Step 3: Rewrite the loop into a countable form, with canonical IV.
3527
3528 // The new canonical induction variable.
3529 Builder.SetInsertPoint(LoopHeaderBB, LoopHeaderBB->begin());
3530 auto *CIV = Builder.CreatePHI(Ty, 2, CurLoop->getName() + ".iv");
3531
3532 // The induction itself.
3533 Builder.SetInsertPoint(LoopHeaderBB, LoopHeaderBB->getFirstNonPHIIt());
3534 auto *CIVNext =
3535 Builder.CreateAdd(CIV, ConstantInt::get(Ty, 1), CIV->getName() + ".next",
3536 /*HasNUW=*/true, /*HasNSW=*/Bitwidth != 2);
3537
3538 // The loop trip count check.
3539 auto *CIVCheck = Builder.CreateICmpEQ(CIVNext, LoopTripCount,
3540 CurLoop->getName() + ".ivcheck");
3541 auto *NewIVCheck = CIVCheck;
3542 if (InvertedCond) {
3543 NewIVCheck = Builder.CreateNot(CIVCheck);
3544 NewIVCheck->takeName(ValShiftedIsZero);
3545 }
3546
3547 // The original IV, but rebased to be an offset to the CIV.
3548 auto *IVDePHId = Builder.CreateAdd(CIV, Start, "", /*HasNUW=*/false,
3549 /*HasNSW=*/true); // FIXME: what about NUW?
3550 IVDePHId->takeName(IV);
3551
3552 // The loop terminator.
3553 Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
3554 SmallVector<uint32_t> BranchWeights;
3555 const bool HasBranchWeights =
3557 extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);
3558
3559 auto *BI = Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
3560 if (HasBranchWeights) {
3561 if (InvertedCond)
3562 std::swap(BranchWeights[0], BranchWeights[1]);
3563 // We're not changing the loop profile, so we can reuse the original loop's
3564 // profile.
3565 setBranchWeights(*BI, BranchWeights, /*IsExpected=*/false);
3566 }
3567 LoopHeaderBB->getTerminator()->eraseFromParent();
3568
3569 // Populate the IV PHI.
3570 CIV->addIncoming(ConstantInt::get(Ty, 0), LoopPreheaderBB);
3571 CIV->addIncoming(CIVNext, LoopHeaderBB);
3572
3573 // Step 4: Forget the "non-computable" trip-count SCEV associated with the
3574 // loop. The loop would otherwise not be deleted even if it becomes empty.
3575
3576 SE->forgetLoop(CurLoop);
3577
3578 // Step 5: Try to cleanup the loop's body somewhat.
3579 IV->replaceAllUsesWith(IVDePHId);
3580 IV->eraseFromParent();
3581
3582 ValShiftedIsZero->replaceAllUsesWith(NewIVCheck);
3583 ValShiftedIsZero->eraseFromParent();
3584
3585 // Other passes will take care of actually deleting the loop if possible.
3586
3587 LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-zero idiom optimized!\n");
3588
3589 ++NumShiftUntilZero;
3590 return MadeChange;
3591}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
DXIL Resource Access
This file defines the DenseMap class.
#define DEBUG_TYPE
static const HTTPClientCleanup Cleanup
static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, const SCEV *BECount, unsigned StoreSize, AliasAnalysis &AA, SmallPtrSetImpl< Instruction * > &Ignored)
mayLoopAccessLocation - Return true if the specified loop might access the specified pointer location...
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static Value * matchCondition(BranchInst *BI, BasicBlock *LoopEntry, bool JmpOnZero=false)
Check if the given conditional branch is based on the comparison between a variable and zero,...
static PHINode * getRecurrenceVar(Value *VarX, Instruction *DefX, BasicBlock *LoopEntry)
static CallInst * createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL, bool ZeroCheck, Intrinsic::ID IID)
static bool detectShiftUntilLessThanIdiom(Loop *CurLoop, const DataLayout &DL, Intrinsic::ID &IntrinID, Value *&InitX, Instruction *&CntInst, PHINode *&CntPhi, Instruction *&DefX, APInt &Threshold)
Return true if the idiom is detected in the loop.
static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX, Value *&BitMask, Value *&BitPos, Value *&CurrX, Instruction *&NextX)
Return true if the idiom is detected in the loop.
static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, Instruction *&CntInst, PHINode *&CntPhi, Value *&Var)
Return true iff the idiom is detected in the loop.
static Constant * getMemSetPatternValue(Value *V, const DataLayout *DL)
getMemSetPatternValue - If a strided store of the specified value is safe to turn into a memset....
static CallInst * createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL)
static const SCEV * getNumBytes(const SCEV *BECount, Type *IntPtr, const SCEV *StoreSizeSCEV, Loop *CurLoop, const DataLayout *DL, ScalarEvolution *SE)
Compute the number of bytes as a SCEV from the backedge taken count.
static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL, Intrinsic::ID &IntrinID, Value *&InitX, Instruction *&CntInst, PHINode *&CntPhi, Instruction *&DefX)
Return true if the idiom is detected in the loop.
static const SCEV * getStartForNegStride(const SCEV *Start, const SCEV *BECount, Type *IntPtr, const SCEV *StoreSizeSCEV, ScalarEvolution *SE)
static APInt getStoreStride(const SCEVAddRecExpr *StoreEv)
static Value * matchShiftULTCondition(BranchInst *BI, BasicBlock *LoopEntry, APInt &Threshold)
Check if the given conditional branch is based on an unsigned less-than comparison between a variable...
match_LoopInvariant< Ty > m_LoopInvariant(const Ty &M, const Loop *L)
Matches if the value is loop-invariant.
static void deleteDeadInstruction(Instruction *I)
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
#define T
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
if(PassOpts->AAPipeline)
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
static bool isSimple(Instruction *I)
verify safepoint Safepoint IR Verifier
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This pass exposes codegen information to IR-level passes.
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
std::optional< uint64_t > tryZExtValue() const
Get zero extended value if possible.
Definition APInt.h:1553
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
Definition BasicBlock.h:482
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI const_iterator getFirstNonPHIOrDbgOrAlloca() const
Returns an iterator to the first instruction in this block that is not a PHINode, a debug intrinsic,...
size_t size() const
Definition BasicBlock.h:480
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
BinaryOps getOpcode() const
Definition InstrTypes.h:374
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
bool isConditional() const
unsigned getNumSuccessors() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
void setPredicate(Predicate P)
Set the predicate for this instruction to the specified value.
Definition InstrTypes.h:768
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:765
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:226
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition Constants.h:220
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
LLVM_ABI IntegerType * getIndexType(LLVMContext &C, unsigned AddressSpace) const
Returns the type of a GEP index in AddressSpace.
A debug info location.
Definition DebugLoc.h:124
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This class represents a freeze function that returns random concrete value if an operand is either a ...
PointerType * getType() const
Global values are always pointers.
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition GlobalValue.h:61
static CRCTable genSarwateTable(const APInt &GenPoly, bool ByteOrderSwapped)
Generate a lookup table of 256 entries by interleaving the generating polynomial.
This instruction compares its operands according to the predicate given to the constructor.
bool isEquality() const
Return true if this predicate is either EQ or NE.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition IRBuilder.h:497
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition IRBuilder.h:2103
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
LLVM_ABI bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
LLVM_ABI bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool isShift() const
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
bool isUnordered() const
Align getAlign() const
Return the alignment of the access that is being performed.
static LocationSize precise(uint64_t Value)
static constexpr LocationSize afterPointer()
Any location after the base pointer (but still within the underlying object).
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
BlockT * getHeader() const
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop.
block_iterator block_begin() const
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition LoopInfo.cpp:632
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition LoopInfo.cpp:61
ICmpInst * getLatchCmpInst() const
Get the latch condition instruction.
Definition LoopInfo.cpp:175
StringRef getName() const
Definition LoopInfo.h:389
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition LoopInfo.cpp:151
This class wraps the llvm.memcpy intrinsic.
Value * getLength() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
MaybeAlign getDestAlign() const
bool isForceInlined() const
bool isVolatile() const
Value * getValue() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
MaybeAlign getSourceAlign() const
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
Representation for a specific memory location.
An analysis that produces MemorySSA for a function.
Definition MemorySSA.h:936
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition MemorySSA.h:702
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition Module.h:278
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
Helper to remove instructions inserted during SCEV expansion, unless they are marked as used.
This class uses information about analyze scalars to rewrite expressions in canonical form.
const SCEV * getOperand(unsigned i) const
This class represents an analyzed expression in the program.
LLVM_ABI bool isOne() const
Return true if the expression is a constant one.
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
LLVM_ABI bool isNonConstantNegative() const
Return true if the specified scev is negated, but not a constant.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI bool isKnownNonNegative(const SCEV *S)
Test if the given expression is known to be non-negative.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
LLVM_ABI void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
LLVM_ABI const SCEV * getTruncateOrSignExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
A vector that has set insertion semantics.
Definition SetVector.h:58
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:260
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:149
Simple and conservative implementation of LoopSafetyInfo that can give false-positive answers to its ...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
bool anyBlockMayThrow() const override
Returns true iff any block of the loop for which this info is contains an instruction that may throw ...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Align getAlign() const
Value * getValueOperand()
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Provides information about what library functions are available for the current target.
unsigned getWCharSize(const Module &M) const
Returns the size of the wchar_t type in bytes or 0 if the size is unknown.
bool has(LibFunc F) const
Tests whether a library function is available.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCC_Basic
The cost of a typical 'add' instruction.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition Type.h:255
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
void setOperand(unsigned i, Value *Val)
Definition User.h:237
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
Definition Value.cpp:554
LLVM_ABI void replaceUsesOutsideBlock(Value *V, BasicBlock *BB)
replaceUsesOutsideBlock - Go through the uses list for this definition and make each use point to "V"...
Definition Value.cpp:599
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
Value handle that is nullable, but tries to track the Value.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
const ParentTy * getParent() const
Definition ilist_node.h:34
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
@ HeaderSize
Definition BTF.h:61
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
OperandType
Operands are tagged with one of the values of this enum.
Definition MCInstrDesc.h:60
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
cst_pred_ty< icmp_pred_with_threshold > m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold)
Match an integer or vector with every element comparing 'pred' (eg/ne/...) to Threshold.
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
class_match< const SCEVConstant > m_SCEVConstant()
specificloop_ty m_SpecificLoop(const Loop *L)
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
bool match(const SCEV *S, const Pattern &P)
specificscev_ty m_scev_Specific(const SCEV *S)
Match if we have a specific specified SCEV.
class_match< const SCEV > m_SCEV()
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
constexpr double e
DiagnosticInfoOptimizationBase::Argument NV
DiagnosticInfoOptimizationBase::setExtraArgs setExtraArgs
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
static cl::opt< bool, true > EnableLIRPWcslen("disable-loop-idiom-wcslen", cl::desc("Proceed with loop idiom recognize pass, " "enable conversion of loop(s) to wcslen."), cl::location(DisableLIRP::Wcslen), cl::init(false), cl::ReallyHidden)
InstructionCost Cost
static cl::opt< bool, true > DisableLIRPMemcpy("disable-" DEBUG_TYPE "-memcpy", cl::desc("Proceed with loop idiom recognize pass, but do " "not convert loop(s) to memcpy."), cl::location(DisableLIRP::Memcpy), cl::init(false), cl::ReallyHidden)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
static cl::opt< bool, true > DisableLIRPStrlen("disable-loop-idiom-strlen", cl::desc("Proceed with loop idiom recognize pass, but do " "not convert loop(s) to strlen."), cl::location(DisableLIRP::Strlen), cl::init(false), cl::ReallyHidden)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
static cl::opt< bool > ForceMemsetPatternIntrinsic("loop-idiom-force-memset-pattern-intrinsic", cl::desc("Use memset.pattern intrinsic whenever possible"), cl::init(false), cl::Hidden)
LLVM_ABI bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI, LibFunc TheLibFunc)
Check whether the library function is available on target and also that it in the current Module is a...
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:1968
LLVM_ABI bool isMustProgress(const Loop *L)
Return true if this loop can be assumed to make progress.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
bool isModOrRefSet(const ModRefInfo MRI)
Definition ModRef.h:43
LLVM_ABI Value * emitStrLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL, const TargetLibraryInfo *TLI)
Emit a call to the strlen function to the builder, for the specified pointer.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition ModRef.h:28
@ ModRef
The access may reference and may modify the value stored in memory.
Definition ModRef.h:36
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
static cl::opt< bool, true > DisableLIRPHashRecognize("disable-" DEBUG_TYPE "-hashrecognize", cl::desc("Proceed with loop idiom recognize pass, " "but do not optimize CRC loops."), cl::location(DisableLIRP::HashRecognize), cl::init(false), cl::ReallyHidden)
TargetTransformInfo TTI
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
LLVM_ABI bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition MemorySSA.cpp:84
LLVM_ABI bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
DWARFExpression::Operation Op
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
LLVM_ABI Value * emitWcsLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL, const TargetLibraryInfo *TLI)
Emit a call to the wcslen function to the builder, for the specified pointer.
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
LLVM_ABI Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
static cl::opt< bool > UseLIRCodeSizeHeurs("use-lir-code-size-heurs", cl::desc("Use loop idiom recognition code size heuristics when compiling " "with -Os/-Oz"), cl::init(true), cl::Hidden)
LLVM_ABI bool RecursivelyDeleteDeadPHINode(PHINode *PN, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is an effectively dead PHI node, due to being a def-use chain of single-use no...
Definition Local.cpp:641
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
static cl::opt< bool, true > DisableLIRPMemset("disable-" DEBUG_TYPE "-memset", cl::desc("Proceed with loop idiom recognize pass, but do " "not convert loop(s) to memset."), cl::location(DisableLIRP::Memset), cl::init(false), cl::ReallyHidden)
static cl::opt< bool, true > DisableLIRPAll("disable-" DEBUG_TYPE "-all", cl::desc("Options to disable Loop Idiom Recognize Pass."), cl::location(DisableLIRP::All), cl::init(false), cl::ReallyHidden)
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
std::optional< DecomposedBitTest > decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate Pred, bool LookThroughTrunc=true, bool AllowNonZeroC=false, bool DecomposeAnd=false)
Decompose an icmp into the form ((X & Mask) pred C) if possible.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:761
LLVM_ABI AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
AAMDNodes extendTo(ssize_t Len) const
Create a new AAMDNode that describes this AAMDNode after extending it to apply to a series of bytes o...
Definition Metadata.h:834
static bool Memcpy
When true, Memcpy is disabled.
static bool Wcslen
When true, Wcslen is disabled.
static bool Strlen
When true, Strlen is disabled.
static bool HashRecognize
When true, HashRecognize is disabled.
static bool Memset
When true, Memset is disabled.
static bool All
When true, the entire pass is disabled.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
The structure that is returned when a polynomial algorithm was recognized by the analysis.
Match loop-invariant value.
match_LoopInvariant(const SubPattern_t &SP, const Loop *L)