107#define DEBUG_TYPE "loop-idiom"
109STATISTIC(NumMemSet,
"Number of memset's formed from loop stores");
110STATISTIC(NumMemCpy,
"Number of memcpy's formed from loop load+stores");
111STATISTIC(NumMemMove,
"Number of memmove's formed from loop load+stores");
113 NumShiftUntilBitTest,
114 "Number of uncountable loops recognized as 'shift until bitttest' idiom");
116 "Number of uncountable loops recognized as 'shift until zero' idiom");
121 cl::desc(
"Options to disable Loop Idiom Recognize Pass."),
128 cl::desc(
"Proceed with loop idiom recognize pass, but do "
129 "not convert loop(s) to memset."),
136 cl::desc(
"Proceed with loop idiom recognize pass, but do "
137 "not convert loop(s) to memcpy."),
142 "use-lir-code-size-heurs",
143 cl::desc(
"Use loop idiom recognition code size heuristics when compiling"
149class LoopIdiomRecognize {
150 Loop *CurLoop =
nullptr;
159 bool ApplyCodeSizeHeuristics;
160 std::unique_ptr<MemorySSAUpdater> MSSAU;
169 : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI),
TTI(
TTI),
DL(
DL), ORE(ORE) {
171 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
174 bool runOnLoop(
Loop *L);
180 StoreListMap StoreRefsForMemset;
181 StoreListMap StoreRefsForMemsetPattern;
182 StoreList StoreRefsForMemcpy;
184 bool HasMemsetPattern;
188 enum LegalStoreKind {
193 UnorderedAtomicMemcpy,
201 bool runOnCountableLoop();
206 LegalStoreKind isLegalStore(
StoreInst *SI);
207 enum class ForMemset {
No,
Yes };
211 template <
typename MemInst>
212 bool processLoopMemIntrinsic(
214 bool (LoopIdiomRecognize::*Processor)(MemInst *,
const SCEV *),
215 const SCEV *BECount);
219 bool processLoopStridedStore(
Value *DestPtr,
const SCEV *StoreSizeSCEV,
224 bool IsNegStride,
bool IsLoopMemset =
false);
225 bool processLoopStoreOfLoopLoad(
StoreInst *SI,
const SCEV *BECount);
226 bool processLoopStoreOfLoopLoad(
Value *DestPtr,
Value *SourcePtr,
232 const SCEV *BECount);
233 bool avoidLIRForMultiBlockLoop(
bool IsMemset =
false,
234 bool IsLoopMemset =
false);
240 bool runOnNoncountableLoop();
242 bool recognizePopcount();
245 bool recognizeAndInsertFFS();
250 bool IsCntPhiUsedOutsideLoop);
252 bool recognizeShiftUntilBitTest();
253 bool recognizeShiftUntilZero();
258class LoopIdiomRecognizeLegacyPass :
public LoopPass {
262 explicit LoopIdiomRecognizeLegacyPass() :
LoopPass(
ID) {
274 AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
275 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
276 LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
277 ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
279 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
282 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
285 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
288 MSSA = &MSSAAnalysis->getMSSA();
295 LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI,
TTI, MSSA,
DL, ORE);
296 return LIR.runOnLoop(L);
311char LoopIdiomRecognizeLegacyPass::ID = 0;
326 LoopIdiomRecognize LIR(&AR.
AA, &AR.
DT, &AR.
LI, &AR.
SE, &AR.
TLI, &AR.
TTI,
328 if (!LIR.runOnLoop(&L))
338 "Recognize loop idioms",
false,
false)
349 I->eraseFromParent();
358bool LoopIdiomRecognize::runOnLoop(
Loop *L) {
367 if (
Name ==
"memset" ||
Name ==
"memcpy")
371 ApplyCodeSizeHeuristics =
374 HasMemset = TLI->
has(LibFunc_memset);
375 HasMemsetPattern = TLI->
has(LibFunc_memset_pattern16);
376 HasMemcpy = TLI->
has(LibFunc_memcpy);
378 if (HasMemset || HasMemsetPattern || HasMemcpy)
380 return runOnCountableLoop();
382 return runOnNoncountableLoop();
385bool LoopIdiomRecognize::runOnCountableLoop() {
387 assert(!isa<SCEVCouldNotCompute>(BECount) &&
388 "runOnCountableLoop() called on a loop without a predictable"
389 "backedge-taken count");
393 if (
const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
394 if (BECst->getAPInt() == 0)
412 bool MadeChange =
false;
420 MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks);
444 if (!
C || isa<ConstantExpr>(
C))
453 if (
DL->isBigEndian())
469 unsigned ArraySize = 16 /
Size;
474LoopIdiomRecognize::LegalStoreKind
475LoopIdiomRecognize::isLegalStore(
StoreInst *SI) {
477 if (
SI->isVolatile())
478 return LegalStoreKind::None;
480 if (!
SI->isUnordered())
481 return LegalStoreKind::None;
484 if (
SI->getMetadata(LLVMContext::MD_nontemporal))
485 return LegalStoreKind::None;
487 Value *StoredVal =
SI->getValueOperand();
488 Value *StorePtr =
SI->getPointerOperand();
493 return LegalStoreKind::None;
501 return LegalStoreKind::None;
507 dyn_cast<SCEVAddRecExpr>(SE->
getSCEV(StorePtr));
509 return LegalStoreKind::None;
512 if (!isa<SCEVConstant>(StoreEv->
getOperand(1)))
513 return LegalStoreKind::None;
524 bool UnorderedAtomic =
SI->isUnordered() && !
SI->isSimple();
533 return LegalStoreKind::Memset;
540 return LegalStoreKind::MemsetPattern;
548 unsigned StoreSize =
DL->getTypeStoreSize(
SI->getValueOperand()->getType());
549 if (StoreSize != Stride && StoreSize != -Stride)
550 return LegalStoreKind::None;
553 LoadInst *LI = dyn_cast<LoadInst>(
SI->getValueOperand());
557 return LegalStoreKind::None;
560 return LegalStoreKind::None;
568 return LegalStoreKind::None;
572 return LegalStoreKind::None;
575 UnorderedAtomic = UnorderedAtomic || LI->
isAtomic();
576 return UnorderedAtomic ? LegalStoreKind::UnorderedAtomicMemcpy
577 : LegalStoreKind::Memcpy;
580 return LegalStoreKind::None;
583void LoopIdiomRecognize::collectStores(
BasicBlock *BB) {
584 StoreRefsForMemset.clear();
585 StoreRefsForMemsetPattern.clear();
586 StoreRefsForMemcpy.clear();
593 switch (isLegalStore(SI)) {
594 case LegalStoreKind::None:
597 case LegalStoreKind::Memset: {
600 StoreRefsForMemset[
Ptr].push_back(SI);
602 case LegalStoreKind::MemsetPattern: {
605 StoreRefsForMemsetPattern[
Ptr].push_back(SI);
607 case LegalStoreKind::Memcpy:
608 case LegalStoreKind::UnorderedAtomicMemcpy:
609 StoreRefsForMemcpy.push_back(SI);
612 assert(
false &&
"unhandled return value");
621bool LoopIdiomRecognize::runOnLoopBlock(
631 bool MadeChange =
false;
638 for (
auto &SL : StoreRefsForMemset)
639 MadeChange |= processLoopStores(SL.second, BECount, ForMemset::Yes);
641 for (
auto &SL : StoreRefsForMemsetPattern)
642 MadeChange |= processLoopStores(SL.second, BECount, ForMemset::No);
645 for (
auto &SI : StoreRefsForMemcpy)
646 MadeChange |= processLoopStoreOfLoopLoad(SI, BECount);
648 MadeChange |= processLoopMemIntrinsic<MemCpyInst>(
649 BB, &LoopIdiomRecognize::processLoopMemCpy, BECount);
650 MadeChange |= processLoopMemIntrinsic<MemSetInst>(
651 BB, &LoopIdiomRecognize::processLoopMemSet, BECount);
658 const SCEV *BECount, ForMemset For) {
666 for (
unsigned i = 0, e = SL.
size(); i < e; ++i) {
667 assert(SL[i]->
isSimple() &&
"Expected only non-volatile stores.");
669 Value *FirstStoredVal = SL[i]->getValueOperand();
670 Value *FirstStorePtr = SL[i]->getPointerOperand();
672 cast<SCEVAddRecExpr>(SE->
getSCEV(FirstStorePtr));
674 unsigned FirstStoreSize =
DL->getTypeStoreSize(SL[i]->getValueOperand()->
getType());
677 if (FirstStride == FirstStoreSize || -FirstStride == FirstStoreSize) {
682 Value *FirstSplatValue =
nullptr;
683 Constant *FirstPatternValue =
nullptr;
685 if (For == ForMemset::Yes)
690 assert((FirstSplatValue || FirstPatternValue) &&
691 "Expected either splat value or pattern value.");
699 for (j = i + 1; j <
e; ++j)
701 for (j = i; j > 0; --j)
704 for (
auto &k : IndexQueue) {
705 assert(SL[k]->
isSimple() &&
"Expected only non-volatile stores.");
706 Value *SecondStorePtr = SL[k]->getPointerOperand();
708 cast<SCEVAddRecExpr>(SE->
getSCEV(SecondStorePtr));
711 if (FirstStride != SecondStride)
714 Value *SecondStoredVal = SL[k]->getValueOperand();
715 Value *SecondSplatValue =
nullptr;
716 Constant *SecondPatternValue =
nullptr;
718 if (For == ForMemset::Yes)
723 assert((SecondSplatValue || SecondPatternValue) &&
724 "Expected either splat value or pattern value.");
727 if (For == ForMemset::Yes) {
728 if (isa<UndefValue>(FirstSplatValue))
729 FirstSplatValue = SecondSplatValue;
730 if (FirstSplatValue != SecondSplatValue)
733 if (isa<UndefValue>(FirstPatternValue))
734 FirstPatternValue = SecondPatternValue;
735 if (FirstPatternValue != SecondPatternValue)
740 ConsecutiveChain[SL[i]] = SL[k];
749 bool Changed =
false;
760 unsigned StoreSize = 0;
763 while (Tails.
count(
I) || Heads.count(
I)) {
764 if (TransformedStores.
count(
I))
768 StoreSize +=
DL->getTypeStoreSize(
I->getValueOperand()->getType());
770 I = ConsecutiveChain[
I];
780 if (StoreSize != Stride && StoreSize != -Stride)
783 bool IsNegStride = StoreSize == -Stride;
787 if (processLoopStridedStore(StorePtr, StoreSizeSCEV,
789 HeadStore, AdjacentStores, StoreEv, BECount,
791 TransformedStores.
insert(AdjacentStores.
begin(), AdjacentStores.
end());
801template <
typename MemInst>
802bool LoopIdiomRecognize::processLoopMemIntrinsic(
804 bool (LoopIdiomRecognize::*Processor)(MemInst *,
const SCEV *),
805 const SCEV *BECount) {
806 bool MadeChange =
false;
810 if (MemInst *
MI = dyn_cast<MemInst>(Inst)) {
812 if (!(this->*Processor)(
MI, BECount))
826bool LoopIdiomRecognize::processLoopMemCpy(
MemCpyInst *MCI,
827 const SCEV *BECount) {
838 if (!Dest || !Source)
853 if ((SizeInBytes >> 32) != 0)
859 dyn_cast<SCEVConstant>(StoreEv->
getOperand(1));
861 dyn_cast<SCEVConstant>(LoadEv->
getOperand(1));
862 if (!ConstStoreStride || !ConstLoadStride)
871 if (SizeInBytes != StoreStrideValue && SizeInBytes != -StoreStrideValue) {
874 <<
ore::NV(
"Inst",
"memcpy") <<
" in "
876 <<
" function will not be hoisted: "
877 <<
ore::NV(
"Reason",
"memcpy size is not equal to stride");
882 int64_t StoreStrideInt = StoreStrideValue.
getSExtValue();
885 if (StoreStrideInt != LoadStrideInt)
888 return processLoopStoreOfLoopLoad(
895bool LoopIdiomRecognize::processLoopMemSet(
MemSetInst *MSI,
896 const SCEV *BECount) {
911 if (!Ev || Ev->
getLoop() != CurLoop)
920 if (!PointerStrideSCEV || !MemsetSizeSCEV)
923 bool IsNegStride =
false;
924 const bool IsConstantSize = isa<ConstantInt>(MSI->
getLength());
926 if (IsConstantSize) {
937 if (SizeInBytes != Stride && SizeInBytes != -Stride)
940 IsNegStride = SizeInBytes == -Stride;
948 if (
Pointer->getType()->getPointerAddressSpace() != 0) {
961 const SCEV *PositiveStrideSCEV =
964 LLVM_DEBUG(
dbgs() <<
" MemsetSizeSCEV: " << *MemsetSizeSCEV <<
"\n"
965 <<
" PositiveStrideSCEV: " << *PositiveStrideSCEV
968 if (PositiveStrideSCEV != MemsetSizeSCEV) {
971 const SCEV *FoldedPositiveStride =
973 const SCEV *FoldedMemsetSize =
977 <<
" FoldedMemsetSize: " << *FoldedMemsetSize <<
"\n"
978 <<
" FoldedPositiveStride: " << *FoldedPositiveStride
981 if (FoldedPositiveStride != FoldedMemsetSize) {
998 BECount, IsNegStride,
true);
1006 const SCEV *BECount,
const SCEV *StoreSizeSCEV,
1016 const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount);
1017 const SCEVConstant *ConstSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
1018 if (BECst && ConstSize)
1040 Type *IntPtr,
const SCEV *StoreSizeSCEV,
1043 if (!StoreSizeSCEV->
isOne()) {
1057 const SCEV *TripCountS =
nullptr;
1064 if (
DL->getTypeSizeInBits(BECount->
getType()) <
1065 DL->getTypeSizeInBits(IntPtr) &&
1085 const SCEV *StoreSizeSCEV,
Loop *CurLoop,
1096bool LoopIdiomRecognize::processLoopStridedStore(
1100 const SCEV *BECount,
bool IsNegStride,
bool IsLoopMemset) {
1108 assert((SplatValue || PatternValue) &&
1109 "Expected either splat value or pattern value.");
1120 Type *DestInt8PtrTy =
Builder.getInt8PtrTy(DestAS);
1123 bool Changed =
false;
1131 if (!Expander.isSafeToExpand(Start))
1140 Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->
getTerminator());
1152 StoreSizeSCEV, *AA, Stores))
1155 if (avoidLIRForMultiBlockLoop(
true, IsLoopMemset))
1160 const SCEV *NumBytesS =
1161 getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop,
DL, SE);
1165 if (!Expander.isSafeToExpand(NumBytesS))
1169 Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->
getTerminator());
1175 AATags = AATags.
merge(
Store->getAAMetadata());
1176 if (
auto CI = dyn_cast<ConstantInt>(NumBytes))
1177 AATags = AATags.
extendTo(CI->getZExtValue());
1181 NewCall =
Builder.CreateMemSet(
1182 BasePtr, SplatValue, NumBytes,
MaybeAlign(StoreAlignment),
1186 Type *Int8PtrTy = DestInt8PtrTy;
1188 StringRef FuncName =
"memset_pattern16";
1190 Builder.getVoidTy(), Int8PtrTy, Int8PtrTy, IntIdxTy);
1197 PatternValue,
".memset_pattern");
1201 NewCall =
Builder.CreateCall(MSP, {
BasePtr, PatternPtr, NumBytes});
1208 MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1210 MSSAU->insertDef(cast<MemoryDef>(NewMemAcc),
true);
1214 <<
" from store to: " << *Ev <<
" at: " << *TheStore
1220 R <<
"Transformed loop-strided store in "
1222 <<
" function into a call to "
1225 if (!Stores.empty())
1227 for (
auto *
I : Stores) {
1228 R <<
ore::NV(
"FromBlock",
I->getParent()->getName())
1236 for (
auto *
I : Stores) {
1238 MSSAU->removeMemoryAccess(
I,
true);
1242 MSSAU->getMemorySSA()->verifyMemorySSA();
1244 ExpCleaner.markResultUsed();
1251bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
StoreInst *SI,
1252 const SCEV *BECount) {
1253 assert(
SI->isUnordered() &&
"Expected only non-volatile non-ordered stores.");
1255 Value *StorePtr =
SI->getPointerOperand();
1257 unsigned StoreSize =
DL->getTypeStoreSize(
SI->getValueOperand()->getType());
1260 LoadInst *LI = cast<LoadInst>(
SI->getValueOperand());
1270 return processLoopStoreOfLoopLoad(StorePtr, LoadPtr, StoreSizeSCEV,
1272 StoreEv, LoadEv, BECount);
1276class MemmoveVerifier {
1278 explicit MemmoveVerifier(
const Value &LoadBasePtr,
const Value &StoreBasePtr,
1281 LoadBasePtr.stripPointerCasts(), LoadOff,
DL)),
1283 StoreBasePtr.stripPointerCasts(), StoreOff,
DL)),
1284 IsSameObject(BP1 == BP2) {}
1286 bool loadAndStoreMayFormMemmove(
unsigned StoreSize,
bool IsNegStride,
1288 bool IsMemCpy)
const {
1292 if ((!IsNegStride && LoadOff <= StoreOff) ||
1293 (IsNegStride && LoadOff >= StoreOff))
1299 DL.getTypeSizeInBits(TheLoad.
getType()).getFixedValue() / 8;
1300 if (BP1 != BP2 || LoadSize != int64_t(StoreSize))
1302 if ((!IsNegStride && LoadOff < StoreOff + int64_t(StoreSize)) ||
1303 (IsNegStride && LoadOff + LoadSize > StoreOff))
1311 int64_t LoadOff = 0;
1312 int64_t StoreOff = 0;
1317 const bool IsSameObject;
1321bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
1330 if (isa<MemCpyInlineInst>(TheStore))
1342 bool Changed =
false;
1345 Type *IntIdxTy =
Builder.getIntNTy(
DL->getIndexSizeInBits(StrAS));
1348 const SCEVConstant *ConstStoreSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
1351 assert(ConstStoreSize &&
"store size is expected to be a constant");
1354 bool IsNegStride = StoreSize == -Stride;
1367 Value *StoreBasePtr = Expander.expandCodeFor(
1380 IgnoredInsts.
insert(TheStore);
1382 bool IsMemCpy = isa<MemCpyInst>(TheStore);
1383 const StringRef InstRemark = IsMemCpy ?
"memcpy" :
"load and store";
1385 bool LoopAccessStore =
1387 StoreSizeSCEV, *AA, IgnoredInsts);
1388 if (LoopAccessStore) {
1394 IgnoredInsts.
insert(TheLoad);
1396 BECount, StoreSizeSCEV, *AA, IgnoredInsts)) {
1400 <<
ore::NV(
"Inst", InstRemark) <<
" in "
1402 <<
" function will not be hoisted: "
1403 <<
ore::NV(
"Reason",
"The loop may access store location");
1407 IgnoredInsts.
erase(TheLoad);
1420 Value *LoadBasePtr = Expander.expandCodeFor(
1425 MemmoveVerifier
Verifier(*LoadBasePtr, *StoreBasePtr, *
DL);
1426 if (IsMemCpy && !
Verifier.IsSameObject)
1427 IgnoredInsts.
erase(TheStore);
1429 StoreSizeSCEV, *AA, IgnoredInsts)) {
1432 <<
ore::NV(
"Inst", InstRemark) <<
" in "
1434 <<
" function will not be hoisted: "
1435 <<
ore::NV(
"Reason",
"The loop may access load location");
1440 bool UseMemMove = IsMemCpy ?
Verifier.IsSameObject : LoopAccessStore;
1442 if (!
Verifier.loadAndStoreMayFormMemmove(StoreSize, IsNegStride, *TheLoad,
1446 if (avoidLIRForMultiBlockLoop())
1451 const SCEV *NumBytesS =
1452 getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop,
DL, SE);
1455 Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->
getTerminator());
1459 AATags = AATags.
merge(StoreAATags);
1460 if (
auto CI = dyn_cast<ConstantInt>(NumBytes))
1461 AATags = AATags.
extendTo(CI->getZExtValue());
1471 NewCall =
Builder.CreateMemMove(
1472 StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign, NumBytes,
1476 Builder.CreateMemCpy(StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign,
1477 NumBytes,
false, AATags.
TBAA,
1485 assert((StoreAlign && LoadAlign) &&
1486 "Expect unordered load/store to have align.");
1487 if (*StoreAlign < StoreSize || *LoadAlign < StoreSize)
1500 NewCall =
Builder.CreateElementUnorderedAtomicMemCpy(
1501 StoreBasePtr, *StoreAlign, LoadBasePtr, *LoadAlign, NumBytes, StoreSize,
1507 MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1509 MSSAU->insertDef(cast<MemoryDef>(NewMemAcc),
true);
1513 <<
" from load ptr=" << *LoadEv <<
" at: " << *TheLoad
1515 <<
" from store ptr=" << *StoreEv <<
" at: " << *TheStore
1521 <<
"Formed a call to "
1523 <<
"() intrinsic from " <<
ore::NV(
"Inst", InstRemark)
1534 MSSAU->removeMemoryAccess(TheStore,
true);
1537 MSSAU->getMemorySSA()->verifyMemorySSA();
1542 ExpCleaner.markResultUsed();
1549bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(
bool IsMemset,
1550 bool IsLoopMemset) {
1551 if (ApplyCodeSizeHeuristics && CurLoop->
getNumBlocks() > 1) {
1552 if (CurLoop->
isOutermost() && (!IsMemset || !IsLoopMemset)) {
1554 <<
" : LIR " << (IsMemset ?
"Memset" :
"Memcpy")
1555 <<
" avoided: multi-block top-level loop\n");
1563bool LoopIdiomRecognize::runOnNoncountableLoop() {
1566 <<
"] Noncountable Loop %"
1569 return recognizePopcount() || recognizeAndInsertFFS() ||
1570 recognizeShiftUntilBitTest() || recognizeShiftUntilZero();
1580 bool JmpOnZero =
false) {
1589 if (!CmpZero || !CmpZero->
isZero())
1600 return Cond->getOperand(0);
1609 auto *PhiX = dyn_cast<PHINode>(VarX);
1610 if (PhiX && PhiX->getParent() == LoopEntry &&
1611 (PhiX->getOperand(0) == DefX || PhiX->
getOperand(1) == DefX))
1647 Value *VarX1, *VarX0;
1650 DefX2 = CountInst =
nullptr;
1651 VarX1 = VarX0 =
nullptr;
1652 PhiX = CountPhi =
nullptr;
1658 dyn_cast<BranchInst>(LoopEntry->
getTerminator()), LoopEntry))
1659 DefX2 = dyn_cast<Instruction>(
T);
1666 if (!DefX2 || DefX2->
getOpcode() != Instruction::And)
1671 if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->
getOperand(0))))
1675 SubOneOp = dyn_cast<BinaryOperator>(DefX2->
getOperand(1));
1677 if (!SubOneOp || SubOneOp->
getOperand(0) != VarX1)
1683 (SubOneOp->
getOpcode() == Instruction::Add &&
1696 CountInst =
nullptr;
1699 if (Inst.
getOpcode() != Instruction::Add)
1703 if (!Inc || !Inc->
isOne())
1711 bool LiveOutLoop =
false;
1713 if ((cast<Instruction>(U))->
getParent() != LoopEntry) {
1733 auto *PreCondBr = dyn_cast<BranchInst>(PreCondBB->
getTerminator());
1738 CntInst = CountInst;
1778 Value *VarX =
nullptr;
1787 dyn_cast<BranchInst>(LoopEntry->
getTerminator()), LoopEntry))
1788 DefX = dyn_cast<Instruction>(
T);
1793 if (!DefX || !DefX->
isShift())
1795 IntrinID = DefX->
getOpcode() == Instruction::Shl ? Intrinsic::cttz :
1798 if (!Shft || !Shft->
isOne())
1823 if (Inst.
getOpcode() != Instruction::Add)
1847bool LoopIdiomRecognize::recognizeAndInsertFFS() {
1859 size_t IdiomCanonicalSize = 6;
1862 CntInst, CntPhi, DefX))
1865 bool IsCntPhiUsedOutsideLoop =
false;
1867 if (!CurLoop->
contains(cast<Instruction>(U))) {
1868 IsCntPhiUsedOutsideLoop =
true;
1871 bool IsCntInstUsedOutsideLoop =
false;
1873 if (!CurLoop->
contains(cast<Instruction>(U))) {
1874 IsCntInstUsedOutsideLoop =
true;
1879 if (IsCntInstUsedOutsideLoop && IsCntPhiUsedOutsideLoop)
1885 bool ZeroCheck =
false;
1894 if (!IsCntPhiUsedOutsideLoop) {
1898 auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1923 std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
1928 if (HeaderSize != IdiomCanonicalSize &&
1932 transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
1934 IsCntPhiUsedOutsideLoop);
1942bool LoopIdiomRecognize::recognizePopcount() {
1956 if (LoopBody->
size() >= 20) {
1966 if (!EntryBI || EntryBI->isConditional())
1974 auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1975 if (!PreCondBI || PreCondBI->isUnconditional())
1984 transformLoopToPopcount(PreCondBB, CntInst, CntPhi, Val);
1990 Value *Ops[] = {Val};
2046void LoopIdiomRecognize::transformLoopToCountable(
2049 bool ZeroCheck,
bool IsCntPhiUsedOutsideLoop) {
2063 if (IsCntPhiUsedOutsideLoop) {
2064 if (DefX->
getOpcode() == Instruction::AShr)
2065 InitXNext =
Builder.CreateAShr(InitX, 1);
2066 else if (DefX->
getOpcode() == Instruction::LShr)
2067 InitXNext =
Builder.CreateLShr(InitX, 1);
2068 else if (DefX->
getOpcode() == Instruction::Shl)
2069 InitXNext =
Builder.CreateShl(InitX, 1);
2079 Value *NewCount = Count;
2080 if (IsCntPhiUsedOutsideLoop)
2083 NewCount =
Builder.CreateZExtOrTrunc(NewCount, CntInst->
getType());
2086 if (cast<ConstantInt>(CntInst->
getOperand(1))->isOne()) {
2089 ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
2090 if (!InitConst || !InitConst->
isZero())
2091 NewCount =
Builder.CreateAdd(NewCount, CntInitVal);
2095 NewCount =
Builder.CreateSub(CntInitVal, NewCount);
2108 ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
2112 Builder.SetInsertPoint(LbCond);
2127 if (IsCntPhiUsedOutsideLoop)
2137void LoopIdiomRecognize::transformLoopToPopcount(
BasicBlock *PreCondBB,
2141 auto *PreCondBr = cast<BranchInst>(PreCondBB->
getTerminator());
2150 Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
2153 NewCount = PopCntZext =
2154 Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->
getType()));
2156 if (NewCount != PopCnt)
2157 (cast<Instruction>(NewCount))->setDebugLoc(
DL);
2164 ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
2165 if (!InitConst || !InitConst->
isZero()) {
2166 NewCount =
Builder.CreateAdd(NewCount, CntInitVal);
2167 (cast<Instruction>(NewCount))->setDebugLoc(
DL);
2176 ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
2178 Value *Opnd0 = PopCntZext;
2183 ICmpInst *NewPreCond = cast<ICmpInst>(
2185 PreCondBr->setCondition(NewPreCond);
2213 ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
2218 Builder.SetInsertPoint(LbCond);
2221 "tcdec",
false,
true));
2249 : SubPattern(SP), L(L) {}
2251 template <
typename ITy>
bool match(ITy *V) {
2257template <
typename Ty>
2288 " Performing shift-until-bittest idiom detection.\n");
2298 assert(LoopPreheaderBB &&
"There is always a loop preheader.");
2300 using namespace PatternMatch;
2305 Value *CmpLHS, *CmpRHS;
2316 auto MatchVariableBitMask = [&]() {
2325 auto MatchConstantBitMask = [&]() {
2331 auto MatchDecomposableConstantBitMask = [&]() {
2339 if (!MatchVariableBitMask() && !MatchConstantBitMask() &&
2340 !MatchDecomposableConstantBitMask()) {
2346 auto *CurrXPN = dyn_cast<PHINode>(CurrX);
2347 if (!CurrXPN || CurrXPN->getParent() != LoopHeaderBB) {
2352 BaseX = CurrXPN->getIncomingValueForBlock(LoopPreheaderBB);
2354 dyn_cast<Instruction>(CurrXPN->getIncomingValueForBlock(LoopHeaderBB));
2357 "Expected BaseX to be avaliable in the preheader!");
2368 "Should only get equality predicates here.");
2378 if (TrueBB != LoopHeaderBB) {
2437bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
2438 bool MadeChange =
false;
2440 Value *
X, *BitMask, *BitPos, *XCurr;
2445 " shift-until-bittest idiom detection failed.\n");
2455 assert(LoopPreheaderBB &&
"There is always a loop preheader.");
2458 assert(SuccessorBB &&
"There is only a single successor.");
2464 Type *Ty =
X->getType();
2478 " Intrinsic is too costly, not beneficial\n");
2493 BitPos->
getName() +
".lowbitmask");
2495 Builder.CreateOr(LowBitMask, BitMask, BitPos->
getName() +
".mask");
2496 Value *XMasked =
Builder.CreateAnd(
X, Mask,
X->getName() +
".masked");
2498 IntrID, Ty, {XMasked,
Builder.getTrue()},
2499 nullptr, XMasked->
getName() +
".numleadingzeros");
2502 XMasked->
getName() +
".numactivebits",
true,
2504 Value *XMaskedLeadingOnePos =
2506 XMasked->
getName() +
".leadingonepos",
false,
2510 BitPos, XMaskedLeadingOnePos, CurLoop->
getName() +
".backedgetakencount",
2514 Value *LoopTripCount =
2516 CurLoop->
getName() +
".tripcount",
true,
2525 if (
auto *
I = dyn_cast<Instruction>(NewX))
2526 I->copyIRFlags(XNext,
true);
2538 NewXNext =
Builder.CreateShl(
X, LoopTripCount);
2547 if (
auto *
I = dyn_cast<Instruction>(NewXNext))
2548 I->copyIRFlags(XNext,
true);
2567 true, Bitwidth != 2);
2570 auto *IVCheck =
Builder.CreateICmpEQ(IVNext, LoopTripCount,
2571 CurLoop->
getName() +
".ivcheck");
2572 Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
2577 IV->addIncoming(IVNext, LoopHeaderBB);
2588 ++NumShiftUntilBitTest;
2624 const SCEV *&ExtraOffsetExpr,
2625 bool &InvertedCond) {
2627 " Performing shift-until-zero idiom detection.\n");
2640 assert(LoopPreheaderBB &&
"There is always a loop preheader.");
2642 using namespace PatternMatch;
2651 !
match(ValShiftedIsZero,
2665 IntrinID = ValShifted->
getOpcode() == Instruction::Shl ? Intrinsic::cttz
2674 else if (
match(NBits,
2678 ExtraOffsetExpr = SE->
getSCEV(ExtraOffset);
2685 auto *IVPN = dyn_cast<PHINode>(
IV);
2686 if (!IVPN || IVPN->getParent() != LoopHeaderBB) {
2691 Start = IVPN->getIncomingValueForBlock(LoopPreheaderBB);
2692 IVNext = dyn_cast<Instruction>(IVPN->getIncomingValueForBlock(LoopHeaderBB));
2702 "Should only get equality predicates here.");
2713 if (FalseBB != LoopHeaderBB) {
2724 if (ValShifted->
getOpcode() == Instruction::AShr &&
2788bool LoopIdiomRecognize::recognizeShiftUntilZero() {
2789 bool MadeChange =
false;
2795 const SCEV *ExtraOffsetExpr;
2798 Start, Val, ExtraOffsetExpr, InvertedCond)) {
2800 " shift-until-zero idiom detection failed.\n");
2810 assert(LoopPreheaderBB &&
"There is always a loop preheader.");
2813 assert(SuccessorBB &&
"There is only a single successor.");
2816 Builder.SetCurrentDebugLocation(
IV->getDebugLoc());
2832 " Intrinsic is too costly, not beneficial\n");
2839 bool OffsetIsZero =
false;
2840 if (
auto *ExtraOffsetExprC = dyn_cast<SCEVConstant>(ExtraOffsetExpr))
2841 OffsetIsZero = ExtraOffsetExprC->isZero();
2846 IntrID, Ty, {Val,
Builder.getFalse()},
2847 nullptr, Val->
getName() +
".numleadingzeros");
2850 Val->
getName() +
".numactivebits",
true,
2854 Expander.setInsertPoint(&*
Builder.GetInsertPoint());
2855 Value *ExtraOffset = Expander.expandCodeFor(ExtraOffsetExpr);
2858 ValNumActiveBits, ExtraOffset, ValNumActiveBits->
getName() +
".offset",
2859 OffsetIsZero,
true);
2860 Value *IVFinal =
Builder.CreateIntrinsic(Intrinsic::smax, {Ty},
2861 {ValNumActiveBitsOffset, Start},
2862 nullptr,
"iv.final");
2864 auto *LoopBackedgeTakenCount = cast<Instruction>(
Builder.CreateSub(
2865 IVFinal, Start, CurLoop->
getName() +
".backedgetakencount",
2866 OffsetIsZero,
true));
2870 Value *LoopTripCount =
2872 CurLoop->
getName() +
".tripcount",
true,
2878 IV->replaceUsesOutsideBlock(IVFinal, LoopHeaderBB);
2884 auto *CIV =
Builder.CreatePHI(Ty, 2, CurLoop->
getName() +
".iv");
2890 true, Bitwidth != 2);
2893 auto *CIVCheck =
Builder.CreateICmpEQ(CIVNext, LoopTripCount,
2894 CurLoop->
getName() +
".ivcheck");
2895 auto *NewIVCheck = CIVCheck;
2897 NewIVCheck =
Builder.CreateNot(CIVCheck);
2898 NewIVCheck->takeName(ValShiftedIsZero);
2902 auto *IVDePHId =
Builder.CreateAdd(CIV, Start,
"",
false,
2904 IVDePHId->takeName(
IV);
2908 Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
2913 CIV->addIncoming(CIVNext, LoopHeaderBB);
2921 IV->replaceAllUsesWith(IVDePHId);
2922 IV->eraseFromParent();
2931 ++NumShiftUntilZero;
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements a class to represent arbitrary precision integral constant values and operations...
static const Function * getParent(const Value *V)
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
This file defines the DenseMap class.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
hexagon loop Recognize Hexagon specific loop idioms
static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, const SCEV *BECount, unsigned StoreSize, AliasAnalysis &AA, SmallPtrSetImpl< Instruction * > &Ignored)
mayLoopAccessLocation - Return true if the specified loop might access the specified pointer location...
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static Value * matchCondition(BranchInst *BI, BasicBlock *LoopEntry, bool JmpOnZero=false)
Check if the given conditional branch is based on the comparison between a variable and zero,...
static PHINode * getRecurrenceVar(Value *VarX, Instruction *DefX, BasicBlock *LoopEntry)
static cl::opt< bool, true > DisableLIRPMemset("disable-" DEBUG_TYPE "-memset", cl::desc("Proceed with loop idiom recognize pass, but do " "not convert loop(s) to memset."), cl::location(DisableLIRP::Memset), cl::init(false), cl::ReallyHidden)
static cl::opt< bool > UseLIRCodeSizeHeurs("use-lir-code-size-heurs", cl::desc("Use loop idiom recognition code size heuristics when compiling" "with -Os/-Oz"), cl::init(true), cl::Hidden)
static CallInst * createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL, bool ZeroCheck, Intrinsic::ID IID)
static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX, Value *&BitMask, Value *&BitPos, Value *&CurrX, Instruction *&NextX)
Return true if the idiom is detected in the loop.
static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, Instruction *&CntInst, PHINode *&CntPhi, Value *&Var)
Return true iff the idiom is detected in the loop.
static Constant * getMemSetPatternValue(Value *V, const DataLayout *DL)
getMemSetPatternValue - If a strided store of the specified value is safe to turn into a memset_patte...
static const SCEV * getTripCount(const SCEV *BECount, Type *IntPtr, Loop *CurLoop, const DataLayout *DL, ScalarEvolution *SE)
Compute trip count from the backedge taken count.
static cl::opt< bool, true > DisableLIRPMemcpy("disable-" DEBUG_TYPE "-memcpy", cl::desc("Proceed with loop idiom recognize pass, but do " "not convert loop(s) to memcpy."), cl::location(DisableLIRP::Memcpy), cl::init(false), cl::ReallyHidden)
static CallInst * createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL)
static const SCEV * getNumBytes(const SCEV *BECount, Type *IntPtr, const SCEV *StoreSizeSCEV, Loop *CurLoop, const DataLayout *DL, ScalarEvolution *SE)
Compute the number of bytes as a SCEV from the backedge taken count.
static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL, Intrinsic::ID &IntrinID, Value *&InitX, Instruction *&CntInst, PHINode *&CntPhi, Instruction *&DefX)
Return true if the idiom is detected in the loop.
static const SCEV * getStartForNegStride(const SCEV *Start, const SCEV *BECount, Type *IntPtr, const SCEV *StoreSizeSCEV, ScalarEvolution *SE)
static APInt getStoreStride(const SCEVAddRecExpr *StoreEv)
match_LoopInvariant< Ty > m_LoopInvariant(const Ty &M, const Loop *L)
Matches if the value is loop-invariant.
static cl::opt< bool, true > DisableLIRPAll("disable-" DEBUG_TYPE "-all", cl::desc("Options to disable Loop Idiom Recognize Pass."), cl::location(DisableLIRP::All), cl::init(false), cl::ReallyHidden)
static void deleteDeadInstruction(Instruction *I)
static M68kRelType getType(unsigned Kind, MCSymbolRefExpr::VariantKind &Modifier, bool &IsPCRel)
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Module.h This file contains the declarations for the Module class.
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
verify safepoint Safepoint IR Verifier
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static const uint32_t IV[8]
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
unsigned getBitWidth() const
Return the number of bits in the APInt.
int64_t getSExtValue() const
Get sign extended value.
A container for analyses that lazily runs them and caches their results.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const Instruction & front() const
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
BinaryOps getOpcode() const
Conditional or Unconditional Branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
void setPredicate(Predicate P)
Set the predicate for this instruction to the specified value.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLE
signed less or equal
@ ICMP_UGT
unsigned greater than
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Predicate getPredicate() const
Return the predicate for this instruction.
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static Constant * getExactLogBase2(Constant *C)
If C is a scalar/fixed width vector of known powers of 2, then this function returns a new scalar/fix...
This is the shared class of boolean and integer constants.
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static ConstantInt * getBool(LLVMContext &Context, bool V)
This is an important base class in LLVM.
static Constant * getAllOnesValue(Type *Ty)
A parsed version of the target data layout string in and methods for querying it.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalObject.
void setUnnamedAddr(UnnamedAddr Val)
Module * getParent()
Get the module that this global value is contained inside of...
@ PrivateLinkage
Like Internal, but omit from symbol table.
This instruction compares its operands according to the predicate given to the constructor.
bool isEquality() const
Return true if this predicate is either EQ or NE.
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
BasicBlock * GetInsertBlock() const
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
const BasicBlock * getParent() const
const Function * getFunction() const
Return the function this instruction belongs to.
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Align getAlign() const
Return the alignment of the access that is being performed.
static LocationSize precise(uint64_t Value)
static constexpr LocationSize afterPointer()
Any location after the base pointer (but still within the underlying object).
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
BlockT * getHeader() const
iterator_range< block_iterator > blocks() const
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop.
block_iterator block_begin() const
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
virtual bool runOnLoop(Loop *L, LPPassManager &LPM)=0
bool skipLoop(const Loop *L) const
Optional passes call this function to check whether the pass should be skipped.
Represents a single loop in the control flow graph.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
StringRef getName() const
This class implements a map that also provides access to all stored values in a deterministic order.
This class wraps the llvm.memcpy intrinsic.
Value * getLength() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
MaybeAlign getDestAlign() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
MaybeAlign getSourceAlign() const
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
Representation for a specific memory location.
An analysis that produces MemorySSA for a function.
Legacy analysis pass which computes MemorySSA.
Encapsulates MemorySSA, including all data associated with memory accesses.
A Module instance is used to store all the information related to an LLVM module.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Value * getIncomingValueForBlock(const BasicBlock *BB) const
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
Helper to remove instructions inserted during SCEV expansion, unless they are marked as used.
This class uses information about analyze scalars to rewrite expressions in canonical form.
const SCEV * getOperand(unsigned i) const
This class represents an analyzed expression in the program.
bool isOne() const
Return true if the expression is a constant one.
bool isNonConstantNegative() const
Return true if the specified scev is negated, but not a constant.
Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
bool isKnownNonNegative(const SCEV *S)
Test if the given expression is known to be non-negative.
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
bool isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether entry to the loop is protected by a conditional between LHS and RHS.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
A vector that has set insertion semantics.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
Simple and conservative implementation of LoopSafetyInfo that can give false-positive answers to its ...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
bool anyBlockMayThrow() const override
Returns true iff any block of the loop for which this info is contains an instruction that may throw ...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getValueOperand()
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
void replaceUsesOutsideBlock(Value *V, BasicBlock *BB)
replaceUsesOutsideBlock - Go through the uses list for this definition and make each use point to "V"...
LLVMContext & getContext() const
All values hold a context through their type.
StringRef getName() const
Return a constant reference to the value's name.
void takeName(Value *V)
Transfer the name from V to this value.
Value handle that is nullable, but tries to track the Value.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
cst_pred_ty< is_power2 > m_Power2()
Match an integer or vector power-of-2.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
cst_pred_ty< icmp_pred_with_threshold > m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold)
Match an integer or vector with every element comparing 'pred' (eg/ne/...) to Threshold.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
DiagnosticInfoOptimizationBase::setExtraArgs setExtraArgs
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Pass * createLoopIdiomPass()
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
bool inferNonMandatoryLibFuncAttrs(Module *M, StringRef Name, const TargetLibraryInfo &TLI)
Analyze the name and prototype of the given function and set any applicable attributes.
bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI, LibFunc TheLibFunc)
Check whether the library function is available on target and also that it in the current Module is a...
bool isMustProgress(const Loop *L)
Return true if this loop can be assumed to make progress.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry &)
bool isModOrRefSet(const ModRefInfo MRI)
FunctionCallee getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI, LibFunc TheLibFunc, FunctionType *T, AttributeList AttributeList)
Calls getOrInsertFunction() and then makes sure to add mandatory argument attributes.
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
@ ModRef
The access may reference and may modify the value stored in memory.
@ Mod
The access may modify the value stored in memory.
bool VerifyMemorySSA
Enables verification of MemorySSA.
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
bool isKnownNonNegative(const Value *V, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Returns true if the give value is known to be non-negative.
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
bool decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate &Pred, Value *&X, APInt &Mask, bool LookThroughTrunc=true)
Decompose an icmp into the form ((X & Mask) pred 0) if possible.
constexpr std::nullopt_t None
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
MDNode * TBAAStruct
The tag for type-based alias analysis (tbaa struct).
MDNode * Scope
The tag for alias scope specification (used with noalias).
MDNode * TBAA
The tag for type-based alias analysis.
AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
MDNode * NoAlias
The tag specifying the noalias scope.
AAMDNodes extendTo(ssize_t Len) const
Create a new AAMDNode that describes this AAMDNode after extending it to apply to a series of bytes o...
This struct is a compact representation of a valid (non-zero power of two) alignment.
static bool Memset
When true, Memset is disabled.
static bool All
When true, the entire pass is disabled.
static bool Memcpy
When true, Memcpy is disabled.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Match loop-invariant value.
match_LoopInvariant(const SubPattern_t &SP, const Loop *L)