96#define DEBUG_TYPE "loop-idiom"
98STATISTIC(NumMemSet,
"Number of memset's formed from loop stores");
99STATISTIC(NumMemCpy,
"Number of memcpy's formed from loop load+stores");
100STATISTIC(NumMemMove,
"Number of memmove's formed from loop load+stores");
102 NumShiftUntilBitTest,
103 "Number of uncountable loops recognized as 'shift until bitttest' idiom");
105 "Number of uncountable loops recognized as 'shift until zero' idiom");
110 cl::desc(
"Options to disable Loop Idiom Recognize Pass."),
117 cl::desc(
"Proceed with loop idiom recognize pass, but do "
118 "not convert loop(s) to memset."),
125 cl::desc(
"Proceed with loop idiom recognize pass, but do "
126 "not convert loop(s) to memcpy."),
131 "use-lir-code-size-heurs",
132 cl::desc(
"Use loop idiom recognition code size heuristics when compiling"
138class LoopIdiomRecognize {
139 Loop *CurLoop =
nullptr;
148 bool ApplyCodeSizeHeuristics;
149 std::unique_ptr<MemorySSAUpdater> MSSAU;
158 : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI),
TTI(
TTI),
DL(
DL), ORE(ORE) {
160 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
163 bool runOnLoop(
Loop *L);
169 StoreListMap StoreRefsForMemset;
170 StoreListMap StoreRefsForMemsetPattern;
171 StoreList StoreRefsForMemcpy;
173 bool HasMemsetPattern;
177 enum LegalStoreKind {
182 UnorderedAtomicMemcpy,
190 bool runOnCountableLoop();
195 LegalStoreKind isLegalStore(
StoreInst *SI);
196 enum class ForMemset {
No,
Yes };
200 template <
typename MemInst>
201 bool processLoopMemIntrinsic(
203 bool (LoopIdiomRecognize::*Processor)(MemInst *,
const SCEV *),
204 const SCEV *BECount);
208 bool processLoopStridedStore(
Value *DestPtr,
const SCEV *StoreSizeSCEV,
213 bool IsNegStride,
bool IsLoopMemset =
false);
214 bool processLoopStoreOfLoopLoad(
StoreInst *SI,
const SCEV *BECount);
215 bool processLoopStoreOfLoopLoad(
Value *DestPtr,
Value *SourcePtr,
221 const SCEV *BECount);
222 bool avoidLIRForMultiBlockLoop(
bool IsMemset =
false,
223 bool IsLoopMemset =
false);
229 bool runOnNoncountableLoop();
231 bool recognizePopcount();
234 bool recognizeAndInsertFFS();
239 bool IsCntPhiUsedOutsideLoop);
241 bool recognizeShiftUntilBitTest();
242 bool recognizeShiftUntilZero();
254 const auto *
DL = &L.getHeader()->getDataLayout();
261 LoopIdiomRecognize LIR(&AR.
AA, &AR.
DT, &AR.
LI, &AR.
SE, &AR.
TLI, &AR.
TTI,
263 if (!LIR.runOnLoop(&L))
274 I->eraseFromParent();
283bool LoopIdiomRecognize::runOnLoop(
Loop *L) {
287 if (!
L->getLoopPreheader())
292 if (
Name ==
"memset" ||
Name ==
"memcpy")
296 ApplyCodeSizeHeuristics =
299 HasMemset = TLI->
has(LibFunc_memset);
300 HasMemsetPattern = TLI->
has(LibFunc_memset_pattern16);
301 HasMemcpy = TLI->
has(LibFunc_memcpy);
303 if (HasMemset || HasMemsetPattern || HasMemcpy)
305 return runOnCountableLoop();
307 return runOnNoncountableLoop();
310bool LoopIdiomRecognize::runOnCountableLoop() {
312 assert(!isa<SCEVCouldNotCompute>(BECount) &&
313 "runOnCountableLoop() called on a loop without a predictable"
314 "backedge-taken count");
318 if (
const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
319 if (BECst->getAPInt() == 0)
337 bool MadeChange =
false;
345 MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks);
369 if (!
C || isa<ConstantExpr>(
C))
378 if (
DL->isBigEndian())
394 unsigned ArraySize = 16 /
Size;
399LoopIdiomRecognize::LegalStoreKind
400LoopIdiomRecognize::isLegalStore(
StoreInst *SI) {
402 if (
SI->isVolatile())
403 return LegalStoreKind::None;
405 if (!
SI->isUnordered())
406 return LegalStoreKind::None;
409 if (
SI->getMetadata(LLVMContext::MD_nontemporal))
410 return LegalStoreKind::None;
412 Value *StoredVal =
SI->getValueOperand();
413 Value *StorePtr =
SI->getPointerOperand();
418 return LegalStoreKind::None;
426 return LegalStoreKind::None;
432 dyn_cast<SCEVAddRecExpr>(SE->
getSCEV(StorePtr));
434 return LegalStoreKind::None;
437 if (!isa<SCEVConstant>(StoreEv->
getOperand(1)))
438 return LegalStoreKind::None;
449 bool UnorderedAtomic =
SI->isUnordered() && !
SI->isSimple();
458 return LegalStoreKind::Memset;
465 return LegalStoreKind::MemsetPattern;
473 unsigned StoreSize =
DL->getTypeStoreSize(
SI->getValueOperand()->getType());
474 if (StoreSize != Stride && StoreSize != -Stride)
475 return LegalStoreKind::None;
478 LoadInst *LI = dyn_cast<LoadInst>(
SI->getValueOperand());
482 return LegalStoreKind::None;
485 return LegalStoreKind::None;
493 return LegalStoreKind::None;
497 return LegalStoreKind::None;
500 UnorderedAtomic = UnorderedAtomic || LI->
isAtomic();
501 return UnorderedAtomic ? LegalStoreKind::UnorderedAtomicMemcpy
502 : LegalStoreKind::Memcpy;
505 return LegalStoreKind::None;
508void LoopIdiomRecognize::collectStores(
BasicBlock *BB) {
509 StoreRefsForMemset.clear();
510 StoreRefsForMemsetPattern.clear();
511 StoreRefsForMemcpy.clear();
518 switch (isLegalStore(SI)) {
519 case LegalStoreKind::None:
522 case LegalStoreKind::Memset: {
525 StoreRefsForMemset[
Ptr].push_back(SI);
527 case LegalStoreKind::MemsetPattern: {
530 StoreRefsForMemsetPattern[
Ptr].push_back(SI);
532 case LegalStoreKind::Memcpy:
533 case LegalStoreKind::UnorderedAtomicMemcpy:
534 StoreRefsForMemcpy.push_back(SI);
537 assert(
false &&
"unhandled return value");
546bool LoopIdiomRecognize::runOnLoopBlock(
556 bool MadeChange =
false;
563 for (
auto &SL : StoreRefsForMemset)
564 MadeChange |= processLoopStores(SL.second, BECount, ForMemset::Yes);
566 for (
auto &SL : StoreRefsForMemsetPattern)
567 MadeChange |= processLoopStores(SL.second, BECount, ForMemset::No);
570 for (
auto &SI : StoreRefsForMemcpy)
571 MadeChange |= processLoopStoreOfLoopLoad(SI, BECount);
573 MadeChange |= processLoopMemIntrinsic<MemCpyInst>(
574 BB, &LoopIdiomRecognize::processLoopMemCpy, BECount);
575 MadeChange |= processLoopMemIntrinsic<MemSetInst>(
576 BB, &LoopIdiomRecognize::processLoopMemSet, BECount);
583 const SCEV *BECount, ForMemset For) {
591 for (
unsigned i = 0, e = SL.
size(); i < e; ++i) {
592 assert(SL[i]->
isSimple() &&
"Expected only non-volatile stores.");
594 Value *FirstStoredVal = SL[i]->getValueOperand();
595 Value *FirstStorePtr = SL[i]->getPointerOperand();
597 cast<SCEVAddRecExpr>(SE->
getSCEV(FirstStorePtr));
599 unsigned FirstStoreSize =
DL->getTypeStoreSize(SL[i]->getValueOperand()->
getType());
602 if (FirstStride == FirstStoreSize || -FirstStride == FirstStoreSize) {
607 Value *FirstSplatValue =
nullptr;
608 Constant *FirstPatternValue =
nullptr;
610 if (For == ForMemset::Yes)
615 assert((FirstSplatValue || FirstPatternValue) &&
616 "Expected either splat value or pattern value.");
624 for (j = i + 1;
j <
e; ++
j)
626 for (j = i;
j > 0; --
j)
629 for (
auto &k : IndexQueue) {
630 assert(SL[k]->
isSimple() &&
"Expected only non-volatile stores.");
631 Value *SecondStorePtr = SL[
k]->getPointerOperand();
633 cast<SCEVAddRecExpr>(SE->
getSCEV(SecondStorePtr));
636 if (FirstStride != SecondStride)
639 Value *SecondStoredVal = SL[
k]->getValueOperand();
640 Value *SecondSplatValue =
nullptr;
641 Constant *SecondPatternValue =
nullptr;
643 if (For == ForMemset::Yes)
648 assert((SecondSplatValue || SecondPatternValue) &&
649 "Expected either splat value or pattern value.");
652 if (For == ForMemset::Yes) {
653 if (isa<UndefValue>(FirstSplatValue))
654 FirstSplatValue = SecondSplatValue;
655 if (FirstSplatValue != SecondSplatValue)
658 if (isa<UndefValue>(FirstPatternValue))
659 FirstPatternValue = SecondPatternValue;
660 if (FirstPatternValue != SecondPatternValue)
665 ConsecutiveChain[SL[i]] = SL[
k];
674 bool Changed =
false;
685 unsigned StoreSize = 0;
688 while (Tails.
count(
I) || Heads.count(
I)) {
689 if (TransformedStores.
count(
I))
693 StoreSize +=
DL->getTypeStoreSize(
I->getValueOperand()->getType());
695 I = ConsecutiveChain[
I];
705 if (StoreSize != Stride && StoreSize != -Stride)
708 bool IsNegStride = StoreSize == -Stride;
712 if (processLoopStridedStore(StorePtr, StoreSizeSCEV,
714 HeadStore, AdjacentStores, StoreEv, BECount,
716 TransformedStores.
insert(AdjacentStores.
begin(), AdjacentStores.
end());
726template <
typename MemInst>
727bool LoopIdiomRecognize::processLoopMemIntrinsic(
729 bool (LoopIdiomRecognize::*Processor)(MemInst *,
const SCEV *),
730 const SCEV *BECount) {
731 bool MadeChange =
false;
735 if (MemInst *
MI = dyn_cast<MemInst>(Inst)) {
737 if (!(this->*Processor)(
MI, BECount))
751bool LoopIdiomRecognize::processLoopMemCpy(
MemCpyInst *MCI,
752 const SCEV *BECount) {
763 if (!Dest || !Source)
778 if ((SizeInBytes >> 32) != 0)
784 dyn_cast<SCEVConstant>(StoreEv->
getOperand(1));
786 dyn_cast<SCEVConstant>(LoadEv->
getOperand(1));
787 if (!ConstStoreStride || !ConstLoadStride)
796 if (SizeInBytes != StoreStrideValue && SizeInBytes != -StoreStrideValue) {
799 <<
ore::NV(
"Inst",
"memcpy") <<
" in "
801 <<
" function will not be hoisted: "
802 <<
ore::NV(
"Reason",
"memcpy size is not equal to stride");
807 int64_t StoreStrideInt = StoreStrideValue.
getSExtValue();
810 if (StoreStrideInt != LoadStrideInt)
813 return processLoopStoreOfLoopLoad(
820bool LoopIdiomRecognize::processLoopMemSet(
MemSetInst *MSI,
821 const SCEV *BECount) {
836 if (!Ev || Ev->
getLoop() != CurLoop)
845 if (!PointerStrideSCEV || !MemsetSizeSCEV)
848 bool IsNegStride =
false;
849 const bool IsConstantSize = isa<ConstantInt>(MSI->
getLength());
851 if (IsConstantSize) {
862 if (SizeInBytes != Stride && SizeInBytes != -Stride)
865 IsNegStride = SizeInBytes == -Stride;
873 if (
Pointer->getType()->getPointerAddressSpace() != 0) {
886 const SCEV *PositiveStrideSCEV =
889 LLVM_DEBUG(
dbgs() <<
" MemsetSizeSCEV: " << *MemsetSizeSCEV <<
"\n"
890 <<
" PositiveStrideSCEV: " << *PositiveStrideSCEV
893 if (PositiveStrideSCEV != MemsetSizeSCEV) {
896 const SCEV *FoldedPositiveStride =
898 const SCEV *FoldedMemsetSize =
902 <<
" FoldedMemsetSize: " << *FoldedMemsetSize <<
"\n"
903 <<
" FoldedPositiveStride: " << *FoldedPositiveStride
906 if (FoldedPositiveStride != FoldedMemsetSize) {
923 BECount, IsNegStride,
true);
931 const SCEV *BECount,
const SCEV *StoreSizeSCEV,
941 const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount);
942 const SCEVConstant *ConstSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
943 if (BECst && ConstSize) {
947 if (BEInt && SizeInt)
969 Type *IntPtr,
const SCEV *StoreSizeSCEV,
972 if (!StoreSizeSCEV->
isOne()) {
987 const SCEV *StoreSizeSCEV,
Loop *CurLoop,
989 const SCEV *TripCountSCEV =
998bool LoopIdiomRecognize::processLoopStridedStore(
1002 const SCEV *BECount,
bool IsNegStride,
bool IsLoopMemset) {
1010 assert((SplatValue || PatternValue) &&
1011 "Expected either splat value or pattern value.");
1022 Type *DestInt8PtrTy = Builder.getPtrTy(DestAS);
1025 bool Changed =
false;
1033 if (!Expander.isSafeToExpand(Start))
1042 Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->
getTerminator());
1054 StoreSizeSCEV, *AA, Stores))
1057 if (avoidLIRForMultiBlockLoop(
true, IsLoopMemset))
1062 const SCEV *NumBytesS =
1063 getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop,
DL, SE);
1067 if (!Expander.isSafeToExpand(NumBytesS))
1071 Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->
getTerminator());
1078 AATags = AATags.
merge(
Store->getAAMetadata());
1079 if (
auto CI = dyn_cast<ConstantInt>(NumBytes))
1080 AATags = AATags.
extendTo(CI->getZExtValue());
1086 NewCall = Builder.CreateMemSet(
1087 BasePtr, SplatValue, NumBytes,
MaybeAlign(StoreAlignment),
1092 Type *Int8PtrTy = DestInt8PtrTy;
1094 StringRef FuncName =
"memset_pattern16";
1096 Builder.getVoidTy(), Int8PtrTy, Int8PtrTy, IntIdxTy);
1103 PatternValue,
".memset_pattern");
1106 Value *PatternPtr = GV;
1107 NewCall = Builder.CreateCall(MSP, {
BasePtr, PatternPtr, NumBytes});
1123 MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1125 MSSAU->insertDef(cast<MemoryDef>(NewMemAcc),
true);
1129 <<
" from store to: " << *Ev <<
" at: " << *TheStore
1135 R <<
"Transformed loop-strided store in "
1137 <<
" function into a call to "
1140 if (!Stores.empty())
1142 for (
auto *
I : Stores) {
1143 R <<
ore::NV(
"FromBlock",
I->getParent()->getName())
1151 for (
auto *
I : Stores) {
1153 MSSAU->removeMemoryAccess(
I,
true);
1157 MSSAU->getMemorySSA()->verifyMemorySSA();
1159 ExpCleaner.markResultUsed();
1166bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
StoreInst *SI,
1167 const SCEV *BECount) {
1168 assert(
SI->isUnordered() &&
"Expected only non-volatile non-ordered stores.");
1170 Value *StorePtr =
SI->getPointerOperand();
1172 unsigned StoreSize =
DL->getTypeStoreSize(
SI->getValueOperand()->getType());
1175 LoadInst *LI = cast<LoadInst>(
SI->getValueOperand());
1185 return processLoopStoreOfLoopLoad(StorePtr, LoadPtr, StoreSizeSCEV,
1187 StoreEv, LoadEv, BECount);
1191class MemmoveVerifier {
1193 explicit MemmoveVerifier(
const Value &LoadBasePtr,
const Value &StoreBasePtr,
1196 LoadBasePtr.stripPointerCasts(), LoadOff,
DL)),
1198 StoreBasePtr.stripPointerCasts(), StoreOff,
DL)),
1199 IsSameObject(BP1 == BP2) {}
1201 bool loadAndStoreMayFormMemmove(
unsigned StoreSize,
bool IsNegStride,
1203 bool IsMemCpy)
const {
1207 if ((!IsNegStride && LoadOff <= StoreOff) ||
1208 (IsNegStride && LoadOff >= StoreOff))
1214 DL.getTypeSizeInBits(TheLoad.
getType()).getFixedValue() / 8;
1215 if (BP1 != BP2 || LoadSize != int64_t(StoreSize))
1217 if ((!IsNegStride && LoadOff < StoreOff + int64_t(StoreSize)) ||
1218 (IsNegStride && LoadOff + LoadSize > StoreOff))
1226 int64_t LoadOff = 0;
1227 int64_t StoreOff = 0;
1232 const bool IsSameObject;
1236bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
1245 if (isa<MemCpyInlineInst>(TheStore))
1257 bool Changed =
false;
1263 const SCEVConstant *ConstStoreSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
1266 assert(ConstStoreSize &&
"store size is expected to be a constant");
1269 bool IsNegStride = StoreSize == -Stride;
1282 Value *StoreBasePtr = Expander.expandCodeFor(
1283 StrStart, Builder.getPtrTy(StrAS), Preheader->
getTerminator());
1295 IgnoredInsts.
insert(TheStore);
1297 bool IsMemCpy = isa<MemCpyInst>(TheStore);
1298 const StringRef InstRemark = IsMemCpy ?
"memcpy" :
"load and store";
1300 bool LoopAccessStore =
1302 StoreSizeSCEV, *AA, IgnoredInsts);
1303 if (LoopAccessStore) {
1309 IgnoredInsts.
insert(TheLoad);
1311 BECount, StoreSizeSCEV, *AA, IgnoredInsts)) {
1315 <<
ore::NV(
"Inst", InstRemark) <<
" in "
1317 <<
" function will not be hoisted: "
1318 <<
ore::NV(
"Reason",
"The loop may access store location");
1322 IgnoredInsts.
erase(TheLoad);
1335 Value *LoadBasePtr = Expander.expandCodeFor(LdStart, Builder.getPtrTy(LdAS),
1340 MemmoveVerifier
Verifier(*LoadBasePtr, *StoreBasePtr, *
DL);
1341 if (IsMemCpy && !
Verifier.IsSameObject)
1342 IgnoredInsts.
erase(TheStore);
1344 StoreSizeSCEV, *AA, IgnoredInsts)) {
1347 <<
ore::NV(
"Inst", InstRemark) <<
" in "
1349 <<
" function will not be hoisted: "
1350 <<
ore::NV(
"Reason",
"The loop may access load location");
1355 bool UseMemMove = IsMemCpy ?
Verifier.IsSameObject : LoopAccessStore;
1357 if (!
Verifier.loadAndStoreMayFormMemmove(StoreSize, IsNegStride, *TheLoad,
1361 if (avoidLIRForMultiBlockLoop())
1366 const SCEV *NumBytesS =
1367 getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop,
DL, SE);
1370 Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->
getTerminator());
1374 AATags = AATags.
merge(StoreAATags);
1375 if (
auto CI = dyn_cast<ConstantInt>(NumBytes))
1376 AATags = AATags.
extendTo(CI->getZExtValue());
1386 NewCall = Builder.CreateMemMove(
1387 StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign, NumBytes,
1391 Builder.CreateMemCpy(StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign,
1392 NumBytes,
false, AATags.
TBAA,
1400 assert((StoreAlign && LoadAlign) &&
1401 "Expect unordered load/store to have align.");
1402 if (*StoreAlign < StoreSize || *LoadAlign < StoreSize)
1415 NewCall = Builder.CreateElementUnorderedAtomicMemCpy(
1416 StoreBasePtr, *StoreAlign, LoadBasePtr, *LoadAlign, NumBytes, StoreSize,
1422 MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1424 MSSAU->insertDef(cast<MemoryDef>(NewMemAcc),
true);
1428 <<
" from load ptr=" << *LoadEv <<
" at: " << *TheLoad
1430 <<
" from store ptr=" << *StoreEv <<
" at: " << *TheStore
1436 <<
"Formed a call to "
1438 <<
"() intrinsic from " <<
ore::NV(
"Inst", InstRemark)
1449 MSSAU->removeMemoryAccess(TheStore,
true);
1452 MSSAU->getMemorySSA()->verifyMemorySSA();
1457 ExpCleaner.markResultUsed();
1464bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(
bool IsMemset,
1465 bool IsLoopMemset) {
1466 if (ApplyCodeSizeHeuristics && CurLoop->
getNumBlocks() > 1) {
1467 if (CurLoop->
isOutermost() && (!IsMemset || !IsLoopMemset)) {
1469 <<
" : LIR " << (IsMemset ?
"Memset" :
"Memcpy")
1470 <<
" avoided: multi-block top-level loop\n");
1478bool LoopIdiomRecognize::runOnNoncountableLoop() {
1481 <<
"] Noncountable Loop %"
1484 return recognizePopcount() || recognizeAndInsertFFS() ||
1485 recognizeShiftUntilBitTest() || recognizeShiftUntilZero();
1495 bool JmpOnZero =
false) {
1504 if (!CmpZero || !CmpZero->
isZero())
1515 return Cond->getOperand(0);
1524 auto *PhiX = dyn_cast<PHINode>(VarX);
1525 if (PhiX && PhiX->getParent() == LoopEntry &&
1526 (PhiX->getOperand(0) == DefX || PhiX->
getOperand(1) == DefX))
1562 Value *VarX1, *VarX0;
1565 DefX2 = CountInst =
nullptr;
1566 VarX1 = VarX0 =
nullptr;
1567 PhiX = CountPhi =
nullptr;
1573 dyn_cast<BranchInst>(LoopEntry->
getTerminator()), LoopEntry))
1574 DefX2 = dyn_cast<Instruction>(
T);
1581 if (!DefX2 || DefX2->
getOpcode() != Instruction::And)
1586 if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->
getOperand(0))))
1590 SubOneOp = dyn_cast<BinaryOperator>(DefX2->
getOperand(1));
1592 if (!SubOneOp || SubOneOp->
getOperand(0) != VarX1)
1598 (SubOneOp->
getOpcode() == Instruction::Add &&
1611 CountInst =
nullptr;
1614 if (Inst.
getOpcode() != Instruction::Add)
1618 if (!Inc || !Inc->
isOne())
1626 bool LiveOutLoop =
false;
1628 if ((cast<Instruction>(U))->
getParent() != LoopEntry) {
1648 auto *PreCondBr = dyn_cast<BranchInst>(PreCondBB->
getTerminator());
1653 CntInst = CountInst;
1693 Value *VarX =
nullptr;
1702 dyn_cast<BranchInst>(LoopEntry->
getTerminator()), LoopEntry))
1703 DefX = dyn_cast<Instruction>(
T);
1708 if (!DefX || !DefX->
isShift())
1710 IntrinID = DefX->
getOpcode() == Instruction::Shl ? Intrinsic::cttz :
1713 if (!Shft || !Shft->
isOne())
1738 if (Inst.
getOpcode() != Instruction::Add)
1762bool LoopIdiomRecognize::recognizeAndInsertFFS() {
1774 size_t IdiomCanonicalSize = 6;
1777 CntInst, CntPhi, DefX))
1780 bool IsCntPhiUsedOutsideLoop =
false;
1782 if (!CurLoop->
contains(cast<Instruction>(U))) {
1783 IsCntPhiUsedOutsideLoop =
true;
1786 bool IsCntInstUsedOutsideLoop =
false;
1788 if (!CurLoop->
contains(cast<Instruction>(U))) {
1789 IsCntInstUsedOutsideLoop =
true;
1794 if (IsCntInstUsedOutsideLoop && IsCntPhiUsedOutsideLoop)
1800 bool ZeroCheck =
false;
1809 if (!IsCntPhiUsedOutsideLoop) {
1813 auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1838 std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
1843 if (HeaderSize != IdiomCanonicalSize &&
1847 transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
1849 IsCntPhiUsedOutsideLoop);
1857bool LoopIdiomRecognize::recognizePopcount() {
1871 if (LoopBody->
size() >= 20) {
1881 if (!EntryBI || EntryBI->isConditional())
1889 auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1890 if (!PreCondBI || PreCondBI->isUnconditional())
1899 transformLoopToPopcount(PreCondBB, CntInst, CntPhi, Val);
1905 Value *Ops[] = {Val};
1961void LoopIdiomRecognize::transformLoopToCountable(
1964 bool ZeroCheck,
bool IsCntPhiUsedOutsideLoop) {
1969 Builder.SetCurrentDebugLocation(
DL);
1978 if (IsCntPhiUsedOutsideLoop) {
1979 if (DefX->
getOpcode() == Instruction::AShr)
1980 InitXNext = Builder.CreateAShr(InitX, 1);
1981 else if (DefX->
getOpcode() == Instruction::LShr)
1982 InitXNext = Builder.CreateLShr(InitX, 1);
1983 else if (DefX->
getOpcode() == Instruction::Shl)
1984 InitXNext = Builder.CreateShl(InitX, 1);
1992 Count = Builder.CreateSub(
1994 Value *NewCount = Count;
1995 if (IsCntPhiUsedOutsideLoop)
1996 Count = Builder.CreateAdd(Count, ConstantInt::get(CountTy, 1));
1998 NewCount = Builder.CreateZExtOrTrunc(NewCount, CntInst->
getType());
2001 if (cast<ConstantInt>(CntInst->
getOperand(1))->isOne()) {
2004 ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
2005 if (!InitConst || !InitConst->
isZero())
2006 NewCount = Builder.CreateAdd(NewCount, CntInitVal);
2010 NewCount = Builder.CreateSub(CntInitVal, NewCount);
2023 ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
2028 Builder.SetInsertPoint(LbCond);
2029 Instruction *TcDec = cast<Instruction>(Builder.CreateSub(
2030 TcPhi, ConstantInt::get(CountTy, 1),
"tcdec",
false,
true));
2039 LbCond->
setOperand(1, ConstantInt::get(CountTy, 0));
2043 if (IsCntPhiUsedOutsideLoop)
2053void LoopIdiomRecognize::transformLoopToPopcount(
BasicBlock *PreCondBB,
2057 auto *PreCondBr = cast<BranchInst>(PreCondBB->
getTerminator());
2066 Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
2069 NewCount = PopCntZext =
2070 Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->
getType()));
2072 if (NewCount != PopCnt)
2073 (cast<Instruction>(NewCount))->setDebugLoc(
DL);
2080 ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
2081 if (!InitConst || !InitConst->
isZero()) {
2082 NewCount = Builder.CreateAdd(NewCount, CntInitVal);
2083 (cast<Instruction>(NewCount))->setDebugLoc(
DL);
2092 ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
2094 Value *Opnd0 = PopCntZext;
2095 Value *Opnd1 = ConstantInt::get(PopCntZext->
getType(), 0);
2099 ICmpInst *NewPreCond = cast<ICmpInst>(
2100 Builder.CreateICmp(PreCond->
getPredicate(), Opnd0, Opnd1));
2101 PreCondBr->setCondition(NewPreCond);
2129 ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
2135 Builder.SetInsertPoint(LbCond);
2137 Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
2138 "tcdec",
false,
true));
2147 LbCond->
setOperand(1, ConstantInt::get(Ty, 0));
2166 : SubPattern(SP), L(L) {}
2168 template <
typename ITy>
bool match(ITy *V) {
2169 return L->isLoopInvariant(V) && SubPattern.match(V);
2174template <
typename Ty>
2205 " Performing shift-until-bittest idiom detection.\n");
2215 assert(LoopPreheaderBB &&
"There is always a loop preheader.");
2217 using namespace PatternMatch;
2222 Value *CmpLHS, *CmpRHS;
2233 auto MatchVariableBitMask = [&]() {
2242 auto MatchConstantBitMask = [&]() {
2248 auto MatchDecomposableConstantBitMask = [&]() {
2252 (BitMask = ConstantInt::get(CurrX->
getType(), Mask)) &&
2253 (BitPos = ConstantInt::get(CurrX->
getType(), Mask.logBase2()));
2256 if (!MatchVariableBitMask() && !MatchConstantBitMask() &&
2257 !MatchDecomposableConstantBitMask()) {
2263 auto *CurrXPN = dyn_cast<PHINode>(CurrX);
2264 if (!CurrXPN || CurrXPN->getParent() != LoopHeaderBB) {
2269 BaseX = CurrXPN->getIncomingValueForBlock(LoopPreheaderBB);
2271 dyn_cast<Instruction>(CurrXPN->getIncomingValueForBlock(LoopHeaderBB));
2274 "Expected BaseX to be avaliable in the preheader!");
2285 "Should only get equality predicates here.");
2295 if (TrueBB != LoopHeaderBB) {
2354bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
2355 bool MadeChange =
false;
2357 Value *
X, *BitMask, *BitPos, *XCurr;
2362 " shift-until-bittest idiom detection failed.\n");
2372 assert(LoopPreheaderBB &&
"There is always a loop preheader.");
2375 assert(SuccessorBB &&
"There is only a single successor.");
2378 Builder.SetCurrentDebugLocation(cast<Instruction>(XCurr)->
getDebugLoc());
2381 Type *Ty =
X->getType();
2395 " Intrinsic is too costly, not beneficial\n");
2410 std::optional<BasicBlock::iterator> InsertPt = std::nullopt;
2411 if (
auto *BitPosI = dyn_cast<Instruction>(BitPos))
2412 InsertPt = BitPosI->getInsertionPointAfterDef();
2420 return U.getUser() != BitPosFrozen;
2422 BitPos = BitPosFrozen;
2428 BitPos->
getName() +
".lowbitmask");
2430 Builder.CreateOr(LowBitMask, BitMask, BitPos->
getName() +
".mask");
2431 Value *XMasked = Builder.CreateAnd(
X, Mask,
X->getName() +
".masked");
2432 CallInst *XMaskedNumLeadingZeros = Builder.CreateIntrinsic(
2433 IntrID, Ty, {XMasked, Builder.getTrue()},
2434 nullptr, XMasked->
getName() +
".numleadingzeros");
2435 Value *XMaskedNumActiveBits = Builder.CreateSub(
2437 XMasked->
getName() +
".numactivebits",
true,
2439 Value *XMaskedLeadingOnePos =
2441 XMasked->
getName() +
".leadingonepos",
false,
2444 Value *LoopBackedgeTakenCount = Builder.CreateSub(
2445 BitPos, XMaskedLeadingOnePos, CurLoop->
getName() +
".backedgetakencount",
2449 Value *LoopTripCount =
2450 Builder.CreateAdd(LoopBackedgeTakenCount, ConstantInt::get(Ty, 1),
2451 CurLoop->
getName() +
".tripcount",
true,
2458 Value *NewX = Builder.CreateShl(
X, LoopBackedgeTakenCount);
2460 if (
auto *
I = dyn_cast<Instruction>(NewX))
2461 I->copyIRFlags(XNext,
true);
2473 NewXNext = Builder.CreateShl(
X, LoopTripCount);
2478 NewXNext = Builder.CreateShl(NewX, ConstantInt::get(Ty, 1));
2482 if (
auto *
I = dyn_cast<Instruction>(NewXNext))
2483 I->copyIRFlags(XNext,
true);
2494 Builder.SetInsertPoint(LoopHeaderBB, LoopHeaderBB->
begin());
2495 auto *
IV = Builder.CreatePHI(Ty, 2, CurLoop->
getName() +
".iv");
2501 Builder.CreateAdd(
IV, ConstantInt::get(Ty, 1),
IV->getName() +
".next",
2502 true, Bitwidth != 2);
2505 auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount,
2506 CurLoop->
getName() +
".ivcheck");
2507 Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
2511 IV->addIncoming(ConstantInt::get(Ty, 0), LoopPreheaderBB);
2512 IV->addIncoming(IVNext, LoopHeaderBB);
2523 ++NumShiftUntilBitTest;
2559 const SCEV *&ExtraOffsetExpr,
2560 bool &InvertedCond) {
2562 " Performing shift-until-zero idiom detection.\n");
2575 assert(LoopPreheaderBB &&
"There is always a loop preheader.");
2577 using namespace PatternMatch;
2586 !
match(ValShiftedIsZero,
2600 IntrinID = ValShifted->
getOpcode() == Instruction::Shl ? Intrinsic::cttz
2609 else if (
match(NBits,
2613 ExtraOffsetExpr = SE->
getSCEV(ExtraOffset);
2620 auto *IVPN = dyn_cast<PHINode>(
IV);
2621 if (!IVPN || IVPN->getParent() != LoopHeaderBB) {
2626 Start = IVPN->getIncomingValueForBlock(LoopPreheaderBB);
2627 IVNext = dyn_cast<Instruction>(IVPN->getIncomingValueForBlock(LoopHeaderBB));
2637 "Should only get equality predicates here.");
2648 if (FalseBB != LoopHeaderBB) {
2659 if (ValShifted->
getOpcode() == Instruction::AShr &&
2723bool LoopIdiomRecognize::recognizeShiftUntilZero() {
2724 bool MadeChange =
false;
2730 const SCEV *ExtraOffsetExpr;
2733 Start, Val, ExtraOffsetExpr, InvertedCond)) {
2735 " shift-until-zero idiom detection failed.\n");
2745 assert(LoopPreheaderBB &&
"There is always a loop preheader.");
2748 assert(SuccessorBB &&
"There is only a single successor.");
2751 Builder.SetCurrentDebugLocation(
IV->getDebugLoc());
2767 " Intrinsic is too costly, not beneficial\n");
2774 bool OffsetIsZero =
false;
2775 if (
auto *ExtraOffsetExprC = dyn_cast<SCEVConstant>(ExtraOffsetExpr))
2776 OffsetIsZero = ExtraOffsetExprC->isZero();
2780 CallInst *ValNumLeadingZeros = Builder.CreateIntrinsic(
2781 IntrID, Ty, {Val, Builder.getFalse()},
2782 nullptr, Val->
getName() +
".numleadingzeros");
2783 Value *ValNumActiveBits = Builder.CreateSub(
2785 Val->
getName() +
".numactivebits",
true,
2789 Expander.setInsertPoint(&*Builder.GetInsertPoint());
2790 Value *ExtraOffset = Expander.expandCodeFor(ExtraOffsetExpr);
2792 Value *ValNumActiveBitsOffset = Builder.CreateAdd(
2793 ValNumActiveBits, ExtraOffset, ValNumActiveBits->
getName() +
".offset",
2794 OffsetIsZero,
true);
2795 Value *IVFinal = Builder.CreateIntrinsic(Intrinsic::smax, {Ty},
2796 {ValNumActiveBitsOffset, Start},
2797 nullptr,
"iv.final");
2799 auto *LoopBackedgeTakenCount = cast<Instruction>(Builder.CreateSub(
2800 IVFinal, Start, CurLoop->
getName() +
".backedgetakencount",
2801 OffsetIsZero,
true));
2805 Value *LoopTripCount =
2806 Builder.CreateAdd(LoopBackedgeTakenCount, ConstantInt::get(Ty, 1),
2807 CurLoop->
getName() +
".tripcount",
true,
2813 IV->replaceUsesOutsideBlock(IVFinal, LoopHeaderBB);
2818 Builder.SetInsertPoint(LoopHeaderBB, LoopHeaderBB->
begin());
2819 auto *CIV = Builder.CreatePHI(Ty, 2, CurLoop->
getName() +
".iv");
2824 Builder.CreateAdd(CIV, ConstantInt::get(Ty, 1), CIV->getName() +
".next",
2825 true, Bitwidth != 2);
2828 auto *CIVCheck = Builder.CreateICmpEQ(CIVNext, LoopTripCount,
2829 CurLoop->
getName() +
".ivcheck");
2830 auto *NewIVCheck = CIVCheck;
2832 NewIVCheck = Builder.CreateNot(CIVCheck);
2833 NewIVCheck->takeName(ValShiftedIsZero);
2837 auto *IVDePHId = Builder.CreateAdd(CIV, Start,
"",
false,
2839 IVDePHId->takeName(
IV);
2843 Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
2847 CIV->addIncoming(ConstantInt::get(Ty, 0), LoopPreheaderBB);
2848 CIV->addIncoming(CIVNext, LoopHeaderBB);
2856 IV->replaceAllUsesWith(IVDePHId);
2857 IV->eraseFromParent();
2866 ++NumShiftUntilZero;
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements a class to represent arbitrary precision integral constant values and operations...
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
This file defines the DenseMap class.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, const SCEV *BECount, unsigned StoreSize, AliasAnalysis &AA, SmallPtrSetImpl< Instruction * > &Ignored)
mayLoopAccessLocation - Return true if the specified loop might access the specified pointer location...
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static Value * matchCondition(BranchInst *BI, BasicBlock *LoopEntry, bool JmpOnZero=false)
Check if the given conditional branch is based on the comparison between a variable and zero,...
static PHINode * getRecurrenceVar(Value *VarX, Instruction *DefX, BasicBlock *LoopEntry)
static cl::opt< bool, true > DisableLIRPMemset("disable-" DEBUG_TYPE "-memset", cl::desc("Proceed with loop idiom recognize pass, but do " "not convert loop(s) to memset."), cl::location(DisableLIRP::Memset), cl::init(false), cl::ReallyHidden)
static cl::opt< bool > UseLIRCodeSizeHeurs("use-lir-code-size-heurs", cl::desc("Use loop idiom recognition code size heuristics when compiling" "with -Os/-Oz"), cl::init(true), cl::Hidden)
static CallInst * createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL, bool ZeroCheck, Intrinsic::ID IID)
static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX, Value *&BitMask, Value *&BitPos, Value *&CurrX, Instruction *&NextX)
Return true if the idiom is detected in the loop.
static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, Instruction *&CntInst, PHINode *&CntPhi, Value *&Var)
Return true iff the idiom is detected in the loop.
static Constant * getMemSetPatternValue(Value *V, const DataLayout *DL)
getMemSetPatternValue - If a strided store of the specified value is safe to turn into a memset_patte...
static cl::opt< bool, true > DisableLIRPMemcpy("disable-" DEBUG_TYPE "-memcpy", cl::desc("Proceed with loop idiom recognize pass, but do " "not convert loop(s) to memcpy."), cl::location(DisableLIRP::Memcpy), cl::init(false), cl::ReallyHidden)
static CallInst * createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL)
static const SCEV * getNumBytes(const SCEV *BECount, Type *IntPtr, const SCEV *StoreSizeSCEV, Loop *CurLoop, const DataLayout *DL, ScalarEvolution *SE)
Compute the number of bytes as a SCEV from the backedge taken count.
static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL, Intrinsic::ID &IntrinID, Value *&InitX, Instruction *&CntInst, PHINode *&CntPhi, Instruction *&DefX)
Return true if the idiom is detected in the loop.
static const SCEV * getStartForNegStride(const SCEV *Start, const SCEV *BECount, Type *IntPtr, const SCEV *StoreSizeSCEV, ScalarEvolution *SE)
static APInt getStoreStride(const SCEVAddRecExpr *StoreEv)
match_LoopInvariant< Ty > m_LoopInvariant(const Ty &M, const Loop *L)
Matches if the value is loop-invariant.
static cl::opt< bool, true > DisableLIRPAll("disable-" DEBUG_TYPE "-all", cl::desc("Options to disable Loop Idiom Recognize Pass."), cl::location(DisableLIRP::All), cl::init(false), cl::ReallyHidden)
static void deleteDeadInstruction(Instruction *I)
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Module.h This file contains the declarations for the Module class.
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
This header defines various interfaces for pass management in LLVM.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
verify safepoint Safepoint IR Verifier
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static const uint32_t IV[8]
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
std::optional< uint64_t > tryZExtValue() const
Get zero extended value if possible.
unsigned getBitWidth() const
Return the number of bits in the APInt.
int64_t getSExtValue() const
Get sign extended value.
A container for analyses that lazily runs them and caches their results.
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
InstListType::const_iterator getFirstNonPHIIt() const
Iterator returning form of getFirstNonPHI.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const Instruction & front() const
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
const_iterator getFirstNonPHIOrDbgOrAlloca() const
Returns an iterator to the first instruction in this block that is not a PHINode, a debug intrinsic,...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BinaryOps getOpcode() const
Conditional or Unconditional Branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
void setPredicate(Predicate P)
Set the predicate for this instruction to the specified value.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLE
signed less or equal
@ ICMP_UGT
unsigned greater than
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Predicate getPredicate() const
Return the predicate for this instruction.
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static Constant * getExactLogBase2(Constant *C)
If C is a scalar/fixed width vector of known powers of 2, then this function returns a new scalar/fix...
This is the shared class of boolean and integer constants.
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static ConstantInt * getBool(LLVMContext &Context, bool V)
This is an important base class in LLVM.
static Constant * getAllOnesValue(Type *Ty)
A parsed version of the target data layout string in and methods for querying it.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This class represents a freeze function that returns random concrete value if an operand is either a ...
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalObject.
void setUnnamedAddr(UnnamedAddr Val)
Module * getParent()
Get the module that this global value is contained inside of...
@ PrivateLinkage
Like Internal, but omit from symbol table.
This instruction compares its operands according to the predicate given to the constructor.
bool isEquality() const
Return true if this predicate is either EQ or NE.
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
BasicBlock * GetInsertBlock() const
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
const Function * getFunction() const
Return the function this instruction belongs to.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Align getAlign() const
Return the alignment of the access that is being performed.
static LocationSize precise(uint64_t Value)
static constexpr LocationSize afterPointer()
Any location after the base pointer (but still within the underlying object).
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
BlockT * getHeader() const
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop.
block_iterator block_begin() const
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
StringRef getName() const
This class implements a map that also provides access to all stored values in a deterministic order.
This class wraps the llvm.memcpy intrinsic.
Value * getLength() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
MaybeAlign getDestAlign() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
MaybeAlign getSourceAlign() const
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
Representation for a specific memory location.
An analysis that produces MemorySSA for a function.
Encapsulates MemorySSA, including all data associated with memory accesses.
A Module instance is used to store all the information related to an LLVM module.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
Helper to remove instructions inserted during SCEV expansion, unless they are marked as used.
This class uses information about analyze scalars to rewrite expressions in canonical form.
const SCEV * getOperand(unsigned i) const
This class represents an analyzed expression in the program.
bool isOne() const
Return true if the expression is a constant one.
bool isNonConstantNegative() const
Return true if the specified scev is negated, but not a constant.
The main scalar evolution driver.
bool isKnownNonNegative(const SCEV *S)
Test if the given expression is known to be non-negative.
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
A vector that has set insertion semantics.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
Simple and conservative implementation of LoopSafetyInfo that can give false-positive answers to its ...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
bool anyBlockMayThrow() const override
Returns true iff any block of the loop for which this info is contains an instruction that may throw ...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getValueOperand()
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
void replaceUsesOutsideBlock(Value *V, BasicBlock *BB)
replaceUsesOutsideBlock - Go through the uses list for this definition and make each use point to "V"...
LLVMContext & getContext() const
All values hold a context through their type.
StringRef getName() const
Return a constant reference to the value's name.
void takeName(Value *V)
Transfer the name from V to this value.
Value handle that is nullable, but tries to track the Value.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
cst_pred_ty< is_power2 > m_Power2()
Match an integer or vector power-of-2.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
cst_pred_ty< icmp_pred_with_threshold > m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold)
Match an integer or vector with every element comparing 'pred' (eg/ne/...) to Threshold.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
DiagnosticInfoOptimizationBase::setExtraArgs setExtraArgs
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
bool inferNonMandatoryLibFuncAttrs(Module *M, StringRef Name, const TargetLibraryInfo &TLI)
Analyze the name and prototype of the given function and set any applicable attributes.
bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI, LibFunc TheLibFunc)
Check whether the library function is available on target and also that it in the current Module is a...
bool isMustProgress(const Loop *L)
Return true if this loop can be assumed to make progress.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isModOrRefSet(const ModRefInfo MRI)
FunctionCallee getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI, LibFunc TheLibFunc, FunctionType *T, AttributeList AttributeList)
Calls getOrInsertFunction() and then makes sure to add mandatory argument attributes.
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
@ ModRef
The access may reference and may modify the value stored in memory.
@ Mod
The access may modify the value stored in memory.
bool VerifyMemorySSA
Enables verification of MemorySSA.
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
bool decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate &Pred, Value *&X, APInt &Mask, bool LookThroughTrunc=true)
Decompose an icmp into the form ((X & Mask) pred 0) if possible.
bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
MDNode * TBAAStruct
The tag for type-based alias analysis (tbaa struct).
MDNode * Scope
The tag for alias scope specification (used with noalias).
MDNode * TBAA
The tag for type-based alias analysis.
AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
MDNode * NoAlias
The tag specifying the noalias scope.
AAMDNodes extendTo(ssize_t Len) const
Create a new AAMDNode that describes this AAMDNode after extending it to apply to a series of bytes o...
This struct is a compact representation of a valid (non-zero power of two) alignment.
static bool Memset
When true, Memset is disabled.
static bool All
When true, the entire pass is disabled.
static bool Memcpy
When true, Memcpy is disabled.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Match loop-invariant value.
match_LoopInvariant(const SubPattern_t &SP, const Loop *L)