95#define DEBUG_TYPE "loop-idiom"
97STATISTIC(NumMemSet,
"Number of memset's formed from loop stores");
98STATISTIC(NumMemCpy,
"Number of memcpy's formed from loop load+stores");
99STATISTIC(NumMemMove,
"Number of memmove's formed from loop load+stores");
101 NumShiftUntilBitTest,
102 "Number of uncountable loops recognized as 'shift until bitttest' idiom");
104 "Number of uncountable loops recognized as 'shift until zero' idiom");
109 cl::desc(
"Options to disable Loop Idiom Recognize Pass."),
116 cl::desc(
"Proceed with loop idiom recognize pass, but do "
117 "not convert loop(s) to memset."),
124 cl::desc(
"Proceed with loop idiom recognize pass, but do "
125 "not convert loop(s) to memcpy."),
130 "use-lir-code-size-heurs",
131 cl::desc(
"Use loop idiom recognition code size heuristics when compiling"
137class LoopIdiomRecognize {
138 Loop *CurLoop =
nullptr;
147 bool ApplyCodeSizeHeuristics;
148 std::unique_ptr<MemorySSAUpdater> MSSAU;
157 : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI),
TTI(
TTI),
DL(
DL), ORE(ORE) {
159 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
162 bool runOnLoop(
Loop *L);
168 StoreListMap StoreRefsForMemset;
169 StoreListMap StoreRefsForMemsetPattern;
170 StoreList StoreRefsForMemcpy;
172 bool HasMemsetPattern;
176 enum LegalStoreKind {
181 UnorderedAtomicMemcpy,
189 bool runOnCountableLoop();
194 LegalStoreKind isLegalStore(
StoreInst *SI);
195 enum class ForMemset {
No,
Yes };
199 template <
typename MemInst>
200 bool processLoopMemIntrinsic(
202 bool (LoopIdiomRecognize::*Processor)(MemInst *,
const SCEV *),
203 const SCEV *BECount);
207 bool processLoopStridedStore(
Value *DestPtr,
const SCEV *StoreSizeSCEV,
212 bool IsNegStride,
bool IsLoopMemset =
false);
213 bool processLoopStoreOfLoopLoad(
StoreInst *SI,
const SCEV *BECount);
214 bool processLoopStoreOfLoopLoad(
Value *DestPtr,
Value *SourcePtr,
220 const SCEV *BECount);
221 bool avoidLIRForMultiBlockLoop(
bool IsMemset =
false,
222 bool IsLoopMemset =
false);
228 bool runOnNoncountableLoop();
230 bool recognizePopcount();
234 bool ZeroCheck,
size_t CanonicalSize);
238 bool recognizeAndInsertFFS();
239 bool recognizeShiftUntilLessThan();
244 bool IsCntPhiUsedOutsideLoop,
245 bool InsertSub =
false);
247 bool recognizeShiftUntilBitTest();
248 bool recognizeShiftUntilZero();
260 const auto *
DL = &L.getHeader()->getDataLayout();
267 LoopIdiomRecognize LIR(&AR.
AA, &AR.
DT, &AR.
LI, &AR.
SE, &AR.
TLI, &AR.
TTI,
269 if (!LIR.runOnLoop(&L))
280 I->eraseFromParent();
289bool LoopIdiomRecognize::runOnLoop(
Loop *L) {
293 if (!
L->getLoopPreheader())
298 if (
Name ==
"memset" ||
Name ==
"memcpy")
302 ApplyCodeSizeHeuristics =
305 HasMemset = TLI->
has(LibFunc_memset);
306 HasMemsetPattern = TLI->
has(LibFunc_memset_pattern16);
307 HasMemcpy = TLI->
has(LibFunc_memcpy);
309 if (HasMemset || HasMemsetPattern || HasMemcpy)
311 return runOnCountableLoop();
313 return runOnNoncountableLoop();
316bool LoopIdiomRecognize::runOnCountableLoop() {
318 assert(!isa<SCEVCouldNotCompute>(BECount) &&
319 "runOnCountableLoop() called on a loop without a predictable"
320 "backedge-taken count");
324 if (
const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
325 if (BECst->getAPInt() == 0)
343 bool MadeChange =
false;
351 MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks);
375 if (!
C || isa<ConstantExpr>(
C))
384 if (
DL->isBigEndian())
400 unsigned ArraySize = 16 /
Size;
405LoopIdiomRecognize::LegalStoreKind
406LoopIdiomRecognize::isLegalStore(
StoreInst *SI) {
408 if (
SI->isVolatile())
409 return LegalStoreKind::None;
411 if (!
SI->isUnordered())
412 return LegalStoreKind::None;
415 if (
SI->getMetadata(LLVMContext::MD_nontemporal))
416 return LegalStoreKind::None;
418 Value *StoredVal =
SI->getValueOperand();
419 Value *StorePtr =
SI->getPointerOperand();
424 return LegalStoreKind::None;
432 return LegalStoreKind::None;
438 dyn_cast<SCEVAddRecExpr>(SE->
getSCEV(StorePtr));
440 return LegalStoreKind::None;
443 if (!isa<SCEVConstant>(StoreEv->
getOperand(1)))
444 return LegalStoreKind::None;
455 bool UnorderedAtomic =
SI->isUnordered() && !
SI->isSimple();
464 return LegalStoreKind::Memset;
471 return LegalStoreKind::MemsetPattern;
479 unsigned StoreSize =
DL->getTypeStoreSize(
SI->getValueOperand()->getType());
480 if (StoreSize != Stride && StoreSize != -Stride)
481 return LegalStoreKind::None;
484 LoadInst *LI = dyn_cast<LoadInst>(
SI->getValueOperand());
488 return LegalStoreKind::None;
491 return LegalStoreKind::None;
499 return LegalStoreKind::None;
503 return LegalStoreKind::None;
506 UnorderedAtomic = UnorderedAtomic || LI->
isAtomic();
507 return UnorderedAtomic ? LegalStoreKind::UnorderedAtomicMemcpy
508 : LegalStoreKind::Memcpy;
511 return LegalStoreKind::None;
514void LoopIdiomRecognize::collectStores(
BasicBlock *BB) {
515 StoreRefsForMemset.clear();
516 StoreRefsForMemsetPattern.clear();
517 StoreRefsForMemcpy.clear();
524 switch (isLegalStore(SI)) {
525 case LegalStoreKind::None:
528 case LegalStoreKind::Memset: {
531 StoreRefsForMemset[
Ptr].push_back(SI);
533 case LegalStoreKind::MemsetPattern: {
536 StoreRefsForMemsetPattern[
Ptr].push_back(SI);
538 case LegalStoreKind::Memcpy:
539 case LegalStoreKind::UnorderedAtomicMemcpy:
540 StoreRefsForMemcpy.push_back(SI);
543 assert(
false &&
"unhandled return value");
552bool LoopIdiomRecognize::runOnLoopBlock(
562 bool MadeChange =
false;
569 for (
auto &SL : StoreRefsForMemset)
570 MadeChange |= processLoopStores(SL.second, BECount, ForMemset::Yes);
572 for (
auto &SL : StoreRefsForMemsetPattern)
573 MadeChange |= processLoopStores(SL.second, BECount, ForMemset::No);
576 for (
auto &SI : StoreRefsForMemcpy)
577 MadeChange |= processLoopStoreOfLoopLoad(SI, BECount);
579 MadeChange |= processLoopMemIntrinsic<MemCpyInst>(
580 BB, &LoopIdiomRecognize::processLoopMemCpy, BECount);
581 MadeChange |= processLoopMemIntrinsic<MemSetInst>(
582 BB, &LoopIdiomRecognize::processLoopMemSet, BECount);
589 const SCEV *BECount, ForMemset For) {
597 for (
unsigned i = 0, e = SL.
size(); i < e; ++i) {
598 assert(SL[i]->
isSimple() &&
"Expected only non-volatile stores.");
600 Value *FirstStoredVal = SL[i]->getValueOperand();
601 Value *FirstStorePtr = SL[i]->getPointerOperand();
603 cast<SCEVAddRecExpr>(SE->
getSCEV(FirstStorePtr));
605 unsigned FirstStoreSize =
DL->getTypeStoreSize(SL[i]->getValueOperand()->
getType());
608 if (FirstStride == FirstStoreSize || -FirstStride == FirstStoreSize) {
613 Value *FirstSplatValue =
nullptr;
614 Constant *FirstPatternValue =
nullptr;
616 if (For == ForMemset::Yes)
621 assert((FirstSplatValue || FirstPatternValue) &&
622 "Expected either splat value or pattern value.");
630 for (j = i + 1;
j <
e; ++
j)
632 for (j = i;
j > 0; --
j)
635 for (
auto &k : IndexQueue) {
636 assert(SL[k]->
isSimple() &&
"Expected only non-volatile stores.");
637 Value *SecondStorePtr = SL[
k]->getPointerOperand();
639 cast<SCEVAddRecExpr>(SE->
getSCEV(SecondStorePtr));
642 if (FirstStride != SecondStride)
645 Value *SecondStoredVal = SL[
k]->getValueOperand();
646 Value *SecondSplatValue =
nullptr;
647 Constant *SecondPatternValue =
nullptr;
649 if (For == ForMemset::Yes)
654 assert((SecondSplatValue || SecondPatternValue) &&
655 "Expected either splat value or pattern value.");
658 if (For == ForMemset::Yes) {
659 if (isa<UndefValue>(FirstSplatValue))
660 FirstSplatValue = SecondSplatValue;
661 if (FirstSplatValue != SecondSplatValue)
664 if (isa<UndefValue>(FirstPatternValue))
665 FirstPatternValue = SecondPatternValue;
666 if (FirstPatternValue != SecondPatternValue)
671 ConsecutiveChain[SL[i]] = SL[
k];
680 bool Changed =
false;
691 unsigned StoreSize = 0;
694 while (Tails.
count(
I) || Heads.count(
I)) {
695 if (TransformedStores.
count(
I))
699 StoreSize +=
DL->getTypeStoreSize(
I->getValueOperand()->getType());
701 I = ConsecutiveChain[
I];
711 if (StoreSize != Stride && StoreSize != -Stride)
714 bool IsNegStride = StoreSize == -Stride;
718 if (processLoopStridedStore(StorePtr, StoreSizeSCEV,
720 HeadStore, AdjacentStores, StoreEv, BECount,
722 TransformedStores.
insert(AdjacentStores.
begin(), AdjacentStores.
end());
732template <
typename MemInst>
733bool LoopIdiomRecognize::processLoopMemIntrinsic(
735 bool (LoopIdiomRecognize::*Processor)(MemInst *,
const SCEV *),
736 const SCEV *BECount) {
737 bool MadeChange =
false;
741 if (MemInst *
MI = dyn_cast<MemInst>(Inst)) {
743 if (!(this->*Processor)(
MI, BECount))
757bool LoopIdiomRecognize::processLoopMemCpy(
MemCpyInst *MCI,
758 const SCEV *BECount) {
769 if (!Dest || !Source)
784 if ((SizeInBytes >> 32) != 0)
790 dyn_cast<SCEVConstant>(StoreEv->
getOperand(1));
792 dyn_cast<SCEVConstant>(LoadEv->
getOperand(1));
793 if (!ConstStoreStride || !ConstLoadStride)
802 if (SizeInBytes != StoreStrideValue && SizeInBytes != -StoreStrideValue) {
805 <<
ore::NV(
"Inst",
"memcpy") <<
" in "
807 <<
" function will not be hoisted: "
808 <<
ore::NV(
"Reason",
"memcpy size is not equal to stride");
813 int64_t StoreStrideInt = StoreStrideValue.
getSExtValue();
816 if (StoreStrideInt != LoadStrideInt)
819 return processLoopStoreOfLoopLoad(
826bool LoopIdiomRecognize::processLoopMemSet(
MemSetInst *MSI,
827 const SCEV *BECount) {
842 if (!Ev || Ev->
getLoop() != CurLoop)
851 if (!PointerStrideSCEV || !MemsetSizeSCEV)
854 bool IsNegStride =
false;
855 const bool IsConstantSize = isa<ConstantInt>(MSI->
getLength());
857 if (IsConstantSize) {
868 if (SizeInBytes != Stride && SizeInBytes != -Stride)
871 IsNegStride = SizeInBytes == -Stride;
879 if (
Pointer->getType()->getPointerAddressSpace() != 0) {
892 const SCEV *PositiveStrideSCEV =
895 LLVM_DEBUG(
dbgs() <<
" MemsetSizeSCEV: " << *MemsetSizeSCEV <<
"\n"
896 <<
" PositiveStrideSCEV: " << *PositiveStrideSCEV
899 if (PositiveStrideSCEV != MemsetSizeSCEV) {
902 const SCEV *FoldedPositiveStride =
904 const SCEV *FoldedMemsetSize =
908 <<
" FoldedMemsetSize: " << *FoldedMemsetSize <<
"\n"
909 <<
" FoldedPositiveStride: " << *FoldedPositiveStride
912 if (FoldedPositiveStride != FoldedMemsetSize) {
929 BECount, IsNegStride,
true);
937 const SCEV *BECount,
const SCEV *StoreSizeSCEV,
947 const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount);
948 const SCEVConstant *ConstSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
949 if (BECst && ConstSize) {
953 if (BEInt && SizeInt)
975 Type *IntPtr,
const SCEV *StoreSizeSCEV,
978 if (!StoreSizeSCEV->
isOne()) {
993 const SCEV *StoreSizeSCEV,
Loop *CurLoop,
995 const SCEV *TripCountSCEV =
1004bool LoopIdiomRecognize::processLoopStridedStore(
1008 const SCEV *BECount,
bool IsNegStride,
bool IsLoopMemset) {
1016 assert((SplatValue || PatternValue) &&
1017 "Expected either splat value or pattern value.");
1028 Type *DestInt8PtrTy = Builder.getPtrTy(DestAS);
1031 bool Changed =
false;
1039 if (!Expander.isSafeToExpand(Start))
1048 Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->
getTerminator());
1060 StoreSizeSCEV, *AA, Stores))
1063 if (avoidLIRForMultiBlockLoop(
true, IsLoopMemset))
1068 const SCEV *NumBytesS =
1069 getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop,
DL, SE);
1073 if (!Expander.isSafeToExpand(NumBytesS))
1077 Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->
getTerminator());
1084 AATags = AATags.
merge(
Store->getAAMetadata());
1085 if (
auto CI = dyn_cast<ConstantInt>(NumBytes))
1086 AATags = AATags.
extendTo(CI->getZExtValue());
1092 NewCall = Builder.CreateMemSet(
1093 BasePtr, SplatValue, NumBytes,
MaybeAlign(StoreAlignment),
1098 Type *Int8PtrTy = DestInt8PtrTy;
1100 StringRef FuncName =
"memset_pattern16";
1102 Builder.getVoidTy(), Int8PtrTy, Int8PtrTy, IntIdxTy);
1109 PatternValue,
".memset_pattern");
1112 Value *PatternPtr = GV;
1113 NewCall = Builder.CreateCall(MSP, {
BasePtr, PatternPtr, NumBytes});
1129 MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1131 MSSAU->insertDef(cast<MemoryDef>(NewMemAcc),
true);
1135 <<
" from store to: " << *Ev <<
" at: " << *TheStore
1141 R <<
"Transformed loop-strided store in "
1143 <<
" function into a call to "
1146 if (!Stores.empty())
1148 for (
auto *
I : Stores) {
1149 R <<
ore::NV(
"FromBlock",
I->getParent()->getName())
1157 for (
auto *
I : Stores) {
1159 MSSAU->removeMemoryAccess(
I,
true);
1163 MSSAU->getMemorySSA()->verifyMemorySSA();
1165 ExpCleaner.markResultUsed();
1172bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
StoreInst *SI,
1173 const SCEV *BECount) {
1174 assert(
SI->isUnordered() &&
"Expected only non-volatile non-ordered stores.");
1176 Value *StorePtr =
SI->getPointerOperand();
1178 unsigned StoreSize =
DL->getTypeStoreSize(
SI->getValueOperand()->getType());
1181 LoadInst *LI = cast<LoadInst>(
SI->getValueOperand());
1191 return processLoopStoreOfLoopLoad(StorePtr, LoadPtr, StoreSizeSCEV,
1193 StoreEv, LoadEv, BECount);
1197class MemmoveVerifier {
1199 explicit MemmoveVerifier(
const Value &LoadBasePtr,
const Value &StoreBasePtr,
1202 LoadBasePtr.stripPointerCasts(), LoadOff,
DL)),
1204 StoreBasePtr.stripPointerCasts(), StoreOff,
DL)),
1205 IsSameObject(BP1 == BP2) {}
1207 bool loadAndStoreMayFormMemmove(
unsigned StoreSize,
bool IsNegStride,
1209 bool IsMemCpy)
const {
1213 if ((!IsNegStride && LoadOff <= StoreOff) ||
1214 (IsNegStride && LoadOff >= StoreOff))
1220 DL.getTypeSizeInBits(TheLoad.
getType()).getFixedValue() / 8;
1221 if (BP1 != BP2 || LoadSize != int64_t(StoreSize))
1223 if ((!IsNegStride && LoadOff < StoreOff + int64_t(StoreSize)) ||
1224 (IsNegStride && LoadOff + LoadSize > StoreOff))
1232 int64_t LoadOff = 0;
1233 int64_t StoreOff = 0;
1238 const bool IsSameObject;
1242bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
1251 if (isa<MemCpyInlineInst>(TheStore))
1263 bool Changed =
false;
1269 const SCEVConstant *ConstStoreSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
1272 assert(ConstStoreSize &&
"store size is expected to be a constant");
1275 bool IsNegStride = StoreSize == -Stride;
1288 Value *StoreBasePtr = Expander.expandCodeFor(
1289 StrStart, Builder.getPtrTy(StrAS), Preheader->
getTerminator());
1301 IgnoredInsts.
insert(TheStore);
1303 bool IsMemCpy = isa<MemCpyInst>(TheStore);
1304 const StringRef InstRemark = IsMemCpy ?
"memcpy" :
"load and store";
1306 bool LoopAccessStore =
1308 StoreSizeSCEV, *AA, IgnoredInsts);
1309 if (LoopAccessStore) {
1315 IgnoredInsts.
insert(TheLoad);
1317 BECount, StoreSizeSCEV, *AA, IgnoredInsts)) {
1321 <<
ore::NV(
"Inst", InstRemark) <<
" in "
1323 <<
" function will not be hoisted: "
1324 <<
ore::NV(
"Reason",
"The loop may access store location");
1328 IgnoredInsts.
erase(TheLoad);
1341 Value *LoadBasePtr = Expander.expandCodeFor(LdStart, Builder.getPtrTy(LdAS),
1346 MemmoveVerifier
Verifier(*LoadBasePtr, *StoreBasePtr, *
DL);
1347 if (IsMemCpy && !
Verifier.IsSameObject)
1348 IgnoredInsts.
erase(TheStore);
1350 StoreSizeSCEV, *AA, IgnoredInsts)) {
1353 <<
ore::NV(
"Inst", InstRemark) <<
" in "
1355 <<
" function will not be hoisted: "
1356 <<
ore::NV(
"Reason",
"The loop may access load location");
1361 bool UseMemMove = IsMemCpy ?
Verifier.IsSameObject : LoopAccessStore;
1363 if (!
Verifier.loadAndStoreMayFormMemmove(StoreSize, IsNegStride, *TheLoad,
1367 if (avoidLIRForMultiBlockLoop())
1372 const SCEV *NumBytesS =
1373 getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop,
DL, SE);
1376 Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->
getTerminator());
1380 AATags = AATags.
merge(StoreAATags);
1381 if (
auto CI = dyn_cast<ConstantInt>(NumBytes))
1382 AATags = AATags.
extendTo(CI->getZExtValue());
1392 NewCall = Builder.CreateMemMove(
1393 StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign, NumBytes,
1397 Builder.CreateMemCpy(StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign,
1398 NumBytes,
false, AATags.
TBAA,
1406 assert((StoreAlign && LoadAlign) &&
1407 "Expect unordered load/store to have align.");
1408 if (*StoreAlign < StoreSize || *LoadAlign < StoreSize)
1421 NewCall = Builder.CreateElementUnorderedAtomicMemCpy(
1422 StoreBasePtr, *StoreAlign, LoadBasePtr, *LoadAlign, NumBytes, StoreSize,
1428 MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1430 MSSAU->insertDef(cast<MemoryDef>(NewMemAcc),
true);
1434 <<
" from load ptr=" << *LoadEv <<
" at: " << *TheLoad
1436 <<
" from store ptr=" << *StoreEv <<
" at: " << *TheStore
1442 <<
"Formed a call to "
1444 <<
"() intrinsic from " <<
ore::NV(
"Inst", InstRemark)
1455 MSSAU->removeMemoryAccess(TheStore,
true);
1458 MSSAU->getMemorySSA()->verifyMemorySSA();
1463 ExpCleaner.markResultUsed();
1470bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(
bool IsMemset,
1471 bool IsLoopMemset) {
1472 if (ApplyCodeSizeHeuristics && CurLoop->
getNumBlocks() > 1) {
1473 if (CurLoop->
isOutermost() && (!IsMemset || !IsLoopMemset)) {
1475 <<
" : LIR " << (IsMemset ?
"Memset" :
"Memcpy")
1476 <<
" avoided: multi-block top-level loop\n");
1484bool LoopIdiomRecognize::runOnNoncountableLoop() {
1487 <<
"] Noncountable Loop %"
1490 return recognizePopcount() || recognizeAndInsertFFS() ||
1491 recognizeShiftUntilBitTest() || recognizeShiftUntilZero() ||
1492 recognizeShiftUntilLessThan();
1502 bool JmpOnZero =
false) {
1511 if (!CmpZero || !CmpZero->
isZero())
1522 return Cond->getOperand(0);
1549 return Cond->getOperand(0);
1559 auto *PhiX = dyn_cast<PHINode>(VarX);
1560 if (PhiX && PhiX->getParent() == LoopEntry &&
1561 (PhiX->getOperand(0) == DefX || PhiX->
getOperand(1) == DefX))
1607 dyn_cast<BranchInst>(LoopEntry->
getTerminator()), LoopEntry,
1609 DefX = dyn_cast<Instruction>(
T);
1614 if (!DefX || !isa<PHINode>(DefX))
1617 PHINode *VarPhi = cast<PHINode>(DefX);
1627 if (DefX->
getOpcode() != Instruction::LShr)
1630 IntrinID = Intrinsic::ctlz;
1632 if (!Shft || !Shft->
isOne())
1646 if (Inst.
getOpcode() != Instruction::Add)
1698 Value *VarX1, *VarX0;
1701 DefX2 = CountInst =
nullptr;
1702 VarX1 = VarX0 =
nullptr;
1703 PhiX = CountPhi =
nullptr;
1709 dyn_cast<BranchInst>(LoopEntry->
getTerminator()), LoopEntry))
1710 DefX2 = dyn_cast<Instruction>(
T);
1717 if (!DefX2 || DefX2->
getOpcode() != Instruction::And)
1722 if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->
getOperand(0))))
1726 SubOneOp = dyn_cast<BinaryOperator>(DefX2->
getOperand(1));
1728 if (!SubOneOp || SubOneOp->
getOperand(0) != VarX1)
1734 (SubOneOp->
getOpcode() == Instruction::Add &&
1747 CountInst =
nullptr;
1750 if (Inst.
getOpcode() != Instruction::Add)
1754 if (!Inc || !Inc->
isOne())
1762 bool LiveOutLoop =
false;
1764 if ((cast<Instruction>(U))->
getParent() != LoopEntry) {
1784 auto *PreCondBr = dyn_cast<BranchInst>(PreCondBB->
getTerminator());
1789 CntInst = CountInst;
1829 Value *VarX =
nullptr;
1838 dyn_cast<BranchInst>(LoopEntry->
getTerminator()), LoopEntry))
1839 DefX = dyn_cast<Instruction>(
T);
1844 if (!DefX || !DefX->
isShift())
1846 IntrinID = DefX->
getOpcode() == Instruction::Shl ? Intrinsic::cttz :
1849 if (!Shft || !Shft->
isOne())
1874 if (Inst.
getOpcode() != Instruction::Add)
1897bool LoopIdiomRecognize::isProfitableToInsertFFS(
Intrinsic::ID IntrinID,
1898 Value *InitX,
bool ZeroCheck,
1899 size_t CanonicalSize) {
1906 std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
1920bool LoopIdiomRecognize::insertFFSIfProfitable(
Intrinsic::ID IntrinID,
1924 bool IsCntPhiUsedOutsideLoop =
false;
1926 if (!CurLoop->
contains(cast<Instruction>(U))) {
1927 IsCntPhiUsedOutsideLoop =
true;
1930 bool IsCntInstUsedOutsideLoop =
false;
1932 if (!CurLoop->
contains(cast<Instruction>(U))) {
1933 IsCntInstUsedOutsideLoop =
true;
1938 if (IsCntInstUsedOutsideLoop && IsCntPhiUsedOutsideLoop)
1944 bool ZeroCheck =
false;
1953 if (!IsCntPhiUsedOutsideLoop) {
1957 auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1972 size_t IdiomCanonicalSize = 6;
1973 if (!isProfitableToInsertFFS(IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
1976 transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
1978 IsCntPhiUsedOutsideLoop);
1985bool LoopIdiomRecognize::recognizeAndInsertFFS() {
2000 return insertFFSIfProfitable(IntrinID, InitX, DefX, CntPhi, CntInst);
2003bool LoopIdiomRecognize::recognizeShiftUntilLessThan() {
2014 APInt LoopThreshold;
2016 CntPhi, DefX, LoopThreshold))
2019 if (LoopThreshold == 2) {
2021 return insertFFSIfProfitable(IntrinID, InitX, DefX, CntPhi, CntInst);
2025 if (LoopThreshold != 4)
2030 if (!CurLoop->
contains(cast<Instruction>(U)))
2039 auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
2043 APInt PreLoopThreshold;
2045 PreLoopThreshold != 2)
2048 bool ZeroCheck =
true;
2057 size_t IdiomCanonicalSize = 6;
2058 if (!isProfitableToInsertFFS(IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
2062 transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
2073bool LoopIdiomRecognize::recognizePopcount() {
2087 if (LoopBody->
size() >= 20) {
2097 if (!EntryBI || EntryBI->isConditional())
2105 auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
2106 if (!PreCondBI || PreCondBI->isUnconditional())
2115 transformLoopToPopcount(PreCondBB, CntInst, CntPhi, Val);
2121 Value *Ops[] = {Val};
2173void LoopIdiomRecognize::transformLoopToCountable(
2176 bool ZeroCheck,
bool IsCntPhiUsedOutsideLoop,
bool InsertSub) {
2181 Builder.SetCurrentDebugLocation(
DL);
2190 if (IsCntPhiUsedOutsideLoop) {
2191 if (DefX->
getOpcode() == Instruction::AShr)
2192 InitXNext = Builder.CreateAShr(InitX, 1);
2193 else if (DefX->
getOpcode() == Instruction::LShr)
2194 InitXNext = Builder.CreateLShr(InitX, 1);
2195 else if (DefX->
getOpcode() == Instruction::Shl)
2196 InitXNext = Builder.CreateShl(InitX, 1);
2204 Count = Builder.CreateSub(
2207 Count = Builder.CreateSub(Count, ConstantInt::get(CountTy, 1));
2208 Value *NewCount = Count;
2209 if (IsCntPhiUsedOutsideLoop)
2210 Count = Builder.CreateAdd(Count, ConstantInt::get(CountTy, 1));
2212 NewCount = Builder.CreateZExtOrTrunc(NewCount, CntInst->
getType());
2215 if (cast<ConstantInt>(CntInst->
getOperand(1))->isOne()) {
2218 ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
2219 if (!InitConst || !InitConst->
isZero())
2220 NewCount = Builder.CreateAdd(NewCount, CntInitVal);
2224 NewCount = Builder.CreateSub(CntInitVal, NewCount);
2237 ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
2242 Builder.SetInsertPoint(LbCond);
2243 Instruction *TcDec = cast<Instruction>(Builder.CreateSub(
2244 TcPhi, ConstantInt::get(CountTy, 1),
"tcdec",
false,
true));
2253 LbCond->
setOperand(1, ConstantInt::get(CountTy, 0));
2257 if (IsCntPhiUsedOutsideLoop)
2267void LoopIdiomRecognize::transformLoopToPopcount(
BasicBlock *PreCondBB,
2271 auto *PreCondBr = cast<BranchInst>(PreCondBB->
getTerminator());
2280 Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
2283 NewCount = PopCntZext =
2284 Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->
getType()));
2286 if (NewCount != PopCnt)
2287 (cast<Instruction>(NewCount))->setDebugLoc(
DL);
2294 ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
2295 if (!InitConst || !InitConst->
isZero()) {
2296 NewCount = Builder.CreateAdd(NewCount, CntInitVal);
2297 (cast<Instruction>(NewCount))->setDebugLoc(
DL);
2306 ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
2308 Value *Opnd0 = PopCntZext;
2309 Value *Opnd1 = ConstantInt::get(PopCntZext->
getType(), 0);
2313 ICmpInst *NewPreCond = cast<ICmpInst>(
2314 Builder.CreateICmp(PreCond->
getPredicate(), Opnd0, Opnd1));
2315 PreCondBr->setCondition(NewPreCond);
2343 ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
2349 Builder.SetInsertPoint(LbCond);
2351 Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
2352 "tcdec",
false,
true));
2361 LbCond->
setOperand(1, ConstantInt::get(Ty, 0));
2380 : SubPattern(SP), L(L) {}
2382 template <
typename ITy>
bool match(ITy *V) {
2383 return L->isLoopInvariant(V) && SubPattern.match(V);
2388template <
typename Ty>
2419 " Performing shift-until-bittest idiom detection.\n");
2429 assert(LoopPreheaderBB &&
"There is always a loop preheader.");
2431 using namespace PatternMatch;
2436 Value *CmpLHS, *CmpRHS;
2447 auto MatchVariableBitMask = [&]() {
2456 auto MatchConstantBitMask = [&]() {
2462 auto MatchDecomposableConstantBitMask = [&]() {
2464 if (Res && Res->Mask.isPowerOf2()) {
2468 BitMask = ConstantInt::get(CurrX->
getType(), Res->Mask);
2469 BitPos = ConstantInt::get(CurrX->
getType(), Res->Mask.logBase2());
2475 if (!MatchVariableBitMask() && !MatchConstantBitMask() &&
2476 !MatchDecomposableConstantBitMask()) {
2482 auto *CurrXPN = dyn_cast<PHINode>(CurrX);
2483 if (!CurrXPN || CurrXPN->getParent() != LoopHeaderBB) {
2488 BaseX = CurrXPN->getIncomingValueForBlock(LoopPreheaderBB);
2490 dyn_cast<Instruction>(CurrXPN->getIncomingValueForBlock(LoopHeaderBB));
2493 "Expected BaseX to be available in the preheader!");
2504 "Should only get equality predicates here.");
2514 if (TrueBB != LoopHeaderBB) {
2573bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
2574 bool MadeChange =
false;
2576 Value *
X, *BitMask, *BitPos, *XCurr;
2581 " shift-until-bittest idiom detection failed.\n");
2591 assert(LoopPreheaderBB &&
"There is always a loop preheader.");
2594 assert(SuccessorBB &&
"There is only a single successor.");
2597 Builder.SetCurrentDebugLocation(cast<Instruction>(XCurr)->
getDebugLoc());
2600 Type *Ty =
X->getType();
2614 " Intrinsic is too costly, not beneficial\n");
2629 std::optional<BasicBlock::iterator> InsertPt = std::nullopt;
2630 if (
auto *BitPosI = dyn_cast<Instruction>(BitPos))
2631 InsertPt = BitPosI->getInsertionPointAfterDef();
2639 return U.getUser() != BitPosFrozen;
2641 BitPos = BitPosFrozen;
2647 BitPos->
getName() +
".lowbitmask");
2649 Builder.CreateOr(LowBitMask, BitMask, BitPos->
getName() +
".mask");
2650 Value *XMasked = Builder.CreateAnd(
X, Mask,
X->getName() +
".masked");
2651 CallInst *XMaskedNumLeadingZeros = Builder.CreateIntrinsic(
2652 IntrID, Ty, {XMasked, Builder.getTrue()},
2653 nullptr, XMasked->
getName() +
".numleadingzeros");
2654 Value *XMaskedNumActiveBits = Builder.CreateSub(
2656 XMasked->
getName() +
".numactivebits",
true,
2658 Value *XMaskedLeadingOnePos =
2660 XMasked->
getName() +
".leadingonepos",
false,
2663 Value *LoopBackedgeTakenCount = Builder.CreateSub(
2664 BitPos, XMaskedLeadingOnePos, CurLoop->
getName() +
".backedgetakencount",
2668 Value *LoopTripCount =
2669 Builder.CreateAdd(LoopBackedgeTakenCount, ConstantInt::get(Ty, 1),
2670 CurLoop->
getName() +
".tripcount",
true,
2677 Value *NewX = Builder.CreateShl(
X, LoopBackedgeTakenCount);
2679 if (
auto *
I = dyn_cast<Instruction>(NewX))
2680 I->copyIRFlags(XNext,
true);
2692 NewXNext = Builder.CreateShl(
X, LoopTripCount);
2697 NewXNext = Builder.CreateShl(NewX, ConstantInt::get(Ty, 1));
2701 if (
auto *
I = dyn_cast<Instruction>(NewXNext))
2702 I->copyIRFlags(XNext,
true);
2713 Builder.SetInsertPoint(LoopHeaderBB, LoopHeaderBB->
begin());
2714 auto *
IV = Builder.CreatePHI(Ty, 2, CurLoop->
getName() +
".iv");
2720 Builder.CreateAdd(
IV, ConstantInt::get(Ty, 1),
IV->getName() +
".next",
2721 true, Bitwidth != 2);
2724 auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount,
2725 CurLoop->
getName() +
".ivcheck");
2726 Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
2730 IV->addIncoming(ConstantInt::get(Ty, 0), LoopPreheaderBB);
2731 IV->addIncoming(IVNext, LoopHeaderBB);
2742 ++NumShiftUntilBitTest;
2778 const SCEV *&ExtraOffsetExpr,
2779 bool &InvertedCond) {
2781 " Performing shift-until-zero idiom detection.\n");
2794 assert(LoopPreheaderBB &&
"There is always a loop preheader.");
2796 using namespace PatternMatch;
2805 !
match(ValShiftedIsZero,
2819 IntrinID = ValShifted->
getOpcode() == Instruction::Shl ? Intrinsic::cttz
2828 else if (
match(NBits,
2832 ExtraOffsetExpr = SE->
getSCEV(ExtraOffset);
2839 auto *IVPN = dyn_cast<PHINode>(
IV);
2840 if (!IVPN || IVPN->getParent() != LoopHeaderBB) {
2845 Start = IVPN->getIncomingValueForBlock(LoopPreheaderBB);
2846 IVNext = dyn_cast<Instruction>(IVPN->getIncomingValueForBlock(LoopHeaderBB));
2856 "Should only get equality predicates here.");
2867 if (FalseBB != LoopHeaderBB) {
2878 if (ValShifted->
getOpcode() == Instruction::AShr &&
2942bool LoopIdiomRecognize::recognizeShiftUntilZero() {
2943 bool MadeChange =
false;
2949 const SCEV *ExtraOffsetExpr;
2952 Start, Val, ExtraOffsetExpr, InvertedCond)) {
2954 " shift-until-zero idiom detection failed.\n");
2964 assert(LoopPreheaderBB &&
"There is always a loop preheader.");
2967 assert(SuccessorBB &&
"There is only a single successor.");
2970 Builder.SetCurrentDebugLocation(
IV->getDebugLoc());
2986 " Intrinsic is too costly, not beneficial\n");
2993 bool OffsetIsZero =
false;
2994 if (
auto *ExtraOffsetExprC = dyn_cast<SCEVConstant>(ExtraOffsetExpr))
2995 OffsetIsZero = ExtraOffsetExprC->isZero();
2999 CallInst *ValNumLeadingZeros = Builder.CreateIntrinsic(
3000 IntrID, Ty, {Val, Builder.getFalse()},
3001 nullptr, Val->
getName() +
".numleadingzeros");
3002 Value *ValNumActiveBits = Builder.CreateSub(
3004 Val->
getName() +
".numactivebits",
true,
3008 Expander.setInsertPoint(&*Builder.GetInsertPoint());
3009 Value *ExtraOffset = Expander.expandCodeFor(ExtraOffsetExpr);
3011 Value *ValNumActiveBitsOffset = Builder.CreateAdd(
3012 ValNumActiveBits, ExtraOffset, ValNumActiveBits->
getName() +
".offset",
3013 OffsetIsZero,
true);
3014 Value *IVFinal = Builder.CreateIntrinsic(Intrinsic::smax, {Ty},
3015 {ValNumActiveBitsOffset, Start},
3016 nullptr,
"iv.final");
3018 auto *LoopBackedgeTakenCount = cast<Instruction>(Builder.CreateSub(
3019 IVFinal, Start, CurLoop->
getName() +
".backedgetakencount",
3020 OffsetIsZero,
true));
3024 Value *LoopTripCount =
3025 Builder.CreateAdd(LoopBackedgeTakenCount, ConstantInt::get(Ty, 1),
3026 CurLoop->
getName() +
".tripcount",
true,
3032 IV->replaceUsesOutsideBlock(IVFinal, LoopHeaderBB);
3037 Builder.SetInsertPoint(LoopHeaderBB, LoopHeaderBB->
begin());
3038 auto *CIV = Builder.CreatePHI(Ty, 2, CurLoop->
getName() +
".iv");
3043 Builder.CreateAdd(CIV, ConstantInt::get(Ty, 1), CIV->getName() +
".next",
3044 true, Bitwidth != 2);
3047 auto *CIVCheck = Builder.CreateICmpEQ(CIVNext, LoopTripCount,
3048 CurLoop->
getName() +
".ivcheck");
3049 auto *NewIVCheck = CIVCheck;
3051 NewIVCheck = Builder.CreateNot(CIVCheck);
3052 NewIVCheck->takeName(ValShiftedIsZero);
3056 auto *IVDePHId = Builder.CreateAdd(CIV, Start,
"",
false,
3058 IVDePHId->takeName(
IV);
3062 Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
3066 CIV->addIncoming(ConstantInt::get(Ty, 0), LoopPreheaderBB);
3067 CIV->addIncoming(CIVNext, LoopHeaderBB);
3075 IV->replaceAllUsesWith(IVDePHId);
3076 IV->eraseFromParent();
3085 ++NumShiftUntilZero;
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, const SCEV *BECount, unsigned StoreSize, AliasAnalysis &AA, SmallPtrSetImpl< Instruction * > &Ignored)
mayLoopAccessLocation - Return true if the specified loop might access the specified pointer location...
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static Value * matchCondition(BranchInst *BI, BasicBlock *LoopEntry, bool JmpOnZero=false)
Check if the given conditional branch is based on the comparison between a variable and zero,...
static PHINode * getRecurrenceVar(Value *VarX, Instruction *DefX, BasicBlock *LoopEntry)
static cl::opt< bool, true > DisableLIRPMemset("disable-" DEBUG_TYPE "-memset", cl::desc("Proceed with loop idiom recognize pass, but do " "not convert loop(s) to memset."), cl::location(DisableLIRP::Memset), cl::init(false), cl::ReallyHidden)
static cl::opt< bool > UseLIRCodeSizeHeurs("use-lir-code-size-heurs", cl::desc("Use loop idiom recognition code size heuristics when compiling" "with -Os/-Oz"), cl::init(true), cl::Hidden)
static CallInst * createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL, bool ZeroCheck, Intrinsic::ID IID)
static bool detectShiftUntilLessThanIdiom(Loop *CurLoop, const DataLayout &DL, Intrinsic::ID &IntrinID, Value *&InitX, Instruction *&CntInst, PHINode *&CntPhi, Instruction *&DefX, APInt &Threshold)
Return true if the idiom is detected in the loop.
static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX, Value *&BitMask, Value *&BitPos, Value *&CurrX, Instruction *&NextX)
Return true if the idiom is detected in the loop.
static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, Instruction *&CntInst, PHINode *&CntPhi, Value *&Var)
Return true iff the idiom is detected in the loop.
static Constant * getMemSetPatternValue(Value *V, const DataLayout *DL)
getMemSetPatternValue - If a strided store of the specified value is safe to turn into a memset_patte...
static cl::opt< bool, true > DisableLIRPMemcpy("disable-" DEBUG_TYPE "-memcpy", cl::desc("Proceed with loop idiom recognize pass, but do " "not convert loop(s) to memcpy."), cl::location(DisableLIRP::Memcpy), cl::init(false), cl::ReallyHidden)
static CallInst * createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL)
static const SCEV * getNumBytes(const SCEV *BECount, Type *IntPtr, const SCEV *StoreSizeSCEV, Loop *CurLoop, const DataLayout *DL, ScalarEvolution *SE)
Compute the number of bytes as a SCEV from the backedge taken count.
static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL, Intrinsic::ID &IntrinID, Value *&InitX, Instruction *&CntInst, PHINode *&CntPhi, Instruction *&DefX)
Return true if the idiom is detected in the loop.
static const SCEV * getStartForNegStride(const SCEV *Start, const SCEV *BECount, Type *IntPtr, const SCEV *StoreSizeSCEV, ScalarEvolution *SE)
static APInt getStoreStride(const SCEVAddRecExpr *StoreEv)
static Value * matchShiftULTCondition(BranchInst *BI, BasicBlock *LoopEntry, APInt &Threshold)
Check if the given conditional branch is based on an unsigned less-than comparison between a variable...
match_LoopInvariant< Ty > m_LoopInvariant(const Ty &M, const Loop *L)
Matches if the value is loop-invariant.
static cl::opt< bool, true > DisableLIRPAll("disable-" DEBUG_TYPE "-all", cl::desc("Options to disable Loop Idiom Recognize Pass."), cl::location(DisableLIRP::All), cl::init(false), cl::ReallyHidden)
static void deleteDeadInstruction(Instruction *I)
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
verify safepoint Safepoint IR Verifier
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static const uint32_t IV[8]
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
std::optional< uint64_t > tryZExtValue() const
Get zero extended value if possible.
unsigned getBitWidth() const
Return the number of bits in the APInt.
int64_t getSExtValue() const
Get sign extended value.
A container for analyses that lazily runs them and caches their results.
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
InstListType::const_iterator getFirstNonPHIIt() const
Iterator returning form of getFirstNonPHI.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const Instruction & front() const
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
const_iterator getFirstNonPHIOrDbgOrAlloca() const
Returns an iterator to the first instruction in this block that is not a PHINode, a debug intrinsic,...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BinaryOps getOpcode() const
Conditional or Unconditional Branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
void setPredicate(Predicate P)
Set the predicate for this instruction to the specified value.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLE
signed less or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_ULT
unsigned less than
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Predicate getPredicate() const
Return the predicate for this instruction.
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static Constant * getExactLogBase2(Constant *C)
If C is a scalar/fixed width vector of known powers of 2, then this function returns a new scalar/fix...
This is the shared class of boolean and integer constants.
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
static ConstantInt * getBool(LLVMContext &Context, bool V)
This is an important base class in LLVM.
static Constant * getAllOnesValue(Type *Ty)
A parsed version of the target data layout string in and methods for querying it.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This class represents a freeze function that returns random concrete value if an operand is either a ...
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalObject.
void setUnnamedAddr(UnnamedAddr Val)
@ PrivateLinkage
Like Internal, but omit from symbol table.
This instruction compares its operands according to the predicate given to the constructor.
bool isEquality() const
Return true if this predicate is either EQ or NE.
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
const Function * getFunction() const
Return the function this instruction belongs to.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Align getAlign() const
Return the alignment of the access that is being performed.
static LocationSize precise(uint64_t Value)
static constexpr LocationSize afterPointer()
Any location after the base pointer (but still within the underlying object).
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
BlockT * getHeader() const
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop.
block_iterator block_begin() const
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
StringRef getName() const
This class implements a map that also provides access to all stored values in a deterministic order.
This class wraps the llvm.memcpy intrinsic.
Value * getLength() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
MaybeAlign getDestAlign() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
MaybeAlign getSourceAlign() const
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
Representation for a specific memory location.
An analysis that produces MemorySSA for a function.
Encapsulates MemorySSA, including all data associated with memory accesses.
A Module instance is used to store all the information related to an LLVM module.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
Helper to remove instructions inserted during SCEV expansion, unless they are marked as used.
This class uses information about analyze scalars to rewrite expressions in canonical form.
const SCEV * getOperand(unsigned i) const
This class represents an analyzed expression in the program.
bool isOne() const
Return true if the expression is a constant one.
bool isNonConstantNegative() const
Return true if the specified scev is negated, but not a constant.
The main scalar evolution driver.
bool isKnownNonNegative(const SCEV *S)
Test if the given expression is known to be non-negative.
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
A vector that has set insertion semantics.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
Simple and conservative implementation of LoopSafetyInfo that can give false-positive answers to its ...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
bool anyBlockMayThrow() const override
Returns true iff any block of the loop for which this info is contains an instruction that may throw ...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getValueOperand()
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
void replaceUsesOutsideBlock(Value *V, BasicBlock *BB)
replaceUsesOutsideBlock - Go through the uses list for this definition and make each use point to "V"...
LLVMContext & getContext() const
All values hold a context through their type.
StringRef getName() const
Return a constant reference to the value's name.
void takeName(Value *V)
Transfer the name from V to this value.
Value handle that is nullable, but tries to track the Value.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
cst_pred_ty< is_power2 > m_Power2()
Match an integer or vector power-of-2.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
cst_pred_ty< icmp_pred_with_threshold > m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold)
Match an integer or vector with every element comparing 'pred' (eg/ne/...) to Threshold.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
DiagnosticInfoOptimizationBase::setExtraArgs setExtraArgs
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
bool inferNonMandatoryLibFuncAttrs(Module *M, StringRef Name, const TargetLibraryInfo &TLI)
Analyze the name and prototype of the given function and set any applicable attributes.
bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI, LibFunc TheLibFunc)
Check whether the library function is available on target and also that it in the current Module is a...
bool isMustProgress(const Loop *L)
Return true if this loop can be assumed to make progress.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isModOrRefSet(const ModRefInfo MRI)
FunctionCallee getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI, LibFunc TheLibFunc, FunctionType *T, AttributeList AttributeList)
Calls getOrInsertFunction() and then makes sure to add mandatory argument attributes.
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
@ ModRef
The access may reference and may modify the value stored in memory.
@ Mod
The access may modify the value stored in memory.
bool VerifyMemorySSA
Enables verification of MemorySSA.
std::optional< DecomposedBitTest > decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate Pred, bool LookThroughTrunc=true, bool AllowNonZeroC=false)
Decompose an icmp into the form ((X & Mask) pred C) if possible.
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
MDNode * TBAAStruct
The tag for type-based alias analysis (tbaa struct).
MDNode * Scope
The tag for alias scope specification (used with noalias).
MDNode * TBAA
The tag for type-based alias analysis.
AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
MDNode * NoAlias
The tag specifying the noalias scope.
AAMDNodes extendTo(ssize_t Len) const
Create a new AAMDNode that describes this AAMDNode after extending it to apply to a series of bytes o...
This struct is a compact representation of a valid (non-zero power of two) alignment.
static bool Memset
When true, Memset is disabled.
static bool All
When true, the entire pass is disabled.
static bool Memcpy
When true, Memcpy is disabled.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Match loop-invariant value.
match_LoopInvariant(const SubPattern_t &SP, const Loop *L)