95 #include "llvm/IR/IntrinsicsPowerPC.h"
114 #define DEBUG_TYPE "ppc-loop-instr-form-prep"
116 using namespace llvm;
120 cl::desc(
"Potential common base number threshold per function "
121 "for PPC loop prep"));
125 cl::desc(
"prefer update form when ds form is also a update form"));
129 cl::desc(
"prepare update form when the load/store increment is a loop "
130 "invariant non-const value."));
134 cl::desc(
"Enable chain commoning in PPC loop prepare pass."));
141 cl::desc(
"Potential PHI threshold per loop for PPC loop prep of update "
146 cl::desc(
"Potential PHI threshold per loop for PPC loop prep of DS form"));
150 cl::desc(
"Potential PHI threshold per loop for PPC loop prep of DQ form"));
161 cl::desc(
"Bucket number per loop for PPC loop chain common"));
169 cl::desc(
"Minimal common base load/store instructions triggering DS/DQ form "
174 cl::desc(
"Minimal common base load/store instructions triggering chain "
175 "commoning preparation. Must be not smaller than 4"));
177 STATISTIC(PHINodeAlreadyExistsUpdate,
"PHI node already in pre-increment form");
178 STATISTIC(PHINodeAlreadyExistsDS,
"PHI node already in DS form");
179 STATISTIC(PHINodeAlreadyExistsDQ,
"PHI node already in DQ form");
180 STATISTIC(DSFormChainRewritten,
"Num of DS form chain rewritten");
181 STATISTIC(DQFormChainRewritten,
"Num of DQ form chain rewritten");
182 STATISTIC(UpdFormChainRewritten,
"Num of update form chain rewritten");
183 STATISTIC(ChainCommoningRewritten,
"Num of commoning chains");
186 struct BucketElement {
196 : BaseSCEV(
B), Elements(1, BucketElement(
I)) {
201 const SCEV *BaseSCEV;
219 enum PrepForm { UpdateForm = 1, DSForm = 4, DQForm = 16, ChainCommoning };
249 bool HasCandidateForPrepare;
255 unsigned SuccPrepCount;
257 bool runOnLoop(
Loop *L);
261 const SCEV *BasePtrStartSCEV,
262 const SCEV *BasePtrIncSCEV, PrepForm
Form);
266 const SCEV *BasePtrIncSCEV);
272 bool prepareBasesForCommoningChains(Bucket &BucketChain);
276 rewriteLoadStoresForCommoningChains(
Loop *L, Bucket &Bucket,
286 unsigned MaxCandidateNum);
293 unsigned MaxCandidateNum);
308 bool prepareBaseForDispFormChain(Bucket &BucketChain, PrepForm
Form);
315 bool prepareBaseForUpdateFormChain(Bucket &BucketChain);
319 bool rewriteLoadStores(
Loop *L, Bucket &BucketChain,
324 std::pair<Instruction *, Instruction *>
332 rewriteForBucketElement(std::pair<Instruction *, Instruction *>
Base,
333 const BucketElement &Element,
Value *OffToBase,
340 static const char *
name =
"Prepare loop for ppc preferred instruction forms";
352 return new PPCLoopInstrFormPrep(
TM);
356 Value *StrippedBasePtr = BasePtr;
357 while (
BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBasePtr))
358 StrippedBasePtr = BC->getOperand(0);
360 return GEP->isInBounds();
366 assert(
I &&
"Invalid paramater!");
368 return (
I->getName() + Suffix).str();
374 Type **PtrElementType =
nullptr) {
376 Value *PtrValue =
nullptr;
377 Type *PointerElementType =
nullptr;
379 if (
LoadInst *LMemI = dyn_cast<LoadInst>(MemI)) {
380 PtrValue = LMemI->getPointerOperand();
381 PointerElementType = LMemI->getType();
382 }
else if (
StoreInst *SMemI = dyn_cast<StoreInst>(MemI)) {
383 PtrValue = SMemI->getPointerOperand();
384 PointerElementType = SMemI->getValueOperand()->getType();
385 }
else if (
IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(MemI)) {
388 IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) {
389 PtrValue = IMemI->getArgOperand(0);
390 }
else if (IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp) {
391 PtrValue = IMemI->getArgOperand(1);
396 *PtrElementType = PointerElementType;
405 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
406 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
407 auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
408 DT = DTWP ? &DTWP->getDomTree() :
nullptr;
409 PreserveLCSSA = mustPreserveAnalysisID(
LCSSAID);
410 ST =
TM ?
TM->getSubtargetImpl(
F) :
nullptr;
413 bool MadeChange =
false;
417 MadeChange |= runOnLoop(L);
428 bool PPCLoopInstrFormPrep::prepareBasesForCommoningChains(Bucket &CBucket) {
445 "Thredhold can not be smaller than 4!\n");
451 const SCEV *FirstOffset = CBucket.Elements[1].Offset;
456 unsigned FirstOffsetReusedCount = 1;
460 unsigned FirstOffsetReusedCountInFirstChain = 1;
462 unsigned EleNum = CBucket.Elements.size();
463 bool SawChainSeparater =
false;
464 for (
unsigned j = 2;
j != EleNum; ++
j) {
466 CBucket.Elements[
j - 1].Offset) == FirstOffset) {
467 if (!SawChainSeparater)
468 FirstOffsetReusedCountInFirstChain++;
469 FirstOffsetReusedCount++;
487 SawChainSeparater =
true;
491 if (FirstOffsetReusedCount == 1)
495 FirstOffsetReusedCount / FirstOffsetReusedCountInFirstChain;
499 if (!SawChainSeparater)
500 ChainNum = (unsigned)sqrt((
double)EleNum);
502 CBucket.ChainSize = (unsigned)(EleNum / ChainNum);
506 if (CBucket.ChainSize * ChainNum != EleNum)
509 if (SawChainSeparater) {
511 for (
unsigned i = 1;
i < CBucket.ChainSize;
i++)
512 for (
unsigned j = 1;
j < ChainNum;
j++)
513 if (CBucket.Elements[
i].Offset !=
514 SE->
getMinusSCEV(CBucket.Elements[
i +
j * CBucket.ChainSize].Offset,
515 CBucket.Elements[
j * CBucket.ChainSize].Offset))
519 for (
unsigned i = 0;
i < ChainNum;
i++)
520 CBucket.ChainBases.push_back(CBucket.Elements[
i * CBucket.ChainSize]);
527 bool PPCLoopInstrFormPrep::chainCommoning(
Loop *L,
529 bool MadeChange =
false;
536 for (
auto &Bucket : Buckets) {
537 if (prepareBasesForCommoningChains(Bucket))
538 MadeChange |= rewriteLoadStoresForCommoningChains(L, Bucket, BBChanged);
542 for (
auto *
BB : BBChanged)
547 bool PPCLoopInstrFormPrep::rewriteLoadStoresForCommoningChains(
549 bool MadeChange =
false;
551 assert(Bucket.Elements.size() ==
552 Bucket.ChainBases.size() * Bucket.ChainSize &&
553 "invalid bucket for chain commoning!\n");
560 "loopprepare-chaincommon");
562 for (
unsigned ChainIdx = 0; ChainIdx < Bucket.ChainBases.size(); ++ChainIdx) {
563 unsigned BaseElemIdx = Bucket.ChainSize * ChainIdx;
564 const SCEV *BaseSCEV =
566 Bucket.Elements[BaseElemIdx].Offset)
568 const SCEVAddRecExpr *BasePtrSCEV = cast<SCEVAddRecExpr>(BaseSCEV);
575 "Invalid SCEV type for the base ptr for a candidate chain!\n");
577 std::pair<Instruction *, Instruction *>
Base = rewriteForBase(
578 L, BasePtrSCEV, Bucket.Elements[BaseElemIdx].Instr,
579 false , ChainCommoning, SCEVE, DeletedPtrs);
589 for (
unsigned Idx = BaseElemIdx + 1; Idx < BaseElemIdx + Bucket.ChainSize;
591 BucketElement &
I = Bucket.Elements[Idx];
593 assert(Ptr &&
"No pointer operand");
594 if (NewPtrs.
count(Ptr))
597 const SCEV *OffsetSCEV =
598 BaseElemIdx ? SE->
getMinusSCEV(Bucket.Elements[Idx].Offset,
599 Bucket.Elements[BaseElemIdx].Offset)
600 : Bucket.Elements[Idx].Offset;
608 Value *OffsetValue = SCEVE.expandCodeFor(
611 Instruction *NewPtr = rewriteForBucketElement(
Base, Bucket.Elements[Idx],
612 OffsetValue, DeletedPtrs);
614 assert(NewPtr &&
"Wrong rewrite!\n");
618 ++ChainCommoningRewritten;
625 for (
auto *Ptr : DeletedPtrs) {
626 if (
Instruction *IDel = dyn_cast<Instruction>(Ptr))
627 BBChanged.
insert(IDel->getParent());
647 std::pair<Instruction *, Instruction *>
653 LLVM_DEBUG(
dbgs() <<
"PIP: Transforming: " << *BasePtrSCEV <<
"\n");
655 assert(BasePtrSCEV->
getLoop() == L &&
"AddRec for the wrong loop?");
658 assert(BasePtr &&
"No pointer operand");
663 BasePtr->getType()->getPointerAddressSpace());
665 bool IsConstantInc =
false;
667 Value *IncNode = getNodeForInc(L, BaseMemI, BasePtrIncSCEV);
670 dyn_cast<SCEVConstant>(BasePtrIncSCEV);
671 if (BasePtrIncConstantSCEV)
672 IsConstantInc =
true;
676 LLVM_DEBUG(
dbgs() <<
"Loop Increasement can not be represented!\n");
677 return std::make_pair(
nullptr,
nullptr);
683 <<
"Update form prepare for non-const increment is not enabled!\n");
684 return std::make_pair(
nullptr,
nullptr);
687 const SCEV *BasePtrStartSCEV =
nullptr;
690 "Increment is not loop invariant!\n");
692 IsConstantInc ? BasePtrIncConstantSCEV
695 BasePtrStartSCEV = BasePtrSCEV->
getStart();
697 if (alreadyPrepared(L, BaseMemI, BasePtrStartSCEV, BasePtrIncSCEV,
Form)) {
699 return std::make_pair(
nullptr,
nullptr);
702 LLVM_DEBUG(
dbgs() <<
"PIP: New start is: " << *BasePtrStartSCEV <<
"\n");
705 unsigned HeaderLoopPredCount =
pred_size(Header);
718 if (PI != LoopPredecessor)
721 NewPHI->
addIncoming(BasePtrStart, LoopPredecessor);
731 cast<GetElementPtrInst>(PtrInc)->setIsInBounds(
IsPtrInBounds(BasePtr));
733 if (PI == LoopPredecessor)
748 if (PI == LoopPredecessor)
758 cast<GetElementPtrInst>(PtrInc)->setIsInBounds(
IsPtrInBounds(BasePtr));
771 BasePtr->replaceAllUsesWith(NewBasePtr);
773 DeletedPtrs.
insert(BasePtr);
775 return std::make_pair(NewBasePtr, PtrInc);
778 Instruction *PPCLoopInstrFormPrep::rewriteForBucketElement(
779 std::pair<Instruction *, Instruction *>
Base,
const BucketElement &Element,
783 assert((NewBasePtr && PtrInc) &&
"base does not exist!\n");
788 assert(Ptr &&
"No pointer operand");
791 if (!Element.Offset ||
792 (isa<SCEVConstant>(Element.Offset) &&
793 cast<SCEVConstant>(Element.Offset)->getValue()->isZero())) {
794 RealNewPtr = NewBasePtr;
797 if (PtrIP && isa<Instruction>(NewBasePtr) &&
800 else if (PtrIP && isa<PHINode>(PtrIP))
803 PtrIP = Element.Instr;
805 assert(OffToBase &&
"There should be an offset for non base element!\n");
807 I8Ty, PtrInc, OffToBase,
821 ReplNewPtr = RealNewPtr;
829 void PPCLoopInstrFormPrep::addOneCandidate(
833 "Candidate should be a memory instruction.");
834 assert(LSCEV &&
"Invalid SCEV for Ptr value.");
836 bool FoundBucket =
false;
837 for (
auto &
B : Buckets) {
838 if (cast<SCEVAddRecExpr>(
B.BaseSCEV)->getStepRecurrence(*SE) !=
839 cast<SCEVAddRecExpr>(LSCEV)->getStepRecurrence(*SE))
842 if (isValidDiff(Diff)) {
843 B.Elements.push_back(BucketElement(Diff, MemI));
850 if (Buckets.size() == MaxCandidateNum) {
851 LLVM_DEBUG(
dbgs() <<
"Can not prepare more chains, reach maximum limit "
852 << MaxCandidateNum <<
"\n");
855 Buckets.push_back(Bucket(LSCEV, MemI));
867 for (
auto &J : *
BB) {
868 Value *PtrValue =
nullptr;
869 Type *PointerElementType =
nullptr;
883 if (!LARSCEV || LARSCEV->
getLoop() != L)
887 HasCandidateForPrepare =
true;
889 if (isValidCandidate(&J, PtrValue, PointerElementType))
890 addOneCandidate(&J, LSCEV, Buckets, isValidDiff, MaxCandidateNum);
895 bool PPCLoopInstrFormPrep::prepareBaseForDispFormChain(Bucket &BucketChain,
905 for (
unsigned j = 0, je = BucketChain.Elements.size();
j != je; ++
j) {
906 if (!BucketChain.Elements[
j].Offset)
907 RemainderOffsetInfo[0] = std::make_pair(0, 1);
909 unsigned Remainder = cast<SCEVConstant>(BucketChain.Elements[
j].Offset)
912 if (RemainderOffsetInfo.
find(Remainder) == RemainderOffsetInfo.
end())
913 RemainderOffsetInfo[Remainder] = std::make_pair(
j, 1);
915 RemainderOffsetInfo[Remainder].second++;
933 unsigned MaxCountRemainder = 0;
934 for (
unsigned j = 0;
j < (unsigned)
Form;
j++)
935 if ((RemainderOffsetInfo.
find(
j) != RemainderOffsetInfo.
end()) &&
936 RemainderOffsetInfo[
j].second >
937 RemainderOffsetInfo[MaxCountRemainder].second)
938 MaxCountRemainder =
j;
946 if (MaxCountRemainder == 0)
951 BucketChain.Elements[RemainderOffsetInfo[MaxCountRemainder].first].Offset;
952 BucketChain.BaseSCEV = SE->
getAddExpr(BucketChain.BaseSCEV, Offset);
953 for (
auto &
E : BucketChain.Elements) {
955 E.Offset = cast<SCEVConstant>(SE->
getMinusSCEV(
E.Offset, Offset));
960 std::swap(BucketChain.Elements[RemainderOffsetInfo[MaxCountRemainder].first],
961 BucketChain.Elements[0]);
971 bool PPCLoopInstrFormPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) {
981 for (
int j = 0, je = BucketChain.Elements.size();
j != je; ++
j) {
982 if (
auto *II = dyn_cast<IntrinsicInst>(BucketChain.Elements[
j].Instr))
992 if (!BucketChain.Elements[
j].Offset ||
993 cast<SCEVConstant>(BucketChain.Elements[
j].Offset)->isZero())
996 const SCEV *
Offset = BucketChain.Elements[
j].Offset;
997 BucketChain.BaseSCEV = SE->
getAddExpr(BucketChain.BaseSCEV, Offset);
998 for (
auto &
E : BucketChain.Elements) {
1000 E.Offset = cast<SCEVConstant>(SE->
getMinusSCEV(
E.Offset, Offset));
1005 std::swap(BucketChain.Elements[
j], BucketChain.Elements[0]);
1011 bool PPCLoopInstrFormPrep::rewriteLoadStores(
1014 bool MadeChange =
false;
1017 cast<SCEVAddRecExpr>(BucketChain.BaseSCEV);
1028 "loopprepare-formrewrite");
1033 bool CanPreInc = (
Form == UpdateForm ||
1034 ((
Form == DSForm) &&
1041 std::pair<Instruction *, Instruction *>
Base =
1042 rewriteForBase(L, BasePtrSCEV, BucketChain.Elements.begin()->Instr,
1043 CanPreInc,
Form, SCEVE, DeletedPtrs);
1053 for (
auto I = std::next(BucketChain.Elements.begin()),
1054 IE = BucketChain.Elements.end();
I !=
IE; ++
I) {
1056 assert(Ptr &&
"No pointer operand");
1057 if (NewPtrs.
count(Ptr))
1062 I->Offset ? cast<SCEVConstant>(
I->Offset)->getValue() :
nullptr,
1064 assert(NewPtr &&
"wrong rewrite!\n");
1072 for (
auto *Ptr : DeletedPtrs) {
1073 if (
Instruction *IDel = dyn_cast<Instruction>(Ptr))
1074 BBChanged.
insert(IDel->getParent());
1082 if (
Form == DSForm && !CanPreInc)
1083 DSFormChainRewritten++;
1084 else if (
Form == DQForm)
1085 DQFormChainRewritten++;
1086 else if (
Form == UpdateForm || (
Form == DSForm && CanPreInc))
1087 UpdFormChainRewritten++;
1092 bool PPCLoopInstrFormPrep::updateFormPrep(
Loop *L,
1094 bool MadeChange =
false;
1095 if (Buckets.empty())
1098 for (
auto &Bucket : Buckets)
1101 if (prepareBaseForUpdateFormChain(Bucket))
1102 MadeChange |= rewriteLoadStores(L, Bucket, BBChanged, UpdateForm);
1105 for (
auto *
BB : BBChanged)
1110 bool PPCLoopInstrFormPrep::dispFormPrep(
Loop *L,
1113 bool MadeChange =
false;
1115 if (Buckets.empty())
1119 for (
auto &Bucket : Buckets) {
1122 if (prepareBaseForDispFormChain(Bucket,
Form))
1123 MadeChange |= rewriteLoadStores(L, Bucket, BBChanged,
Form);
1127 for (
auto *
BB : BBChanged)
1141 const SCEV *BasePtrIncSCEV) {
1144 if (isa<SCEVConstant>(BasePtrIncSCEV))
1145 return cast<SCEVConstant>(BasePtrIncSCEV)->getValue();
1162 for (
auto &CurrentPHI : PHIIter) {
1163 PHINode *CurrentPHINode = dyn_cast<PHINode>(&CurrentPHI);
1164 if (!CurrentPHINode)
1172 const SCEVAddRecExpr *PHIBasePtrSCEV = dyn_cast<SCEVAddRecExpr>(PHISCEV);
1173 if (!PHIBasePtrSCEV)
1178 if (!PHIBasePtrIncSCEV || (PHIBasePtrIncSCEV != BasePtrIncSCEV))
1185 Value *StrippedBaseI =
I;
1186 while (
BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBaseI))
1187 StrippedBaseI = BC->getOperand(0);
1189 Instruction *StrippedI = dyn_cast<Instruction>(StrippedBaseI);
1196 (StrippedI->
getOpcode() == Instruction::GetElementPtr &&
1212 bool PPCLoopInstrFormPrep::alreadyPrepared(
Loop *L,
Instruction *MemI,
1213 const SCEV *BasePtrStartSCEV,
1214 const SCEV *BasePtrIncSCEV,
1223 if (!PredBB || !LatchBB)
1228 for (
auto & CurrentPHI : PHIIter) {
1229 PHINode *CurrentPHINode = dyn_cast<PHINode>(&CurrentPHI);
1230 if (!CurrentPHINode)
1238 const SCEVAddRecExpr *PHIBasePtrSCEV = dyn_cast<SCEVAddRecExpr>(PHISCEV);
1239 if (!PHIBasePtrSCEV)
1244 if (!PHIBasePtrIncSCEV)
1252 if (PHIBasePtrIncSCEV == BasePtrIncSCEV) {
1255 if ((
Form == UpdateForm ||
Form == ChainCommoning ) &&
1256 PHIBasePtrSCEV->
getStart() == BasePtrStartSCEV) {
1257 ++PHINodeAlreadyExistsUpdate;
1260 if (
Form == DSForm ||
Form == DQForm) {
1265 ++PHINodeAlreadyExistsDS;
1267 ++PHINodeAlreadyExistsDQ;
1278 bool PPCLoopInstrFormPrep::runOnLoop(
Loop *L) {
1279 bool MadeChange =
false;
1295 if (!LoopPredecessor ||
1298 if (LoopPredecessor)
1301 if (!LoopPredecessor) {
1302 LLVM_DEBUG(
dbgs() <<
"PIP fails since no predecessor for current loop.\n");
1308 const Type *PointerElementType) {
1309 assert((PtrValue &&
I) &&
"Invalid parameter!");
1314 auto *II = dyn_cast<IntrinsicInst>(
I);
1315 if (II && ((II->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) ||
1316 II->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp))
1326 if (!LARSCEV || LARSCEV->
getLoop() != L)
1330 const APInt &ConstInt = StepConst->getValue()->getValue();
1340 const Type *PointerElementType) {
1341 assert((PtrValue &&
I) &&
"Invalid parameter!");
1342 if (isa<IntrinsicInst>(
I))
1349 [](
const User *U) { return isa<SExtInst>(U); }));
1354 const Type *PointerElementType) {
1355 assert((PtrValue &&
I) &&
"Invalid parameter!");
1357 auto *II = dyn_cast<IntrinsicInst>(
I);
1359 return II->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp ||
1360 II->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp;
1362 return ST &&
ST->hasP9Vector() && (PointerElementType->
isVectorTy());
1369 const Type *PointerElementType) {
1381 if (isa<SCEVUnknown>(Start) && Start->getType()->isPointerTy())
1384 const SCEVAddExpr *ASCEV = dyn_cast<SCEVAddExpr>(Start);
1392 bool SawPointer =
false;
1394 if (
Op->getType()->isPointerTy()) {
1398 }
else if (!
Op->getType()->isIntegerTy())
1407 auto isValidConstantDiff = [](
const SCEV *Diff) {
1408 return dyn_cast<SCEVConstant>(Diff) !=
nullptr;
1413 auto isValidChainCommoningDiff = [](
const SCEV *Diff) {
1414 assert(Diff &&
"Invalid Diff!\n");
1417 if (isa<SCEVConstant>(Diff))
1424 const SCEVNAryExpr *ADiff = dyn_cast<SCEVNAryExpr>(Diff);
1429 if (!
Op->getType()->isIntegerTy())
1435 HasCandidateForPrepare =
false;
1444 if (!UpdateFormBuckets.empty())
1445 MadeChange |= updateFormPrep(L, UpdateFormBuckets);
1446 else if (!HasCandidateForPrepare) {
1449 <<
"No prepare candidates found, stop praparation for current loop!\n");
1461 if (!DSFormBuckets.empty())
1462 MadeChange |= dispFormPrep(L, DSFormBuckets, DSForm);
1471 if (!DQFormBuckets.empty())
1472 MadeChange |= dispFormPrep(L, DQFormBuckets, DQForm);
1484 collectCandidates(L, isChainCommoningCandidate, isValidChainCommoningDiff,
1488 if (!Buckets.empty())
1489 MadeChange |= chainCommoning(L, Buckets);