17 #define BBV_NAME "bb-vectorize"
53 #define DEBUG_TYPE BBV_NAME
61 cl::desc(
"The required chain depth for vectorization"));
66 " target information"));
70 cl::desc(
"The maximum search distance for instruction pairs"));
74 cl::desc(
"Replicating one element to a pair breaks the chain"));
78 cl::desc(
"The size of the native vector registers"));
82 cl::desc(
"The maximum number of pairing iterations"));
86 cl::desc(
"Don't try to form non-2^n-length vectors"));
90 cl::desc(
"The maximum number of pairable instructions per group"));
94 cl::desc(
"The maximum number of candidate instruction pairs per group"));
99 " a full cycle check"));
103 cl::desc(
"Don't try to vectorize boolean (i1) values"));
107 cl::desc(
"Don't try to vectorize integer values"));
111 cl::desc(
"Don't try to vectorize floating-point values"));
116 cl::desc(
"Don't try to vectorize pointer values"));
120 cl::desc(
"Don't try to vectorize casting (conversion) operations"));
124 cl::desc(
"Don't try to vectorize floating-point math intrinsics"));
128 cl::desc(
"Don't try to vectorize BitManipulation intrinsics"));
132 cl::desc(
"Don't try to vectorize the fused-multiply-add intrinsic"));
136 cl::desc(
"Don't try to vectorize select instructions"));
140 cl::desc(
"Don't try to vectorize comparison instructions"));
144 cl::desc(
"Don't try to vectorize getelementptr instructions"));
148 cl::desc(
"Don't try to vectorize loads and stores"));
152 cl::desc(
"Only generate aligned loads and stores"));
157 cl::desc(
"Don't boost the chain-depth contribution of loads and stores"));
161 cl::desc(
"Use a fast instruction dependency analysis"));
167 cl::desc(
"When debugging is enabled, output information on the"
168 " instruction-examination process"));
172 cl::desc(
"When debugging is enabled, output information on the"
173 " candidate-selection process"));
177 cl::desc(
"When debugging is enabled, output information on the"
178 " pair-selection process"));
182 cl::desc(
"When debugging is enabled, output information on the"
183 " cycle-checking process"));
188 cl::desc(
"When debugging is enabled, dump the basic block after"
189 " every pair is fused"));
192 STATISTIC(NumFusedOps,
"Number of operations fused by bb-vectorize");
215 typedef std::pair<Value *, Value *> ValuePair;
216 typedef std::pair<ValuePair, int> ValuePairWithCost;
217 typedef std::pair<ValuePair, size_t> ValuePairWithDepth;
218 typedef std::pair<ValuePair, ValuePair> VPPair;
219 typedef std::pair<VPPair, unsigned> VPPairWithType;
228 bool vectorizePairs(
BasicBlock &BB,
bool NonPow2Len =
false);
235 std::vector<Value *> &PairableInsts,
bool NonPow2Len);
240 enum PairConnectionType {
241 PairConnectionDirect,
246 void computeConnectedPairs(
249 std::vector<Value *> &PairableInsts,
250 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
255 std::vector<Value *> &PairableInsts,
258 void choosePairs(
DenseMap<
Value *, std::vector<Value *> > &CandidatePairs,
261 std::vector<Value *> &PairableInsts,
264 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
265 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
270 std::vector<Value *> &PairableInsts,
274 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
275 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps);
278 bool isInstVectorizable(
Instruction *
I,
bool &IsSimpleLoadStore);
281 bool IsSimpleLoadStore,
bool NonPow2Len,
282 int &CostSavings,
int &FixedOrder);
289 void computePairsConnectedTo(
292 std::vector<Value *> &PairableInsts,
293 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
297 bool pairsConflict(ValuePair
P, ValuePair Q,
299 DenseMap<ValuePair, std::vector<ValuePair> >
300 *PairableInstUserMap =
nullptr,
303 bool pairWillFormCycle(ValuePair
P,
304 DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUsers,
309 std::vector<Value *> &PairableInsts,
310 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
312 DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
319 void buildInitialDAGFor(
322 std::vector<Value *> &PairableInsts,
323 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
332 std::vector<Value *> &PairableInsts,
335 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
336 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
338 DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
342 int &BestEffSize,
Value *II, std::vector<Value *>&JJ,
349 unsigned MaskOffset,
unsigned NumInElem,
350 unsigned NumInElem1,
unsigned IdxOffset,
351 std::vector<Constant*> &Mask);
357 unsigned o,
Value *&LOp,
unsigned numElemL,
358 Type *ArgTypeL,
Type *ArgTypeR,
bool IBeforeJ,
359 unsigned IdxOff = 0);
380 std::vector<Value *> &PairableInsts,
395 if (skipOptnoneFunction(BB))
397 if (!DT->isReachableFromEntry(&BB)) {
403 DEBUG(
if (TTI)
dbgs() <<
"BBV: using target information\n");
405 bool changed =
false;
411 (TTI || v <= Config.VectorBits) &&
412 (!Config.MaxIter || n <= Config.MaxIter);
414 DEBUG(
dbgs() <<
"BBV: fusing loop #" << n <<
415 " for " << BB.
getName() <<
" in " <<
417 if (vectorizePairs(BB))
425 for (; !Config.MaxIter || n <= Config.MaxIter; ++n) {
426 DEBUG(
dbgs() <<
"BBV: fusing for non-2^n-length vectors loop #: " <<
427 n <<
" for " << BB.
getName() <<
" in " <<
429 if (!vectorizePairs(BB,
true))
break;
437 bool runOnBasicBlock(
BasicBlock &BB)
override {
440 AA = &getAnalysis<AliasAnalysis>();
441 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
442 SE = &getAnalysis<ScalarEvolution>();
445 : &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
448 return vectorizeBB(BB);
465 "Cannot form vector from incompatible scalar types");
469 if (
VectorType *VTy = dyn_cast<VectorType>(ElemTy)) {
470 numElem = VTy->getNumElements();
475 if (
VectorType *VTy = dyn_cast<VectorType>(Elem2Ty)) {
476 numElem += VTy->getNumElements();
490 Value *IVal =
SI->getValueOperand();
496 if (
CastInst *CI = dyn_cast<CastInst>(I))
502 T2 =
SI->getCondition()->getType();
504 T2 =
SI->getOperand(0)->getType();
505 }
else if (
CmpInst *CI = dyn_cast<CmpInst>(I)) {
506 T2 = CI->getOperand(0)->getType();
519 inline size_t getDepthFactor(
Value *V) {
528 if (isa<InsertElementInst>(V) || isa<ExtractElementInst>(V))
533 if (!Config.NoMemOpBoost && (isa<LoadInst>(V) || isa<StoreInst>(V)))
534 return Config.ReqChainDepth/2;
541 unsigned getInstrCost(
unsigned Opcode,
Type *T1,
Type *T2,
548 case Instruction::GetElementPtr:
553 case Instruction::Br:
554 return TTI->getCFInstrCost(Opcode);
557 case Instruction::Add:
558 case Instruction::FAdd:
559 case Instruction::Sub:
560 case Instruction::FSub:
561 case Instruction::Mul:
562 case Instruction::FMul:
563 case Instruction::UDiv:
564 case Instruction::SDiv:
565 case Instruction::FDiv:
566 case Instruction::URem:
567 case Instruction::SRem:
568 case Instruction::FRem:
569 case Instruction::Shl:
570 case Instruction::LShr:
571 case Instruction::AShr:
575 return TTI->getArithmeticInstrCost(Opcode, T1, Op1VK, Op2VK);
577 case Instruction::ICmp:
578 case Instruction::FCmp:
579 return TTI->getCmpSelInstrCost(Opcode, T1, T2);
580 case Instruction::ZExt:
581 case Instruction::SExt:
582 case Instruction::FPToUI:
583 case Instruction::FPToSI:
584 case Instruction::FPExt:
585 case Instruction::PtrToInt:
586 case Instruction::IntToPtr:
587 case Instruction::SIToFP:
588 case Instruction::UIToFP:
589 case Instruction::Trunc:
590 case Instruction::FPTrunc:
591 case Instruction::BitCast:
592 case Instruction::ShuffleVector:
593 return TTI->getCastInstrCost(Opcode, T1, T2);
605 Value *&IPtr,
Value *&JPtr,
unsigned &IAlignment,
unsigned &JAlignment,
606 unsigned &IAddressSpace,
unsigned &JAddressSpace,
607 int64_t &OffsetInElmts,
bool ComputeOffset =
true) {
609 if (
LoadInst *LI = dyn_cast<LoadInst>(I)) {
611 IPtr = LI->getPointerOperand();
613 IAlignment = LI->getAlignment();
615 IAddressSpace = LI->getPointerAddressSpace();
618 StoreInst *
SI = cast<StoreInst>(
I), *SJ = cast<StoreInst>(J);
620 JPtr = SJ->getPointerOperand();
622 JAlignment = SJ->getAlignment();
624 JAddressSpace = SJ->getPointerAddressSpace();
630 const SCEV *IPtrSCEV = SE->getSCEV(IPtr);
631 const SCEV *JPtrSCEV = SE->getSCEV(JPtr);
636 const SCEV *OffsetSCEV = SE->getMinusSCEV(JPtrSCEV, IPtrSCEV);
638 dyn_cast<SCEVConstant>(OffsetSCEV)) {
646 if (VTy != VTy2 && Offset < 0) {
648 OffsetInElmts = Offset/VTy2TSS;
649 return (
std::abs(Offset) % VTy2TSS) == 0;
652 OffsetInElmts = Offset/VTyTSS;
653 return (
std::abs(Offset) % VTyTSS) == 0;
661 bool isVectorizableIntrinsic(
CallInst* I) {
663 if (!F)
return false;
666 if (!IID)
return false;
671 case Intrinsic::sqrt:
672 case Intrinsic::powi:
676 case Intrinsic::log2:
677 case Intrinsic::log10:
679 case Intrinsic::exp2:
681 case Intrinsic::round:
682 case Intrinsic::copysign:
683 case Intrinsic::ceil:
684 case Intrinsic::nearbyint:
685 case Intrinsic::rint:
686 case Intrinsic::trunc:
687 case Intrinsic::floor:
688 case Intrinsic::fabs:
691 return Config.VectorizeMath;
692 case Intrinsic::bswap:
693 case Intrinsic::ctpop:
694 case Intrinsic::ctlz:
695 case Intrinsic::cttz:
696 return Config.VectorizeBitManipulations;
698 case Intrinsic::fmuladd:
699 return Config.VectorizeFMA;
706 if (!isa<UndefValue>(IENext->
getOperand(0)) &&
707 !isa<InsertElementInst>(IENext->
getOperand(0))) {
711 dyn_cast<InsertElementInst>(IENext->
getOperand(0))));
719 bool BBVectorize::vectorizePairs(
BasicBlock &BB,
bool NonPow2Len) {
723 std::vector<Value *> AllPairableInsts;
728 AllConnectedPairDeps;
731 std::vector<Value *> PairableInsts;
735 ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs,
737 CandidatePairCostSavings,
738 PairableInsts, NonPow2Len);
739 if (PairableInsts.empty())
continue;
744 CandidatePairs.
begin(), E = CandidatePairs.
end(); I != E; ++
I)
745 for (std::vector<Value *>::iterator J = I->second.begin(),
746 JE = I->second.end(); J != JE; ++J)
747 CandidatePairsSet.
insert(ValuePair(I->first, *J));
761 computeConnectedPairs(CandidatePairs, CandidatePairsSet,
762 PairableInsts, ConnectedPairs, PairConnectionTypes);
763 if (ConnectedPairs.
empty())
continue;
765 for (
DenseMap<ValuePair, std::vector<ValuePair> >::iterator
766 I = ConnectedPairs.
begin(), IE = ConnectedPairs.
end();
768 for (std::vector<ValuePair>::iterator J = I->second.begin(),
769 JE = I->second.end(); J != JE; ++J)
770 ConnectedPairDeps[*J].push_back(I->first);
774 buildDepMap(BB, CandidatePairs, PairableInsts, PairableInstUsers);
783 choosePairs(CandidatePairs, CandidatePairsSet,
784 CandidatePairCostSavings,
785 PairableInsts, FixedOrderPairs, PairConnectionTypes,
786 ConnectedPairs, ConnectedPairDeps,
787 PairableInstUsers, ChosenPairs);
789 if (ChosenPairs.
empty())
continue;
790 AllPairableInsts.insert(AllPairableInsts.end(), PairableInsts.begin(),
791 PairableInsts.end());
798 IE = ChosenPairs.
end(); I !=
IE; ++
I) {
799 if (FixedOrderPairs.
count(*I))
800 AllFixedOrderPairs.
insert(*I);
801 else if (FixedOrderPairs.
count(ValuePair(I->second, I->first)))
802 AllFixedOrderPairs.
insert(ValuePair(I->second, I->first));
807 PairConnectionTypes.
find(VPPair(*I, *J));
808 if (K != PairConnectionTypes.
end()) {
809 AllPairConnectionTypes.
insert(*K);
811 K = PairConnectionTypes.
find(VPPair(*J, *I));
812 if (K != PairConnectionTypes.
end())
813 AllPairConnectionTypes.
insert(*K);
818 for (
DenseMap<ValuePair, std::vector<ValuePair> >::iterator
819 I = ConnectedPairs.
begin(), IE = ConnectedPairs.
end();
821 for (std::vector<ValuePair>::iterator J = I->second.begin(),
822 JE = I->second.end(); J != JE; ++J)
823 if (AllPairConnectionTypes.
count(VPPair(I->first, *J))) {
824 AllConnectedPairs[I->first].push_back(*J);
825 AllConnectedPairDeps[*J].push_back(I->first);
827 }
while (ShouldContinue);
829 if (AllChosenPairs.
empty())
return false;
830 NumFusedOps += AllChosenPairs.
size();
839 fuseChosenPairs(BB, AllPairableInsts, AllChosenPairs, AllFixedOrderPairs,
840 AllPairConnectionTypes,
841 AllConnectedPairs, AllConnectedPairDeps);
852 bool BBVectorize::isInstVectorizable(
Instruction *I,
853 bool &IsSimpleLoadStore) {
854 IsSimpleLoadStore =
false;
856 if (
CallInst *
C = dyn_cast<CallInst>(I)) {
857 if (!isVectorizableIntrinsic(
C))
859 }
else if (
LoadInst *L = dyn_cast<LoadInst>(I)) {
861 IsSimpleLoadStore = L->isSimple();
862 if (!IsSimpleLoadStore || !Config.VectorizeMemOps)
864 }
else if (
StoreInst *S = dyn_cast<StoreInst>(I)) {
866 IsSimpleLoadStore = S->isSimple();
867 if (!IsSimpleLoadStore || !Config.VectorizeMemOps)
869 }
else if (
CastInst *
C = dyn_cast<CastInst>(I)) {
871 if (!Config.VectorizeCasts)
874 Type *SrcTy =
C->getSrcTy();
878 Type *DestTy =
C->getDestTy();
881 }
else if (isa<SelectInst>(I)) {
882 if (!Config.VectorizeSelect)
884 }
else if (isa<CmpInst>(I)) {
885 if (!Config.VectorizeCmp)
888 if (!Config.VectorizeGEP)
892 if (
G->getNumIndices() != 1)
894 }
else if (!(I->
isBinaryOp() || isa<ShuffleVectorInst>(
I) ||
895 isa<ExtractElementInst>(I) || isa<InsertElementInst>(
I))) {
900 getInstructionTypes(I, T1, T2);
908 if (!Config.VectorizeBools)
916 if (!Config.VectorizeBools)
923 if (!Config.VectorizeFloats
949 bool IsSimpleLoadStore,
bool NonPow2Len,
950 int &CostSavings,
int &FixedOrder) {
952 " <-> " << *J <<
"\n");
963 Type *IT1, *IT2, *JT1, *JT2;
964 getInstructionTypes(I, IT1, IT2);
965 getInstructionTypes(J, JT1, JT2);
966 unsigned MaxTypeBits = std::max(
969 if (!TTI && MaxTypeBits > Config.VectorBits)
974 if (IsSimpleLoadStore) {
976 unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace;
977 int64_t OffsetInElmts = 0;
978 if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
979 IAddressSpace, JAddressSpace, OffsetInElmts) &&
981 FixedOrder = (
int) OffsetInElmts;
982 unsigned BottomAlignment = IAlignment;
983 if (OffsetInElmts < 0) BottomAlignment = JAlignment;
985 Type *aTypeI = isa<StoreInst>(
I) ?
986 cast<StoreInst>(I)->getValueOperand()->getType() : I->
getType();
987 Type *aTypeJ = isa<StoreInst>(J) ?
988 cast<StoreInst>(J)->getValueOperand()->getType() : J->
getType();
989 Type *VType = getVecTypeForPair(aTypeI, aTypeJ);
991 if (Config.AlignedOnly) {
997 if (BottomAlignment < VecAlignment)
1002 unsigned ICost = TTI->getMemoryOpCost(I->
getOpcode(), aTypeI,
1003 IAlignment, IAddressSpace);
1004 unsigned JCost = TTI->getMemoryOpCost(J->
getOpcode(), aTypeJ,
1005 JAlignment, JAddressSpace);
1006 unsigned VCost = TTI->getMemoryOpCost(I->
getOpcode(), VType,
1010 ICost += TTI->getAddressComputationCost(aTypeI);
1011 JCost += TTI->getAddressComputationCost(aTypeJ);
1012 VCost += TTI->getAddressComputationCost(VType);
1014 if (VCost > ICost + JCost)
1020 unsigned VParts = TTI->getNumberOfParts(VType);
1023 else if (!VParts && VCost == ICost + JCost)
1026 CostSavings = ICost + JCost - VCost;
1032 unsigned ICost = getInstrCost(I->
getOpcode(), IT1, IT2);
1033 unsigned JCost = getInstrCost(J->
getOpcode(), JT1, JT2);
1034 Type *VT1 = getVecTypeForPair(IT1, JT1),
1035 *VT2 = getVecTypeForPair(IT2, JT2);
1046 case Instruction::Shl:
1047 case Instruction::LShr:
1048 case Instruction::AShr:
1062 if ((isa<ConstantVector>(IOp) || isa<ConstantDataVector>(IOp)) &&
1063 (isa<ConstantVector>(JOp) || isa<ConstantDataVector>(JOp))) {
1065 Constant *SplatValue = cast<Constant>(IOp)->getSplatValue();
1066 if (SplatValue !=
nullptr &&
1067 SplatValue == cast<Constant>(JOp)->getSplatValue())
1078 unsigned VCost = getInstrCost(I->
getOpcode(), VT1, VT2, Op1VK, Op2VK);
1080 if (VCost > ICost + JCost)
1086 unsigned VParts1 = TTI->getNumberOfParts(VT1),
1087 VParts2 = TTI->getNumberOfParts(VT2);
1088 if (VParts1 > 1 || VParts2 > 1)
1090 else if ((!VParts1 || !VParts2) && VCost == ICost + JCost)
1093 CostSavings = ICost + JCost - VCost;
1102 if (IID == Intrinsic::powi || IID == Intrinsic::ctlz ||
1103 IID == Intrinsic::cttz) {
1105 *A1J = cast<CallInst>(J)->getArgOperand(1);
1106 const SCEV *A1ISCEV = SE->getSCEV(A1I),
1107 *A1JSCEV = SE->getSCEV(A1J);
1108 return (A1ISCEV == A1JSCEV);
1115 unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, Tys);
1121 unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, Tys);
1125 "Intrinsic argument counts differ");
1127 if ((IID == Intrinsic::powi || IID == Intrinsic::ctlz ||
1128 IID == Intrinsic::cttz) && i == 1)
1135 Type *RetTy = getVecTypeForPair(IT1, JT1);
1136 unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys);
1138 if (VCost > ICost + JCost)
1144 unsigned RetParts = TTI->getNumberOfParts(RetTy);
1147 else if (!RetParts && VCost == ICost + JCost)
1151 if (!Tys[i]->isVectorTy())
1154 unsigned NumParts = TTI->getNumberOfParts(Tys[i]);
1157 else if (!NumParts && VCost == ICost + JCost)
1161 CostSavings = ICost + JCost - VCost;
1198 if (I == V || Users.
count(V)) {
1204 if (LoadMoveSetPairs) {
1205 UsesI = LoadMoveSetPairs->
count(ValuePair(J, I));
1208 WE = WriteSet.
end(); W != WE; ++W) {
1209 if (W->aliasesUnknownInst(J, *AA)) {
1217 if (UsesI && UpdateUsers) {
1227 bool BBVectorize::getCandidatePairs(
BasicBlock &BB,
1232 std::vector<Value *> &PairableInsts,
bool NonPow2Len) {
1233 size_t TotalPairs = 0;
1235 if (Start == E)
return false;
1237 bool ShouldContinue =
false, IAfterStart =
false;
1239 if (I == Start) IAfterStart =
true;
1241 bool IsSimpleLoadStore;
1242 if (!isInstVectorizable(I, IsSimpleLoadStore))
continue;
1249 bool JAfterStart = IAfterStart;
1251 for (
unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) {
1252 if (J == Start) JAfterStart =
true;
1255 bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !Config.FastDep);
1256 if (Config.FastDep) {
1266 if (UsesI)
continue;
1271 int CostSavings, FixedOrder;
1272 if (!areInstsCompatible(I, J, IsSimpleLoadStore, NonPow2Len,
1273 CostSavings, FixedOrder))
continue;
1276 if (PairableInsts.empty() ||
1277 PairableInsts[PairableInsts.size()-1] !=
I) {
1278 PairableInsts.push_back(I);
1281 CandidatePairs[
I].push_back(J);
1284 CandidatePairCostSavings.
insert(ValuePairWithCost(ValuePair(I, J),
1287 if (FixedOrder == 1)
1288 FixedOrderPairs.
insert(ValuePair(I, J));
1289 else if (FixedOrder == -1)
1290 FixedOrderPairs.
insert(ValuePair(J, I));
1295 Start = std::next(J);
1296 IAfterStart = JAfterStart =
false;
1300 << *I <<
" <-> " << *J <<
" (cost savings: " <<
1301 CostSavings <<
")\n");
1306 if (PairableInsts.size() >= Config.MaxInsts ||
1307 TotalPairs >= Config.MaxPairs) {
1308 ShouldContinue =
true;
1317 DEBUG(
dbgs() <<
"BBV: found " << PairableInsts.size()
1318 <<
" instructions with candidate pairs\n");
1320 return ShouldContinue;
1326 void BBVectorize::computePairsConnectedTo(
1329 std::vector<Value *> &PairableInsts,
1330 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
1338 E = P.first->user_end();
1341 if (isa<LoadInst>(UI)) {
1345 }
else if ((SI = dyn_cast<StoreInst>(UI)) &&
1355 if ((SJ = dyn_cast<StoreInst>(UJ)) &&
1360 if (CandidatePairsSet.
count(ValuePair(UI, UJ))) {
1361 VPPair VP(P, ValuePair(UI, UJ));
1362 ConnectedPairs[VP.first].push_back(VP.second);
1363 PairConnectionTypes.
insert(VPPairWithType(VP, PairConnectionDirect));
1367 if (CandidatePairsSet.
count(ValuePair(UJ, UI))) {
1368 VPPair VP(P, ValuePair(UJ, UI));
1369 ConnectedPairs[VP.first].push_back(VP.second);
1370 PairConnectionTypes.
insert(VPPairWithType(VP, PairConnectionSwap));
1374 if (Config.SplatBreaksChain)
continue;
1379 if ((SJ = dyn_cast<StoreInst>(UJ)) &&
1383 if (CandidatePairsSet.
count(ValuePair(UI, UJ))) {
1384 VPPair VP(P, ValuePair(UI, UJ));
1385 ConnectedPairs[VP.first].push_back(VP.second);
1386 PairConnectionTypes.
insert(VPPairWithType(VP, PairConnectionSplat));
1391 if (Config.SplatBreaksChain)
return;
1395 E = P.second->user_end();
1398 if (isa<LoadInst>(UI))
1400 else if ((SI = dyn_cast<StoreInst>(UI)) &&
1406 if ((SJ = dyn_cast<StoreInst>(UJ)) &&
1410 if (CandidatePairsSet.
count(ValuePair(UI, UJ))) {
1411 VPPair VP(P, ValuePair(UI, UJ));
1412 ConnectedPairs[VP.first].push_back(VP.second);
1413 PairConnectionTypes.
insert(VPPairWithType(VP, PairConnectionSplat));
1422 void BBVectorize::computeConnectedPairs(
1425 std::vector<Value *> &PairableInsts,
1426 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
1428 for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
1429 PE = PairableInsts.end(); PI != PE; ++PI) {
1431 CandidatePairs.
find(*PI);
1432 if (PP == CandidatePairs.
end())
1435 for (std::vector<Value *>::iterator P = PP->second.
begin(),
1436 E = PP->second.
end(); P != E; ++
P)
1437 computePairsConnectedTo(CandidatePairs, CandidatePairsSet,
1438 PairableInsts, ConnectedPairs,
1439 PairConnectionTypes, ValuePair(*PI, *P));
1442 DEBUG(
size_t TotalPairs = 0;
1443 for (
DenseMap<ValuePair, std::vector<ValuePair> >::iterator I =
1444 ConnectedPairs.
begin(), IE = ConnectedPairs.
end(); I !=
IE; ++
I)
1445 TotalPairs += I->second.size();
1446 dbgs() <<
"BBV: found " << TotalPairs
1447 <<
" pair connections.\n");
1453 void BBVectorize::buildDepMap(
1456 std::vector<Value *> &PairableInsts,
1460 CandidatePairs.
begin(), E = CandidatePairs.
end();
C != E; ++
C) {
1462 IsInPair.
insert(
C->second.begin(),
C->second.end());
1471 if (IsInPair.
find(I) == IsInPair.
end())
continue;
1478 (void) trackUsesOfI(Users, WriteSet, I, J);
1486 if (IsInPair.
find(*U) == IsInPair.
end())
continue;
1487 PairableInstUsers.
insert(ValuePair(I, *U));
1498 bool BBVectorize::pairsConflict(ValuePair P, ValuePair Q,
1500 DenseMap<ValuePair, std::vector<ValuePair> > *PairableInstUserMap,
1503 bool QUsesP = PairableInstUsers.
count(ValuePair(P.first, Q.first)) ||
1504 PairableInstUsers.
count(ValuePair(P.first, Q.second)) ||
1505 PairableInstUsers.
count(ValuePair(P.second, Q.first)) ||
1506 PairableInstUsers.
count(ValuePair(P.second, Q.second));
1507 bool PUsesQ = PairableInstUsers.
count(ValuePair(Q.first, P.first)) ||
1508 PairableInstUsers.
count(ValuePair(Q.first, P.second)) ||
1509 PairableInstUsers.
count(ValuePair(Q.second, P.first)) ||
1510 PairableInstUsers.
count(ValuePair(Q.second, P.second));
1511 if (PairableInstUserMap) {
1516 if (PairableInstUserPairSet->
insert(VPPair(Q, P)).second)
1517 (*PairableInstUserMap)[Q].push_back(P);
1520 if (PairableInstUserPairSet->
insert(VPPair(P, Q)).second)
1521 (*PairableInstUserMap)[
P].push_back(Q);
1525 return (QUsesP && PUsesQ);
1530 bool BBVectorize::pairWillFormCycle(ValuePair P,
1531 DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
1534 dbgs() <<
"BBV: starting cycle check for : " << *P.first <<
" <-> "
1535 << *P.second <<
"\n");
1547 dbgs() <<
"BBV: cycle check visiting: " << *QTop.first <<
" <-> "
1548 << *QTop.second <<
"\n");
1550 PairableInstUserMap.
find(QTop);
1551 if (QQ == PairableInstUserMap.
end())
1554 for (std::vector<ValuePair>::iterator
C = QQ->second.
begin(),
1558 <<
"BBV: rejected to prevent non-trivial cycle formation: "
1559 << QTop.first <<
" <-> " <<
C->second <<
"\n");
1566 }
while (!Q.
empty());
1573 void BBVectorize::buildInitialDAGFor(
1576 std::vector<Value *> &PairableInsts,
1577 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
1586 Q.
push_back(ValuePairWithDepth(J, getDepthFactor(J.first)));
1588 ValuePairWithDepth QTop = Q.
back();
1591 bool MoreChildren =
false;
1592 size_t MaxChildDepth = QTop.second;
1594 ConnectedPairs.
find(QTop.first);
1595 if (QQ != ConnectedPairs.
end())
1596 for (std::vector<ValuePair>::iterator k = QQ->second.
begin(),
1597 ke = QQ->second.
end(); k != ke; ++k) {
1599 if (CandidatePairsSet.
count(*k)) {
1601 if (C == DAG.
end()) {
1602 size_t d = getDepthFactor(k->first);
1603 Q.
push_back(ValuePairWithDepth(*k, QTop.second+d));
1604 MoreChildren =
true;
1606 MaxChildDepth = std::max(MaxChildDepth, C->second);
1611 if (!MoreChildren) {
1613 DAG.
insert(ValuePairWithDepth(QTop.first, MaxChildDepth));
1616 }
while (!Q.
empty());
1621 void BBVectorize::pruneDAGFor(
1623 std::vector<Value *> &PairableInsts,
1624 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
1626 DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
1631 bool UseCycleCheck) {
1634 Q.
push_back(ValuePairWithDepth(J, getDepthFactor(J.first)));
1637 PrunedDAG.
insert(QTop.first);
1642 ConnectedPairs.
find(QTop.first);
1643 if (QQ == ConnectedPairs.
end())
1646 for (std::vector<ValuePair>::iterator K = QQ->second.
begin(),
1647 KE = QQ->second.
end(); K != KE; ++K) {
1649 if (C == DAG.
end())
continue;
1677 = BestChildren.
begin(), E2 = BestChildren.
end();
1679 if (C2->first.first == C->first.first ||
1680 C2->first.first == C->first.second ||
1681 C2->first.second == C->first.first ||
1682 C2->first.second == C->first.second ||
1683 pairsConflict(C2->first, C->first, PairableInstUsers,
1684 UseCycleCheck ? &PairableInstUserMap :
nullptr,
1685 UseCycleCheck ? &PairableInstUserPairSet
1687 if (C2->second >= C->second) {
1692 CurrentPairs.
insert(C2->first);
1695 if (!CanAdd)
continue;
1700 E2 = PrunedDAG.
end();
T != E2; ++
T) {
1701 if (
T->first == C->first.first ||
1702 T->first == C->first.second ||
1703 T->second == C->first.first ||
1704 T->second == C->first.second ||
1705 pairsConflict(*
T, C->first, PairableInstUsers,
1706 UseCycleCheck ? &PairableInstUserMap :
nullptr,
1707 UseCycleCheck ? &PairableInstUserPairSet
1715 if (!CanAdd)
continue;
1719 E2 = Q.
end(); C2 != E2; ++C2) {
1720 if (C2->first.first == C->first.first ||
1721 C2->first.first == C->first.second ||
1722 C2->first.second == C->first.first ||
1723 C2->first.second == C->first.second ||
1724 pairsConflict(C2->first, C->first, PairableInstUsers,
1725 UseCycleCheck ? &PairableInstUserMap :
nullptr,
1726 UseCycleCheck ? &PairableInstUserPairSet
1732 CurrentPairs.
insert(C2->first);
1734 if (!CanAdd)
continue;
1739 ChosenPairs.
begin(), E2 = ChosenPairs.
end();
1741 if (pairsConflict(*C2, C->first, PairableInstUsers,
1742 UseCycleCheck ? &PairableInstUserMap :
nullptr,
1743 UseCycleCheck ? &PairableInstUserPairSet
1749 CurrentPairs.
insert(*C2);
1751 if (!CanAdd)
continue;
1761 if (UseCycleCheck &&
1762 pairWillFormCycle(C->first, PairableInstUserMap, CurrentPairs))
1770 = BestChildren.
begin(); C2 != BestChildren.
end();) {
1771 if (C2->first.first == C->first.first ||
1772 C2->first.first == C->first.second ||
1773 C2->first.second == C->first.first ||
1774 C2->first.second == C->first.second ||
1775 pairsConflict(C2->first, C->first, PairableInstUsers))
1776 C2 = BestChildren.
erase(C2);
1781 BestChildren.
push_back(ValuePairWithDepth(C->first, C->second));
1785 = BestChildren.
begin(), E2 = BestChildren.
end();
1787 size_t DepthF = getDepthFactor(C->first.first);
1788 Q.
push_back(ValuePairWithDepth(C->first, QTop.second+DepthF));
1790 }
while (!Q.
empty());
1795 void BBVectorize::findBestDAGFor(
1799 std::vector<Value *> &PairableInsts,
1802 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
1803 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
1805 DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
1809 int &BestEffSize,
Value *II, std::vector<Value *>&JJ,
1810 bool UseCycleCheck) {
1811 for (std::vector<Value *>::iterator J = JJ.begin(), JE = JJ.end();
1813 ValuePair IJ(II, *J);
1814 if (!CandidatePairsSet.
count(IJ))
1821 bool DoesConflict =
false;
1823 E = ChosenPairs.
end(); C != E; ++
C) {
1824 if (pairsConflict(*C, IJ, PairableInstUsers,
1825 UseCycleCheck ? &PairableInstUserMap :
nullptr,
1826 UseCycleCheck ? &PairableInstUserPairSet :
nullptr)) {
1827 DoesConflict =
true;
1831 ChosenPairSet.
insert(*C);
1833 if (DoesConflict)
continue;
1835 if (UseCycleCheck &&
1836 pairWillFormCycle(IJ, PairableInstUserMap, ChosenPairSet))
1840 buildInitialDAGFor(CandidatePairs, CandidatePairsSet,
1841 PairableInsts, ConnectedPairs,
1842 PairableInstUsers, ChosenPairs, DAG, IJ);
1849 << *IJ.first <<
" <-> " << *IJ.second <<
"} of depth " <<
1850 MaxDepth <<
" and size " << DAG.
size() <<
"\n");
1860 pruneDAGFor(CandidatePairs, PairableInsts, ConnectedPairs,
1861 PairableInstUsers, PairableInstUserMap,
1862 PairableInstUserPairSet,
1863 ChosenPairs, DAG, PrunedDAG, IJ, UseCycleCheck);
1869 E = PrunedDAG.end(); S != E; ++S) {
1870 PrunedDAGInstrs.
insert(S->first);
1871 PrunedDAGInstrs.
insert(S->second);
1880 bool HasNontrivialInsts =
false;
1885 E = PrunedDAG.end(); S != E; ++S) {
1886 if (!isa<ShuffleVectorInst>(S->first) &&
1887 !isa<InsertElementInst>(S->first) &&
1888 !isa<ExtractElementInst>(S->first))
1889 HasNontrivialInsts =
true;
1891 bool FlipOrder =
false;
1893 if (getDepthFactor(S->first)) {
1894 int ESContrib = CandidatePairCostSavings.
find(*S)->second;
1896 << *S->first <<
" <-> " << *S->second <<
"} = " <<
1898 EffSize += ESContrib;
1904 ConnectedPairDeps.
find(*S);
1905 if (SS != ConnectedPairDeps.
end()) {
1906 unsigned NumDepsDirect = 0, NumDepsSwap = 0;
1907 for (std::vector<ValuePair>::iterator
T = SS->second.
begin(),
1908 TE = SS->second.
end();
T != TE; ++
T) {
1910 if (!PrunedDAG.count(Q.second))
1913 PairConnectionTypes.
find(VPPair(Q.second, Q.first));
1914 assert(R != PairConnectionTypes.
end() &&
1915 "Cannot find pair connection type");
1916 if (R->second == PairConnectionDirect)
1918 else if (R->second == PairConnectionSwap)
1925 FlipOrder = !FixedOrderPairs.
count(*S) &&
1926 ((NumDepsSwap > NumDepsDirect) ||
1927 FixedOrderPairs.
count(ValuePair(S->second, S->first)));
1929 for (std::vector<ValuePair>::iterator
T = SS->second.
begin(),
1930 TE = SS->second.
end();
T != TE; ++
T) {
1932 if (!PrunedDAG.count(Q.second))
1935 PairConnectionTypes.
find(VPPair(Q.second, Q.first));
1936 assert(R != PairConnectionTypes.
end() &&
1937 "Cannot find pair connection type");
1938 Type *Ty1 = Q.second.first->getType(),
1939 *Ty2 = Q.second.second->getType();
1940 Type *VTy = getVecTypeForPair(Ty1, Ty2);
1941 if ((R->second == PairConnectionDirect && FlipOrder) ||
1942 (R->second == PairConnectionSwap && !FlipOrder) ||
1943 R->second == PairConnectionSplat) {
1944 int ESContrib = (
int) getInstrCost(Instruction::ShuffleVector,
1948 if (R->second == PairConnectionSplat)
1949 ESContrib =
std::min(ESContrib, (
int) TTI->getShuffleCost(
1952 ESContrib =
std::min(ESContrib, (
int) TTI->getShuffleCost(
1957 *Q.second.first <<
" <-> " << *Q.second.second <<
1959 *S->first <<
" <-> " << *S->second <<
"} = " <<
1961 EffSize -= ESContrib;
1969 if (!S->first->getType()->isVoidTy()) {
1970 Type *Ty1 = S->first->getType(),
1971 *Ty2 = S->second->getType();
1972 Type *VTy = getVecTypeForPair(Ty1, Ty2);
1974 bool NeedsExtraction =
false;
1981 if (isa<ExtractElementInst>(U))
1983 if (PrunedDAGInstrs.
count(U))
1985 NeedsExtraction =
true;
1989 if (NeedsExtraction) {
1992 ESContrib = (
int) getInstrCost(Instruction::ShuffleVector,
1994 ESContrib =
std::min(ESContrib, (
int) TTI->getShuffleCost(
1997 ESContrib = (
int) TTI->getVectorInstrCost(
2001 *S->first <<
"} = " << ESContrib <<
"\n");
2002 EffSize -= ESContrib;
2005 NeedsExtraction =
false;
2012 if (isa<ExtractElementInst>(U))
2014 if (PrunedDAGInstrs.
count(U))
2016 NeedsExtraction =
true;
2020 if (NeedsExtraction) {
2022 if (Ty2->isVectorTy()) {
2023 ESContrib = (
int) getInstrCost(Instruction::ShuffleVector,
2025 ESContrib =
std::min(ESContrib, (
int) TTI->getShuffleCost(
2029 ESContrib = (
int) TTI->getVectorInstrCost(
2032 *S->second <<
"} = " << ESContrib <<
"\n");
2033 EffSize -= ESContrib;
2038 if (!isa<LoadInst>(S->first) && !isa<StoreInst>(S->first)) {
2040 *S2 = cast<Instruction>(S->second);
2046 if (isa<Constant>(O1) && isa<Constant>(O2))
2052 ValuePair VP = ValuePair(O1, O2);
2053 ValuePair VPR = ValuePair(O2, O1);
2056 if (PrunedDAG.count(VP) || PrunedDAG.count(VPR))
2060 *Ty2 = O2->getType();
2061 Type *VTy = getVecTypeForPair(Ty1, Ty2);
2068 *IEO2 = dyn_cast<InsertElementInst>(O2);
2069 if (IEO1 && IEO2 && isPureIEChain(IEO1) && isPureIEChain(IEO2))
2074 *EIO2 = dyn_cast<ExtractElementInst>(O2);
2077 EIO2->getOperand(0)->getType())
2083 *SIO2 = dyn_cast<ShuffleVectorInst>(O2);
2086 SIO2->getOperand(0)->getType()) {
2090 SIOps.
insert(SIO2->getOperand(0));
2091 SIOps.
insert(SIO2->getOperand(1));
2092 if (SIOps.
size() <= 2)
2099 if (IncomingPairs.
count(VP)) {
2101 }
else if (IncomingPairs.
count(VPR)) {
2102 ESContrib = (
int) getInstrCost(Instruction::ShuffleVector,
2106 ESContrib =
std::min(ESContrib, (
int) TTI->getShuffleCost(
2108 }
else if (!Ty1->
isVectorTy() && !Ty2->isVectorTy()) {
2109 ESContrib = (
int) TTI->getVectorInstrCost(
2110 Instruction::InsertElement, VTy, 0);
2111 ESContrib += (
int) TTI->getVectorInstrCost(
2112 Instruction::InsertElement, VTy, 1);
2116 ESContrib = (
int) TTI->getVectorInstrCost(
2117 Instruction::InsertElement, Ty2, 0);
2118 ESContrib += (
int) getInstrCost(Instruction::ShuffleVector,
2120 }
else if (!Ty2->isVectorTy()) {
2123 ESContrib = (
int) TTI->getVectorInstrCost(
2124 Instruction::InsertElement, Ty1, 0);
2125 ESContrib += (
int) getInstrCost(Instruction::ShuffleVector,
2128 Type *TyBig = Ty1, *TySmall = Ty2;
2132 ESContrib = (
int) getInstrCost(Instruction::ShuffleVector,
2134 if (TyBig != TySmall)
2135 ESContrib += (
int) getInstrCost(Instruction::ShuffleVector,
2140 << *O1 <<
" <-> " << *O2 <<
"} = " <<
2142 EffSize -= ESContrib;
2143 IncomingPairs.
insert(VP);
2148 if (!HasNontrivialInsts) {
2150 "\tNo non-trivial instructions in DAG;"
2151 " override to zero effective size\n");
2156 E = PrunedDAG.end(); S != E; ++S)
2157 EffSize += (
int) getDepthFactor(S->first);
2161 dbgs() <<
"BBV: found pruned DAG for pair {"
2162 << *IJ.first <<
" <-> " << *IJ.second <<
"} of depth " <<
2163 MaxDepth <<
" and size " << PrunedDAG.size() <<
2164 " (effective size: " << EffSize <<
")\n");
2166 MaxDepth >= Config.ReqChainDepth) &&
2167 EffSize > 0 && EffSize > BestEffSize) {
2169 BestEffSize = EffSize;
2170 BestDAG = PrunedDAG;
2177 void BBVectorize::choosePairs(
2181 std::vector<Value *> &PairableInsts,
2184 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
2185 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
2188 bool UseCycleCheck =
2189 CandidatePairsSet.
size() <= Config.MaxCandPairsForCycleCheck;
2193 E = CandidatePairsSet.
end(); I != E; ++
I) {
2194 std::vector<Value *> &JJ = CandidatePairs2[I->second];
2195 if (JJ.empty()) JJ.reserve(32);
2196 JJ.push_back(I->first);
2201 for (std::vector<Value *>::iterator I = PairableInsts.begin(),
2202 E = PairableInsts.end(); I != E; ++
I) {
2204 size_t NumChoices = CandidatePairs.
lookup(*I).size();
2205 if (!NumChoices)
continue;
2207 std::vector<Value *> &JJ = CandidatePairs[*
I];
2210 size_t BestMaxDepth = 0;
2211 int BestEffSize = 0;
2213 findBestDAGFor(CandidatePairs, CandidatePairsSet,
2214 CandidatePairCostSavings,
2215 PairableInsts, FixedOrderPairs, PairConnectionTypes,
2216 ConnectedPairs, ConnectedPairDeps,
2217 PairableInstUsers, PairableInstUserMap,
2218 PairableInstUserPairSet, ChosenPairs,
2219 BestDAG, BestMaxDepth, BestEffSize, *I, JJ,
2222 if (BestDAG.
empty())
2229 DEBUG(
dbgs() <<
"BBV: selected pairs in the best DAG for: "
2230 << *cast<Instruction>(*I) <<
"\n");
2233 SE2 = BestDAG.
end(); S != SE2; ++S) {
2235 ChosenPairs.
insert(ValuePair(S->first, S->second));
2236 DEBUG(
dbgs() <<
"BBV: selected pair: " << *S->first <<
" <-> " <<
2237 *S->second <<
"\n");
2240 std::vector<Value *> &KK = CandidatePairs[S->first];
2241 for (std::vector<Value *>::iterator K = KK.
begin(), KE = KK.end();
2243 if (*K == S->second)
2246 CandidatePairsSet.
erase(ValuePair(S->first, *K));
2249 std::vector<Value *> &LL = CandidatePairs2[S->second];
2250 for (std::vector<Value *>::iterator L = LL.begin(),
LE = LL.end();
2255 CandidatePairsSet.
erase(ValuePair(*L, S->second));
2258 std::vector<Value *> &MM = CandidatePairs[S->second];
2259 for (std::vector<Value *>::iterator M = MM.begin(), ME = MM.end();
2261 assert(*M != S->first &&
"Flipped pair in candidate list?");
2262 CandidatePairsSet.
erase(ValuePair(S->second, *M));
2265 std::vector<Value *> &NN = CandidatePairs2[S->first];
2266 for (std::vector<Value *>::iterator
N = NN.begin(),
NE = NN.end();
2268 assert(*
N != S->second &&
"Flipped pair in candidate list?");
2269 CandidatePairsSet.
erase(ValuePair(*
N, S->first));
2274 DEBUG(
dbgs() <<
"BBV: selected " << ChosenPairs.
size() <<
" pairs.\n");
2277 std::string getReplacementName(
Instruction *I,
bool IsInput,
unsigned o,
2282 return (I->
getName() + (IsInput ?
".v.i" :
".v.r") +
utostr(o) +
2283 (n > 0 ?
"." +
utostr(n) :
"")).str();
2291 unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace;
2292 int64_t OffsetInElmts;
2296 (void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
2297 IAddressSpace, JAddressSpace,
2298 OffsetInElmts,
false);
2305 Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
2309 return new BitCastInst(VPtr, VArgPtrType, getReplacementName(I,
true, o),
2314 unsigned MaskOffset,
unsigned NumInElem,
2315 unsigned NumInElem1,
unsigned IdxOffset,
2316 std::vector<Constant*> &Mask) {
2318 for (
unsigned v = 0; v < NumElem1; ++v) {
2319 int m = cast<ShuffleVectorInst>(J)->getMaskValue(v);
2323 unsigned mm = m + (
int) IdxOffset;
2324 if (m >= (
int) NumInElem1)
2325 mm += (
int) NumInElem;
2327 Mask[v+MaskOffset] =
2342 Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
2350 std::vector<Constant*> Mask(NumElem);
2366 fillNewShuffleMask(Context, I, 0, NumInElemJ, NumInElemI,
2370 fillNewShuffleMask(Context, J, NumElemI, NumInElemI, NumInElemJ,
2380 bool IBeforeJ,
unsigned IdxOff) {
2381 bool ExpandedIEChain =
false;
2385 if (isPureIEChain(LIE)) {
2391 cast<ConstantInt>(LIENext->
getOperand(2))->getSExtValue();
2394 dyn_cast<InsertElementInst>(LIENext->
getOperand(0))));
2398 for (
unsigned i = 0; i < numElemL; ++i) {
2399 if (isa<UndefValue>(VectElemts[i]))
continue;
2403 getReplacementName(IBeforeJ ? I : J,
2410 ExpandedIEChain =
true;
2414 return ExpandedIEChain;
2417 static unsigned getNumScalarElements(
Type *Ty) {
2418 if (
VectorType *VecTy = dyn_cast<VectorType>(Ty))
2419 return VecTy->getNumElements();
2433 VectorType *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
2436 Type *ArgTypeL = ArgTypeI, *ArgTypeH = ArgTypeJ;
2438 unsigned numElemL = getNumScalarElements(ArgTypeL);
2439 unsigned numElemH = getNumScalarElements(ArgTypeH);
2457 bool IsSizeChangeShuffle =
2458 isa<ShuffleVectorInst>(L) &&
2461 if ((LEE || LSV) && (HEE || HSV) && !IsSizeChangeShuffle) {
2463 bool CanUseInputs =
true;
2464 Value *I1, *I2 =
nullptr;
2468 I1 = LSV->getOperand(0);
2469 I2 = LSV->getOperand(1);
2470 if (I2 == I1 || isa<UndefValue>(I2))
2475 Value *I3 = HEE->getOperand(0);
2476 if (!I2 && I3 != I1)
2478 else if (I3 != I1 && I3 != I2)
2479 CanUseInputs =
false;
2481 Value *I3 = HSV->getOperand(0);
2482 if (!I2 && I3 != I1)
2484 else if (I3 != I1 && I3 != I2)
2485 CanUseInputs =
false;
2488 Value *I4 = HSV->getOperand(1);
2489 if (!isa<UndefValue>(I4)) {
2490 if (!I2 && I4 != I1)
2492 else if (I4 != I1 && I4 != I2)
2493 CanUseInputs =
false;
2500 cast<Instruction>(LOp)->getOperand(0)->getType()
2501 ->getVectorNumElements();
2504 cast<Instruction>(
HOp)->getOperand(0)->getType()
2505 ->getVectorNumElements();
2510 for (
unsigned i = 0; i < numElemL; ++i) {
2514 cast<ConstantInt>(LEE->
getOperand(1))->getSExtValue();
2517 Idx = LSV->getMaskValue(i);
2518 if (Idx < (
int) LOpElem) {
2519 INum = LSV->getOperand(0) == I1 ? 0 : 1;
2522 INum = LSV->getOperand(1) == I1 ? 0 : 1;
2526 II[i] = std::pair<int, int>(Idx, INum);
2528 for (
unsigned i = 0; i < numElemH; ++i) {
2532 cast<ConstantInt>(HEE->getOperand(1))->getSExtValue();
2533 INum = HEE->getOperand(0) == I1 ? 0 : 1;
2535 Idx = HSV->getMaskValue(i);
2536 if (Idx < (
int) HOpElem) {
2537 INum = HSV->getOperand(0) == I1 ? 0 : 1;
2540 INum = HSV->getOperand(1) == I1 ? 0 : 1;
2544 II[i + numElemL] = std::pair<int, int>(Idx, INum);
2555 if (I1Elem == numElem) {
2556 bool ElemInOrder =
true;
2557 for (
unsigned i = 0; i < numElem; ++i) {
2558 if (II[i].first != (
int) i && II[i].first != -1) {
2559 ElemInOrder =
false;
2569 std::vector<Constant *> Mask(numElem);
2570 for (
unsigned i = 0; i < numElem; ++i) {
2571 int Idx = II[i].first;
2581 getReplacementName(IBeforeJ ? I : J,
2593 if (I1Elem < I2Elem) {
2594 std::vector<Constant *> Mask(I2Elem);
2596 for (; v < I1Elem; ++v)
2598 for (; v < I2Elem; ++v)
2604 getReplacementName(IBeforeJ ? I : J,
2609 }
else if (I1Elem > I2Elem) {
2610 std::vector<Constant *> Mask(I1Elem);
2612 for (; v < I2Elem; ++v)
2614 for (; v < I1Elem; ++v)
2620 getReplacementName(IBeforeJ ? I : J,
2628 std::vector<Constant *> Mask(numElem);
2629 for (
unsigned v = 0; v < numElem; ++v) {
2630 if (II[v].first == -1) {
2633 int Idx = II[v].first + II[v].second * I1Elem;
2640 getReplacementName(IBeforeJ ? I : J,
true, o));
2646 Type *ArgType = ArgTypeL;
2647 if (numElemL < numElemH) {
2648 if (numElemL == 1 && expandIEChain(Context, I, J, o, HOp, numElemH,
2649 ArgTypeL, VArgType, IBeforeJ, 1)) {
2655 getReplacementName(IBeforeJ ? I : J,
true, o));
2658 }
else if (!expandIEChain(Context, I, J, o, LOp, numElemL, ArgTypeL,
2659 ArgTypeH, IBeforeJ)) {
2665 std::vector<Constant *> Mask(numElemH);
2667 for (; v < numElemL; ++v)
2669 for (; v < numElemH; ++v)
2674 getReplacementName(IBeforeJ ? I : J,
2678 getReplacementName(IBeforeJ ? I : J,
2687 }
else if (numElemL > numElemH) {
2688 if (numElemH == 1 && expandIEChain(Context, I, J, o, LOp, numElemL,
2689 ArgTypeH, VArgType, IBeforeJ)) {
2694 getReplacementName(IBeforeJ ? I : J,
2698 }
else if (!expandIEChain(Context, I, J, o, HOp, numElemH, ArgTypeH,
2699 ArgTypeL, IBeforeJ)) {
2702 std::vector<Constant *> Mask(numElemL);
2704 for (; v < numElemH; ++v)
2706 for (; v < numElemL; ++v)
2711 getReplacementName(IBeforeJ ? I : J,
2715 getReplacementName(IBeforeJ ? I : J,
2726 std::vector<Constant*> Mask(numElem);
2727 for (
unsigned v = 0; v < numElem; ++v) {
2731 if (v >= numElemL && numElemH > numElemL)
2732 Idx += (numElemH - numElemL);
2738 getReplacementName(IBeforeJ ? I : J,
true, o));
2745 getReplacementName(IBeforeJ ? I : J,
2749 getReplacementName(IBeforeJ ? I : J,
2757 void BBVectorize::getReplacementInputsForPair(
LLVMContext& Context,
2763 for (
unsigned p = 0, o = NumOperands-1; p < NumOperands; ++p, --o) {
2767 if (isa<LoadInst>(I) || (o == 1 && isa<StoreInst>(I))) {
2769 ReplacedOperands[o] = getReplacementPointerInput(Context, I, J, o);
2771 }
else if (isa<CallInst>(I)) {
2774 if (o == NumOperands-1) {
2780 Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
2784 }
else if ((IID == Intrinsic::powi || IID == Intrinsic::ctlz ||
2785 IID == Intrinsic::cttz) && o == 1) {
2792 }
else if (isa<ShuffleVectorInst>(I) && o == NumOperands-1) {
2793 ReplacedOperands[o] = getReplacementShuffleMask(Context, I, J);
2797 ReplacedOperands[o] = getReplacementInput(Context, I, J, o, IBeforeJ);
2809 if (isa<StoreInst>(I)) {
2810 AA->replaceWithNewValue(I, K);
2811 AA->replaceWithNewValue(J, K);
2816 VectorType *VType = getVecTypeForPair(IType, JType);
2819 unsigned numElemI = getNumScalarElements(IType);
2820 unsigned numElemJ = getNumScalarElements(JType);
2823 std::vector<Constant*> Mask1(numElemI), Mask2(numElemI);
2824 for (
unsigned v = 0; v < numElemI; ++v) {
2831 getReplacementName(K,
false, 1));
2835 getReplacementName(K,
false, 1));
2839 std::vector<Constant*> Mask1(numElemJ), Mask2(numElemJ);
2840 for (
unsigned v = 0; v < numElemJ; ++v) {
2847 getReplacementName(K,
false, 2));
2851 getReplacementName(K,
false, 2));
2861 bool BBVectorize::canMoveUsesOfIAfterJ(
BasicBlock &BB,
2871 for (; cast<Instruction>(L) != J; ++L)
2872 (
void) trackUsesOfI(Users, WriteSet, I, L,
true, &LoadMoveSetPairs);
2874 assert(cast<Instruction>(L) == J &&
2875 "Tracking has not proceeded far enough to check for dependencies");
2878 return !trackUsesOfI(Users, WriteSet, I, J,
true, &LoadMoveSetPairs);
2882 void BBVectorize::moveUsesOfIAfterJ(
BasicBlock &BB,
2893 for (; cast<Instruction>(L) != J;) {
2894 if (trackUsesOfI(Users, WriteSet, I, L,
true, &LoadMoveSetPairs)) {
2898 DEBUG(
dbgs() <<
"BBV: moving: " << *InstToMove <<
2899 " to after " << *InsertionPt <<
"\n");
2902 InsertionPt = InstToMove;
2912 void BBVectorize::collectPairLoadMoveSet(
BasicBlock &BB,
2928 if (trackUsesOfI(Users, WriteSet, I, L)) {
2929 if (L->mayReadFromMemory()) {
2930 LoadMoveSet[L].push_back(I);
2931 LoadMoveSetPairs.
insert(ValuePair(L, I));
2944 void BBVectorize::collectLoadMoveSet(
BasicBlock &BB,
2945 std::vector<Value *> &PairableInsts,
2949 for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
2950 PIE = PairableInsts.end(); PI != PIE; ++PI) {
2952 if (P == ChosenPairs.
end())
continue;
2955 collectPairLoadMoveSet(BB, ChosenPairs, LoadMoveSet,
2956 LoadMoveSetPairs, I);
2966 void BBVectorize::fuseChosenPairs(
BasicBlock &BB,
2967 std::vector<Value *> &PairableInsts,
2971 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
2972 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps) {
2980 E = ChosenPairs.
end(); P != E; ++
P)
2981 FlippedPairs.
insert(ValuePair(P->second, P->first));
2983 E = FlippedPairs.
end(); P != E; ++
P)
2988 collectLoadMoveSet(BB, PairableInsts, ChosenPairs,
2989 LoadMoveSet, LoadMoveSetPairs);
2991 DEBUG(
dbgs() <<
"BBV: initial: \n" << BB <<
"\n");
2995 if (P == ChosenPairs.
end()) {
3000 if (getDepthFactor(P->first) == 0) {
3010 *J = cast<Instruction>(P->second);
3012 DEBUG(
dbgs() <<
"BBV: fusing: " << *I <<
3013 " <-> " << *J <<
"\n");
3017 assert(FP != ChosenPairs.
end() &&
"Flipped pair not found in list");
3018 ChosenPairs.
erase(FP);
3019 ChosenPairs.
erase(P);
3021 if (!canMoveUsesOfIAfterJ(BB, LoadMoveSetPairs, I, J)) {
3022 DEBUG(
dbgs() <<
"BBV: fusion of: " << *I <<
3024 " aborted because of non-trivial dependency cycle\n");
3031 bool FlipPairOrder = FixedOrderPairs.
count(ValuePair(J, I));
3032 if (!FlipPairOrder && !FixedOrderPairs.
count(ValuePair(I, J))) {
3037 bool OrigOrder =
true;
3039 ConnectedPairDeps.
find(ValuePair(I, J));
3040 if (IJ == ConnectedPairDeps.
end()) {
3041 IJ = ConnectedPairDeps.
find(ValuePair(J, I));
3045 if (IJ != ConnectedPairDeps.
end()) {
3046 unsigned NumDepsDirect = 0, NumDepsSwap = 0;
3047 for (std::vector<ValuePair>::iterator
T = IJ->second.
begin(),
3048 TE = IJ->second.
end();
T != TE; ++
T) {
3049 VPPair Q(IJ->first, *
T);
3051 PairConnectionTypes.
find(VPPair(Q.second, Q.first));
3052 assert(R != PairConnectionTypes.
end() &&
3053 "Cannot find pair connection type");
3054 if (R->second == PairConnectionDirect)
3056 else if (R->second == PairConnectionSwap)
3063 if (NumDepsSwap > NumDepsDirect) {
3064 FlipPairOrder =
true;
3065 DEBUG(
dbgs() <<
"BBV: reordering pair: " << *I <<
3066 " <-> " << *J <<
"\n");
3078 ConnectedPairs.
find(ValuePair(H, L));
3079 if (HL != ConnectedPairs.
end())
3080 for (std::vector<ValuePair>::iterator
T = HL->second.
begin(),
3081 TE = HL->second.
end();
T != TE; ++
T) {
3082 VPPair Q(HL->first, *
T);
3084 assert(R != PairConnectionTypes.
end() &&
3085 "Cannot find pair connection type");
3086 if (R->second == PairConnectionDirect)
3087 R->second = PairConnectionSwap;
3088 else if (R->second == PairConnectionSwap)
3089 R->second = PairConnectionDirect;
3092 bool LBeforeH = !FlipPairOrder;
3095 getReplacementInputsForPair(Context, L, H, ReplacedOperands,
3103 else if (H->hasName())
3110 assert(NumOld <= ReplacedOperands.
size());
3111 for (
unsigned i = 0; i != NumOld; ++i)
3112 Tys.
push_back(ReplacedOperands[i]->getType());
3113 CS.mutateFunctionType(
3116 }
else if (!isa<StoreInst>(K))
3119 unsigned KnownIDs[] = {
3128 for (
unsigned o = 0; o < NumOperands; ++o)
3136 replaceOutputsOfPair(Context, L, H, K, InsertionPt, K1, K2);
3143 moveUsesOfIAfterJ(BB, LoadMoveSetPairs, InsertionPt, I, J);
3145 if (!isa<StoreInst>(I)) {
3147 H->replaceAllUsesWith(K2);
3148 AA->replaceWithNewValue(L, K1);
3149 AA->replaceWithNewValue(H, K2);
3159 std::vector<ValuePair> NewSetMembers;
3161 LoadMoveSet.
find(I);
3162 if (II != LoadMoveSet.
end())
3163 for (std::vector<Value *>::iterator
N = II->second.
begin(),
3165 NewSetMembers.push_back(ValuePair(K, *
N));
3167 LoadMoveSet.
find(J);
3168 if (JJ != LoadMoveSet.
end())
3169 for (std::vector<Value *>::iterator
N = JJ->second.
begin(),
3171 NewSetMembers.push_back(ValuePair(K, *
N));
3172 for (std::vector<ValuePair>::iterator
A = NewSetMembers.begin(),
3173 AE = NewSetMembers.end();
A != AE; ++
A) {
3174 LoadMoveSet[
A->first].push_back(
A->second);
3181 if (cast<Instruction>(PI) == J)
3193 DEBUG(
dbgs() <<
"BBV: final: \n" << BB <<
"\n");
3207 return new BBVectorize(C);
3212 BBVectorize BBVectorizer(P, *BB.
getParent(), C);
3213 return BBVectorizer.vectorizeBB(BB);
Pass interface - Implemented by all 'passes'.
bool VectorizeFMA
Vectorize the fused-multiply-add intrinsic.
iplist< Instruction >::iterator eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing basic block and deletes it...
void push_back(const T &Elt)
A parsed version of the target data layout string in and methods for querying it. ...
static cl::opt< unsigned > MaxPairs("bb-vectorize-max-pairs-per-group", cl::init(3000), cl::Hidden, cl::desc("The maximum number of candidate instruction pairs per group"))
This class is the base class for the comparison instructions.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
STATISTIC(NumFunctions,"Total number of functions")
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
A Module instance is used to store all the information related to an LLVM module. ...
unsigned getNumParams() const
getNumParams - Return the number of fixed parameters this function type requires. ...
static cl::opt< bool > DebugPairSelection("bb-vectorize-debug-pair-selection", cl::init(false), cl::Hidden, cl::desc("When debugging is enabled, output information on the"" pair-selection process"))
DenseSet - This implements a dense probed hash-table based set.
unsigned getNumOperands() const
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
ScalarEvolution - This class is the main scalar evolution driver.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
CallInst - This class represents a function call, abstracting a target machine's calling convention...
void initializeBBVectorizePass(PassRegistry &)
static cl::opt< bool > NoMemOpBoost("bb-vectorize-no-mem-op-boost", cl::init(false), cl::Hidden, cl::desc("Don't boost the chain-depth contribution of loads and stores"))
static PointerType * get(Type *ElementType, unsigned AddressSpace)
PointerType::get - This constructs a pointer to an object of the specified type in a numbered address...
static cl::opt< bool > IgnoreTargetInfo("bb-vectorize-ignore-target-info", cl::init(false), cl::Hidden, cl::desc("Ignore target information"))
ShuffleVectorInst - This instruction constructs a fixed permutation of two input vectors.
bool VectorizeMath
Vectorize floating-point math intrinsics.
const Function * getParent() const
Return the enclosing method, or null if none.
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
SimplifyInstructionsInBlock - Scan the specified basic block and try to simplify any instructions in ...
LoadInst - an instruction for reading from memory.
iv Induction Variable Users
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool add(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo)
add methods - These methods are used to add different types of instructions to the alias sets...
static cl::opt< HelpPrinterWrapper, true, parser< bool > > HOp("help", cl::desc("Display available options (-help-hidden for more)"), cl::location(WrappedNormalPrinter), cl::ValueDisallowed, cl::cat(GenericCategory))
static cl::opt< unsigned > VectorBits("bb-vectorize-vector-bits", cl::init(128), cl::Hidden, cl::desc("The size of the native vector registers"))
Type * getPointerElementType() const
StringRef getName() const
Return a constant reference to the value's name.
bool isSingleValueType() const
isSingleValueType - Return true if the type is a valid type for a register in codegen.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
AnalysisUsage & addRequired()
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
#define INITIALIZE_PASS_DEPENDENCY(depName)
static cl::opt< bool > AlignedOnly("bb-vectorize-aligned-only", cl::init(false), cl::Hidden, cl::desc("Only generate aligned loads and stores"))
SelectInst - This class represents the LLVM 'select' instruction.
unsigned MaxCandPairsForCycleCheck
The maximum number of candidate pairs with which to use a full cycle check.
bool erase(const ValueT &V)
This is the base class for all instructions that perform data casts.
const APInt & getValue() const
Return the constant as an APInt value reference.
const_iterator end() const
T LLVM_ATTRIBUTE_UNUSED_RESULT pop_back_val()
A Use represents the edge between a Value definition and its users.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
unsigned getNumArgOperands() const
getNumArgOperands - Return the number of call arguments.
static Constant * get(ArrayRef< Constant * > V)
Number of individual test Apply this number of consecutive mutations to each input exit after the first new interesting input is found the minimized corpus is saved into the first input directory Number of jobs to run If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
Windows NT (Windows on ARM)
Check for equivalence ignoring load/store alignment.
static ConstantInt * ExtractElement(Constant *V, Constant *Idx)
Instruction * clone() const
clone() - Create a copy of 'this' instruction that is identical in all ways except the following: ...
static cl::opt< unsigned > MaxInsts("bb-vectorize-max-instr-per-group", cl::init(500), cl::Hidden, cl::desc("The maximum number of pairable instructions per group"))
static const char bb_vectorize_name[]
bool Pow2LenOnly
Don't try to form odd-length vectors.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
const_iterator begin() const
user_iterator_impl< User > user_iterator
FunctionType - Class to represent function types.
Check for equivalence treating a type and a vector of that type as equivalent.
static cl::opt< bool > NoMath("bb-vectorize-no-math", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize floating-point math intrinsics"))
bool mayReadFromMemory() const
mayReadFromMemory - Return true if this instruction may read memory.
static cl::opt< bool > UseChainDepthWithTI("bb-vectorize-use-chain-depth", cl::init(false), cl::Hidden, cl::desc("Use the chain depth requirement with"" target information"))
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
static cl::opt< bool > NoFMA("bb-vectorize-no-fma", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize the fused-multiply-add intrinsic"))
static bool isValidElementType(Type *ElemTy)
isValidElementType - Return true if the specified type is valid as a element type.
This class represents a no-op cast from one type to another.
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
FunctionType::get - This static method is the primary way of constructing a FunctionType.
static std::string utostr(uint64_t X, bool isNeg=false)
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
StoreInst - an instruction for storing to memory.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
unsigned getNumElements() const
Return the number of elements in the Vector type.
void takeName(Value *V)
Transfer the name from V to this value.
bool VectorizePointers
Vectorize pointer values.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
bool isPPC_FP128Ty() const
isPPC_FP128Ty - Return true if this is powerpc long double.
bool VectorizeCmp
Vectorize comparison instructions.
unsigned MaxIter
The maximum number of pairing iterations.
bool VectorizeMemOps
Vectorize loads and stores.
static cl::opt< bool > FastDep("bb-vectorize-fast-dep", cl::init(false), cl::Hidden, cl::desc("Use a fast instruction dependency analysis"))
static cl::opt< bool > NoCmp("bb-vectorize-no-cmp", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize comparison instructions"))
GetElementPtrInst - an instruction for type-safe pointer arithmetic to access elements of arrays and ...
bool isX86_MMXTy() const
isX86_MMXTy - Return true if this is X86 MMX.
bool isIntOrIntVectorTy() const
isIntOrIntVectorTy - Return true if this is an integer type or a vector of integer types...
void intersectOptionalDataWith(const Value *V)
Clear any optional flags not set in the given Value.
initializer< Ty > init(const Ty &Val)
bool erase(const KeyT &Val)
InsertElementInst - This instruction inserts a single (scalar) element into a VectorType value...
unsigned getAlignment() const
getAlignment - Return the alignment of the access that is being performed
iterator find(const ValueT &V)
bool AlignedOnly
Only generate aligned loads and stores.
static cl::opt< bool > NoFloats("bb-vectorize-no-floats", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize floating-point values"))
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction...
LLVM Basic Block Representation.
static cl::opt< unsigned > SearchLimit("bb-vectorize-search-limit", cl::init(400), cl::Hidden, cl::desc("The maximum search distance for instruction pairs"))
The instances of the Type class are immutable: once they are created, they are never changed...
This is an important class for using LLVM in a threaded context.
size_type count(const ValueT &V) const
Return 1 if the specified key is in the set, 0 otherwise.
bool isVectorTy() const
isVectorTy - True if this is an instance of VectorType.
This is an important base class in LLVM.
bool SplatBreaksChain
Replicating one element to a pair breaks the chain.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< bool > DebugInstructionExamination("bb-vectorize-debug-instruction-examination", cl::init(false), cl::Hidden, cl::desc("When debugging is enabled, output information on the"" instruction-examination process"))
APInt Or(const APInt &LHS, const APInt &RHS)
Bitwise OR function for APInt.
APInt Xor(const APInt &LHS, const APInt &RHS)
Bitwise XOR function for APInt.
static cl::opt< bool > NoBitManipulation("bb-vectorize-no-bitmanip", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize BitManipulation intrinsics"))
Represent the analysis usage information of a pass.
Value * getOperand(unsigned i) const
Value * getPointerOperand()
unsigned SearchLimit
The maximum search distance for instruction pairs.
static cl::opt< bool > NoCasts("bb-vectorize-no-casts", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize casting (conversion) operations"))
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
static cl::opt< unsigned > MaxCandPairsForCycleCheck("bb-vectorize-max-cycle-check-pairs", cl::init(200), cl::Hidden, cl::desc("The maximum number of candidate pairs with which to use"" a full cycle check"))
#define INITIALIZE_AG_DEPENDENCY(depName)
bool isPointerTy() const
isPointerTy - True if this is an instance of PointerType.
static UndefValue * get(Type *T)
get() - Static factory methods - Return an 'undef' object of the specified type.
iterator erase(iterator I)
static cl::opt< unsigned > MaxIter("bb-vectorize-max-iter", cl::init(0), cl::Hidden, cl::desc("The maximum number of pairing iterations"))
bool isFPOrFPVectorTy() const
isFPOrFPVectorTy - Return true if this is a FP type or a vector of FP.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
bool mayWriteToMemory() const
mayWriteToMemory - Return true if this instruction may modify memory.
BasicBlockPass class - This class is used to implement most local optimizations.
unsigned MaxPairs
The maximum number of candidate instruction pairs per group.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
std::pair< iterator, bool > insert(const ValueT &V)
This is the shared class of boolean and integer constants.
bool VectorizeFloats
Vectorize floating-point values.
unsigned getVectorNumElements() const
unsigned getScalarSizeInBits() const LLVM_READONLY
getScalarSizeInBits - If this is a vector type, return the getPrimitiveSizeInBits value for the eleme...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Module.h This file contains the declarations for the Module class.
Type * getType() const
All values are typed, get the type of this value.
static cl::opt< bool > SplatBreaksChain("bb-vectorize-splat-breaks-chain", cl::init(false), cl::Hidden, cl::desc("Replicating one element to a pair breaks the chain"))
bool vectorizeBasicBlock(Pass *P, BasicBlock &BB, const VectorizeConfig &C=VectorizeConfig())
Vectorize the BasicBlock.
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static cl::opt< bool > NoMemOps("bb-vectorize-no-mem-ops", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize loads and stores"))
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Function * getCalledFunction() const
getCalledFunction - Return the function called, or null if this is an indirect function invocation...
void setPreservesCFG()
This function should be called by the pass, iff they do not:
static cl::opt< bool > NoPointers("bb-vectorize-no-pointers", cl::init(true), cl::Hidden, cl::desc("Don't try to vectorize pointer values"))
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
bool FastDep
Use a fast instruction dependency analysis.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
void setOperand(unsigned i, Value *Val)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
VectorType - Class to represent vector types.
static cl::opt< bool > NoGEP("bb-vectorize-no-gep", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize getelementptr instructions"))
iterator_range< user_iterator > users()
static cl::opt< bool > NoBools("bb-vectorize-no-bools", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize boolean (i1) values"))
bool VectorizeCasts
Vectorize casting (conversion) operations.
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
BasicBlockPass * createBBVectorizePass(const VectorizeConfig &C=VectorizeConfig())
const Type * getScalarType() const LLVM_READONLY
getScalarType - If this is a vector type, return the element type, otherwise return 'this'...
bool VectorizeBools
Vectorize boolean values.
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing basic block, but does not delete it...
static cl::opt< unsigned > ReqChainDepth("bb-vectorize-req-chain-depth", cl::init(6), cl::Hidden, cl::desc("The required chain depth for vectorization"))
unsigned ReqChainDepth
The required chain depth for vectorization.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
bool isX86_FP80Ty() const
isX86_FP80Ty - Return true if this is x86 long double.
unsigned MaxInsts
The maximum number of pairable instructions per group.
SCEV - This class represents an analyzed expression in the program.
static IntegerType * getInt32Ty(LLVMContext &C)
static cl::opt< bool > NoInts("bb-vectorize-no-ints", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize integer values"))
unsigned getAlignment() const
getAlignment - Return the alignment of the access that is being performed
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction...
bool NoMemOpBoost
Don't boost the chain-depth contribution of loads and stores.
bool VectorizeGEP
Vectorize getelementptr instructions.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
static Function * getCalledFunction(const Value *V, bool LookThroughBitCast)
iterator find(const KeyT &Val)
static cl::opt< bool > PrintAfterEveryPair("bb-vectorize-debug-print-after-every-pair", cl::init(false), cl::Hidden, cl::desc("When debugging is enabled, dump the basic block after"" every pair is fused"))
static cl::opt< bool > DebugCycleCheck("bb-vectorize-debug-cycle-check", cl::init(false), cl::Hidden, cl::desc("When debugging is enabled, output information on the"" cycle-checking process"))
static cl::opt< bool > DebugCandidateSelection("bb-vectorize-debug-candidate-selection", cl::init(false), cl::Hidden, cl::desc("When debugging is enabled, output information on the"" candidate-selection process"))
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
VectorizeConfig()
Initialize the VectorizeConfig from command line options.
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
user_iterator user_begin()
LLVMContext & getContext() const
Get the context in which this basic block lives.
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
getPrimitiveSizeInBits - Return the basic size of this type if it is a primitive type.
Module * getParent()
Get the module that this global value is contained inside of...
LLVM Value Representation.
unsigned getOpcode() const
getOpcode() returns a member of one of the enums like Instruction::Add.
static VectorType * get(Type *ElementType, unsigned NumElements)
VectorType::get - This static method is the primary way to construct an VectorType.
bool VectorizeInts
Vectorize integer values.
static cl::opt< bool > Pow2LenOnly("bb-vectorize-pow2-len-only", cl::init(false), cl::Hidden, cl::desc("Don't try to form non-2^n-length vectors"))
bool VectorizeBitManipulations
Vectorize bit intrinsics.
Legacy analysis pass which computes a DominatorTree.
bool isSameOperationAs(const Instruction *I, unsigned flags=0) const
This function determines if the specified instruction executes the same operation as the current one...
iterator getFirstInsertionPt()
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
bool VectorizeSelect
Vectorize select instructions.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Value * getPointerOperand()
void combineMetadata(Instruction *K, const Instruction *J, ArrayRef< unsigned > KnownIDs)
Combine the metadata of two instructions so that K can replace J.
const BasicBlock * getParent() const
InstListType::iterator iterator
Instruction iterators...
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
static cl::opt< bool > NoSelect("bb-vectorize-no-select", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize select instructions"))
unsigned VectorBits
The size of the native vector registers.
SCEVConstant - This class represents a constant integer value.