17 #define BBV_NAME "bb-vectorize"
56 #define DEBUG_TYPE BBV_NAME
64 cl::desc(
"The required chain depth for vectorization"));
69 " target information"));
73 cl::desc(
"The maximum search distance for instruction pairs"));
77 cl::desc(
"Replicating one element to a pair breaks the chain"));
81 cl::desc(
"The size of the native vector registers"));
85 cl::desc(
"The maximum number of pairing iterations"));
89 cl::desc(
"Don't try to form non-2^n-length vectors"));
93 cl::desc(
"The maximum number of pairable instructions per group"));
97 cl::desc(
"The maximum number of candidate instruction pairs per group"));
102 " a full cycle check"));
106 cl::desc(
"Don't try to vectorize boolean (i1) values"));
110 cl::desc(
"Don't try to vectorize integer values"));
114 cl::desc(
"Don't try to vectorize floating-point values"));
119 cl::desc(
"Don't try to vectorize pointer values"));
123 cl::desc(
"Don't try to vectorize casting (conversion) operations"));
127 cl::desc(
"Don't try to vectorize floating-point math intrinsics"));
131 cl::desc(
"Don't try to vectorize BitManipulation intrinsics"));
135 cl::desc(
"Don't try to vectorize the fused-multiply-add intrinsic"));
139 cl::desc(
"Don't try to vectorize select instructions"));
143 cl::desc(
"Don't try to vectorize comparison instructions"));
147 cl::desc(
"Don't try to vectorize getelementptr instructions"));
151 cl::desc(
"Don't try to vectorize loads and stores"));
155 cl::desc(
"Only generate aligned loads and stores"));
160 cl::desc(
"Don't boost the chain-depth contribution of loads and stores"));
164 cl::desc(
"Use a fast instruction dependency analysis"));
170 cl::desc(
"When debugging is enabled, output information on the"
171 " instruction-examination process"));
175 cl::desc(
"When debugging is enabled, output information on the"
176 " candidate-selection process"));
180 cl::desc(
"When debugging is enabled, output information on the"
181 " pair-selection process"));
185 cl::desc(
"When debugging is enabled, output information on the"
186 " cycle-checking process"));
191 cl::desc(
"When debugging is enabled, dump the basic block after"
192 " every pair is fused"));
195 STATISTIC(NumFusedOps,
"Number of operations fused by bb-vectorize");
219 typedef std::pair<Value *, Value *> ValuePair;
220 typedef std::pair<ValuePair, int> ValuePairWithCost;
221 typedef std::pair<ValuePair, size_t> ValuePairWithDepth;
222 typedef std::pair<ValuePair, ValuePair> VPPair;
223 typedef std::pair<VPPair, unsigned> VPPairWithType;
233 bool vectorizePairs(
BasicBlock &BB,
bool NonPow2Len =
false);
240 std::vector<Value *> &PairableInsts,
bool NonPow2Len);
245 enum PairConnectionType {
246 PairConnectionDirect,
251 void computeConnectedPairs(
254 std::vector<Value *> &PairableInsts,
255 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
260 std::vector<Value *> &PairableInsts,
263 void choosePairs(
DenseMap<
Value *, std::vector<Value *> > &CandidatePairs,
266 std::vector<Value *> &PairableInsts,
269 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
270 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
275 std::vector<Value *> &PairableInsts,
279 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
280 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps);
283 bool isInstVectorizable(
Instruction *
I,
bool &IsSimpleLoadStore);
286 bool IsSimpleLoadStore,
bool NonPow2Len,
287 int &CostSavings,
int &FixedOrder);
294 void computePairsConnectedTo(
297 std::vector<Value *> &PairableInsts,
298 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
302 bool pairsConflict(ValuePair
P, ValuePair Q,
304 DenseMap<ValuePair, std::vector<ValuePair> >
305 *PairableInstUserMap =
nullptr,
308 bool pairWillFormCycle(ValuePair
P,
309 DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUsers,
314 std::vector<Value *> &PairableInsts,
315 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
317 DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
324 void buildInitialDAGFor(
327 std::vector<Value *> &PairableInsts,
328 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
337 std::vector<Value *> &PairableInsts,
340 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
341 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
343 DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
347 int &BestEffSize,
Value *II, std::vector<Value *>&JJ,
354 unsigned MaskOffset,
unsigned NumInElem,
355 unsigned NumInElem1,
unsigned IdxOffset,
356 std::vector<Constant*> &
Mask);
362 unsigned o,
Value *&LOp,
unsigned numElemL,
363 Type *ArgTypeL,
Type *ArgTypeR,
bool IBeforeJ,
364 unsigned IdxOff = 0);
385 std::vector<Value *> &PairableInsts,
400 if (skipBasicBlock(BB))
402 if (!DT->isReachableFromEntry(&BB)) {
408 DEBUG(
if (TTI)
dbgs() <<
"BBV: using target information\n");
410 bool changed =
false;
416 (TTI || v <= Config.VectorBits) &&
417 (!Config.MaxIter || n <= Config.MaxIter);
419 DEBUG(
dbgs() <<
"BBV: fusing loop #" << n <<
420 " for " << BB.
getName() <<
" in " <<
422 if (vectorizePairs(BB))
430 for (; !Config.MaxIter || n <= Config.MaxIter; ++n) {
431 DEBUG(
dbgs() <<
"BBV: fusing for non-2^n-length vectors loop #: " <<
432 n <<
" for " << BB.
getName() <<
" in " <<
434 if (!vectorizePairs(BB,
true))
break;
445 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
446 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
447 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
448 TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
451 : &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
454 return vectorizeBB(BB);
473 "Cannot form vector from incompatible scalar types");
477 if (
VectorType *VTy = dyn_cast<VectorType>(ElemTy)) {
478 numElem = VTy->getNumElements();
483 if (
VectorType *VTy = dyn_cast<VectorType>(Elem2Ty)) {
484 numElem += VTy->getNumElements();
494 if (
StoreInst *SI = dyn_cast<StoreInst>(I)) {
498 Value *IVal =
SI->getValueOperand();
504 if (
CastInst *CI = dyn_cast<CastInst>(I))
509 if (
SelectInst *SI = dyn_cast<SelectInst>(I)) {
510 T2 =
SI->getCondition()->getType();
512 T2 =
SI->getOperand(0)->getType();
513 }
else if (
CmpInst *CI = dyn_cast<CmpInst>(I)) {
514 T2 = CI->getOperand(0)->getType();
527 inline size_t getDepthFactor(
Value *V) {
536 if (isa<InsertElementInst>(V) || isa<ExtractElementInst>(V))
541 if (!Config.NoMemOpBoost && (isa<LoadInst>(V) || isa<StoreInst>(V)))
542 return Config.ReqChainDepth/2;
549 unsigned getInstrCost(
unsigned Opcode,
Type *T1,
Type *T2,
556 case Instruction::GetElementPtr:
561 case Instruction::Br:
562 return TTI->getCFInstrCost(Opcode);
563 case Instruction::PHI:
566 case Instruction::FAdd:
567 case Instruction::Sub:
568 case Instruction::FSub:
569 case Instruction::Mul:
570 case Instruction::FMul:
571 case Instruction::UDiv:
572 case Instruction::SDiv:
573 case Instruction::FDiv:
574 case Instruction::URem:
575 case Instruction::SRem:
576 case Instruction::FRem:
577 case Instruction::Shl:
578 case Instruction::LShr:
579 case Instruction::AShr:
583 return TTI->getArithmeticInstrCost(Opcode, T1, Op1VK, Op2VK);
585 case Instruction::ICmp:
586 case Instruction::FCmp:
587 return TTI->getCmpSelInstrCost(Opcode, T1, T2);
588 case Instruction::ZExt:
589 case Instruction::SExt:
590 case Instruction::FPToUI:
591 case Instruction::FPToSI:
592 case Instruction::FPExt:
593 case Instruction::PtrToInt:
594 case Instruction::IntToPtr:
595 case Instruction::SIToFP:
596 case Instruction::UIToFP:
597 case Instruction::Trunc:
598 case Instruction::FPTrunc:
599 case Instruction::BitCast:
600 case Instruction::ShuffleVector:
601 return TTI->getCastInstrCost(Opcode, T1, T2);
613 Value *&IPtr,
Value *&JPtr,
unsigned &IAlignment,
unsigned &JAlignment,
614 unsigned &IAddressSpace,
unsigned &JAddressSpace,
615 int64_t &OffsetInElmts,
bool ComputeOffset =
true) {
617 if (
LoadInst *LI = dyn_cast<LoadInst>(I)) {
619 IPtr = LI->getPointerOperand();
621 IAlignment = LI->getAlignment();
623 IAddressSpace = LI->getPointerAddressSpace();
626 StoreInst *
SI = cast<StoreInst>(
I), *SJ = cast<StoreInst>(J);
628 JPtr = SJ->getPointerOperand();
630 JAlignment = SJ->getAlignment();
632 JAddressSpace = SJ->getPointerAddressSpace();
638 const SCEV *IPtrSCEV = SE->getSCEV(IPtr);
639 const SCEV *JPtrSCEV = SE->getSCEV(JPtr);
644 const SCEV *OffsetSCEV = SE->getMinusSCEV(JPtrSCEV, IPtrSCEV);
646 dyn_cast<SCEVConstant>(OffsetSCEV)) {
654 if (VTy != VTy2 && Offset < 0) {
656 OffsetInElmts = Offset/VTy2TSS;
657 return (
std::abs(Offset) % VTy2TSS) == 0;
660 OffsetInElmts = Offset/VTyTSS;
661 return (
std::abs(Offset) % VTyTSS) == 0;
669 bool isVectorizableIntrinsic(
CallInst* I) {
671 if (!F)
return false;
674 if (!IID)
return false;
679 case Intrinsic::sqrt:
680 case Intrinsic::powi:
684 case Intrinsic::log2:
685 case Intrinsic::log10:
687 case Intrinsic::exp2:
690 case Intrinsic::copysign:
691 case Intrinsic::ceil:
692 case Intrinsic::nearbyint:
693 case Intrinsic::rint:
694 case Intrinsic::trunc:
695 case Intrinsic::floor:
696 case Intrinsic::fabs:
699 return Config.VectorizeMath;
700 case Intrinsic::bswap:
701 case Intrinsic::ctpop:
702 case Intrinsic::ctlz:
703 case Intrinsic::cttz:
704 return Config.VectorizeBitManipulations;
706 case Intrinsic::fmuladd:
707 return Config.VectorizeFMA;
714 if (!isa<UndefValue>(IENext->
getOperand(0)) &&
715 !isa<InsertElementInst>(IENext->
getOperand(0))) {
719 dyn_cast<InsertElementInst>(IENext->
getOperand(0))));
727 bool BBVectorize::vectorizePairs(
BasicBlock &BB,
bool NonPow2Len) {
731 std::vector<Value *> AllPairableInsts;
736 AllConnectedPairDeps;
739 std::vector<Value *> PairableInsts;
743 ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs,
745 CandidatePairCostSavings,
746 PairableInsts, NonPow2Len);
747 if (PairableInsts.empty())
continue;
752 CandidatePairs.
begin(),
E = CandidatePairs.
end(); I !=
E; ++
I)
753 for (std::vector<Value *>::iterator J = I->second.begin(),
754 JE = I->second.end(); J != JE; ++J)
755 CandidatePairsSet.
insert(ValuePair(I->first, *J));
769 computeConnectedPairs(CandidatePairs, CandidatePairsSet,
770 PairableInsts, ConnectedPairs, PairConnectionTypes);
771 if (ConnectedPairs.
empty())
continue;
773 for (
DenseMap<ValuePair, std::vector<ValuePair> >::iterator
774 I = ConnectedPairs.
begin(), IE = ConnectedPairs.
end();
776 for (std::vector<ValuePair>::iterator J = I->second.begin(),
777 JE = I->second.end(); J != JE; ++J)
778 ConnectedPairDeps[*J].push_back(I->first);
782 buildDepMap(BB, CandidatePairs, PairableInsts, PairableInstUsers);
791 choosePairs(CandidatePairs, CandidatePairsSet,
792 CandidatePairCostSavings,
793 PairableInsts, FixedOrderPairs, PairConnectionTypes,
794 ConnectedPairs, ConnectedPairDeps,
795 PairableInstUsers, ChosenPairs);
797 if (ChosenPairs.
empty())
continue;
798 AllPairableInsts.insert(AllPairableInsts.end(), PairableInsts.begin(),
799 PairableInsts.end());
806 IE = ChosenPairs.
end(); I !=
IE; ++
I) {
807 if (FixedOrderPairs.
count(*I))
808 AllFixedOrderPairs.
insert(*I);
809 else if (FixedOrderPairs.
count(ValuePair(I->second, I->first)))
810 AllFixedOrderPairs.
insert(ValuePair(I->second, I->first));
815 PairConnectionTypes.
find(VPPair(*I, *J));
816 if (K != PairConnectionTypes.
end()) {
817 AllPairConnectionTypes.
insert(*K);
819 K = PairConnectionTypes.
find(VPPair(*J, *I));
820 if (K != PairConnectionTypes.
end())
821 AllPairConnectionTypes.
insert(*K);
826 for (
DenseMap<ValuePair, std::vector<ValuePair> >::iterator
827 I = ConnectedPairs.
begin(), IE = ConnectedPairs.
end();
829 for (std::vector<ValuePair>::iterator J = I->second.begin(),
830 JE = I->second.end(); J != JE; ++J)
831 if (AllPairConnectionTypes.
count(VPPair(I->first, *J))) {
832 AllConnectedPairs[I->first].push_back(*J);
833 AllConnectedPairDeps[*J].push_back(I->first);
835 }
while (ShouldContinue);
837 if (AllChosenPairs.
empty())
return false;
838 NumFusedOps += AllChosenPairs.
size();
847 fuseChosenPairs(BB, AllPairableInsts, AllChosenPairs, AllFixedOrderPairs,
848 AllPairConnectionTypes,
849 AllConnectedPairs, AllConnectedPairDeps);
860 bool BBVectorize::isInstVectorizable(
Instruction *I,
861 bool &IsSimpleLoadStore) {
862 IsSimpleLoadStore =
false;
864 if (
CallInst *
C = dyn_cast<CallInst>(I)) {
865 if (!isVectorizableIntrinsic(
C))
867 }
else if (
LoadInst *
L = dyn_cast<LoadInst>(I)) {
869 IsSimpleLoadStore =
L->isSimple();
870 if (!IsSimpleLoadStore || !Config.VectorizeMemOps)
872 }
else if (
StoreInst *S = dyn_cast<StoreInst>(I)) {
874 IsSimpleLoadStore = S->isSimple();
875 if (!IsSimpleLoadStore || !Config.VectorizeMemOps)
877 }
else if (
CastInst *
C = dyn_cast<CastInst>(I)) {
879 if (!Config.VectorizeCasts)
882 Type *SrcTy =
C->getSrcTy();
886 Type *DestTy =
C->getDestTy();
889 }
else if (
SelectInst *SI = dyn_cast<SelectInst>(I)) {
890 if (!Config.VectorizeSelect)
899 }
else if (isa<CmpInst>(I)) {
900 if (!Config.VectorizeCmp)
903 if (!Config.VectorizeGEP)
907 if (
G->getNumIndices() != 1)
909 }
else if (!(I->
isBinaryOp() || isa<ShuffleVectorInst>(
I) ||
910 isa<ExtractElementInst>(I) || isa<InsertElementInst>(
I))) {
915 getInstructionTypes(I, T1, T2);
923 if (!Config.VectorizeBools)
931 if (!Config.VectorizeBools)
938 if (!Config.VectorizeFloats
964 bool IsSimpleLoadStore,
bool NonPow2Len,
965 int &CostSavings,
int &FixedOrder) {
967 " <-> " << *J <<
"\n");
978 Type *IT1, *IT2, *JT1, *JT2;
979 getInstructionTypes(I, IT1, IT2);
980 getInstructionTypes(J, JT1, JT2);
981 unsigned MaxTypeBits = std::max(
984 if (!TTI && MaxTypeBits > Config.VectorBits)
989 if (IsSimpleLoadStore) {
991 unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace;
992 int64_t OffsetInElmts = 0;
993 if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
994 IAddressSpace, JAddressSpace, OffsetInElmts) &&
996 FixedOrder = (int) OffsetInElmts;
997 unsigned BottomAlignment = IAlignment;
998 if (OffsetInElmts < 0) BottomAlignment = JAlignment;
1000 Type *aTypeI = isa<StoreInst>(
I) ?
1001 cast<StoreInst>(I)->getValueOperand()->getType() : I->
getType();
1002 Type *aTypeJ = isa<StoreInst>(J) ?
1003 cast<StoreInst>(J)->getValueOperand()->getType() : J->
getType();
1004 Type *VType = getVecTypeForPair(aTypeI, aTypeJ);
1006 if (Config.AlignedOnly) {
1012 if (BottomAlignment < VecAlignment)
1018 IAlignment, IAddressSpace);
1020 JAlignment, JAddressSpace);
1029 if (VCost > ICost + JCost)
1038 else if (!VParts && VCost == ICost + JCost)
1041 CostSavings = ICost + JCost - VCost;
1047 unsigned ICost = getInstrCost(I->
getOpcode(), IT1, IT2);
1048 unsigned JCost = getInstrCost(J->
getOpcode(), JT1, JT2);
1049 Type *VT1 = getVecTypeForPair(IT1, JT1),
1050 *VT2 = getVecTypeForPair(IT2, JT2);
1061 case Instruction::Shl:
1062 case Instruction::LShr:
1063 case Instruction::AShr:
1077 if ((isa<ConstantVector>(IOp) || isa<ConstantDataVector>(IOp)) &&
1078 (isa<ConstantVector>(JOp) || isa<ConstantDataVector>(JOp))) {
1081 if (SplatValue !=
nullptr &&
1093 unsigned VCost = getInstrCost(I->
getOpcode(), VT1, VT2, Op1VK, Op2VK);
1095 if (VCost > ICost + JCost)
1103 if (VParts1 > 1 || VParts2 > 1)
1105 else if ((!VParts1 || !VParts2) && VCost == ICost + JCost)
1108 CostSavings = ICost + JCost - VCost;
1117 if (IID == Intrinsic::powi || IID == Intrinsic::ctlz ||
1118 IID == Intrinsic::cttz) {
1120 *A1J = cast<CallInst>(J)->getArgOperand(1);
1121 const SCEV *A1ISCEV = SE->getSCEV(A1I),
1122 *A1JSCEV = SE->getSCEV(A1J);
1123 return (A1ISCEV == A1JSCEV);
1128 if (
auto *FPMOCI = dyn_cast<FPMathOperator>(CI))
1129 FMFCI = FPMOCI->getFastMathFlags();
1140 if (
auto *FPMOCJ = dyn_cast<FPMathOperator>(CJ))
1141 FMFCJ = FPMOCJ->getFastMathFlags();
1149 "Intrinsic argument counts differ");
1151 if ((IID == Intrinsic::powi || IID == Intrinsic::ctlz ||
1152 IID == Intrinsic::cttz) &&
i == 1)
1161 Type *RetTy = getVecTypeForPair(IT1, JT1);
1164 if (VCost > ICost + JCost)
1173 else if (!RetParts && VCost == ICost + JCost)
1177 if (!Tys[
i]->isVectorTy())
1183 else if (!NumParts && VCost == ICost + JCost)
1187 CostSavings = ICost + JCost - VCost;
1224 if (I == V || Users.
count(V)) {
1230 if (LoadMoveSetPairs) {
1231 UsesI = LoadMoveSetPairs->
count(ValuePair(J, I));
1234 WE = WriteSet.
end(); W != WE; ++W) {
1235 if (W->aliasesUnknownInst(J, *AA)) {
1243 if (UsesI && UpdateUsers) {
1253 bool BBVectorize::getCandidatePairs(
BasicBlock &BB,
1258 std::vector<Value *> &PairableInsts,
bool NonPow2Len) {
1259 size_t TotalPairs = 0;
1261 if (Start == E)
return false;
1263 bool ShouldContinue =
false, IAfterStart =
false;
1265 if (I == Start) IAfterStart =
true;
1267 bool IsSimpleLoadStore;
1268 if (!isInstVectorizable(&*I, IsSimpleLoadStore))
1277 bool JAfterStart = IAfterStart;
1279 for (
unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) {
1284 bool UsesI = trackUsesOfI(Users, WriteSet, &*I, &*J, !Config.FastDep);
1285 if (Config.FastDep) {
1295 if (UsesI)
continue;
1300 int CostSavings, FixedOrder;
1301 if (!areInstsCompatible(&*I, &*J, IsSimpleLoadStore, NonPow2Len,
1302 CostSavings, FixedOrder))
1306 if (PairableInsts.empty() ||
1307 PairableInsts[PairableInsts.size() - 1] != &*
I) {
1308 PairableInsts.push_back(&*I);
1311 CandidatePairs[&*
I].push_back(&*J);
1314 CandidatePairCostSavings.
insert(
1315 ValuePairWithCost(ValuePair(&*I, &*J), CostSavings));
1317 if (FixedOrder == 1)
1318 FixedOrderPairs.
insert(ValuePair(&*I, &*J));
1319 else if (FixedOrder == -1)
1320 FixedOrderPairs.
insert(ValuePair(&*J, &*I));
1325 Start = std::next(J);
1326 IAfterStart = JAfterStart =
false;
1330 << *I <<
" <-> " << *J <<
" (cost savings: " <<
1331 CostSavings <<
")\n");
1336 if (PairableInsts.size() >= Config.MaxInsts ||
1337 TotalPairs >= Config.MaxPairs) {
1338 ShouldContinue =
true;
1347 DEBUG(
dbgs() <<
"BBV: found " << PairableInsts.size()
1348 <<
" instructions with candidate pairs\n");
1350 return ShouldContinue;
1356 void BBVectorize::computePairsConnectedTo(
1359 std::vector<Value *> &PairableInsts,
1360 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
1368 E = P.first->user_end();
1371 if (isa<LoadInst>(UI)) {
1375 }
else if ((SI = dyn_cast<StoreInst>(UI)) &&
1385 if ((SJ = dyn_cast<StoreInst>(UJ)) &&
1390 if (CandidatePairsSet.
count(ValuePair(UI, UJ))) {
1391 VPPair VP(P, ValuePair(UI, UJ));
1392 ConnectedPairs[VP.first].push_back(VP.second);
1393 PairConnectionTypes.
insert(VPPairWithType(VP, PairConnectionDirect));
1397 if (CandidatePairsSet.
count(ValuePair(UJ, UI))) {
1398 VPPair VP(P, ValuePair(UJ, UI));
1399 ConnectedPairs[VP.first].push_back(VP.second);
1400 PairConnectionTypes.
insert(VPPairWithType(VP, PairConnectionSwap));
1404 if (Config.SplatBreaksChain)
continue;
1409 if ((SJ = dyn_cast<StoreInst>(UJ)) &&
1413 if (CandidatePairsSet.
count(ValuePair(UI, UJ))) {
1414 VPPair VP(P, ValuePair(UI, UJ));
1415 ConnectedPairs[VP.first].push_back(VP.second);
1416 PairConnectionTypes.
insert(VPPairWithType(VP, PairConnectionSplat));
1421 if (Config.SplatBreaksChain)
return;
1425 E = P.second->user_end();
1428 if (isa<LoadInst>(UI))
1430 else if ((SI = dyn_cast<StoreInst>(UI)) &&
1436 if ((SJ = dyn_cast<StoreInst>(UJ)) &&
1440 if (CandidatePairsSet.
count(ValuePair(UI, UJ))) {
1441 VPPair VP(P, ValuePair(UI, UJ));
1442 ConnectedPairs[VP.first].push_back(VP.second);
1443 PairConnectionTypes.
insert(VPPairWithType(VP, PairConnectionSplat));
1452 void BBVectorize::computeConnectedPairs(
1455 std::vector<Value *> &PairableInsts,
1456 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
1458 for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
1459 PE = PairableInsts.end(); PI != PE; ++PI) {
1461 CandidatePairs.
find(*PI);
1462 if (PP == CandidatePairs.
end())
1465 for (std::vector<Value *>::iterator P = PP->second.
begin(),
1466 E = PP->second.
end(); P !=
E; ++
P)
1467 computePairsConnectedTo(CandidatePairs, CandidatePairsSet,
1468 PairableInsts, ConnectedPairs,
1469 PairConnectionTypes, ValuePair(*PI, *P));
1472 DEBUG(
size_t TotalPairs = 0;
1473 for (
DenseMap<ValuePair, std::vector<ValuePair> >::iterator I =
1474 ConnectedPairs.
begin(), IE = ConnectedPairs.
end(); I !=
IE; ++
I)
1475 TotalPairs += I->second.size();
1476 dbgs() <<
"BBV: found " << TotalPairs
1477 <<
" pair connections.\n");
1483 void BBVectorize::buildDepMap(
1486 std::vector<Value *> &PairableInsts,
1490 CandidatePairs.
begin(), E = CandidatePairs.
end();
C !=
E; ++
C) {
1492 IsInPair.
insert(
C->second.begin(),
C->second.end());
1501 if (IsInPair.
find(&*I) == IsInPair.
end())
1510 (void)trackUsesOfI(Users, WriteSet, &*I, &*J);
1518 if (IsInPair.
find(*U) == IsInPair.
end())
continue;
1519 PairableInstUsers.
insert(ValuePair(&*I, *U));
1530 bool BBVectorize::pairsConflict(ValuePair P, ValuePair Q,
1532 DenseMap<ValuePair, std::vector<ValuePair> > *PairableInstUserMap,
1535 bool QUsesP = PairableInstUsers.
count(ValuePair(P.first, Q.first)) ||
1536 PairableInstUsers.
count(ValuePair(P.first, Q.second)) ||
1537 PairableInstUsers.
count(ValuePair(P.second, Q.first)) ||
1538 PairableInstUsers.
count(ValuePair(P.second, Q.second));
1539 bool PUsesQ = PairableInstUsers.
count(ValuePair(Q.first, P.first)) ||
1540 PairableInstUsers.
count(ValuePair(Q.first, P.second)) ||
1541 PairableInstUsers.
count(ValuePair(Q.second, P.first)) ||
1542 PairableInstUsers.
count(ValuePair(Q.second, P.second));
1543 if (PairableInstUserMap) {
1548 if (PairableInstUserPairSet->
insert(VPPair(Q, P)).second)
1549 (*PairableInstUserMap)[Q].push_back(P);
1552 if (PairableInstUserPairSet->
insert(VPPair(P, Q)).second)
1553 (*PairableInstUserMap)[
P].push_back(Q);
1557 return (QUsesP && PUsesQ);
1562 bool BBVectorize::pairWillFormCycle(ValuePair P,
1563 DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
1566 dbgs() <<
"BBV: starting cycle check for : " << *P.first <<
" <-> "
1567 << *P.second <<
"\n");
1579 dbgs() <<
"BBV: cycle check visiting: " << *QTop.first <<
" <-> "
1580 << *QTop.second <<
"\n");
1582 PairableInstUserMap.
find(QTop);
1583 if (QQ == PairableInstUserMap.
end())
1586 for (std::vector<ValuePair>::iterator
C = QQ->second.
begin(),
1590 <<
"BBV: rejected to prevent non-trivial cycle formation: "
1591 << QTop.first <<
" <-> " <<
C->second <<
"\n");
1598 }
while (!Q.
empty());
1605 void BBVectorize::buildInitialDAGFor(
1608 std::vector<Value *> &PairableInsts,
1609 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
1618 Q.
push_back(ValuePairWithDepth(J, getDepthFactor(J.first)));
1620 ValuePairWithDepth QTop = Q.
back();
1623 bool MoreChildren =
false;
1624 size_t MaxChildDepth = QTop.second;
1626 ConnectedPairs.
find(QTop.first);
1627 if (QQ != ConnectedPairs.
end())
1628 for (std::vector<ValuePair>::iterator k = QQ->second.
begin(),
1629 ke = QQ->second.
end(); k != ke; ++k) {
1631 if (CandidatePairsSet.
count(*k)) {
1633 if (C == DAG.
end()) {
1634 size_t d = getDepthFactor(k->first);
1635 Q.
push_back(ValuePairWithDepth(*k, QTop.second+d));
1636 MoreChildren =
true;
1638 MaxChildDepth = std::max(MaxChildDepth, C->second);
1643 if (!MoreChildren) {
1645 DAG.
insert(ValuePairWithDepth(QTop.first, MaxChildDepth));
1648 }
while (!Q.
empty());
1653 void BBVectorize::pruneDAGFor(
1655 std::vector<Value *> &PairableInsts,
1656 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
1658 DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
1663 bool UseCycleCheck) {
1666 Q.
push_back(ValuePairWithDepth(J, getDepthFactor(J.first)));
1669 PrunedDAG.
insert(QTop.first);
1674 ConnectedPairs.
find(QTop.first);
1675 if (QQ == ConnectedPairs.
end())
1678 for (std::vector<ValuePair>::iterator K = QQ->second.
begin(),
1679 KE = QQ->second.
end(); K != KE; ++K) {
1681 if (C == DAG.
end())
continue;
1709 = BestChildren.
begin(), E2 = BestChildren.
end();
1711 if (C2->first.first == C->first.first ||
1712 C2->first.first == C->first.second ||
1713 C2->first.second == C->first.first ||
1714 C2->first.second == C->first.second ||
1715 pairsConflict(C2->first, C->first, PairableInstUsers,
1716 UseCycleCheck ? &PairableInstUserMap :
nullptr,
1717 UseCycleCheck ? &PairableInstUserPairSet
1719 if (C2->second >= C->second) {
1724 CurrentPairs.
insert(C2->first);
1727 if (!CanAdd)
continue;
1732 E2 = PrunedDAG.
end();
T != E2; ++
T) {
1733 if (
T->first == C->first.first ||
1734 T->first == C->first.second ||
1735 T->second == C->first.first ||
1736 T->second == C->first.second ||
1737 pairsConflict(*
T, C->first, PairableInstUsers,
1738 UseCycleCheck ? &PairableInstUserMap :
nullptr,
1739 UseCycleCheck ? &PairableInstUserPairSet
1747 if (!CanAdd)
continue;
1751 E2 = Q.
end(); C2 != E2; ++C2) {
1752 if (C2->first.first == C->first.first ||
1753 C2->first.first == C->first.second ||
1754 C2->first.second == C->first.first ||
1755 C2->first.second == C->first.second ||
1756 pairsConflict(C2->first, C->first, PairableInstUsers,
1757 UseCycleCheck ? &PairableInstUserMap :
nullptr,
1758 UseCycleCheck ? &PairableInstUserPairSet
1764 CurrentPairs.
insert(C2->first);
1766 if (!CanAdd)
continue;
1771 ChosenPairs.
begin(), E2 = ChosenPairs.
end();
1773 if (pairsConflict(*C2, C->first, PairableInstUsers,
1774 UseCycleCheck ? &PairableInstUserMap :
nullptr,
1775 UseCycleCheck ? &PairableInstUserPairSet
1781 CurrentPairs.
insert(*C2);
1783 if (!CanAdd)
continue;
1793 if (UseCycleCheck &&
1794 pairWillFormCycle(C->first, PairableInstUserMap, CurrentPairs))
1802 = BestChildren.
begin(); C2 != BestChildren.
end();) {
1803 if (C2->first.first == C->first.first ||
1804 C2->first.first == C->first.second ||
1805 C2->first.second == C->first.first ||
1806 C2->first.second == C->first.second ||
1807 pairsConflict(C2->first, C->first, PairableInstUsers))
1808 C2 = BestChildren.
erase(C2);
1813 BestChildren.
push_back(ValuePairWithDepth(C->first, C->second));
1817 = BestChildren.
begin(), E2 = BestChildren.
end();
1819 size_t DepthF = getDepthFactor(C->first.first);
1820 Q.
push_back(ValuePairWithDepth(C->first, QTop.second+DepthF));
1822 }
while (!Q.
empty());
1827 void BBVectorize::findBestDAGFor(
1831 std::vector<Value *> &PairableInsts,
1834 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
1835 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
1837 DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
1841 int &BestEffSize,
Value *II, std::vector<Value *>&JJ,
1842 bool UseCycleCheck) {
1843 for (std::vector<Value *>::iterator J = JJ.begin(), JE = JJ.end();
1845 ValuePair IJ(II, *J);
1846 if (!CandidatePairsSet.
count(IJ))
1853 bool DoesConflict =
false;
1855 E = ChosenPairs.
end(); C !=
E; ++
C) {
1856 if (pairsConflict(*C, IJ, PairableInstUsers,
1857 UseCycleCheck ? &PairableInstUserMap :
nullptr,
1858 UseCycleCheck ? &PairableInstUserPairSet :
nullptr)) {
1859 DoesConflict =
true;
1863 ChosenPairSet.
insert(*C);
1865 if (DoesConflict)
continue;
1867 if (UseCycleCheck &&
1868 pairWillFormCycle(IJ, PairableInstUserMap, ChosenPairSet))
1872 buildInitialDAGFor(CandidatePairs, CandidatePairsSet,
1873 PairableInsts, ConnectedPairs,
1874 PairableInstUsers, ChosenPairs, DAG, IJ);
1881 << *IJ.first <<
" <-> " << *IJ.second <<
"} of depth " <<
1882 MaxDepth <<
" and size " << DAG.
size() <<
"\n");
1892 pruneDAGFor(CandidatePairs, PairableInsts, ConnectedPairs,
1893 PairableInstUsers, PairableInstUserMap,
1894 PairableInstUserPairSet,
1895 ChosenPairs, DAG, PrunedDAG, IJ, UseCycleCheck);
1901 E = PrunedDAG.end(); S !=
E; ++S) {
1902 PrunedDAGInstrs.
insert(S->first);
1903 PrunedDAGInstrs.
insert(S->second);
1912 bool HasNontrivialInsts =
false;
1917 E = PrunedDAG.end(); S !=
E; ++S) {
1918 if (!isa<ShuffleVectorInst>(S->first) &&
1919 !isa<InsertElementInst>(S->first) &&
1920 !isa<ExtractElementInst>(S->first))
1921 HasNontrivialInsts =
true;
1923 bool FlipOrder =
false;
1925 if (getDepthFactor(S->first)) {
1926 int ESContrib = CandidatePairCostSavings.
find(*S)->second;
1928 << *S->first <<
" <-> " << *S->second <<
"} = " <<
1930 EffSize += ESContrib;
1936 ConnectedPairDeps.
find(*S);
1937 if (SS != ConnectedPairDeps.
end()) {
1938 unsigned NumDepsDirect = 0, NumDepsSwap = 0;
1939 for (std::vector<ValuePair>::iterator
T = SS->second.
begin(),
1940 TE = SS->second.
end();
T != TE; ++
T) {
1942 if (!PrunedDAG.count(Q.second))
1945 PairConnectionTypes.
find(VPPair(Q.second, Q.first));
1946 assert(R != PairConnectionTypes.
end() &&
1947 "Cannot find pair connection type");
1948 if (R->second == PairConnectionDirect)
1950 else if (R->second == PairConnectionSwap)
1957 FlipOrder = !FixedOrderPairs.
count(*S) &&
1958 ((NumDepsSwap > NumDepsDirect) ||
1959 FixedOrderPairs.
count(ValuePair(S->second, S->first)));
1961 for (std::vector<ValuePair>::iterator
T = SS->second.
begin(),
1962 TE = SS->second.
end();
T != TE; ++
T) {
1964 if (!PrunedDAG.count(Q.second))
1967 PairConnectionTypes.
find(VPPair(Q.second, Q.first));
1968 assert(R != PairConnectionTypes.
end() &&
1969 "Cannot find pair connection type");
1970 Type *Ty1 = Q.second.first->getType(),
1971 *Ty2 = Q.second.second->getType();
1972 Type *VTy = getVecTypeForPair(Ty1, Ty2);
1973 if ((R->second == PairConnectionDirect && FlipOrder) ||
1974 (R->second == PairConnectionSwap && !FlipOrder) ||
1975 R->second == PairConnectionSplat) {
1976 int ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
1980 if (R->second == PairConnectionSplat)
1989 *Q.second.first <<
" <-> " << *Q.second.second <<
1991 *S->first <<
" <-> " << *S->second <<
"} = " <<
1993 EffSize -= ESContrib;
2001 if (!S->first->getType()->isVoidTy()) {
2002 Type *Ty1 = S->first->getType(),
2003 *Ty2 = S->second->getType();
2004 Type *VTy = getVecTypeForPair(Ty1, Ty2);
2006 bool NeedsExtraction =
false;
2013 if (isa<ExtractElementInst>(U))
2015 if (PrunedDAGInstrs.
count(U))
2017 NeedsExtraction =
true;
2021 if (NeedsExtraction) {
2024 ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
2030 Instruction::ExtractElement, VTy, 0);
2033 *S->first <<
"} = " << ESContrib <<
"\n");
2034 EffSize -= ESContrib;
2037 NeedsExtraction =
false;
2044 if (isa<ExtractElementInst>(U))
2046 if (PrunedDAGInstrs.
count(U))
2048 NeedsExtraction =
true;
2052 if (NeedsExtraction) {
2054 if (Ty2->isVectorTy()) {
2055 ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
2062 Instruction::ExtractElement, VTy, 1);
2064 *S->second <<
"} = " << ESContrib <<
"\n");
2065 EffSize -= ESContrib;
2070 if (!isa<LoadInst>(S->first) && !isa<StoreInst>(S->first)) {
2072 *S2 = cast<Instruction>(S->second);
2078 if (isa<Constant>(O1) && isa<Constant>(O2))
2084 ValuePair VP = ValuePair(O1, O2);
2085 ValuePair VPR = ValuePair(O2, O1);
2088 if (PrunedDAG.count(VP) || PrunedDAG.count(VPR))
2092 *Ty2 = O2->getType();
2093 Type *VTy = getVecTypeForPair(Ty1, Ty2);
2100 *IEO2 = dyn_cast<InsertElementInst>(O2);
2101 if (IEO1 && IEO2 && isPureIEChain(IEO1) && isPureIEChain(IEO2))
2106 *EIO2 = dyn_cast<ExtractElementInst>(O2);
2109 EIO2->getOperand(0)->getType())
2115 *SIO2 = dyn_cast<ShuffleVectorInst>(O2);
2118 SIO2->getOperand(0)->getType()) {
2122 SIOps.
insert(SIO2->getOperand(0));
2123 SIOps.
insert(SIO2->getOperand(1));
2124 if (SIOps.
size() <= 2)
2131 if (IncomingPairs.
count(VP)) {
2133 }
else if (IncomingPairs.
count(VPR)) {
2134 ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
2140 }
else if (!Ty1->
isVectorTy() && !Ty2->isVectorTy()) {
2142 Instruction::InsertElement, VTy, 0);
2144 Instruction::InsertElement, VTy, 1);
2149 Instruction::InsertElement, Ty2, 0);
2150 ESContrib += (int) getInstrCost(Instruction::ShuffleVector,
2152 }
else if (!Ty2->isVectorTy()) {
2156 Instruction::InsertElement, Ty1, 0);
2157 ESContrib += (int) getInstrCost(Instruction::ShuffleVector,
2160 Type *TyBig = Ty1, *TySmall = Ty2;
2164 ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
2166 if (TyBig != TySmall)
2167 ESContrib += (int) getInstrCost(Instruction::ShuffleVector,
2172 << *O1 <<
" <-> " << *O2 <<
"} = " <<
2174 EffSize -= ESContrib;
2175 IncomingPairs.
insert(VP);
2180 if (!HasNontrivialInsts) {
2182 "\tNo non-trivial instructions in DAG;"
2183 " override to zero effective size\n");
2188 E = PrunedDAG.end(); S !=
E; ++S)
2189 EffSize += (
int) getDepthFactor(S->first);
2193 dbgs() <<
"BBV: found pruned DAG for pair {"
2194 << *IJ.first <<
" <-> " << *IJ.second <<
"} of depth " <<
2195 MaxDepth <<
" and size " << PrunedDAG.size() <<
2196 " (effective size: " << EffSize <<
")\n");
2198 MaxDepth >= Config.ReqChainDepth) &&
2199 EffSize > 0 && EffSize > BestEffSize) {
2201 BestEffSize = EffSize;
2202 BestDAG = PrunedDAG;
2209 void BBVectorize::choosePairs(
2213 std::vector<Value *> &PairableInsts,
2216 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
2217 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
2220 bool UseCycleCheck =
2221 CandidatePairsSet.
size() <= Config.MaxCandPairsForCycleCheck;
2225 E = CandidatePairsSet.
end(); I !=
E; ++
I) {
2226 std::vector<Value *> &JJ = CandidatePairs2[I->second];
2227 if (JJ.empty()) JJ.
reserve(32);
2228 JJ.push_back(I->first);
2233 for (std::vector<Value *>::iterator I = PairableInsts.begin(),
2234 E = PairableInsts.end(); I !=
E; ++
I) {
2236 size_t NumChoices = CandidatePairs.
lookup(*I).size();
2237 if (!NumChoices)
continue;
2239 std::vector<Value *> &JJ = CandidatePairs[*
I];
2242 size_t BestMaxDepth = 0;
2243 int BestEffSize = 0;
2245 findBestDAGFor(CandidatePairs, CandidatePairsSet,
2246 CandidatePairCostSavings,
2247 PairableInsts, FixedOrderPairs, PairConnectionTypes,
2248 ConnectedPairs, ConnectedPairDeps,
2249 PairableInstUsers, PairableInstUserMap,
2250 PairableInstUserPairSet, ChosenPairs,
2251 BestDAG, BestMaxDepth, BestEffSize, *I, JJ,
2254 if (BestDAG.
empty())
2261 DEBUG(
dbgs() <<
"BBV: selected pairs in the best DAG for: "
2262 << *cast<Instruction>(*I) <<
"\n");
2265 SE2 = BestDAG.
end(); S != SE2; ++S) {
2267 ChosenPairs.
insert(ValuePair(S->first, S->second));
2268 DEBUG(
dbgs() <<
"BBV: selected pair: " << *S->first <<
" <-> " <<
2269 *S->second <<
"\n");
2272 std::vector<Value *> &KK = CandidatePairs[S->first];
2273 for (std::vector<Value *>::iterator K = KK.
begin(), KE = KK.end();
2275 if (*K == S->second)
2278 CandidatePairsSet.
erase(ValuePair(S->first, *K));
2281 std::vector<Value *> &LL = CandidatePairs2[S->second];
2282 for (std::vector<Value *>::iterator
L = LL.
begin(),
LE = LL.end();
2287 CandidatePairsSet.
erase(ValuePair(*
L, S->second));
2290 std::vector<Value *> &MM = CandidatePairs[S->second];
2291 for (std::vector<Value *>::iterator M = MM.begin(), ME = MM.end();
2293 assert(*M != S->first &&
"Flipped pair in candidate list?");
2294 CandidatePairsSet.
erase(ValuePair(S->second, *M));
2297 std::vector<Value *> &NN = CandidatePairs2[S->first];
2298 for (std::vector<Value *>::iterator
N = NN.begin(),
NE = NN.end();
2300 assert(*
N != S->second &&
"Flipped pair in candidate list?");
2301 CandidatePairsSet.
erase(ValuePair(*
N, S->first));
2306 DEBUG(
dbgs() <<
"BBV: selected " << ChosenPairs.
size() <<
" pairs.\n");
2309 std::string getReplacementName(
Instruction *I,
bool IsInput,
unsigned o,
2314 return (I->
getName() + (IsInput ?
".v.i" :
".v.r") +
utostr(o) +
2315 (n > 0 ?
"." +
utostr(n) :
"")).str();
2323 unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace;
2324 int64_t OffsetInElmts;
2328 (void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
2329 IAddressSpace, JAddressSpace,
2330 OffsetInElmts,
false);
2337 Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
2341 return new BitCastInst(VPtr, VArgPtrType, getReplacementName(I,
true, o),
2346 unsigned MaskOffset,
unsigned NumInElem,
2347 unsigned NumInElem1,
unsigned IdxOffset,
2348 std::vector<Constant*> &
Mask) {
2350 for (
unsigned v = 0; v < NumElem1; ++v) {
2351 int m = cast<ShuffleVectorInst>(J)->getMaskValue(v);
2355 unsigned mm = m + (int) IdxOffset;
2356 if (m >= (
int) NumInElem1)
2357 mm += (
int) NumInElem;
2359 Mask[v+MaskOffset] =
2374 Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
2382 std::vector<Constant*>
Mask(NumElem);
2398 fillNewShuffleMask(Context, I, 0, NumInElemJ, NumInElemI,
2402 fillNewShuffleMask(Context, J, NumElemI, NumInElemI, NumInElemJ,
2412 bool IBeforeJ,
unsigned IdxOff) {
2413 bool ExpandedIEChain =
false;
2417 if (isPureIEChain(LIE)) {
2423 cast<ConstantInt>(LIENext->
getOperand(2))->getSExtValue();
2426 dyn_cast<InsertElementInst>(LIENext->
getOperand(0))));
2430 for (
unsigned i = 0;
i < numElemL; ++
i) {
2431 if (isa<UndefValue>(VectElemts[
i]))
continue;
2435 getReplacementName(IBeforeJ ? I : J,
2442 ExpandedIEChain =
true;
2446 return ExpandedIEChain;
2449 static unsigned getNumScalarElements(
Type *Ty) {
2450 if (
VectorType *VecTy = dyn_cast<VectorType>(Ty))
2451 return VecTy->getNumElements();
2465 VectorType *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
2468 Type *ArgTypeL = ArgTypeI, *ArgTypeH = ArgTypeJ;
2470 unsigned numElemL = getNumScalarElements(ArgTypeL);
2471 unsigned numElemH = getNumScalarElements(ArgTypeH);
2489 bool IsSizeChangeShuffle =
2490 isa<ShuffleVectorInst>(
L) &&
2493 if ((LEE || LSV) && (HEE || HSV) && !IsSizeChangeShuffle) {
2495 bool CanUseInputs =
true;
2496 Value *I1, *I2 =
nullptr;
2500 I1 = LSV->getOperand(0);
2501 I2 = LSV->getOperand(1);
2502 if (I2 == I1 || isa<UndefValue>(I2))
2507 Value *I3 = HEE->getOperand(0);
2508 if (!I2 && I3 != I1)
2510 else if (I3 != I1 && I3 != I2)
2511 CanUseInputs =
false;
2513 Value *I3 = HSV->getOperand(0);
2514 if (!I2 && I3 != I1)
2516 else if (I3 != I1 && I3 != I2)
2517 CanUseInputs =
false;
2520 Value *I4 = HSV->getOperand(1);
2521 if (!isa<UndefValue>(I4)) {
2522 if (!I2 && I4 != I1)
2524 else if (I4 != I1 && I4 != I2)
2525 CanUseInputs =
false;
2532 cast<Instruction>(LOp)->getOperand(0)->getType()
2533 ->getVectorNumElements();
2536 cast<Instruction>(
HOp)->getOperand(0)->getType()
2537 ->getVectorNumElements();
2542 for (
unsigned i = 0; i < numElemL; ++
i) {
2546 cast<ConstantInt>(LEE->
getOperand(1))->getSExtValue();
2549 Idx = LSV->getMaskValue(i);
2550 if (Idx < (
int) LOpElem) {
2551 INum = LSV->getOperand(0) == I1 ? 0 : 1;
2554 INum = LSV->getOperand(1) == I1 ? 0 : 1;
2558 II[
i] = std::pair<int, int>(Idx, INum);
2560 for (
unsigned i = 0; i < numElemH; ++
i) {
2564 cast<ConstantInt>(HEE->getOperand(1))->getSExtValue();
2565 INum = HEE->getOperand(0) == I1 ? 0 : 1;
2567 Idx = HSV->getMaskValue(i);
2568 if (Idx < (
int) HOpElem) {
2569 INum = HSV->getOperand(0) == I1 ? 0 : 1;
2572 INum = HSV->getOperand(1) == I1 ? 0 : 1;
2576 II[i + numElemL] = std::pair<int, int>(Idx, INum);
2587 if (I1Elem == numElem) {
2588 bool ElemInOrder =
true;
2589 for (
unsigned i = 0; i < numElem; ++
i) {
2590 if (II[i].first != (
int) i && II[i].first != -1) {
2591 ElemInOrder =
false;
2601 std::vector<Constant *>
Mask(numElem);
2602 for (
unsigned i = 0; i < numElem; ++
i) {
2603 int Idx = II[
i].first;
2613 getReplacementName(IBeforeJ ? I : J,
2625 if (I1Elem < I2Elem) {
2626 std::vector<Constant *>
Mask(I2Elem);
2628 for (; v < I1Elem; ++v)
2630 for (; v < I2Elem; ++v)
2636 getReplacementName(IBeforeJ ? I : J,
2641 }
else if (I1Elem > I2Elem) {
2642 std::vector<Constant *>
Mask(I1Elem);
2644 for (; v < I2Elem; ++v)
2646 for (; v < I1Elem; ++v)
2652 getReplacementName(IBeforeJ ? I : J,
2660 std::vector<Constant *>
Mask(numElem);
2661 for (
unsigned v = 0; v < numElem; ++v) {
2662 if (II[v].first == -1) {
2665 int Idx = II[v].first + II[v].second * I1Elem;
2672 getReplacementName(IBeforeJ ? I : J,
true, o));
2678 Type *ArgType = ArgTypeL;
2679 if (numElemL < numElemH) {
2680 if (numElemL == 1 && expandIEChain(Context, I, J, o, HOp, numElemH,
2681 ArgTypeL, VArgType, IBeforeJ, 1)) {
2687 getReplacementName(IBeforeJ ? I : J,
true, o));
2690 }
else if (!expandIEChain(Context, I, J, o, LOp, numElemL, ArgTypeL,
2691 ArgTypeH, IBeforeJ)) {
2697 std::vector<Constant *>
Mask(numElemH);
2699 for (; v < numElemL; ++v)
2701 for (; v < numElemH; ++v)
2706 getReplacementName(IBeforeJ ? I : J,
2710 getReplacementName(IBeforeJ ? I : J,
2719 }
else if (numElemL > numElemH) {
2720 if (numElemH == 1 && expandIEChain(Context, I, J, o, LOp, numElemL,
2721 ArgTypeH, VArgType, IBeforeJ)) {
2726 getReplacementName(IBeforeJ ? I : J,
2730 }
else if (!expandIEChain(Context, I, J, o, HOp, numElemH, ArgTypeH,
2731 ArgTypeL, IBeforeJ)) {
2734 std::vector<Constant *>
Mask(numElemL);
2736 for (; v < numElemH; ++v)
2738 for (; v < numElemL; ++v)
2743 getReplacementName(IBeforeJ ? I : J,
2747 getReplacementName(IBeforeJ ? I : J,
2758 std::vector<Constant*>
Mask(numElem);
2759 for (
unsigned v = 0; v < numElem; ++v) {
2763 if (v >= numElemL && numElemH > numElemL)
2764 Idx += (numElemH - numElemL);
2770 getReplacementName(IBeforeJ ? I : J,
true, o));
2777 getReplacementName(IBeforeJ ? I : J,
2781 getReplacementName(IBeforeJ ? I : J,
2789 void BBVectorize::getReplacementInputsForPair(
LLVMContext& Context,
2795 for (
unsigned p = 0, o = NumOperands-1; p < NumOperands; ++p, --o) {
2799 if (isa<LoadInst>(I) || (o == 1 && isa<StoreInst>(I))) {
2801 ReplacedOperands[o] = getReplacementPointerInput(Context, I, J, o);
2803 }
else if (isa<CallInst>(I)) {
2806 if (o == NumOperands-1) {
2812 Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
2816 }
else if ((IID == Intrinsic::powi || IID == Intrinsic::ctlz ||
2817 IID == Intrinsic::cttz) && o == 1) {
2824 }
else if (isa<ShuffleVectorInst>(I) && o == NumOperands-1) {
2825 ReplacedOperands[o] = getReplacementShuffleMask(Context, I, J);
2829 ReplacedOperands[o] = getReplacementInput(Context, I, J, o, IBeforeJ);
2841 if (isa<StoreInst>(I))
2847 VectorType *VType = getVecTypeForPair(IType, JType);
2850 unsigned numElemI = getNumScalarElements(IType);
2851 unsigned numElemJ = getNumScalarElements(JType);
2854 std::vector<Constant *> Mask1(numElemI), Mask2(numElemI);
2855 for (
unsigned v = 0; v < numElemI; ++v) {
2862 getReplacementName(K,
false, 1));
2869 std::vector<Constant *> Mask1(numElemJ), Mask2(numElemJ);
2870 for (
unsigned v = 0; v < numElemJ; ++v) {
2877 getReplacementName(K,
false, 2));
2889 bool BBVectorize::canMoveUsesOfIAfterJ(
BasicBlock &BB,
2899 for (; cast<Instruction>(
L) != J; ++
L)
2900 (
void)trackUsesOfI(Users, WriteSet, I, &*L,
true, &LoadMoveSetPairs);
2902 assert(cast<Instruction>(L) == J &&
2903 "Tracking has not proceeded far enough to check for dependencies");
2906 return !trackUsesOfI(Users, WriteSet, I, J,
true, &LoadMoveSetPairs);
2910 void BBVectorize::moveUsesOfIAfterJ(
BasicBlock &BB,
2921 for (; cast<Instruction>(
L) != J;) {
2922 if (trackUsesOfI(Users, WriteSet, I, &*L,
true, &LoadMoveSetPairs)) {
2926 DEBUG(
dbgs() <<
"BBV: moving: " << *InstToMove <<
2927 " to after " << *InsertionPt <<
"\n");
2930 InsertionPt = InstToMove;
2940 void BBVectorize::collectPairLoadMoveSet(
BasicBlock &BB,
2956 if (trackUsesOfI(Users, WriteSet, I, &*L)) {
2957 if (L->mayReadFromMemory()) {
2958 LoadMoveSet[&*
L].push_back(I);
2959 LoadMoveSetPairs.
insert(ValuePair(&*L, I));
2972 void BBVectorize::collectLoadMoveSet(
BasicBlock &BB,
2973 std::vector<Value *> &PairableInsts,
2977 for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
2978 PIE = PairableInsts.end(); PI != PIE; ++PI) {
2980 if (P == ChosenPairs.
end())
continue;
2983 collectPairLoadMoveSet(BB, ChosenPairs, LoadMoveSet,
2984 LoadMoveSetPairs, I);
2994 void BBVectorize::fuseChosenPairs(
BasicBlock &BB,
2995 std::vector<Value *> &PairableInsts,
2999 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
3000 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps) {
3008 E = ChosenPairs.
end(); P !=
E; ++
P)
3009 FlippedPairs.
insert(ValuePair(P->second, P->first));
3011 E = FlippedPairs.
end(); P !=
E; ++
P)
3016 collectLoadMoveSet(BB, PairableInsts, ChosenPairs,
3017 LoadMoveSet, LoadMoveSetPairs);
3019 DEBUG(
dbgs() <<
"BBV: initial: \n" << BB <<
"\n");
3023 if (P == ChosenPairs.
end()) {
3028 if (getDepthFactor(P->first) == 0) {
3038 *J = cast<Instruction>(P->second);
3040 DEBUG(
dbgs() <<
"BBV: fusing: " << *I <<
3041 " <-> " << *J <<
"\n");
3045 assert(FP != ChosenPairs.
end() &&
"Flipped pair not found in list");
3046 ChosenPairs.
erase(FP);
3047 ChosenPairs.
erase(P);
3049 if (!canMoveUsesOfIAfterJ(BB, LoadMoveSetPairs, I, J)) {
3050 DEBUG(
dbgs() <<
"BBV: fusion of: " << *I <<
3052 " aborted because of non-trivial dependency cycle\n");
3059 bool FlipPairOrder = FixedOrderPairs.
count(ValuePair(J, I));
3060 if (!FlipPairOrder && !FixedOrderPairs.
count(ValuePair(I, J))) {
3065 bool OrigOrder =
true;
3067 ConnectedPairDeps.
find(ValuePair(I, J));
3068 if (IJ == ConnectedPairDeps.
end()) {
3069 IJ = ConnectedPairDeps.
find(ValuePair(J, I));
3073 if (IJ != ConnectedPairDeps.
end()) {
3074 unsigned NumDepsDirect = 0, NumDepsSwap = 0;
3075 for (std::vector<ValuePair>::iterator
T = IJ->second.
begin(),
3076 TE = IJ->second.
end();
T != TE; ++
T) {
3077 VPPair Q(IJ->first, *
T);
3079 PairConnectionTypes.
find(VPPair(Q.second, Q.first));
3080 assert(R != PairConnectionTypes.
end() &&
3081 "Cannot find pair connection type");
3082 if (R->second == PairConnectionDirect)
3084 else if (R->second == PairConnectionSwap)
3091 if (NumDepsSwap > NumDepsDirect) {
3092 FlipPairOrder =
true;
3093 DEBUG(
dbgs() <<
"BBV: reordering pair: " << *I <<
3094 " <-> " << *J <<
"\n");
3106 ConnectedPairs.
find(ValuePair(H, L));
3107 if (HL != ConnectedPairs.
end())
3108 for (std::vector<ValuePair>::iterator
T = HL->second.
begin(),
3109 TE = HL->second.
end();
T != TE; ++
T) {
3110 VPPair Q(HL->first, *
T);
3112 assert(R != PairConnectionTypes.
end() &&
3113 "Cannot find pair connection type");
3114 if (R->second == PairConnectionDirect)
3115 R->second = PairConnectionSwap;
3116 else if (R->second == PairConnectionSwap)
3117 R->second = PairConnectionDirect;
3120 bool LBeforeH = !FlipPairOrder;
3123 getReplacementInputsForPair(Context, L, H, ReplacedOperands,
3131 else if (H->hasName())
3139 for (
unsigned i = 0; i != NumOld; ++
i)
3140 Tys.
push_back(ReplacedOperands[i]->getType());
3141 CS.mutateFunctionType(
3144 }
else if (!isa<StoreInst>(K))
3153 for (
unsigned o = 0; o < NumOperands; ++o)
3161 replaceOutputsOfPair(Context, L, H, K, InsertionPt, K1, K2);
3168 moveUsesOfIAfterJ(BB, LoadMoveSetPairs, InsertionPt, I, J);
3170 if (!isa<StoreInst>(I)) {
3172 H->replaceAllUsesWith(K2);
3182 std::vector<ValuePair> NewSetMembers;
3184 LoadMoveSet.
find(I);
3185 if (II != LoadMoveSet.
end())
3186 for (std::vector<Value *>::iterator
N = II->second.
begin(),
3188 NewSetMembers.push_back(ValuePair(K, *
N));
3190 LoadMoveSet.
find(J);
3191 if (JJ != LoadMoveSet.
end())
3192 for (std::vector<Value *>::iterator
N = JJ->second.
begin(),
3194 NewSetMembers.push_back(ValuePair(K, *
N));
3195 for (std::vector<ValuePair>::iterator
A = NewSetMembers.begin(),
3196 AE = NewSetMembers.end();
A != AE; ++
A) {
3197 LoadMoveSet[
A->first].push_back(
A->second);
3204 if (cast<Instruction>(PI) == J)
3216 DEBUG(
dbgs() <<
"BBV: final: \n" << BB <<
"\n");
3234 return new BBVectorize(C);
3239 BBVectorize BBVectorizer(P, *BB.
getParent(),
C);
3240 return BBVectorizer.vectorizeBB(BB);
Legacy wrapper pass to provide the GlobalsAAResult object.
Pass interface - Implemented by all 'passes'.
bool VectorizeFMA
Vectorize the fused-multiply-add intrinsic.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void push_back(const T &Elt)
A parsed version of the target data layout string in and methods for querying it. ...
static cl::opt< unsigned > MaxPairs("bb-vectorize-max-pairs-per-group", cl::init(3000), cl::Hidden, cl::desc("The maximum number of candidate instruction pairs per group"))
This class is the base class for the comparison instructions.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
STATISTIC(NumFunctions,"Total number of functions")
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
This is the interface for a simple mod/ref and alias analysis over globals.
A Module instance is used to store all the information related to an LLVM module. ...
unsigned getNumParams() const
Return the number of fixed parameters this function type requires.
const Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
static cl::opt< bool > DebugPairSelection("bb-vectorize-debug-pair-selection", cl::init(false), cl::Hidden, cl::desc("When debugging is enabled, output information on the"" pair-selection process"))
iterator find(const ValueT &V)
Implements a dense probed hash-table based set.
unsigned getNumOperands() const
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
The main scalar evolution driver.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
This class represents a function call, abstracting a target machine's calling convention.
void initializeBBVectorizePass(PassRegistry &)
static bool runOnBasicBlock(BasicBlock &BB)
static cl::opt< bool > NoMemOpBoost("bb-vectorize-no-mem-op-boost", cl::init(false), cl::Hidden, cl::desc("Don't boost the chain-depth contribution of loads and stores"))
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
static uint64_t round(uint64_t Acc, uint64_t Input)
static cl::opt< bool > IgnoreTargetInfo("bb-vectorize-ignore-target-info", cl::init(false), cl::Hidden, cl::desc("Ignore target information"))
This instruction constructs a fixed permutation of two input vectors.
bool erase(const ValueT &V)
bool VectorizeMath
Vectorize floating-point math intrinsics.
const Function * getParent() const
Return the enclosing method, or null if none.
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
An instruction for reading from memory.
iv Induction Variable Users
static cl::opt< unsigned > VectorBits("bb-vectorize-vector-bits", cl::init(128), cl::Hidden, cl::desc("The size of the native vector registers"))
Type * getPointerElementType() const
StringRef getName() const
Return a constant reference to the value's name.
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
AnalysisUsage & addRequired()
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
#define INITIALIZE_PASS_DEPENDENCY(depName)
This is the interface for a SCEV-based alias analysis.
static cl::opt< bool > AlignedOnly("bb-vectorize-aligned-only", cl::init(false), cl::Hidden, cl::desc("Only generate aligned loads and stores"))
This class represents the LLVM 'select' instruction.
unsigned MaxCandPairsForCycleCheck
The maximum number of candidate pairs with which to use a full cycle check.
This is the base class for all instructions that perform data casts.
const APInt & getValue() const
Return the constant as an APInt value reference.
const_iterator end() const
A Use represents the edge between a Value definition and its users.
unsigned getNumArgOperands() const
Return the number of call arguments.
static Constant * get(ArrayRef< Constant * > V)
Windows NT (Windows on ARM)
Check for equivalence ignoring load/store alignment.
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following: ...
static cl::opt< unsigned > MaxInsts("bb-vectorize-max-instr-per-group", cl::init(500), cl::Hidden, cl::desc("The maximum number of pairable instructions per group"))
static const char bb_vectorize_name[]
LLVM_NODISCARD bool empty() const
bool Pow2LenOnly
Don't try to form odd-length vectors.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
const_iterator begin() const
user_iterator_impl< User > user_iterator
Class to represent function types.
Check for equivalence treating a type and a vector of that type as equivalent.
static cl::opt< bool > NoMath("bb-vectorize-no-math", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize floating-point math intrinsics"))
bool mayReadFromMemory() const
Return true if this instruction may read memory.
static cl::opt< bool > UseChainDepthWithTI("bb-vectorize-use-chain-depth", cl::init(false), cl::Hidden, cl::desc("Use the chain depth requirement with"" target information"))
static cl::opt< bool > NoFMA("bb-vectorize-no-fma", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize the fused-multiply-add intrinsic"))
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction...
Function Alias Analysis false
This class represents a no-op cast from one type to another.
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static std::string utostr(uint64_t X, bool isNeg=false)
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
An instruction for storing to memory.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
void takeName(Value *V)
Transfer the name from V to this value.
bool VectorizePointers
Vectorize pointer values.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Type * getScalarType() const LLVM_READONLY
If this is a vector type, return the element type, otherwise return 'this'.
bool isPPC_FP128Ty() const
Return true if this is powerpc long double.
bool VectorizeCmp
Vectorize comparison instructions.
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
unsigned MaxIter
The maximum number of pairing iterations.
bool VectorizeMemOps
Vectorize loads and stores.
static cl::opt< bool > FastDep("bb-vectorize-fast-dep", cl::init(false), cl::Hidden, cl::desc("Use a fast instruction dependency analysis"))
static cl::opt< bool > NoCmp("bb-vectorize-no-cmp", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize comparison instructions"))
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
bool isX86_MMXTy() const
Return true if this is X86 MMX.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
initializer< Ty > init(const Ty &Val)
bool erase(const KeyT &Val)
This instruction inserts a single (scalar) element into a VectorType value.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
bool AlignedOnly
Only generate aligned loads and stores.
static cl::opt< bool > NoFloats("bb-vectorize-no-floats", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize floating-point values"))
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction...
LLVM Basic Block Representation.
static cl::opt< unsigned > SearchLimit("bb-vectorize-search-limit", cl::init(400), cl::Hidden, cl::desc("The maximum search distance for instruction pairs"))
The instances of the Type class are immutable: once they are created, they are never changed...
This is an important class for using LLVM in a threaded context.
bool isVectorTy() const
True if this is an instance of VectorType.
This is an important base class in LLVM.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
bool SplatBreaksChain
Replicating one element to a pair breaks the chain.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< bool > DebugInstructionExamination("bb-vectorize-debug-instruction-examination", cl::init(false), cl::Hidden, cl::desc("When debugging is enabled, output information on the"" instruction-examination process"))
std::pair< iterator, bool > insert(const ValueT &V)
APInt Or(const APInt &LHS, const APInt &RHS)
Bitwise OR function for APInt.
APInt Xor(const APInt &LHS, const APInt &RHS)
Bitwise XOR function for APInt.
static cl::opt< bool > NoBitManipulation("bb-vectorize-no-bitmanip", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize BitManipulation intrinsics"))
Represent the analysis usage information of a pass.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
uint64_t getNumElements() const
Value * getOperand(unsigned i) const
Value * getPointerOperand()
unsigned SearchLimit
The maximum search distance for instruction pairs.
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again...
static cl::opt< bool > NoCasts("bb-vectorize-no-casts", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize casting (conversion) operations"))
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
static cl::opt< unsigned > MaxCandPairsForCycleCheck("bb-vectorize-max-cycle-check-pairs", cl::init(200), cl::Hidden, cl::desc("The maximum number of candidate pairs with which to use"" a full cycle check"))
bool isPointerTy() const
True if this is an instance of PointerType.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
iterator erase(const_iterator CI)
static cl::opt< unsigned > MaxIter("bb-vectorize-max-iter", cl::init(0), cl::Hidden, cl::desc("The maximum number of pairing iterations"))
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
BasicBlockPass class - This class is used to implement most local optimizations.
unsigned MaxPairs
The maximum number of candidate instruction pairs per group.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
Iterator for intrusive lists based on ilist_node.
This is the shared class of boolean and integer constants.
Legacy wrapper pass to provide the SCEVAAResult object.
bool VectorizeFloats
Vectorize floating-point values.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Module.h This file contains the declarations for the Module class.
Type * getType() const
All values are typed, get the type of this value.
Provides information about what library functions are available for the current target.
static cl::opt< bool > SplatBreaksChain("bb-vectorize-splat-breaks-chain", cl::init(false), cl::Hidden, cl::desc("Replicating one element to a pair breaks the chain"))
bool vectorizeBasicBlock(Pass *P, BasicBlock &BB, const VectorizeConfig &C=VectorizeConfig())
Vectorize the BasicBlock.
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static cl::opt< bool > NoMemOps("bb-vectorize-no-mem-ops", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize loads and stores"))
LLVM_NODISCARD T pop_back_val()
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Function * getCalledFunction() const
Return the function called, or null if this is an indirect function invocation.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
static cl::opt< bool > NoPointers("bb-vectorize-no-pointers", cl::init(true), cl::Hidden, cl::desc("Don't try to vectorize pointer values"))
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void add(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo)
These methods are used to add different types of instructions to the alias sets.
bool FastDep
Use a fast instruction dependency analysis.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
void setOperand(unsigned i, Value *Val)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
Class to represent vector types.
static cl::opt< bool > NoGEP("bb-vectorize-no-gep", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize getelementptr instructions"))
iterator_range< user_iterator > users()
static cl::opt< bool > NoBools("bb-vectorize-no-bools", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize boolean (i1) values"))
unsigned getVectorNumElements() const
bool VectorizeCasts
Vectorize casting (conversion) operations.
BasicBlockPass * createBBVectorizePass(const VectorizeConfig &C=VectorizeConfig())
bool VectorizeBools
Vectorize boolean values.
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
static cl::opt< unsigned > ReqChainDepth("bb-vectorize-req-chain-depth", cl::init(6), cl::Hidden, cl::desc("The required chain depth for vectorization"))
unsigned ReqChainDepth
The required chain depth for vectorization.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
bool isX86_FP80Ty() const
Return true if this is x86 long double.
unsigned MaxInsts
The maximum number of pairable instructions per group.
size_type count(const ValueT &V) const
Return 1 if the specified key is in the set, 0 otherwise.
This class represents an analyzed expression in the program.
static IntegerType * getInt32Ty(LLVMContext &C)
static cl::opt< HelpPrinterWrapper, true, parser< bool > > HOp("help", cl::desc("Display available options (-help-hidden for more)"), cl::location(WrappedNormalPrinter), cl::ValueDisallowed, cl::cat(GenericCategory), cl::sub(*AllSubCommands))
static cl::opt< bool > NoInts("bb-vectorize-no-ints", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize integer values"))
unsigned getAlignment() const
Return the alignment of the access that is being performed.
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction...
bool NoMemOpBoost
Don't boost the chain-depth contribution of loads and stores.
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
bool VectorizeGEP
Vectorize getelementptr instructions.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
iterator find(const KeyT &Val)
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
static cl::opt< bool > PrintAfterEveryPair("bb-vectorize-debug-print-after-every-pair", cl::init(false), cl::Hidden, cl::desc("When debugging is enabled, dump the basic block after"" every pair is fused"))
static cl::opt< bool > DebugCycleCheck("bb-vectorize-debug-cycle-check", cl::init(false), cl::Hidden, cl::desc("When debugging is enabled, output information on the"" cycle-checking process"))
static cl::opt< bool > DebugCandidateSelection("bb-vectorize-debug-candidate-selection", cl::init(false), cl::Hidden, cl::desc("When debugging is enabled, output information on the"" candidate-selection process"))
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
VectorizeConfig()
Initialize the VectorizeConfig from command line options.
static Function * getCalledFunction(const Value *V, bool LookThroughBitCast, bool &IsNoBuiltin)
LLVM_NODISCARD bool empty() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
LLVMContext & getContext() const
Get the context in which this basic block lives.
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Module * getParent()
Get the module that this global value is contained inside of...
LLVM Value Representation.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
bool VectorizeInts
Vectorize integer values.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
static cl::opt< bool > Pow2LenOnly("bb-vectorize-pow2-len-only", cl::init(false), cl::Hidden, cl::desc("Don't try to form non-2^n-length vectors"))
bool VectorizeBitManipulations
Vectorize bit intrinsics.
Convenience struct for specifying and reasoning about fast-math flags.
Legacy analysis pass which computes a DominatorTree.
bool isSameOperationAs(const Instruction *I, unsigned flags=0) const
This function determines if the specified instruction executes the same operation as the current one...
iterator getFirstInsertionPt()
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
bool VectorizeSelect
Vectorize select instructions.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static GCRegistry::Add< ErlangGC > A("erlang","erlang-compatible garbage collector")
Value * getPointerOperand()
void combineMetadata(Instruction *K, const Instruction *J, ArrayRef< unsigned > KnownIDs)
Combine the metadata of two instructions so that K can replace J.
const BasicBlock * getParent() const
InstListType::iterator iterator
Instruction iterators...
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
static cl::opt< bool > NoSelect("bb-vectorize-no-select", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize select instructions"))
unsigned VectorBits
The size of the native vector registers.
This class represents a constant integer value.