32#include "llvm/IR/IntrinsicsHexagon.h"
54#define DEBUG_TYPE "hexagon-vc"
59class HexagonVectorCombine {
64 :
F(F_),
DL(
F.getParent()->getDataLayout()), AA(AA_), AC(AC_), DT(DT_),
74 Type *getByteTy(
int ElemCount = 0)
const;
77 Type *getBoolTy(
int ElemCount = 0)
const;
81 std::optional<APInt> getIntValue(
const Value *Val)
const;
94 int getSizeOf(
const Value *Val, SizeKind Kind = Store)
const;
95 int getSizeOf(
const Type *Ty, SizeKind Kind = Store)
const;
96 int getTypeAlignment(
Type *Ty)
const;
97 size_t length(
Value *Val)
const;
98 size_t length(
Type *Ty)
const;
107 int Length,
int Where)
const;
130 unsigned ToWidth)
const;
134 std::optional<int> calculatePointerDifference(
Value *Ptr0,
Value *Ptr1)
const;
136 unsigned getNumSignificantBits(
const Value *V,
141 template <
typename T = std::vector<Instruction *>>
144 const T &IgnoreInsts = {})
const;
147 [[maybe_unused]]
bool isByteVecTy(
Type *Ty)
const;
159 int Start,
int Length)
const;
164 AlignVectors(
const HexagonVectorCombine &HVC_) : HVC(HVC_) {}
169 using InstList = std::vector<Instruction *>;
178 AddrInfo(
const AddrInfo &) =
default;
181 : Inst(
I),
Addr(
A), ValTy(
T), HaveAlign(
H),
182 NeedAlign(HVC.getTypeAlignment(ValTy)) {}
183 AddrInfo &operator=(
const AddrInfo &) =
default;
194 using AddrList = std::vector<AddrInfo>;
198 return A->comesBefore(
B);
201 using DepList = std::set<Instruction *, InstrLess>;
204 MoveGroup(
const AddrInfo &AI,
Instruction *
B,
bool Hvx,
bool Load)
205 :
Base(
B), Main{AI.Inst}, IsHvx(Hvx), IsLoad(Load) {}
212 using MoveList = std::vector<MoveGroup>;
217 Segment(
Value *Val,
int Begin,
int Len)
218 : Val(Val), Start(Begin), Size(Len) {}
219 Segment(
const Segment &Seg) =
default;
220 Segment &operator=(
const Segment &Seg) =
default;
227 Block(
Value *Val,
int Len,
int Pos) : Seg(Val, 0, Len), Pos(Pos) {}
228 Block(
Value *Val,
int Off,
int Len,
int Pos)
229 : Seg(Val, Off, Len), Pos(Pos) {}
230 Block(
const Block &Blk) =
default;
231 Block &operator=(
const Block &Blk) =
default;
237 ByteSpan section(
int Start,
int Length)
const;
238 ByteSpan &shift(
int Offset);
241 int size()
const {
return Blocks.size(); }
242 Block &operator[](
int i) {
return Blocks[i]; }
244 std::vector<Block> Blocks;
246 using iterator =
decltype(Blocks)::iterator;
247 iterator begin() {
return Blocks.
begin(); }
248 iterator end() {
return Blocks.
end(); }
254 Align getAlignFromValue(
const Value *V)
const;
256 std::optional<AddrInfo> getAddrInfo(
Instruction &In)
const;
257 bool isHvx(
const AddrInfo &AI)
const;
259 [[maybe_unused]]
bool isSectorTy(
Type *Ty)
const;
268 int Alignment)
const;
270 int Alignment,
Value *Mask,
Value *PassThru)
const;
272 int Alignment,
Value *Mask)
const;
275 bool createAddressGroups();
276 MoveList createLoadGroups(
const AddrList &Group)
const;
277 MoveList createStoreGroups(
const AddrList &Group)
const;
278 bool move(
const MoveGroup &Move)
const;
280 int ScLen,
Value *AlignVal,
Value *AlignAddr)
const;
282 int ScLen,
Value *AlignVal,
Value *AlignAddr)
const;
283 bool realignGroup(
const MoveGroup &Move)
const;
290 std::map<Instruction *, AddrList> AddrGroups;
291 const HexagonVectorCombine &HVC;
296 OS <<
"Inst: " << AI.Inst <<
" " << *AI.Inst <<
'\n';
297 OS <<
"Addr: " << *AI.Addr <<
'\n';
298 OS <<
"Type: " << *AI.ValTy <<
'\n';
299 OS <<
"HaveAlign: " << AI.HaveAlign.value() <<
'\n';
300 OS <<
"NeedAlign: " << AI.NeedAlign.value() <<
'\n';
301 OS <<
"Offset: " << AI.Offset;
309 OS <<
" " << *
I <<
'\n';
312 OS <<
" " << *
I <<
'\n';
318 const AlignVectors::ByteSpan::Block &
B) {
319 OS <<
" @" <<
B.Pos <<
" [" <<
B.Seg.Start <<
',' <<
B.Seg.Size <<
"] "
326 OS <<
"ByteSpan[size=" << BS.size() <<
", extent=" << BS.extent() <<
'\n';
327 for (
const AlignVectors::ByteSpan::Block &
B : BS)
335 HvxIdioms(
const HexagonVectorCombine &HVC_) : HVC(HVC_) {
336 auto *
Int32Ty = HVC.getIntTy(32);
337 HvxI32Ty = HVC.getHvxTy(
Int32Ty,
false);
338 HvxP32Ty = HVC.getHvxTy(
Int32Ty,
true);
344 enum Signedness { Positive, Signed, Unsigned };
359 std::optional<unsigned> RoundAt;
364 -> std::pair<unsigned, Signedness>;
365 auto canonSgn(SValue
X, SValue
Y)
const -> std::pair<SValue, SValue>;
367 auto matchFxpMul(
Instruction &In)
const -> std::optional<FxpOp>;
371 const FxpOp &Op)
const ->
Value *;
373 bool Rounding)
const ->
Value *;
375 bool Rounding)
const ->
Value *;
378 Value *CarryIn =
nullptr)
const
379 -> std::pair<Value *, Value *>;
384 -> std::pair<Value *, Value *>;
393 const HexagonVectorCombine &HVC;
399 const HvxIdioms::FxpOp &Op) {
400 static const char *SgnNames[] = {
"Positive",
"Signed",
"Unsigned"};
402 if (Op.RoundAt.has_value()) {
403 if (Op.Frac != 0 && *Op.RoundAt == Op.Frac - 1) {
406 OS <<
" + 1<<" << *Op.RoundAt;
409 OS <<
"\n X:(" << SgnNames[Op.X.Sgn] <<
") " << *Op.X.Val <<
"\n"
410 <<
" Y:(" << SgnNames[Op.Y.Sgn] <<
") " << *Op.Y.Val;
418template <
typename T>
T *getIfUnordered(
T *MaybeT) {
419 return MaybeT && MaybeT->isUnordered() ? MaybeT :
nullptr;
422 return dyn_cast<T>(In);
425 return getIfUnordered(dyn_cast<LoadInst>(In));
428 return getIfUnordered(dyn_cast<StoreInst>(In));
431#if !defined(_MSC_VER) || _MSC_VER >= 1926
435template <
typename Pred,
typename... Ts>
436void erase_if(std::map<Ts...> &map, Pred p)
438template <
typename Pred,
typename T,
typename U>
439void erase_if(std::map<T, U> &map, Pred p)
442 for (
auto i = map.begin(), e = map.end(); i != e;) {
451template <
typename Pred,
typename T>
void erase_if(
T &&container, Pred p) {
459auto AlignVectors::ByteSpan::extent()
const ->
int {
462 int Min = Blocks[0].Pos;
463 int Max = Blocks[0].Pos + Blocks[0].Seg.Size;
464 for (
int i = 1, e =
size(); i !=
e; ++i) {
465 Min = std::min(Min, Blocks[i].Pos);
466 Max = std::max(Max, Blocks[i].Pos + Blocks[i].Seg.Size);
471auto AlignVectors::ByteSpan::section(
int Start,
int Length)
const -> ByteSpan {
473 for (
const ByteSpan::Block &
B : Blocks) {
474 int L = std::max(
B.Pos, Start);
475 int R = std::min(
B.Pos +
B.Seg.Size, Start +
Length);
478 int Off = L >
B.Pos ? L -
B.Pos : 0;
479 Section.Blocks.emplace_back(
B.Seg.Val,
B.Seg.Start + Off, R - L, L);
485auto AlignVectors::ByteSpan::shift(
int Offset) -> ByteSpan & {
486 for (Block &
B : Blocks)
493 for (
int i = 0, e = Blocks.size(); i != e; ++i)
494 Values[i] = Blocks[i].Seg.Val;
498auto AlignVectors::getAlignFromValue(
const Value *V)
const ->
Align {
499 const auto *
C = dyn_cast<ConstantInt>(V);
500 assert(
C &&
"Alignment must be a compile-time constant integer");
501 return C->getAlignValue();
504auto AlignVectors::getAddrInfo(
Instruction &In)
const
505 -> std::optional<AddrInfo> {
506 if (
auto *L = isCandidate<LoadInst>(&In))
507 return AddrInfo(HVC, L, L->getPointerOperand(), L->getType(),
509 if (
auto *S = isCandidate<StoreInst>(&In))
510 return AddrInfo(HVC, S, S->getPointerOperand(),
511 S->getValueOperand()->getType(), S->getAlign());
512 if (
auto *II = isCandidate<IntrinsicInst>(&In)) {
515 case Intrinsic::masked_load:
516 return AddrInfo(HVC, II, II->getArgOperand(0), II->getType(),
517 getAlignFromValue(II->getArgOperand(1)));
518 case Intrinsic::masked_store:
519 return AddrInfo(HVC, II, II->getArgOperand(1),
520 II->getArgOperand(0)->getType(),
521 getAlignFromValue(II->getArgOperand(2)));
527auto AlignVectors::isHvx(
const AddrInfo &AI)
const ->
bool {
528 return HVC.HST.isTypeForHVX(AI.ValTy);
531auto AlignVectors::getPayload(
Value *Val)
const ->
Value * {
532 if (
auto *In = dyn_cast<Instruction>(Val)) {
534 if (
auto *II = dyn_cast<IntrinsicInst>(In))
535 ID = II->getIntrinsicID();
536 if (isa<StoreInst>(In) ||
ID == Intrinsic::masked_store)
537 return In->getOperand(0);
542auto AlignVectors::getMask(
Value *Val)
const ->
Value * {
543 if (
auto *II = dyn_cast<IntrinsicInst>(Val)) {
544 switch (II->getIntrinsicID()) {
545 case Intrinsic::masked_load:
546 return II->getArgOperand(2);
547 case Intrinsic::masked_store:
548 return II->getArgOperand(3);
552 Type *ValTy = getPayload(Val)->getType();
553 if (
auto *VecTy = dyn_cast<VectorType>(ValTy))
554 return HVC.getFullValue(HVC.getBoolTy(HVC.length(VecTy)));
555 return HVC.getFullValue(HVC.getBoolTy());
558auto AlignVectors::getPassThrough(
Value *Val)
const ->
Value * {
559 if (
auto *II = dyn_cast<IntrinsicInst>(Val)) {
560 if (II->getIntrinsicID() == Intrinsic::masked_load)
561 return II->getArgOperand(3);
567 Type *ValTy,
int Adjust)
const
571 auto *PtrTy = cast<PointerType>(
Ptr->getType());
572 if (!PtrTy->isOpaque()) {
574 int ElemSize = HVC.getSizeOf(ElemTy, HVC.Alloc);
575 if (Adjust % ElemSize == 0 && Adjust != 0) {
577 Builder.CreateGEP(ElemTy,
Ptr, HVC.getConstInt(Adjust / ElemSize));
585 HVC.getConstInt(Adjust));
590 Type *ValTy,
int Alignment)
const
593 Value *
Mask = HVC.getConstInt(-Alignment);
601 assert(!HVC.isUndef(Mask));
602 if (HVC.isZero(Mask))
606 return Builder.CreateMaskedLoad(ValTy,
Ptr,
Align(Alignment), Mask, PassThru);
612 if (HVC.isZero(Mask) || HVC.isUndef(Val) || HVC.isUndef(Mask))
623 "Base and In should be in the same block");
624 assert(
Base->comesBefore(In) &&
"Base should come before In");
627 std::deque<Instruction *> WorkQ = {
In};
628 while (!WorkQ.empty()) {
632 for (
Value *Op :
D->operands()) {
633 if (
auto *
I = dyn_cast<Instruction>(Op)) {
634 if (
I->getParent() == Parent &&
Base->comesBefore(
I))
642auto AlignVectors::createAddressGroups() ->
bool {
647 auto findBaseAndOffset = [&](AddrInfo &AI) -> std::pair<Instruction *, int> {
648 for (AddrInfo &W : WorkStack) {
649 if (
auto D = HVC.calculatePointerDifference(AI.Addr,
W.Addr))
650 return std::make_pair(
W.Inst, *
D);
652 return std::make_pair(
nullptr, 0);
655 auto traverseBlock = [&](
DomTreeNode *DomN,
auto Visit) ->
void {
658 auto AI = this->getAddrInfo(
I);
661 auto F = findBaseAndOffset(*AI);
664 AI->Offset =
F.second;
667 WorkStack.push_back(*AI);
668 GroupInst = AI->Inst;
670 AddrGroups[GroupInst].push_back(*AI);
676 while (!WorkStack.empty() && WorkStack.back().Inst->getParent() == &Block)
677 WorkStack.pop_back();
680 traverseBlock(HVC.DT.getRootNode(), traverseBlock);
681 assert(WorkStack.empty());
686 erase_if(AddrGroups, [](
auto &
G) {
return G.second.size() == 1; });
690 G.second, [&](
auto &
I) { return HVC.HST.isTypeForHVX(I.ValTy); });
693 return !AddrGroups.empty();
696auto AlignVectors::createLoadGroups(
const AddrList &Group)
const -> MoveList {
701 auto tryAddTo = [&](
const AddrInfo &
Info, MoveGroup &Move) {
702 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
704 if (Move.IsHvx != isHvx(Info))
708 if (
Base->getParent() !=
Info.Inst->getParent())
712 return HVC.isSafeToMoveBeforeInBB(*
I,
Base->getIterator());
714 DepList Deps = getUpwardDeps(
Info.Inst,
Base);
720 Deps.erase(
Info.Inst);
721 auto inAddrMap = [&](
Instruction *
I) {
return AddrGroups.count(
I) > 0; };
724 Move.Main.push_back(
Info.Inst);
731 for (
const AddrInfo &Info : Group) {
732 if (!
Info.Inst->mayReadFromMemory())
734 if (LoadGroups.empty() || !tryAddTo(Info, LoadGroups.back()))
735 LoadGroups.emplace_back(Info, Group.front().Inst, isHvx(Info),
true);
739 erase_if(LoadGroups, [](
const MoveGroup &
G) {
return G.Main.size() <= 1; });
743auto AlignVectors::createStoreGroups(
const AddrList &Group)
const -> MoveList {
748 auto tryAddTo = [&](
const AddrInfo &
Info, MoveGroup &Move) {
749 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
753 "Not handling stores with return values");
755 if (Move.IsHvx != isHvx(Info))
761 if (
Base->getParent() !=
Info.Inst->getParent())
763 if (!HVC.isSafeToMoveBeforeInBB(*
Info.Inst,
Base->getIterator(), Move.Main))
765 Move.Main.push_back(
Info.Inst);
769 MoveList StoreGroups;
771 for (
auto I = Group.rbegin(),
E = Group.rend();
I !=
E; ++
I) {
772 const AddrInfo &
Info = *
I;
773 if (!
Info.Inst->mayWriteToMemory())
775 if (StoreGroups.empty() || !tryAddTo(Info, StoreGroups.back()))
776 StoreGroups.emplace_back(Info, Group.front().Inst, isHvx(Info),
false);
780 erase_if(StoreGroups, [](
const MoveGroup &
G) {
return G.Main.size() <= 1; });
784auto AlignVectors::move(
const MoveGroup &Move)
const ->
bool {
785 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
791 D->moveBefore(Where);
801 assert(Move.Deps.empty());
805 M->moveBefore(Where);
810 return Move.Main.size() + Move.Deps.size() > 1;
814 const ByteSpan &VSpan,
int ScLen,
817 Type *SecTy = HVC.getByteTy(ScLen);
818 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
819 bool DoAlign = !HVC.isZero(AlignVal);
824 auto *True = HVC.getFullValue(HVC.getBoolTy(ScLen));
847 ASpan.Blocks.emplace_back(
nullptr, ScLen,
Index * ScLen);
849 ASpan.Blocks[
Index].Seg.Val =
850 reinterpret_cast<Value *
>(&ASpan.Blocks[
Index]);
862 assert(
A->getParent() ==
B->getParent());
863 return A->comesBefore(
B);
865 auto earliestUser = [&](
const auto &
Uses) {
868 auto *
I = dyn_cast<Instruction>(U.getUser());
869 assert(
I !=
nullptr &&
"Load used in a non-instruction?");
873 if (
I->getParent() == BaseBlock) {
874 if (!isa<PHINode>(
I))
883 for (
const ByteSpan::Block &
B : VSpan) {
884 ByteSpan ASection = ASpan.section(
B.Pos,
B.Seg.Size);
885 for (
const ByteSpan::Block &S : ASection) {
886 EarliestUser[S.Seg.Val] = std::min(
887 EarliestUser[S.Seg.Val], earliestUser(
B.Seg.Val->uses()), isEarlier);
894 createAdjustedPointer(Builder, AlignAddr, SecTy,
Index * ScLen);
896 Value *
Load = createAlignedLoad(Builder, SecTy,
Ptr, ScLen, True, Undef);
899 int Start = (
Index - DoAlign) * ScLen;
900 int Width = (1 + DoAlign) * ScLen;
902 VSpan.section(Start, Width).values());
903 return cast<Instruction>(Load);
908 assert(
In->getParent() == To->getParent());
909 DepList Deps = getUpwardDeps(In, To);
922 DoAlign &&
Index > 0 ? EarliestUser[&ASpan[
Index - 1]] :
nullptr;
924 Index < NumSectors ? EarliestUser[&ASpan[
Index]] :
nullptr;
925 if (
auto *Where = std::min(PrevAt, ThisAt, isEarlier)) {
927 Loads[
Index] = createLoad(Builder, VSpan,
Index);
933 if (!HVC.isSafeToMoveBeforeInBB(*Loads[
Index], BasePos))
934 moveBefore(Loads[
Index], &*BasePos);
939 ASpan[
Index].Seg.Val =
nullptr;
940 if (
auto *Where = EarliestUser[&ASpan[
Index]]) {
946 assert(NextLoad !=
nullptr);
947 Val = HVC.vralignb(Builder, Val, NextLoad, AlignVal);
949 ASpan[
Index].Seg.Val = Val;
953 for (
const ByteSpan::Block &
B : VSpan) {
954 ByteSpan ASection = ASpan.section(
B.Pos,
B.Seg.Size).shift(-
B.Pos);
956 Builder.SetInsertPoint(cast<Instruction>(
B.Seg.Val));
958 for (ByteSpan::Block &S : ASection) {
959 if (S.Seg.Val ==
nullptr)
965 Value *Pay = HVC.vbytes(Builder, getPayload(S.Seg.Val));
966 Accum = HVC.insertb(Builder, Accum, Pay, S.Seg.Start, S.Seg.Size, S.Pos);
974 Type *ValTy = getPayload(
B.Seg.Val)->getType();
977 getPassThrough(
B.Seg.Val));
978 B.Seg.Val->replaceAllUsesWith(Sel);
983 const ByteSpan &VSpan,
int ScLen,
986 Type *SecTy = HVC.getByteTy(ScLen);
987 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
988 bool DoAlign = !HVC.isZero(AlignVal);
991 ByteSpan ASpanV, ASpanM;
999 auto *VecTy = VectorType::get(Ty, 1,
false);
1000 return Builder.CreateBitCast(Val, VecTy);
1005 for (
int i = (DoAlign ? -1 : 0); i != NumSectors + DoAlign; ++i) {
1008 ByteSpan VSection = VSpan.section(i * ScLen, ScLen).shift(-i * ScLen);
1010 Value *AccumM = HVC.getNullValue(SecTy);
1011 for (ByteSpan::Block &S : VSection) {
1012 Value *Pay = getPayload(S.Seg.Val);
1013 Value *
Mask = HVC.rescale(Builder, MakeVec(Builder, getMask(S.Seg.Val)),
1014 Pay->
getType(), HVC.getByteTy());
1015 AccumM = HVC.insertb(Builder, AccumM, HVC.vbytes(Builder, Mask),
1016 S.Seg.Start, S.Seg.Size, S.Pos);
1017 AccumV = HVC.insertb(Builder, AccumV, HVC.vbytes(Builder, Pay),
1018 S.Seg.Start, S.Seg.Size, S.Pos);
1020 ASpanV.Blocks.emplace_back(AccumV, ScLen, i * ScLen);
1021 ASpanM.Blocks.emplace_back(AccumM, ScLen, i * ScLen);
1026 for (
int j = 1; j != NumSectors + 2; ++j) {
1027 Value *PrevV = ASpanV[j - 1].Seg.Val, *ThisV = ASpanV[j].Seg.Val;
1028 Value *PrevM = ASpanM[j - 1].Seg.Val, *ThisM = ASpanM[j].Seg.Val;
1030 ASpanV[j - 1].Seg.Val = HVC.vlalignb(Builder, PrevV, ThisV, AlignVal);
1031 ASpanM[j - 1].Seg.Val = HVC.vlalignb(Builder, PrevM, ThisM, AlignVal);
1035 for (
int i = 0; i != NumSectors + DoAlign; ++i) {
1036 Value *
Ptr = createAdjustedPointer(Builder, AlignAddr, SecTy, i * ScLen);
1037 Value *Val = ASpanV[i].Seg.Val;
1039 if (!HVC.isUndef(Val) && !HVC.isZero(Mask)) {
1041 createAlignedStore(Builder, Val,
Ptr, ScLen, HVC.vlsb(Builder, Mask));
1044 int Start = (i - DoAlign) * ScLen;
1045 int Width = (1 + DoAlign) * ScLen;
1047 VSpan.section(Start, Width).values());
1052auto AlignVectors::realignGroup(
const MoveGroup &Move)
const ->
bool {
1059 auto getMaxOf = [](
auto Range,
auto GetValue) {
1060 return *std::max_element(
1062 [&GetValue](
auto &
A,
auto &
B) { return GetValue(A) < GetValue(B); });
1065 const AddrList &BaseInfos = AddrGroups.at(Move.Base);
1080 std::set<Instruction *> TestSet(Move.Main.begin(), Move.Main.end());
1083 BaseInfos, std::back_inserter(MoveInfos),
1084 [&TestSet](
const AddrInfo &AI) {
return TestSet.count(AI.Inst); });
1087 const AddrInfo &WithMaxAlign =
1088 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return AI.HaveAlign; });
1089 Align MaxGiven = WithMaxAlign.HaveAlign;
1092 const AddrInfo &WithMinOffset =
1093 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return -AI.Offset; });
1095 const AddrInfo &WithMaxNeeded =
1096 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return AI.NeedAlign; });
1097 Align MinNeeded = WithMaxNeeded.NeedAlign;
1111 Value *AlignAddr =
nullptr;
1112 Value *AlignVal =
nullptr;
1114 if (MinNeeded <= MaxGiven) {
1115 int Start = WithMinOffset.Offset;
1116 int OffAtMax = WithMaxAlign.Offset;
1123 int Adjust = -
alignTo(OffAtMax - Start, MinNeeded.value());
1124 AlignAddr = createAdjustedPointer(Builder, WithMaxAlign.Addr,
1125 WithMaxAlign.ValTy, Adjust);
1126 int Diff = Start - (OffAtMax + Adjust);
1127 AlignVal = HVC.getConstInt(Diff);
1129 assert(
static_cast<decltype(MinNeeded.
value())
>(Diff) < MinNeeded.value());
1138 AlignAddr = createAlignedPointer(Builder, WithMinOffset.Addr,
1139 WithMinOffset.ValTy, MinNeeded.value());
1140 AlignVal =
Builder.CreatePtrToInt(WithMinOffset.Addr, HVC.getIntTy());
1144 for (
const AddrInfo &AI : MoveInfos) {
1145 VSpan.Blocks.emplace_back(AI.Inst, HVC.getSizeOf(AI.ValTy),
1146 AI.Offset - WithMinOffset.Offset);
1152 int ScLen = Move.IsHvx ? HVC.HST.getVectorLength()
1153 : std::max<int>(MinNeeded.value(), 4);
1154 assert(!Move.IsHvx || ScLen == 64 || ScLen == 128);
1155 assert(Move.IsHvx || ScLen == 4 || ScLen == 8);
1158 realignLoadGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1160 realignStoreGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1162 for (
auto *Inst : Move.Main)
1163 Inst->eraseFromParent();
1168auto AlignVectors::isSectorTy(
Type *Ty)
const ->
bool {
1169 if (!HVC.isByteVecTy(Ty))
1171 int Size = HVC.getSizeOf(Ty);
1172 if (HVC.HST.isTypeForHVX(Ty))
1173 return Size ==
static_cast<int>(HVC.HST.getVectorLength());
1177auto AlignVectors::run() ->
bool {
1178 if (!createAddressGroups())
1181 bool Changed =
false;
1182 MoveList LoadGroups, StoreGroups;
1184 for (
auto &
G : AddrGroups) {
1189 for (
auto &M : LoadGroups)
1191 for (
auto &M : StoreGroups)
1194 for (
auto &M : LoadGroups)
1195 Changed |= realignGroup(M);
1196 for (
auto &M : StoreGroups)
1197 Changed |= realignGroup(M);
1207 -> std::pair<unsigned, Signedness> {
1208 unsigned Bits = HVC.getNumSignificantBits(V, In);
1214 KnownBits Known = HVC.getKnownBits(V, In);
1215 Signedness Sign =
Signed;
1216 unsigned NumToTest = 0;
1220 NumToTest =
Bits - 1;
1233 return {
Bits, Sign};
1236auto HvxIdioms::canonSgn(SValue
X, SValue
Y)
const
1237 -> std::pair<SValue, SValue> {
1250auto HvxIdioms::matchFxpMul(
Instruction &In)
const -> std::optional<FxpOp> {
1251 using namespace PatternMatch;
1252 auto *Ty =
In.getType();
1255 return std::nullopt;
1264 auto m_Shr = [](
auto &&V,
auto &&S) {
1268 const APInt *Qn =
nullptr;
1276 if (
Op.Frac > Width)
1277 return std::nullopt;
1280 const APInt *
C =
nullptr;
1284 return std::nullopt;
1292 Op.Opcode = Instruction::Mul;
1294 Op.X.Sgn = getNumSignificantBits(
Op.X.Val, &In).second;
1295 Op.Y.Sgn = getNumSignificantBits(
Op.Y.Val, &In).second;
1296 Op.ResTy = cast<VectorType>(Ty);
1300 return std::nullopt;
1303auto HvxIdioms::processFxpMul(
Instruction &In,
const FxpOp &Op)
const
1305 assert(
Op.X.Val->getType() ==
Op.Y.Val->getType());
1307 auto *VecTy = dyn_cast<VectorType>(
Op.X.Val->getType());
1308 if (VecTy ==
nullptr)
1310 auto *ElemTy = cast<IntegerType>(VecTy->getElementType());
1311 unsigned ElemWidth = ElemTy->getBitWidth();
1314 if ((HVC.length(VecTy) * ElemWidth) % (8 * HVC.HST.getVectorLength()) != 0)
1324 if (ElemWidth <= 32 &&
Op.Frac == 0)
1327 auto [BitsX, SignX] = getNumSignificantBits(
Op.X.Val, &In);
1328 auto [BitsY, SignY] = getNumSignificantBits(
Op.Y.Val, &In);
1336 auto roundUpWidth = [](
unsigned Width) ->
unsigned {
1342 if (Width > 32 && Width % 32 != 0) {
1349 BitsX = roundUpWidth(BitsX);
1350 BitsY = roundUpWidth(BitsY);
1355 unsigned Width = std::max(BitsX, BitsY);
1357 auto *ResizeTy = VectorType::get(HVC.getIntTy(Width), VecTy);
1358 if (Width < ElemWidth) {
1361 }
else if (Width > ElemWidth) {
1368 assert(
X->getType() ==
Y->getType() &&
X->getType() == ResizeTy);
1370 unsigned VecLen = HVC.length(ResizeTy);
1371 unsigned ChopLen = (8 * HVC.HST.getVectorLength()) / std::min(Width, 32u);
1375 ChopOp.ResTy = VectorType::get(
Op.ResTy->getElementType(), ChopLen,
false);
1377 for (
unsigned V = 0; V != VecLen / ChopLen; ++V) {
1378 ChopOp.X.Val = HVC.subvector(Builder,
X, V * ChopLen, ChopLen);
1379 ChopOp.Y.Val = HVC.subvector(Builder,
Y, V * ChopLen, ChopLen);
1380 Results.push_back(processFxpMulChopped(Builder, In, ChopOp));
1390 ?
Builder.CreateSExt(Cat, VecTy)
1391 :
Builder.CreateZExt(Cat, VecTy);
1396 const FxpOp &Op)
const ->
Value * {
1397 assert(
Op.X.Val->getType() ==
Op.Y.Val->getType());
1398 auto *InpTy = cast<VectorType>(
Op.X.Val->getType());
1399 unsigned Width = InpTy->getScalarSizeInBits();
1402 if (!
Op.RoundAt || *
Op.RoundAt ==
Op.Frac - 1) {
1405 Value *QMul =
nullptr;
1407 QMul = createMulQ15(Builder,
Op.X,
Op.Y, Rounding);
1408 }
else if (Width == 32) {
1409 QMul = createMulQ31(Builder,
Op.X,
Op.Y, Rounding);
1411 if (QMul !=
nullptr)
1417 assert(Width < 32 || Width % 32 == 0);
1427 assert(
Op.Frac != 0 &&
"Unshifted mul should have been skipped");
1428 if (
Op.Frac == 16) {
1430 if (
Value *MulH = createMulH16(Builder,
Op.X,
Op.Y))
1434 Value *Prod32 = createMul16(Builder,
Op.X,
Op.Y);
1436 Value *RoundVal = HVC.getConstSplat(Prod32->
getType(), 1 << *
Op.RoundAt);
1437 Prod32 =
Builder.CreateAdd(Prod32, RoundVal);
1442 ?
Builder.CreateAShr(Prod32, ShiftAmt)
1443 :
Builder.CreateLShr(Prod32, ShiftAmt);
1444 return Builder.CreateTrunc(Shifted, InpTy);
1451 auto WordX = HVC.splitVectorElements(Builder,
Op.X.Val, 32);
1452 auto WordY = HVC.splitVectorElements(Builder,
Op.Y.Val, 32);
1453 auto WordP = createMulLong(Builder, WordX,
Op.X.Sgn, WordY,
Op.Y.Sgn);
1455 auto *HvxWordTy = cast<VectorType>(WordP.front()->getType());
1458 if (
Op.RoundAt.has_value()) {
1461 RoundV[*
Op.RoundAt / 32] =
1462 HVC.getConstSplat(HvxWordTy, 1 << (*
Op.RoundAt % 32));
1463 WordP = createAddLong(Builder, WordP, RoundV);
1469 unsigned SkipWords =
Op.Frac / 32;
1470 Constant *ShiftAmt = HVC.getConstSplat(HvxWordTy,
Op.Frac % 32);
1472 for (
int Dst = 0, End = WordP.size() - SkipWords; Dst != End; ++Dst) {
1473 int Src = Dst + SkipWords;
1475 if (Src + 1 < End) {
1477 WordP[Dst] =
Builder.CreateIntrinsic(HvxWordTy, Intrinsic::fshr,
1478 {
Hi,
Lo, ShiftAmt});
1481 WordP[Dst] =
Builder.CreateAShr(
Lo, ShiftAmt);
1485 WordP.resize(WordP.size() - SkipWords);
1487 return HVC.joinVectorElements(Builder, WordP,
Op.ResTy);
1490auto HvxIdioms::createMulQ15(
IRBuilderBase &Builder, SValue
X, SValue
Y,
1491 bool Rounding)
const ->
Value * {
1492 assert(
X.Val->getType() ==
Y.Val->getType());
1493 assert(
X.Val->getType()->getScalarType() == HVC.getIntTy(16));
1500 auto V6_vmpyhvsrs = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhvsrs);
1501 return HVC.createHvxIntrinsic(Builder, V6_vmpyhvsrs,
X.Val->getType(),
1505auto HvxIdioms::createMulQ31(
IRBuilderBase &Builder, SValue
X, SValue
Y,
1506 bool Rounding)
const ->
Value * {
1507 Type *InpTy =
X.Val->getType();
1508 assert(InpTy ==
Y.Val->getType());
1515 auto V6_vmpyewuh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyewuh);
1517 ? HVC.HST.getIntrinsicId(Hexagon::V6_vmpyowh_rnd_sacc)
1518 : HVC.HST.getIntrinsicId(Hexagon::V6_vmpyowh_sacc);
1520 HVC.createHvxIntrinsic(Builder, V6_vmpyewuh, InpTy, {
X.Val,
Y.Val});
1521 return HVC.createHvxIntrinsic(Builder, V6_vmpyo_acc, InpTy,
1522 {V1,
X.Val,
Y.Val});
1526 Value *CarryIn)
const
1527 -> std::pair<Value *, Value *> {
1528 assert(
X->getType() ==
Y->getType());
1529 auto VecTy = cast<VectorType>(
X->getType());
1530 if (VecTy == HvxI32Ty && HVC.HST.useHVXV62Ops()) {
1533 if (CarryIn ==
nullptr && HVC.HST.useHVXV66Ops()) {
1534 AddCarry = HVC.HST.getIntrinsicId(Hexagon::V6_vaddcarryo);
1536 AddCarry = HVC.HST.getIntrinsicId(Hexagon::V6_vaddcarry);
1537 if (CarryIn ==
nullptr)
1538 CarryIn = HVC.getNullValue(HVC.getBoolTy(HVC.length(VecTy)));
1539 Args.push_back(CarryIn);
1541 Value *
Ret = HVC.createHvxIntrinsic(Builder, AddCarry,
1544 Value *CarryOut =
Builder.CreateExtractValue(Ret, {1});
1545 return {
Result, CarryOut};
1552 if (CarryIn !=
nullptr) {
1553 unsigned Width = VecTy->getScalarSizeInBits();
1556 for (
unsigned i = 0, e = 32 / Width; i !=
e; ++i)
1557 Mask = (Mask << Width) | 1;
1559 auto V6_vandqrt = HVC.HST.getIntrinsicId(Hexagon::V6_vandqrt);
1561 HVC.createHvxIntrinsic(Builder, V6_vandqrt,
nullptr,
1562 {CarryIn, HVC.getConstInt(Mask)});
1563 Result1 =
Builder.CreateAdd(
X, ValueIn);
1569 return {Result2,
Builder.CreateOr(CarryOut1, CarryOut2)};
1572auto HvxIdioms::createMul16(
IRBuilderBase &Builder, SValue
X, SValue
Y)
const
1575 std::tie(
X,
Y) = canonSgn(
X,
Y);
1578 V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhv);
1581 V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhus);
1583 V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyuhv);
1588 HVC.createHvxIntrinsic(Builder, V6_vmpyh, HvxP32Ty, {
Y.Val,
X.Val});
1590 return HVC.vshuff(Builder, HVC.sublo(Builder,
P), HVC.subhi(Builder,
P));
1593auto HvxIdioms::createMulH16(
IRBuilderBase &Builder, SValue
X, SValue
Y)
const
1595 Type *HvxI16Ty = HVC.getHvxTy(HVC.getIntTy(16),
false);
1597 if (HVC.HST.useHVXV69Ops()) {
1599 auto V6_vmpyuhvs = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyuhvs);
1600 return HVC.createHvxIntrinsic(Builder, V6_vmpyuhvs, HvxI16Ty,
1605 Type *HvxP16Ty = HVC.getHvxTy(HVC.getIntTy(16),
true);
1606 Value *Pair16 =
Builder.CreateBitCast(createMul16(Builder,
X,
Y), HvxP16Ty);
1607 unsigned Len = HVC.length(HvxP16Ty) / 2;
1610 for (
int i = 0; i !=
static_cast<int>(
Len); ++i)
1611 PickOdd[i] = 2 * i + 1;
1613 return Builder.CreateShuffleVector(HVC.sublo(Builder, Pair16),
1614 HVC.subhi(Builder, Pair16), PickOdd);
1617auto HvxIdioms::createMul32(
IRBuilderBase &Builder, SValue
X, SValue
Y)
const
1618 -> std::pair<Value *, Value *> {
1619 assert(
X.Val->getType() ==
Y.Val->getType());
1620 assert(
X.Val->getType() == HvxI32Ty);
1623 std::tie(
X,
Y) = canonSgn(
X,
Y);
1626 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyss_parts;
1628 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyus_parts;
1630 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyuu_parts;
1633 Value *Parts = HVC.createHvxIntrinsic(Builder, V6_vmpy_parts,
nullptr,
1634 {
X.Val,
Y.Val}, {HvxI32Ty});
1643 assert(WordX.size() == WordY.size());
1644 unsigned Idx = 0,
Length = WordX.size();
1648 if (HVC.isZero(WordX[
Idx]))
1650 else if (HVC.isZero(WordY[
Idx]))
1657 Value *Carry =
nullptr;
1659 std::tie(Sum[
Idx], Carry) =
1660 createAddCarry(Builder, WordX[
Idx], WordY[
Idx], Carry);
1674 for (
int i = 0, e = WordX.size(); i != e; ++i) {
1675 for (
int j = 0, f = WordY.size(); j != f; ++j) {
1677 Signedness SX = (i + 1 ==
e) ? SgnX :
Unsigned;
1678 Signedness
SY = (j + 1 == f) ? SgnY :
Unsigned;
1679 auto [
Lo,
Hi] = createMul32(Builder, {WordX[i], SX}, {WordY[j],
SY});
1680 Products[i + j + 0].push_back(
Lo);
1681 Products[i + j + 1].push_back(
Hi);
1695 for (
int i = 0, e = Products.size(); i !=
e; ++i) {
1696 while (Products[i].
size() > 1) {
1697 Value *Carry =
nullptr;
1698 for (
int j = i; j !=
e; ++j) {
1699 auto &ProdJ = Products[j];
1700 auto [Sum, CarryOut] = createAddCarry(Builder, pop_back_or_zero(ProdJ),
1701 pop_back_or_zero(ProdJ), Carry);
1702 ProdJ.insert(ProdJ.begin(), Sum);
1709 for (
auto &
P : Products) {
1710 assert(
P.size() == 1 &&
"Should have been added together");
1717auto HvxIdioms::run() ->
bool {
1718 bool Changed =
false;
1721 for (
auto It =
B.rbegin(); It !=
B.rend(); ++It) {
1722 if (
auto Fxm = matchFxpMul(*It)) {
1723 Value *
New = processFxpMul(*It, *Fxm);
1728 bool StartOver = !isa<Instruction>(New);
1729 It->replaceAllUsesWith(New);
1731 It = StartOver ?
B.rbegin()
1732 : cast<Instruction>(New)->getReverseIterator();
1743auto HexagonVectorCombine::run() ->
bool {
1744 if (!HST.useHVXOps())
1747 bool Changed =
false;
1748 Changed |= AlignVectors(*this).run();
1749 Changed |= HvxIdioms(*this).run();
1754auto HexagonVectorCombine::getIntTy(
unsigned Width)
const ->
IntegerType * {
1758auto HexagonVectorCombine::getByteTy(
int ElemCount)
const ->
Type * {
1763 return VectorType::get(ByteTy, ElemCount,
false);
1766auto HexagonVectorCombine::getBoolTy(
int ElemCount)
const ->
Type * {
1771 return VectorType::get(BoolTy, ElemCount,
false);
1774auto HexagonVectorCombine::getConstInt(
int Val,
unsigned Width)
const
1779auto HexagonVectorCombine::isZero(
const Value *Val)
const ->
bool {
1780 if (
auto *
C = dyn_cast<Constant>(Val))
1781 return C->isZeroValue();
1785auto HexagonVectorCombine::getIntValue(
const Value *Val)
const
1786 -> std::optional<APInt> {
1787 if (
auto *CI = dyn_cast<ConstantInt>(Val))
1788 return CI->getValue();
1789 return std::nullopt;
1792auto HexagonVectorCombine::isUndef(
const Value *Val)
const ->
bool {
1793 return isa<UndefValue>(Val);
1796auto HexagonVectorCombine::getHvxTy(
Type *ElemTy,
bool Pair)
const
1802 "Invalid HVX element type");
1803 unsigned HwLen = HST.getVectorLength();
1805 return VectorType::get(ElemTy, Pair ? 2 * NumElems : NumElems,
1809auto HexagonVectorCombine::getSizeOf(
const Value *Val, SizeKind Kind)
const
1811 return getSizeOf(Val->
getType(), Kind);
1814auto HexagonVectorCombine::getSizeOf(
const Type *Ty, SizeKind Kind)
const
1816 auto *NcTy =
const_cast<Type *
>(Ty);
1819 return DL.getTypeStoreSize(NcTy).getFixedValue();
1821 return DL.getTypeAllocSize(NcTy).getFixedValue();
1826auto HexagonVectorCombine::getTypeAlignment(
Type *Ty)
const ->
int {
1829 if (HST.isTypeForHVX(Ty))
1830 return HST.getVectorLength();
1831 return DL.getABITypeAlign(Ty).value();
1834auto HexagonVectorCombine::length(
Value *Val)
const ->
size_t {
1835 return length(Val->
getType());
1838auto HexagonVectorCombine::length(
Type *Ty)
const ->
size_t {
1839 auto *VecTy = dyn_cast<VectorType>(Ty);
1840 assert(VecTy &&
"Must be a vector type");
1841 return VecTy->getElementCount().getFixedValue();
1844auto HexagonVectorCombine::getNullValue(
Type *Ty)
const ->
Constant * {
1847 if (
auto *VecTy = dyn_cast<VectorType>(Ty))
1852auto HexagonVectorCombine::getFullValue(
Type *Ty)
const ->
Constant * {
1855 if (
auto *VecTy = dyn_cast<VectorType>(Ty))
1860auto HexagonVectorCombine::getConstSplat(
Type *Ty,
int Val)
const
1863 auto VecTy = cast<VectorType>(Ty);
1864 Type *ElemTy = VecTy->getElementType();
1871auto HexagonVectorCombine::simplify(
Value *V)
const ->
Value * {
1872 if (
auto *In = dyn_cast<Instruction>(V)) {
1882 int Where)
const ->
Value * {
1883 assert(isByteVecTy(Dst->getType()) && isByteVecTy(Src->getType()));
1884 int SrcLen = getSizeOf(Src);
1885 int DstLen = getSizeOf(Dst);
1891 Value *P2Src = vresize(Builder, Src, P2Len, Undef);
1892 Value *P2Dst = vresize(Builder, Dst, P2Len, Undef);
1895 for (
int i = 0; i != P2Len; ++i) {
1899 (Where <= i && i < Where +
Length) ? P2Len + Start + (i - Where) : i;
1902 Value *P2Insert =
Builder.CreateShuffleVector(P2Dst, P2Src, SMask);
1903 return vresize(Builder, P2Insert, DstLen, Undef);
1908 assert(
Lo->getType() ==
Hi->getType() &&
"Argument type mismatch");
1911 int VecLen = getSizeOf(
Hi);
1912 if (
auto IntAmt = getIntValue(Amt))
1913 return getElementRange(Builder,
Lo,
Hi, VecLen - IntAmt->getSExtValue(),
1916 if (HST.isTypeForHVX(
Hi->getType())) {
1917 assert(
static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
1918 "Expecting an exact HVX type");
1919 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_vlalignb),
1920 Hi->getType(), {Hi, Lo, Amt});
1927 return Builder.CreateBitCast(Trunc,
Hi->getType());
1931 return vralignb(Builder,
Lo,
Hi, Sub);
1938 assert(
Lo->getType() ==
Hi->getType() &&
"Argument type mismatch");
1941 int VecLen = getSizeOf(
Lo);
1942 if (
auto IntAmt = getIntValue(Amt))
1943 return getElementRange(Builder,
Lo,
Hi, IntAmt->getSExtValue(), VecLen);
1945 if (HST.isTypeForHVX(
Lo->getType())) {
1946 assert(
static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
1947 "Expecting an exact HVX type");
1948 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_valignb),
1949 Lo->getType(), {Hi, Lo, Amt});
1956 return Builder.CreateBitCast(Trunc,
Lo->getType());
1963 Intrinsic::hexagon_S2_valignrb);
1965 return Builder.CreateBitCast(Call,
Lo->getType());
1975 std::vector<Value *> Work[2];
1976 int ThisW = 0, OtherW = 1;
1978 Work[ThisW].
assign(Vecs.begin(), Vecs.end());
1979 while (Work[ThisW].
size() > 1) {
1980 auto *Ty = cast<VectorType>(Work[ThisW].front()->
getType());
1981 SMask.
resize(length(Ty) * 2);
1982 std::iota(SMask.
begin(), SMask.
end(), 0);
1984 Work[OtherW].clear();
1985 if (Work[ThisW].
size() % 2 != 0)
1987 for (
int i = 0, e = Work[ThisW].
size(); i <
e; i += 2) {
1988 Value *Joined =
Builder.CreateShuffleVector(Work[ThisW][i],
1989 Work[ThisW][i + 1], SMask);
1990 Work[OtherW].push_back(Joined);
1998 SMask.
resize(Vecs.size() * length(Vecs.front()->getType()));
1999 std::iota(SMask.
begin(), SMask.
end(), 0);
2007 auto *ValTy = cast<VectorType>(Val->
getType());
2008 assert(ValTy->getElementType() == Pad->getType());
2010 int CurSize = length(ValTy);
2011 if (CurSize == NewSize)
2014 if (CurSize > NewSize)
2015 return getElementRange(Builder, Val, Val, 0, NewSize);
2018 std::iota(SMask.
begin(), SMask.
begin() + CurSize, 0);
2019 std::fill(SMask.
begin() + CurSize, SMask.
end(), CurSize);
2020 Value *PadVec =
Builder.CreateVectorSplat(CurSize, Pad);
2021 return Builder.CreateShuffleVector(Val, PadVec, SMask);
2033 if (FromSTy == ToSTy)
2036 int FromSize = getSizeOf(FromSTy);
2037 int ToSize = getSizeOf(ToSTy);
2038 assert(FromSize % ToSize == 0 || ToSize % FromSize == 0);
2040 auto *MaskTy = cast<VectorType>(
Mask->getType());
2041 int FromCount = length(MaskTy);
2042 int ToCount = (FromCount * FromSize) / ToSize;
2043 assert((FromCount * FromSize) % ToSize == 0);
2045 auto *FromITy =
getIntTy(FromSize * 8);
2046 auto *ToITy =
getIntTy(ToSize * 8);
2051 Mask, VectorType::get(FromITy, FromCount,
false));
2053 Ext, VectorType::get(ToITy, ToCount,
false));
2055 Cast, VectorType::get(getBoolTy(), ToCount,
false));
2062 if (ScalarTy == getBoolTy())
2065 Value *Bytes = vbytes(Builder, Val);
2066 if (
auto *VecTy = dyn_cast<VectorType>(Bytes->
getType()))
2067 return Builder.CreateTrunc(Bytes, getBoolTy(getSizeOf(VecTy)));
2070 return Builder.CreateTrunc(Bytes, getBoolTy());
2077 if (ScalarTy == getByteTy())
2080 if (ScalarTy != getBoolTy())
2081 return Builder.CreateBitCast(Val, getByteTy(getSizeOf(Val)));
2083 if (
auto *VecTy = dyn_cast<VectorType>(Val->
getType()))
2084 return Builder.CreateSExt(Val, VectorType::get(getByteTy(), VecTy));
2085 return Builder.CreateSExt(Val, getByteTy());
2089 unsigned Start,
unsigned Length)
const
2092 return getElementRange(Builder, Val, Val, Start,
Length);
2097 size_t Len = length(Val);
2098 assert(Len % 2 == 0 &&
"Length should be even");
2099 return subvector(Builder, Val, 0, Len / 2);
2104 size_t Len = length(Val);
2105 assert(Len % 2 == 0 &&
"Length should be even");
2106 return subvector(Builder, Val, Len / 2, Len / 2);
2111 assert(Val0->getType() == Val1->getType());
2112 int Len = length(Val0);
2115 for (
int i = 0; i !=
Len; ++i) {
2119 return Builder.CreateShuffleVector(Val0, Val1, Mask);
2124 assert(Val0->getType() == Val1->getType());
2125 int Len = length(Val0);
2128 for (
int i = 0; i !=
Len; ++i) {
2129 Mask[2 * i + 0] = i;
2132 return Builder.CreateShuffleVector(Val0, Val1, Mask);
2135auto HexagonVectorCombine::createHvxIntrinsic(
IRBuilderBase &Builder,
2142 Type *SrcTy = Val->getType();
2143 if (SrcTy == DestTy)
2148 assert(HST.isTypeForHVX(SrcTy,
true));
2151 if (cast<VectorType>(SrcTy)->getElementType() != BoolTy)
2152 return Builder.CreateBitCast(Val, DestTy);
2155 unsigned HwLen = HST.getVectorLength();
2156 Intrinsic::ID TC = HwLen == 64 ? Intrinsic::hexagon_V6_pred_typecast
2157 : Intrinsic::hexagon_V6_pred_typecast_128B;
2160 return Builder.CreateCall(FI, {Val});
2167 for (
int i = 0, e =
Args.size(); i != e; ++i) {
2169 Type *
T = IntrTy->getParamType(i);
2170 if (
A->getType() !=
T) {
2182 assert(HST.isTypeForHVX(CallTy,
true));
2183 return getCast(Builder, Call,
RetTy);
2186auto HexagonVectorCombine::splitVectorElements(
IRBuilderBase &Builder,
2188 unsigned ToWidth)
const
2202 auto *VecTy = cast<VectorType>(Vec->getType());
2203 assert(VecTy->getElementType()->isIntegerTy());
2204 unsigned FromWidth = VecTy->getScalarSizeInBits();
2206 assert(ToWidth <= FromWidth &&
"Breaking up into wider elements?");
2207 unsigned NumResults = FromWidth / ToWidth;
2211 unsigned Length = length(VecTy);
2215 auto splitInHalf = [&](
unsigned Begin,
unsigned End,
auto splitFunc) ->
void {
2219 if (Begin + 1 == End)
2225 auto *VTy = VectorType::get(
getIntTy(Width / 2), 2 *
Length,
false);
2228 Value *Res =
vdeal(Builder, sublo(Builder, VVal), subhi(Builder, VVal));
2230 unsigned Half = (Begin + End) / 2;
2231 Results[Begin] = sublo(Builder, Res);
2232 Results[Half] = subhi(Builder, Res);
2234 splitFunc(Begin, Half, splitFunc);
2235 splitFunc(Half, End, splitFunc);
2238 splitInHalf(0, NumResults, splitInHalf);
2242auto HexagonVectorCombine::joinVectorElements(
IRBuilderBase &Builder,
2246 assert(ToType->getElementType()->isIntegerTy());
2257 unsigned ToWidth = ToType->getScalarSizeInBits();
2258 unsigned Width = Inputs.front()->getType()->getScalarSizeInBits();
2259 assert(Width <= ToWidth);
2261 unsigned Length = length(Inputs.front()->getType());
2263 unsigned NeedInputs = ToWidth /
Width;
2264 if (Inputs.size() != NeedInputs) {
2270 Inputs.resize(NeedInputs, Sign);
2273 while (Inputs.size() > 1) {
2276 for (
int i = 0, e = Inputs.size(); i < e; i += 2) {
2277 Value *Res =
vshuff(Builder, Inputs[i], Inputs[i + 1]);
2278 Inputs[i / 2] =
Builder.CreateBitCast(Res, VTy);
2280 Inputs.resize(Inputs.size() / 2);
2283 assert(Inputs.front()->getType() == ToType);
2284 return Inputs.front();
2287auto HexagonVectorCombine::calculatePointerDifference(
Value *Ptr0,
2289 -> std::optional<int> {
2294 I->eraseFromParent();
2299#define CallBuilder(B, F) \
2302 if (auto *I = dyn_cast<Instruction>(V)) \
2303 B_.ToErase.push_back(I); \
2313 auto StripBitCast = [](
Value *V) {
2314 while (
auto *
C = dyn_cast<BitCastInst>(V))
2315 V =
C->getOperand(0);
2319 Ptr0 = StripBitCast(Ptr0);
2320 Ptr1 = StripBitCast(Ptr1);
2321 if (!isa<GetElementPtrInst>(Ptr0) || !isa<GetElementPtrInst>(Ptr1))
2322 return std::nullopt;
2324 auto *Gep0 = cast<GetElementPtrInst>(Ptr0);
2325 auto *Gep1 = cast<GetElementPtrInst>(Ptr1);
2326 if (Gep0->getPointerOperand() != Gep1->getPointerOperand())
2327 return std::nullopt;
2328 if (Gep0->getSourceElementType() != Gep1->getSourceElementType())
2329 return std::nullopt;
2332 int Scale = getSizeOf(Gep0->getSourceElementType(), Alloc);
2335 if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2)
2336 return std::nullopt;
2338 Value *Idx0 = Gep0->getOperand(1);
2339 Value *Idx1 = Gep1->getOperand(1);
2342 if (
auto *Diff = dyn_cast<ConstantInt>(
2344 return Diff->getSExtValue() * Scale;
2346 KnownBits Known0 = getKnownBits(Idx0, Gep0);
2347 KnownBits Known1 = getKnownBits(Idx1, Gep1);
2350 return std::nullopt;
2357 if (
auto *
C = dyn_cast<ConstantInt>(SubU)) {
2358 Diff0 =
C->getSExtValue();
2360 return std::nullopt;
2368 if (
auto *
C = dyn_cast<ConstantInt>(SubK)) {
2369 Diff1 =
C->getSExtValue();
2371 return std::nullopt;
2374 return (Diff0 + Diff1) * Scale;
2379auto HexagonVectorCombine::getNumSignificantBits(
const Value *V,
2385auto HexagonVectorCombine::getKnownBits(
const Value *V,
2392template <
typename T>
2393auto HexagonVectorCombine::isSafeToMoveBeforeInBB(
const Instruction &In,
2395 const T &IgnoreInsts)
const
2398 [
this](
const Instruction &
I) -> std::optional<MemoryLocation> {
2399 if (
const auto *II = dyn_cast<IntrinsicInst>(&
I)) {
2400 switch (II->getIntrinsicID()) {
2401 case Intrinsic::masked_load:
2403 case Intrinsic::masked_store:
2412 assert(
Block.begin() == To ||
Block.end() == To || To->getParent() == &Block);
2414 if (isa<PHINode>(In) || (To !=
Block.end() && isa<PHINode>(*To)))
2419 bool MayWrite =
In.mayWriteToMemory();
2420 auto MaybeLoc = getLocOrNone(In);
2422 auto From =
In.getIterator();
2425 bool MoveUp = (To !=
Block.end() && To->comesBefore(&In));
2427 MoveUp ? std::make_pair(To,
From) :
std::make_pair(
std::next(
From), To);
2428 for (
auto It =
Range.first; It !=
Range.second; ++It) {
2433 if (
auto *II = dyn_cast<IntrinsicInst>(&
I)) {
2434 if (II->getIntrinsicID() == Intrinsic::assume)
2440 if (
auto *CB = dyn_cast<CallBase>(&
I)) {
2441 if (!CB->hasFnAttr(Attribute::WillReturn))
2443 if (!CB->hasFnAttr(Attribute::NoSync))
2446 if (
I.mayReadOrWriteMemory()) {
2447 auto MaybeLocI = getLocOrNone(
I);
2448 if (MayWrite ||
I.mayWriteToMemory()) {
2449 if (!MaybeLoc || !MaybeLocI)
2451 if (!AA.isNoAlias(*MaybeLoc, *MaybeLocI))
2459auto HexagonVectorCombine::isByteVecTy(
Type *Ty)
const ->
bool {
2460 if (
auto *VecTy = dyn_cast<VectorType>(Ty))
2461 return VecTy->getElementType() == getByteTy();
2470 std::iota(SMask.
begin(), SMask.
end(), Start);
2471 return Builder.CreateShuffleVector(
Lo,
Hi, SMask);
2482class HexagonVectorCombineLegacy :
public FunctionPass {
2503 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
2505 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
2506 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2508 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
2510 HexagonVectorCombine HVC(
F, AA, AC, DT, TLI,
TM);
2516char HexagonVectorCombineLegacy::ID = 0;
2519 "Hexagon Vector Combine",
false,
false)
2529 return new HexagonVectorCombineLegacy();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements a class to represent arbitrary precision integral constant values and operations...
SmallPtrSet< MachineInstr *, 2 > Uses
Function Alias Analysis Results
BlockVerifier::State From
static IntegerType * getIntTy(IRBuilderBase &B, const TargetLibraryInfo *TLI)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
#define LLVM_ATTRIBUTE_UNUSED
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Given that RA is a live value
Mark the given Function as meaning that it cannot be changed in any way mark any values that are used as this function s parameters or by its return values(according to Uses) live as well. void DeadArgumentEliminationPass
This file defines the DenseMap class.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isUndef(ArrayRef< int > Mask)
#define CallBuilder(B, F)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static M68kRelType getType(unsigned Kind, MCSymbolRefExpr::VariantKind &Modifier, bool &IsPCRel)
static bool isCandidate(const MachineInstr *MI, Register &DefedReg, Register FrameReg)
return ToRemove size() > 0
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static MemoryLocation getLocation(Instruction *I)
static ConstantInt * getConstInt(MDNode *MD, unsigned NumOp)
This file defines the SmallVector class.
Target-Independent Code Generator Pass Configuration Options pass.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
InstListType::const_iterator const_iterator
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
@ ICMP_ULT
unsigned less than
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
iterator_range< iterator > children()
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
const BasicBlock & back() const
Common base class shared among various IRBuilders.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
const BasicBlock * getParent() const
const char * getOpcodeName() const
Class to represent integer types.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
An instruction for reading from memory.
static std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
void assign(size_type NumElts, ValueParamT Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Provides information about what library functions are available for the current target.
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
static IntegerType * getInt1Ty(LLVMContext &C)
Type * getNonOpaquePointerElementType() const
Only use this method in code that is not reachable with opaque pointers, or part of deprecated method...
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt8Ty(LLVMContext &C)
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
friend const_iterator begin(StringRef path, Style style)
Get begin iterator over path.
friend const_iterator end(StringRef path)
Get end iterator over path.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Rounding
Possible values of current rounding mode, which is specified in bits 23:22 of FPCR.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
@ Undef
Value of the register doesn't matter.
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createHexagonVectorCombineLegacyPass()
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&... Ranges)
Concatenated range across two or more ranges.
void initializeHexagonVectorCombineLegacyPass(PassRegistry &)
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
@ And
Bitwise or logical AND of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Get the upper bound on bit size for this Value Op as a signed integer.
bool mayHaveNonDefUseDependency(const Instruction &I)
Returns true if the result or effects of the given instructions I depend values not reachable through...
MaskT vshuff(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
MaskT vdeal(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.