34#include "llvm/IR/IntrinsicsHexagon.h"
58#define DEBUG_TYPE "hexagon-vc"
63#define DEFAULT_HVX_VTCM_PAGE_SIZE 131072
78class HexagonVectorCombine {
83 :
F(F_),
DL(
F.getDataLayout()),
AA(AA_), AC(AC_), DT(DT_),
93 Type *getByteTy(
int ElemCount = 0)
const;
96 Type *getBoolTy(
int ElemCount = 0)
const;
100 std::optional<APInt> getIntValue(
const Value *Val)
const;
106 bool isTrue(
const Value *Val)
const;
108 bool isFalse(
const Value *Val)
const;
117 int getSizeOf(
const Value *Val, SizeKind Kind = Store)
const;
118 int getSizeOf(
const Type *Ty, SizeKind Kind = Store)
const;
119 int getTypeAlignment(
Type *Ty)
const;
120 size_t length(
Value *Val)
const;
121 size_t length(
Type *Ty)
const;
130 int Length,
int Where)
const;
154 unsigned ToWidth)
const;
158 std::optional<int> calculatePointerDifference(
Value *Ptr0,
Value *Ptr1)
const;
160 unsigned getNumSignificantBits(
const Value *V,
167 template <
typename T = std::vector<Instruction *>>
170 const T &IgnoreInsts = {})
const;
173 [[maybe_unused]]
bool isByteVecTy(
Type *Ty)
const;
186 int Start,
int Length)
const;
205 AlignVectors(
const HexagonVectorCombine &HVC_) : HVC(HVC_) {}
210 using InstList = std::vector<Instruction *>;
214 AddrInfo(
const AddrInfo &) =
default;
217 : Inst(
I), Addr(
A), ValTy(
T), HaveAlign(
H),
218 NeedAlign(HVC.getTypeAlignment(ValTy)) {}
219 AddrInfo &operator=(
const AddrInfo &) =
default;
230 using AddrList = std::vector<AddrInfo>;
234 return A->comesBefore(
B);
237 using DepList = std::set<Instruction *, InstrLess>;
240 MoveGroup(
const AddrInfo &AI,
Instruction *
B,
bool Hvx,
bool Load)
241 :
Base(
B), Main{AI.Inst}, Clones{}, IsHvx(Hvx), IsLoad(Load) {}
242 MoveGroup() =
default;
250 using MoveList = std::vector<MoveGroup>;
270 Segment(
Value *Val,
int Begin,
int Len)
271 : Val(Val), Start(Begin),
Size(Len) {}
272 Segment(
const Segment &Seg) =
default;
273 Segment &operator=(
const Segment &Seg) =
default;
280 Block(
Value *Val,
int Len,
int Pos) : Seg(Val, 0, Len), Pos(Pos) {}
282 : Seg(Val, Off, Len), Pos(Pos) {}
290 ByteSpan section(
int Start,
int Length)
const;
291 ByteSpan &shift(
int Offset);
294 int size()
const {
return Blocks.size(); }
295 Block &operator[](
int i) {
return Blocks[i]; }
296 const Block &operator[](
int i)
const {
return Blocks[i]; }
298 std::vector<Block> Blocks;
301 iterator begin() {
return Blocks.begin(); }
302 iterator end() {
return Blocks.end(); }
308 std::optional<AddrInfo> getAddrInfo(
Instruction &In)
const;
309 bool isHvx(
const AddrInfo &AI)
const;
311 [[maybe_unused]]
bool isSectorTy(
Type *Ty)
const;
319 const InstMap &CloneMap = InstMap())
const;
322 const InstMap &CloneMap = InstMap())
const;
346 bool createAddressGroups();
347 MoveList createLoadGroups(
const AddrList &Group)
const;
348 MoveList createStoreGroups(
const AddrList &Group)
const;
349 bool moveTogether(MoveGroup &Move)
const;
350 template <
typename T>
353 void realignLoadGroup(
IRBuilderBase &Builder,
const ByteSpan &VSpan,
354 int ScLen,
Value *AlignVal,
Value *AlignAddr)
const;
355 void realignStoreGroup(
IRBuilderBase &Builder,
const ByteSpan &VSpan,
356 int ScLen,
Value *AlignVal,
Value *AlignAddr)
const;
357 bool realignGroup(
const MoveGroup &Move)
const;
360 int Alignment)
const;
367 std::map<Instruction *, AddrList> AddrGroups;
368 const HexagonVectorCombine &HVC;
372 const AlignVectors::AddrInfo &AI) {
373 OS <<
"Inst: " << AI.Inst <<
" " << *AI.Inst <<
'\n';
374 OS <<
"Addr: " << *AI.Addr <<
'\n';
375 OS <<
"Type: " << *AI.ValTy <<
'\n';
376 OS <<
"HaveAlign: " << AI.HaveAlign.
value() <<
'\n';
377 OS <<
"NeedAlign: " << AI.NeedAlign.
value() <<
'\n';
378 OS <<
"Offset: " << AI.Offset;
383 const AlignVectors::MoveGroup &MG) {
384 OS <<
"IsLoad:" << (MG.IsLoad ?
"yes" :
"no");
385 OS <<
", IsHvx:" << (MG.IsHvx ?
"yes" :
"no") <<
'\n';
388 OS <<
" " << *
I <<
'\n';
391 OS <<
" " << *
I <<
'\n';
393 for (
auto [K, V] : MG.Clones) {
395 K->printAsOperand(OS,
false);
396 OS <<
"\t-> " << *V <<
'\n';
403 OS <<
" @" <<
B.Pos <<
" [" <<
B.Seg.Start <<
',' <<
B.Seg.Size <<
"] ";
404 if (
B.Seg.Val ==
reinterpret_cast<const Value *
>(&
B)) {
405 OS <<
"(self:" <<
B.Seg.Val <<
')';
406 }
else if (
B.Seg.Val !=
nullptr) {
415 const AlignVectors::ByteSpan &BS) {
416 OS <<
"ByteSpan[size=" << BS.size() <<
", extent=" << BS.extent() <<
'\n';
417 for (
const AlignVectors::ByteSpan::Block &
B : BS)
437 HvxIdioms(
const HexagonVectorCombine &HVC_) : HVC(HVC_) {
438 auto *
Int32Ty = HVC.getIntTy(32);
439 HvxI32Ty = HVC.getHvxTy(
Int32Ty,
false);
440 HvxP32Ty = HVC.getHvxTy(
Int32Ty,
true);
461 std::optional<unsigned> RoundAt;
466 -> std::pair<unsigned, Signedness>;
467 auto canonSgn(SValue
X, SValue
Y)
const -> std::pair<SValue, SValue>;
469 auto matchFxpMul(
Instruction &In)
const -> std::optional<FxpOp>;
473 const FxpOp &
Op)
const ->
Value *;
475 bool Rounding)
const ->
Value *;
477 bool Rounding)
const ->
Value *;
480 Value *CarryIn =
nullptr)
const
481 -> std::pair<Value *, Value *>;
486 -> std::pair<Value *, Value *>;
500 const HexagonVectorCombine &HVC;
506 const HvxIdioms::FxpOp &
Op) {
507 static const char *SgnNames[] = {
"Positive",
"Signed",
"Unsigned"};
509 if (
Op.RoundAt.has_value()) {
510 if (
Op.Frac != 0 && *
Op.RoundAt ==
Op.Frac - 1) {
513 OS <<
" + 1<<" << *
Op.RoundAt;
516 OS <<
"\n X:(" << SgnNames[
Op.X.Sgn] <<
") " << *
Op.X.Val <<
"\n"
517 <<
" Y:(" << SgnNames[
Op.Y.Sgn] <<
") " << *
Op.Y.Val;
525template <
typename T>
T *getIfUnordered(
T *MaybeT) {
526 return MaybeT && MaybeT->isUnordered() ? MaybeT :
nullptr;
538#if !defined(_MSC_VER) || _MSC_VER >= 1926
542template <
typename Pred,
typename... Ts>
543void erase_if(std::map<Ts...> &map, Pred p)
545template <
typename Pred,
typename T,
typename U>
546void erase_if(std::map<T, U> &map, Pred p)
549 for (
auto i = map.begin(), e = map.end(); i != e;) {
558template <
typename Pred,
typename T>
void erase_if(
T &&container, Pred p) {
596auto AlignVectors::ByteSpan::extent()
const ->
int {
599 int Min = Blocks[0].Pos;
600 int Max = Blocks[0].Pos + Blocks[0].Seg.Size;
601 for (
int i = 1, e =
size(); i !=
e; ++i) {
602 Min = std::min(Min, Blocks[i].Pos);
603 Max = std::max(Max, Blocks[i].Pos + Blocks[i].Seg.Size);
608auto AlignVectors::ByteSpan::section(
int Start,
int Length)
const -> ByteSpan {
610 for (
const ByteSpan::Block &
B : Blocks) {
611 int L = std::max(
B.Pos, Start);
612 int R = std::min(
B.Pos +
B.Seg.Size, Start +
Length);
615 int Off =
L >
B.Pos ?
L -
B.Pos : 0;
616 Section.Blocks.emplace_back(
B.Seg.Val,
B.Seg.Start + Off, R - L, L);
622auto AlignVectors::ByteSpan::shift(
int Offset) -> ByteSpan & {
629 SmallVector<Value *, 8> Values(Blocks.size());
630 for (
int i = 0, e = Blocks.size(); i != e; ++i)
631 Values[i] = Blocks[i].Seg.Val;
635auto AlignVectors::getAddrInfo(Instruction &In)
const
636 -> std::optional<AddrInfo> {
638 return AddrInfo(HVC, L,
L->getPointerOperand(),
L->getType(),
641 return AddrInfo(HVC, S, S->getPointerOperand(),
642 S->getValueOperand()->getType(), S->getAlign());
646 case Intrinsic::masked_load:
647 return AddrInfo(HVC,
II,
II->getArgOperand(0),
II->getType(),
648 II->getParamAlign(0).valueOrOne());
649 case Intrinsic::masked_store:
650 return AddrInfo(HVC,
II,
II->getArgOperand(1),
651 II->getArgOperand(0)->getType(),
652 II->getParamAlign(1).valueOrOne());
658auto AlignVectors::isHvx(
const AddrInfo &AI)
const ->
bool {
662auto AlignVectors::getPayload(
Value *Val)
const ->
Value * {
666 ID =
II->getIntrinsicID();
668 return In->getOperand(0);
673auto AlignVectors::getMask(
Value *Val)
const ->
Value * {
675 switch (
II->getIntrinsicID()) {
676 case Intrinsic::masked_load:
677 return II->getArgOperand(1);
678 case Intrinsic::masked_store:
679 return II->getArgOperand(2);
683 Type *ValTy = getPayload(Val)->getType();
685 return HVC.getFullValue(HVC.getBoolTy(HVC.length(VecTy)));
686 return HVC.getFullValue(HVC.getBoolTy());
689auto AlignVectors::getPassThrough(
Value *Val)
const ->
Value * {
691 if (
II->getIntrinsicID() == Intrinsic::masked_load)
692 return II->getArgOperand(2);
697auto AlignVectors::createAdjustedPointer(IRBuilderBase &Builder,
Value *Ptr,
698 Type *ValTy,
int Adjust,
699 const InstMap &CloneMap)
const
702 if (Instruction *New = CloneMap.lookup(
I))
704 return Builder.CreatePtrAdd(Ptr, HVC.getConstInt(Adjust),
"gep");
707auto AlignVectors::createAlignedPointer(IRBuilderBase &Builder,
Value *Ptr,
708 Type *ValTy,
int Alignment,
709 const InstMap &CloneMap)
const
713 for (
auto [Old, New] : CloneMap)
714 I->replaceUsesOfWith(Old, New);
719 Value *AsInt = Builder.CreatePtrToInt(Ptr, HVC.getIntTy(),
"pti");
720 Value *
Mask = HVC.getConstInt(-Alignment);
721 Value *
And = Builder.CreateAnd(remap(AsInt), Mask,
"and");
722 return Builder.CreateIntToPtr(
726auto AlignVectors::createLoad(IRBuilderBase &Builder,
Type *ValTy,
Value *Ptr,
734 "Expectning scalar predicate");
735 if (HVC.isFalse(Predicate))
737 if (!HVC.isTrue(Predicate) && HvxHasPredLoad) {
738 Value *
Load = createPredicatedLoad(Builder, ValTy, Ptr, Predicate,
739 Alignment, MDSources);
740 return Builder.CreateSelect(Mask, Load, PassThru);
744 assert(!HVC.isUndef(Mask));
745 if (HVC.isZero(Mask))
747 if (HVC.isTrue(Mask))
748 return createSimpleLoad(Builder, ValTy, Ptr, Alignment, MDSources);
751 Mask, PassThru,
"mld");
756auto AlignVectors::createSimpleLoad(IRBuilderBase &Builder,
Type *ValTy,
757 Value *Ptr,
int Alignment,
761 Builder.CreateAlignedLoad(ValTy, Ptr,
Align(Alignment),
"ald");
766auto AlignVectors::createPredicatedLoad(IRBuilderBase &Builder,
Type *ValTy,
772 "Predicates 'scalar' vector loads not yet supported");
774 assert(!
Predicate->getType()->isVectorTy() &&
"Expectning scalar predicate");
775 assert(HVC.getSizeOf(ValTy, HVC.Alloc) % Alignment == 0);
776 if (HVC.isFalse(Predicate))
778 if (HVC.isTrue(Predicate))
779 return createSimpleLoad(Builder, ValTy, Ptr, Alignment, MDSources);
781 auto V6_vL32b_pred_ai = HVC.HST.
getIntrinsicId(Hexagon::V6_vL32b_pred_ai);
783 return HVC.createHvxIntrinsic(Builder, V6_vL32b_pred_ai, ValTy,
784 {
Predicate, Ptr, HVC.getConstInt(0)}, {},
788auto AlignVectors::createStore(IRBuilderBase &Builder,
Value *Val,
Value *Ptr,
791 if (HVC.isZero(Mask) || HVC.isUndef(Val) || HVC.isUndef(Mask))
794 "Expectning scalar predicate"));
796 if (HVC.isFalse(Predicate))
798 if (HVC.isTrue(Predicate))
803 if (HVC.isTrue(Mask)) {
805 return createPredicatedStore(Builder, Val, Ptr, Predicate, Alignment,
809 return createSimpleStore(Builder, Val, Ptr, Alignment, MDSources);
815 Builder.CreateMaskedStore(Val, Ptr,
Align(Alignment), Mask);
822 Value *PredLoad = createPredicatedLoad(Builder, Val->getType(), Ptr,
823 Predicate, Alignment, MDSources);
824 Value *Mux = Builder.CreateSelect(Mask, Val, PredLoad);
825 return createPredicatedStore(Builder, Mux, Ptr, Predicate, Alignment,
829auto AlignVectors::createSimpleStore(IRBuilderBase &Builder,
Value *Val,
830 Value *Ptr,
int Alignment,
838auto AlignVectors::createPredicatedStore(IRBuilderBase &Builder,
Value *Val,
844 "Predicates 'scalar' vector stores not yet supported");
846 if (HVC.isFalse(Predicate))
848 if (HVC.isTrue(Predicate))
849 return createSimpleStore(Builder, Val, Ptr, Alignment, MDSources);
851 assert(HVC.getSizeOf(Val, HVC.Alloc) % Alignment == 0);
852 auto V6_vS32b_pred_ai = HVC.HST.
getIntrinsicId(Hexagon::V6_vS32b_pred_ai);
854 return HVC.createHvxIntrinsic(Builder, V6_vS32b_pred_ai,
nullptr,
855 {
Predicate, Ptr, HVC.getConstInt(0), Val}, {},
859auto AlignVectors::getUpwardDeps(Instruction *In, Instruction *
Base)
const
863 "Base and In should be in the same block");
864 assert(
Base->comesBefore(In) &&
"Base should come before In");
867 std::deque<Instruction *> WorkQ = {
In};
868 while (!WorkQ.empty()) {
875 if (
I->getParent() == Parent &&
Base->comesBefore(
I))
883auto AlignVectors::createAddressGroups() ->
bool {
888 auto findBaseAndOffset = [&](AddrInfo &AI) -> std::pair<Instruction *, int> {
889 for (AddrInfo &W : WorkStack) {
890 if (
auto D = HVC.calculatePointerDifference(AI.Addr,
W.Addr))
891 return std::make_pair(
W.Inst, *
D);
893 return std::make_pair(
nullptr, 0);
896 auto traverseBlock = [&](
DomTreeNode *DomN,
auto Visit) ->
void {
898 for (Instruction &
I :
Block) {
899 auto AI = this->getAddrInfo(
I);
902 auto F = findBaseAndOffset(*AI);
904 if (Instruction *BI =
F.first) {
905 AI->Offset =
F.second;
908 WorkStack.push_back(*AI);
909 GroupInst = AI->Inst;
911 AddrGroups[GroupInst].push_back(*AI);
917 while (!WorkStack.empty() && WorkStack.back().Inst->getParent() == &
Block)
918 WorkStack.pop_back();
921 traverseBlock(HVC.DT.
getRootNode(), traverseBlock);
922 assert(WorkStack.empty());
927 erase_if(AddrGroups, [](
auto &
G) {
return G.second.size() == 1; });
931 G.second, [&](
auto &
I) { return HVC.HST.isTypeForHVX(I.ValTy); });
934 return !AddrGroups.empty();
937auto AlignVectors::createLoadGroups(
const AddrList &Group)
const -> MoveList {
945 auto tryAddTo = [&](
const AddrInfo &
Info, MoveGroup &Move) {
946 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
950 if (Move.IsHvx != isHvx(
Info))
954 if (
Base->getParent() !=
Info.Inst->getParent())
957 if (!HVC.isSafeToMoveBeforeInBB(*
Info.Inst,
Base->getIterator()))
961 return HVC.isSafeToMoveBeforeInBB(*
I,
Base->getIterator()) &&
962 HVC.isSafeToClone(*
I);
964 DepList Deps = getUpwardDeps(
Info.Inst,
Base);
968 Move.Main.push_back(
Info.Inst);
975 for (
const AddrInfo &
Info : Group) {
976 if (!
Info.Inst->mayReadFromMemory())
978 if (LoadGroups.empty() || !tryAddTo(
Info, LoadGroups.back()))
979 LoadGroups.emplace_back(
Info, Group.front().Inst, isHvx(
Info),
true);
983 erase_if(LoadGroups, [](
const MoveGroup &
G) {
return G.Main.size() <= 1; });
987 erase_if(LoadGroups, [](
const MoveGroup &
G) {
return G.IsHvx; });
992auto AlignVectors::createStoreGroups(
const AddrList &Group)
const -> MoveList {
1000 auto tryAddTo = [&](
const AddrInfo &
Info, MoveGroup &Move) {
1001 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
1007 "Not handling stores with return values");
1009 if (Move.IsHvx != isHvx(
Info))
1015 if (
Base->getParent() !=
Info.Inst->getParent())
1017 if (!HVC.isSafeToMoveBeforeInBB(*
Info.Inst,
Base->getIterator(), Move.Main))
1019 Move.Main.push_back(
Info.Inst);
1023 MoveList StoreGroups;
1025 for (
auto I = Group.rbegin(),
E = Group.rend();
I !=
E; ++
I) {
1026 const AddrInfo &
Info = *
I;
1027 if (!
Info.Inst->mayWriteToMemory())
1029 if (StoreGroups.empty() || !tryAddTo(
Info, StoreGroups.back()))
1030 StoreGroups.emplace_back(
Info, Group.front().Inst, isHvx(
Info),
false);
1034 erase_if(StoreGroups, [](
const MoveGroup &
G) {
return G.Main.size() <= 1; });
1038 erase_if(StoreGroups, [](
const MoveGroup &
G) {
return G.IsHvx; });
1043 if (!VADoFullStores) {
1044 erase_if(StoreGroups, [
this](
const MoveGroup &
G) {
1046 auto MaybeInfo = this->getAddrInfo(*S);
1047 assert(MaybeInfo.has_value());
1048 return HVC.HST.isHVXVectorType(
1049 EVT::getEVT(MaybeInfo->ValTy, false));
1057auto AlignVectors::moveTogether(MoveGroup &Move)
const ->
bool {
1059 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
1065 Move.Clones = cloneBefore(Where->
getIterator(), Move.Deps);
1068 for (Instruction *M : Main) {
1070 M->moveAfter(Where);
1071 for (
auto [Old, New] : Move.Clones)
1072 M->replaceUsesOfWith(Old, New);
1076 for (
int i = 0, e = Move.Deps.size(); i != e; ++i)
1077 Move.Deps[i] = Move.Clones[Move.Deps[i]];
1082 assert(Move.Deps.empty());
1085 for (Instruction *M : Main.drop_front(1)) {
1091 return Move.Main.size() + Move.Deps.size() > 1;
1094template <
typename T>
1099 for (Instruction *
I : Insts) {
1100 assert(HVC.isSafeToClone(*
I));
1102 C->setName(Twine(
"c.") +
I->getName() +
".");
1103 C->insertBefore(To);
1105 for (
auto [Old, New] : Map)
1106 C->replaceUsesOfWith(Old, New);
1107 Map.insert(std::make_pair(
I,
C));
1112auto AlignVectors::realignLoadGroup(IRBuilderBase &Builder,
1113 const ByteSpan &VSpan,
int ScLen,
1118 Type *SecTy = HVC.getByteTy(ScLen);
1119 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
1120 bool DoAlign = !HVC.isZero(AlignVal);
1122 BasicBlock *BaseBlock = Builder.GetInsertBlock();
1125 auto *True = HVC.getFullValue(HVC.getBoolTy(ScLen));
1152 for (
int Index = 0;
Index != NumSectors; ++
Index)
1153 ASpan.Blocks.emplace_back(
nullptr, ScLen, Index * ScLen);
1154 for (
int Index = 0;
Index != NumSectors; ++
Index) {
1155 ASpan.Blocks[
Index].Seg.Val =
1156 reinterpret_cast<Value *
>(&ASpan.Blocks[
Index]);
1162 DenseMap<void *, Instruction *> EarliestUser;
1168 assert(
A->getParent() ==
B->getParent());
1169 return A->comesBefore(
B);
1171 auto earliestUser = [&](
const auto &
Uses) {
1173 for (
const Use &U :
Uses) {
1175 assert(
I !=
nullptr &&
"Load used in a non-instruction?");
1179 if (
I->getParent() == BaseBlock) {
1181 User = std::min(User,
I, isEarlier);
1189 for (
const ByteSpan::Block &
B : VSpan) {
1190 ByteSpan ASection = ASpan.section(
B.Pos,
B.Seg.Size);
1191 for (
const ByteSpan::Block &S : ASection) {
1192 auto &EU = EarliestUser[S.Seg.Val];
1193 EU = std::min(EU, earliestUser(
B.Seg.Val->uses()), isEarlier);
1198 dbgs() <<
"ASpan:\n" << ASpan <<
'\n';
1199 dbgs() <<
"Earliest users of ASpan:\n";
1200 for (
auto &[Val, User] : EarliestUser) {
1201 dbgs() << Val <<
"\n ->" << *
User <<
'\n';
1205 auto createLoad = [&](IRBuilderBase &Builder,
const ByteSpan &VSpan,
1206 int Index,
bool MakePred) {
1208 createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
1210 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
1215 int Width = (1 + DoAlign) * ScLen;
1216 return this->createLoad(Builder, SecTy, Ptr, Predicate, ScLen, True, Undef,
1217 VSpan.section(Start, Width).values());
1222 assert(
In->getParent() == To->getParent());
1223 DepList Deps = getUpwardDeps(&*In, &*To);
1226 InstMap
Map = cloneBefore(In, Deps);
1227 for (
auto [Old, New] : Map)
1228 In->replaceUsesOfWith(Old, New);
1233 for (
int Index = 0;
Index != NumSectors + 1; ++
Index) {
1241 DoAlign &&
Index > 0 ? EarliestUser[&ASpan[
Index - 1]] :
nullptr;
1243 Index < NumSectors ? EarliestUser[&ASpan[
Index]] :
nullptr;
1244 if (
auto *Where = std::min(PrevAt, ThisAt, isEarlier)) {
1247 createLoad(Builder, VSpan, Index, DoAlign && Index == NumSectors);
1255 if (!HVC.isSafeToMoveBeforeInBB(*Load, BasePos))
1256 moveBefore(
Load->getIterator(), BasePos);
1258 LLVM_DEBUG(
dbgs() <<
"Loads[" << Index <<
"]:" << *Loads[Index] <<
'\n');
1264 for (
int Index = 0;
Index != NumSectors; ++
Index) {
1265 ASpan[
Index].Seg.Val =
nullptr;
1266 if (
auto *Where = EarliestUser[&ASpan[Index]]) {
1272 assert(NextLoad !=
nullptr);
1273 Val = HVC.vralignb(Builder, Val, NextLoad, AlignVal);
1275 ASpan[
Index].Seg.Val = Val;
1280 for (
const ByteSpan::Block &
B : VSpan) {
1281 ByteSpan ASection = ASpan.section(
B.Pos,
B.Seg.Size).shift(-
B.Pos);
1288 std::vector<ByteSpan::Block *> ABlocks;
1289 for (ByteSpan::Block &S : ASection) {
1290 if (S.Seg.Val !=
nullptr)
1291 ABlocks.push_back(&S);
1294 [&](
const ByteSpan::Block *
A,
const ByteSpan::Block *
B) {
1298 for (ByteSpan::Block *S : ABlocks) {
1303 Value *Pay = HVC.vbytes(Builder, getPayload(S->Seg.Val));
1305 HVC.insertb(Builder, Accum, Pay, S->Seg.Start, S->Seg.Size, S->Pos);
1313 Type *ValTy = getPayload(
B.Seg.Val)->getType();
1316 getPassThrough(
B.Seg.Val),
"sel");
1321auto AlignVectors::realignStoreGroup(IRBuilderBase &Builder,
1322 const ByteSpan &VSpan,
int ScLen,
1327 Type *SecTy = HVC.getByteTy(ScLen);
1328 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
1329 bool DoAlign = !HVC.isZero(AlignVal);
1332 ByteSpan ASpanV, ASpanM;
1336 auto MakeVec = [](IRBuilderBase &Builder,
Value *Val) ->
Value * {
1340 auto *VecTy = VectorType::get(Ty, 1,
false);
1346 for (
int Index = (DoAlign ? -1 : 0);
Index != NumSectors + DoAlign; ++
Index) {
1350 VSpan.section(Index * ScLen, ScLen).shift(-Index * ScLen);
1355 for (ByteSpan::Block &S : VSection) {
1356 Value *Pay = getPayload(S.Seg.Val);
1358 Pay->
getType(), HVC.getByteTy());
1359 Value *PartM = HVC.insertb(Builder, Zero, HVC.vbytes(Builder, Mask),
1360 S.Seg.Start, S.Seg.Size, S.Pos);
1361 AccumM = Builder.
CreateOr(AccumM, PartM);
1363 Value *PartV = HVC.insertb(Builder, Undef, HVC.vbytes(Builder, Pay),
1364 S.Seg.Start, S.Seg.Size, S.Pos);
1369 ASpanV.Blocks.emplace_back(AccumV, ScLen, Index * ScLen);
1370 ASpanM.Blocks.emplace_back(AccumM, ScLen, Index * ScLen);
1374 dbgs() <<
"ASpanV before vlalign:\n" << ASpanV <<
'\n';
1375 dbgs() <<
"ASpanM before vlalign:\n" << ASpanM <<
'\n';
1380 for (
int Index = 1;
Index != NumSectors + 2; ++
Index) {
1381 Value *PrevV = ASpanV[
Index - 1].Seg.Val, *ThisV = ASpanV[
Index].Seg.Val;
1382 Value *PrevM = ASpanM[
Index - 1].Seg.Val, *ThisM = ASpanM[
Index].Seg.Val;
1384 ASpanV[
Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevV, ThisV, AlignVal);
1385 ASpanM[
Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevM, ThisM, AlignVal);
1390 dbgs() <<
"ASpanV after vlalign:\n" << ASpanV <<
'\n';
1391 dbgs() <<
"ASpanM after vlalign:\n" << ASpanM <<
'\n';
1394 auto createStore = [&](IRBuilderBase &Builder,
const ByteSpan &ASpanV,
1395 const ByteSpan &ASpanM,
int Index,
bool MakePred) {
1398 if (HVC.isUndef(Val) || HVC.isZero(Mask))
1401 createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
1403 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
1408 int Width = (1 + DoAlign) * ScLen;
1409 this->createStore(Builder, Val, Ptr, Predicate, ScLen,
1410 HVC.vlsb(Builder, Mask),
1411 VSpan.section(Start, Width).values());
1414 for (
int Index = 0;
Index != NumSectors + DoAlign; ++
Index) {
1415 createStore(Builder, ASpanV, ASpanM, Index, DoAlign && Index == NumSectors);
1419auto AlignVectors::realignGroup(
const MoveGroup &Move)
const ->
bool {
1428 auto getMaxOf = [](
auto Range,
auto GetValue) {
1430 return GetValue(
A) < GetValue(
B);
1434 const AddrList &BaseInfos = AddrGroups.at(Move.Base);
1449 std::set<Instruction *> TestSet(Move.Main.begin(), Move.Main.end());
1452 BaseInfos, std::back_inserter(MoveInfos),
1453 [&TestSet](
const AddrInfo &AI) {
return TestSet.count(AI.Inst); });
1456 const AddrInfo &WithMaxAlign =
1457 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return AI.HaveAlign; });
1458 Align MaxGiven = WithMaxAlign.HaveAlign;
1461 const AddrInfo &WithMinOffset =
1462 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return -AI.Offset; });
1464 const AddrInfo &WithMaxNeeded =
1465 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return AI.NeedAlign; });
1466 Align MinNeeded = WithMaxNeeded.NeedAlign;
1479 InstSimplifyFolder(HVC.DL));
1480 Value *AlignAddr =
nullptr;
1481 Value *AlignVal =
nullptr;
1483 if (MinNeeded <= MaxGiven) {
1484 int Start = WithMinOffset.Offset;
1485 int OffAtMax = WithMaxAlign.Offset;
1492 int Adjust = -
alignTo(OffAtMax - Start, MinNeeded.value());
1493 AlignAddr = createAdjustedPointer(Builder, WithMaxAlign.Addr,
1494 WithMaxAlign.ValTy, Adjust, Move.Clones);
1495 int Diff =
Start - (OffAtMax + Adjust);
1496 AlignVal = HVC.getConstInt(Diff);
1498 assert(
static_cast<decltype(MinNeeded.value())
>(Diff) < MinNeeded.value());
1508 createAlignedPointer(Builder, WithMinOffset.Addr, WithMinOffset.ValTy,
1509 MinNeeded.value(), Move.Clones);
1511 Builder.
CreatePtrToInt(WithMinOffset.Addr, HVC.getIntTy(),
"pti");
1513 for (
auto [Old, New] : Move.Clones)
1514 I->replaceUsesOfWith(Old, New);
1519 for (
const AddrInfo &AI : MoveInfos) {
1520 VSpan.Blocks.emplace_back(AI.Inst, HVC.getSizeOf(AI.ValTy),
1521 AI.Offset - WithMinOffset.Offset);
1528 : std::max<int>(MinNeeded.value(), 4);
1529 assert(!Move.IsHvx || ScLen == 64 || ScLen == 128);
1530 assert(Move.IsHvx || ScLen == 4 || ScLen == 8);
1533 dbgs() <<
"ScLen: " << ScLen <<
"\n";
1534 dbgs() <<
"AlignVal:" << *AlignVal <<
"\n";
1535 dbgs() <<
"AlignAddr:" << *AlignAddr <<
"\n";
1536 dbgs() <<
"VSpan:\n" << VSpan <<
'\n';
1540 realignLoadGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1542 realignStoreGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1544 for (
auto *Inst : Move.Main)
1545 Inst->eraseFromParent();
1550auto AlignVectors::makeTestIfUnaligned(IRBuilderBase &Builder,
Value *AlignVal,
1551 int Alignment)
const ->
Value * {
1552 auto *AlignTy = AlignVal->getType();
1554 AlignVal, ConstantInt::get(AlignTy, Alignment - 1),
"and");
1555 Value *
Zero = ConstantInt::get(AlignTy, 0);
1559auto AlignVectors::isSectorTy(
Type *Ty)
const ->
bool {
1560 if (!HVC.isByteVecTy(Ty))
1562 int Size = HVC.getSizeOf(Ty);
1568auto AlignVectors::run() ->
bool {
1571 if (!createAddressGroups())
1575 dbgs() <<
"Address groups(" << AddrGroups.size() <<
"):\n";
1576 for (
auto &[In, AL] : AddrGroups) {
1577 for (
const AddrInfo &AI : AL)
1578 dbgs() <<
"---\n" << AI <<
'\n';
1583 MoveList LoadGroups, StoreGroups;
1585 for (
auto &
G : AddrGroups) {
1591 dbgs() <<
"\nLoad groups(" << LoadGroups.size() <<
"):\n";
1592 for (
const MoveGroup &
G : LoadGroups)
1593 dbgs() <<
G <<
"\n";
1594 dbgs() <<
"Store groups(" << StoreGroups.size() <<
"):\n";
1595 for (
const MoveGroup &
G : StoreGroups)
1596 dbgs() <<
G <<
"\n";
1600 unsigned CountLimit = VAGroupCountLimit;
1601 if (CountLimit == 0)
1604 if (LoadGroups.size() > CountLimit) {
1605 LoadGroups.resize(CountLimit);
1606 StoreGroups.clear();
1608 unsigned StoreLimit = CountLimit - LoadGroups.size();
1609 if (StoreGroups.size() > StoreLimit)
1610 StoreGroups.resize(StoreLimit);
1613 for (
auto &M : LoadGroups)
1615 for (
auto &M : StoreGroups)
1620 for (
auto &M : LoadGroups)
1622 for (
auto &M : StoreGroups)
1632auto HvxIdioms::getNumSignificantBits(
Value *V, Instruction *In)
const
1633 -> std::pair<unsigned, Signedness> {
1634 unsigned Bits = HVC.getNumSignificantBits(V, In);
1640 KnownBits Known = HVC.getKnownBits(V, In);
1641 Signedness Sign =
Signed;
1642 unsigned NumToTest = 0;
1646 NumToTest =
Bits - 1;
1659 return {
Bits, Sign};
1662auto HvxIdioms::canonSgn(SValue
X, SValue
Y)
const
1663 -> std::pair<SValue, SValue> {
1676auto HvxIdioms::matchFxpMul(Instruction &In)
const -> std::optional<FxpOp> {
1677 using namespace PatternMatch;
1678 auto *Ty =
In.getType();
1681 return std::nullopt;
1690 auto m_Shr = [](
auto &&
V,
auto &&S) {
1702 if (
Op.Frac > Width)
1703 return std::nullopt;
1710 return std::nullopt;
1718 Op.Opcode = Instruction::Mul;
1720 Op.X.Sgn = getNumSignificantBits(
Op.X.Val, &In).second;
1721 Op.Y.Sgn = getNumSignificantBits(
Op.Y.Val, &In).second;
1726 return std::nullopt;
1729auto HvxIdioms::processFxpMul(Instruction &In,
const FxpOp &
Op)
const
1731 assert(
Op.X.Val->getType() ==
Op.Y.Val->getType());
1734 if (VecTy ==
nullptr)
1737 unsigned ElemWidth = ElemTy->getBitWidth();
1740 if ((HVC.length(VecTy) * ElemWidth) % (8 * HVC.HST.
getVectorLength()) != 0)
1750 if (ElemWidth <= 32 &&
Op.Frac == 0)
1753 auto [BitsX, SignX] = getNumSignificantBits(
Op.X.Val, &In);
1754 auto [BitsY, SignY] = getNumSignificantBits(
Op.Y.Val, &In);
1760 InstSimplifyFolder(HVC.DL));
1762 auto roundUpWidth = [](
unsigned Width) ->
unsigned {
1768 if (Width > 32 && Width % 32 != 0) {
1775 BitsX = roundUpWidth(BitsX);
1776 BitsY = roundUpWidth(BitsY);
1781 unsigned Width = std::max(BitsX, BitsY);
1783 auto *ResizeTy = VectorType::get(HVC.getIntTy(Width), VecTy);
1784 if (Width < ElemWidth) {
1787 }
else if (Width > ElemWidth) {
1794 assert(
X->getType() ==
Y->getType() &&
X->getType() == ResizeTy);
1796 unsigned VecLen = HVC.length(ResizeTy);
1797 unsigned ChopLen = (8 * HVC.HST.
getVectorLength()) / std::min(Width, 32u);
1801 ChopOp.ResTy = VectorType::get(
Op.ResTy->getElementType(), ChopLen,
false);
1803 for (
unsigned V = 0;
V != VecLen / ChopLen; ++
V) {
1804 ChopOp.X.Val = HVC.subvector(Builder,
X, V * ChopLen, ChopLen);
1805 ChopOp.Y.Val = HVC.subvector(Builder,
Y, V * ChopLen, ChopLen);
1806 Results.push_back(processFxpMulChopped(Builder, In, ChopOp));
1821inline bool HvxIdioms::matchScatter(Instruction &In)
const {
1825 return (
II->getIntrinsicID() == Intrinsic::masked_scatter);
1828inline bool HvxIdioms::matchGather(Instruction &In)
const {
1832 return (
II->getIntrinsicID() == Intrinsic::masked_gather);
1840 case Instruction::Add:
1841 case Instruction::Sub:
1842 case Instruction::Mul:
1843 case Instruction::And:
1844 case Instruction::Or:
1845 case Instruction::Xor:
1846 case Instruction::AShr:
1847 case Instruction::LShr:
1848 case Instruction::Shl:
1849 case Instruction::UDiv:
1857 assert(Ptr &&
"Unable to extract pointer");
1863 if (
II->getIntrinsicID() == Intrinsic::masked_store)
1864 return II->getOperand(1);
1870 HvxIdioms::DstQualifier &Qual) {
1876 Qual = HvxIdioms::LdSt;
1878 if (
II->getIntrinsicID() == Intrinsic::masked_gather) {
1880 Qual = HvxIdioms::LLVM_Gather;
1881 }
else if (
II->getIntrinsicID() == Intrinsic::masked_scatter) {
1883 Qual = HvxIdioms::LLVM_Scatter;
1884 }
else if (
II->getIntrinsicID() == Intrinsic::masked_store) {
1886 Qual = HvxIdioms::LdSt;
1887 }
else if (
II->getIntrinsicID() ==
1888 Intrinsic::hexagon_V6_vgather_vscattermh) {
1890 Qual = HvxIdioms::HEX_Gather_Scatter;
1891 }
else if (
II->getIntrinsicID() == Intrinsic::hexagon_V6_vscattermh_128B) {
1893 Qual = HvxIdioms::HEX_Scatter;
1894 }
else if (
II->getIntrinsicID() == Intrinsic::hexagon_V6_vgathermh_128B) {
1896 Qual = HvxIdioms::HEX_Gather;
1904 Qual = HvxIdioms::Call;
1909 Qual = HvxIdioms::Arithmetic;
1929 for (
auto &U : In->uses()) {
1933 Users.push_back(Destination);
1945 assert(In &&
"Bad instruction");
1949 "Not a gather Intrinsic");
1978 if (
II &&
II->getIntrinsicID() == Intrinsic::masked_gather)
1988 auto *Src = IE->getOperand(1);
2003 LLVM_DEBUG(
dbgs() <<
" Unable to locate Address from intrinsic\n");
2015 if (
II->getIntrinsicID() == Intrinsic::masked_load)
2016 return II->getType();
2017 if (
II->getIntrinsicID() == Intrinsic::masked_store)
2018 return II->getOperand(0)->getType();
2020 return In->getType();
2029 if (
II->getIntrinsicID() == Intrinsic::masked_load)
2031 if (
II->getIntrinsicID() == Intrinsic::masked_gather)
2043 return cstDataVector;
2045 return GEPIndex->getOperand(0);
2061 LLVM_DEBUG(
dbgs() <<
" Unable to locate Index from intrinsic\n");
2071 assert(
I &&
"Unable to reinterprete cast");
2072 Type *NT = HVC.getHvxTy(HVC.getIntTy(32),
false);
2073 std::vector<unsigned> shuffleMask;
2074 for (
unsigned i = 0; i < 64; ++i)
2075 shuffleMask.push_back(i);
2077 Value *CastShuffle =
2078 Builder.CreateShuffleVector(
I,
I, Mask,
"identity_shuffle");
2079 return Builder.CreateBitCast(CastShuffle, NT,
"cst64_i16_to_32_i32");
2086 assert(
I &&
"Unable to reinterprete cast");
2087 Type *NT = HVC.getHvxTy(HVC.getIntTy(32),
false);
2088 std::vector<unsigned> shuffleMask;
2089 for (
unsigned i = 0; i < 128; ++i)
2090 shuffleMask.push_back(i);
2092 Value *CastShuffle =
2093 Builder.CreateShuffleVector(
I,
I, Mask,
"identity_shuffle");
2094 return Builder.CreateBitCast(CastShuffle, NT,
"cst128_i8_to_32_i32");
2100 unsigned int pattern) {
2101 std::vector<unsigned int> byteMask;
2102 for (
unsigned i = 0; i < 32; ++i)
2103 byteMask.push_back(pattern);
2105 return Builder.CreateIntrinsic(
2107 {llvm::ConstantDataVector::get(Ctx, byteMask), HVC.getConstInt(~0)},
2111Value *HvxIdioms::processVScatter(Instruction &In)
const {
2113 assert(InpTy &&
"Cannot handle no vector type for llvm.scatter/gather");
2114 unsigned InpSize = HVC.getSizeOf(InpTy);
2115 auto *
F =
In.getFunction();
2116 LLVMContext &Ctx =
F->getContext();
2118 assert(ElemTy &&
"llvm.scatter needs integer type argument");
2121 unsigned Elements = HVC.length(InpTy);
2122 dbgs() <<
"\n[Process scatter](" <<
In <<
")\n" << *
In.getParent() <<
"\n";
2123 dbgs() <<
" Input type(" << *InpTy <<
") elements(" <<
Elements
2124 <<
") VecLen(" << InpSize <<
") type(" << *ElemTy <<
") ElemWidth("
2125 << ElemWidth <<
")\n";
2129 InstSimplifyFolder(HVC.DL));
2131 auto *ValueToScatter =
In.getOperand(0);
2132 LLVM_DEBUG(
dbgs() <<
" ValueToScatter : " << *ValueToScatter <<
"\n");
2136 <<
") for vscatter\n");
2161 Value *CastIndex =
nullptr;
2162 if (cstDataVector) {
2164 AllocaInst *IndexesAlloca =
2165 Builder.
CreateAlloca(HVC.getHvxTy(HVC.getIntTy(32),
false));
2166 [[maybe_unused]]
auto *StoreIndexes =
2167 Builder.
CreateStore(cstDataVector, IndexesAlloca);
2168 LLVM_DEBUG(
dbgs() <<
" StoreIndexes : " << *StoreIndexes <<
"\n");
2170 IndexesAlloca,
"reload_index");
2175 CastIndex = Indexes;
2179 if (ElemWidth == 1) {
2182 Type *
NT = HVC.getHvxTy(HVC.getIntTy(32),
false);
2188 HVC.getHvxTy(HVC.getIntTy(32),
true), V6_vunpack, CastIndexes,
nullptr);
2189 LLVM_DEBUG(
dbgs() <<
" UnpackedIndexes : " << *UnpackedIndexes <<
")\n");
2193 [[maybe_unused]]
Value *IndexHi =
2194 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes);
2195 [[maybe_unused]]
Value *IndexLo =
2196 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes);
2204 HVC.getHvxTy(HVC.getIntTy(32),
true), V6_vunpack, CastSrc,
nullptr);
2205 LLVM_DEBUG(
dbgs() <<
" UnpackedValToScat: " << *UnpackedValueToScatter
2208 [[maybe_unused]]
Value *UVSHi =
2209 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedValueToScatter);
2210 [[maybe_unused]]
Value *UVSLo =
2211 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedValueToScatter);
2216 auto *QByteMask =
get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff);
2219 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B,
2225 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B,
2229 }
else if (ElemWidth == 2) {
2234 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermh_128B,
2238 }
else if (ElemWidth == 4) {
2240 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermw_128B,
2250Value *HvxIdioms::processVGather(Instruction &In)
const {
2251 [[maybe_unused]]
auto *InpTy =
2253 assert(InpTy &&
"Cannot handle no vector type for llvm.gather");
2254 [[maybe_unused]]
auto *ElemTy =
2256 assert(ElemTy &&
"llvm.gather needs vector of ptr argument");
2257 auto *
F =
In.getFunction();
2258 LLVMContext &Ctx =
F->getContext();
2260 << *
In.getParent() <<
"\n");
2262 << HVC.length(InpTy) <<
") VecLen(" << HVC.getSizeOf(InpTy)
2263 <<
") type(" << *ElemTy <<
") Access alignment("
2264 << *
In.getOperand(1) <<
") AddressSpace("
2265 << ElemTy->getAddressSpace() <<
")\n");
2269 "llvm.gather needs vector for mask");
2271 InstSimplifyFolder(HVC.DL));
2276 HvxIdioms::DstQualifier Qual = HvxIdioms::Undefined;
2282 LLVM_DEBUG(
dbgs() <<
" Destination : " << *Dst <<
" Qual(" << Qual
2288 LLVM_DEBUG(
dbgs() <<
"Could not locate vgather destination ptr\n");
2294 assert(DstType &&
"Cannot handle non vector dst type for llvm.gather");
2309 Type *
NT = HVC.getHvxTy(HVC.getIntTy(32),
false);
2310 if (Qual == HvxIdioms::LdSt || Qual == HvxIdioms::Arithmetic) {
2314 unsigned OutputSize = HVC.getSizeOf(DstType);
2318 <<
" Address space ("
2320 <<
" Result type : " << *DstType
2321 <<
"\n Size in bytes : " << OutputSize
2322 <<
" element type(" << *DstElemTy
2323 <<
")\n ElemWidth : " << ElemWidth <<
" bytes\n");
2326 assert(IndexType &&
"Cannot handle non vector index type for llvm.gather");
2327 unsigned IndexWidth = HVC.DL.
getTypeAllocSize(IndexType->getElementType());
2332 IndexLoad, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2341 if (ElemWidth == 1) {
2346 Value *CastIndexes =
2349 auto *UnpackedIndexes =
2351 V6_vunpack, CastIndexes,
nullptr);
2357 [[maybe_unused]]
Value *IndexHi =
2358 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes);
2359 [[maybe_unused]]
Value *IndexLo =
2360 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes);
2364 auto *QByteMask =
get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff);
2368 auto V6_vgather = HVC.HST.
getIntrinsicId(Hexagon::V6_vgathermhq);
2370 Type::getVoidTy(Ctx), V6_vgather,
2371 {Ptr, QByteMask, CastedPtr,
2377 HVC.getHvxTy(HVC.getIntTy(32),
false), Ptr,
"temp_result_hi");
2378 LLVM_DEBUG(
dbgs() <<
" LoadedResultHi : " << *LoadedResultHi <<
"\n");
2383 Type::getVoidTy(Ctx), V6_vgather,
2384 {Ptr, QByteMask, CastedPtr,
2389 HVC.getHvxTy(HVC.getIntTy(32),
false), Ptr,
"temp_result_lo");
2390 LLVM_DEBUG(
dbgs() <<
" LoadedResultLo : " << *LoadedResultLo <<
"\n");
2397 NT, V6_vpackeb, {LoadedResultHi, LoadedResultLo},
nullptr);
2399 [[maybe_unused]]
auto *StoreRes = Builder.
CreateStore(Res, Ptr);
2401 }
else if (ElemWidth == 2) {
2403 if (IndexWidth == 2) {
2411 Value *AdjustedIndex = HVC.createHvxIntrinsic(
2412 Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)});
2414 <<
" Shifted half index: " << *AdjustedIndex <<
")\n");
2420 Type::getVoidTy(Ctx), V6_vgather,
2424 for (
auto &U : Dst->uses()) {
2426 dbgs() <<
" dst used by: " << *UI <<
"\n";
2428 for (
auto &U :
In.uses()) {
2430 dbgs() <<
" In used by : " << *UI <<
"\n";
2435 HVC.getHvxTy(HVC.getIntTy(16),
false), Ptr,
"temp_result");
2436 LLVM_DEBUG(
dbgs() <<
" LoadedResult : " << *LoadedResult <<
"\n");
2437 In.replaceAllUsesWith(LoadedResult);
2439 dbgs() <<
" Unhandled index type for vgather\n";
2442 }
else if (ElemWidth == 4) {
2443 if (IndexWidth == 4) {
2446 Value *AdjustedIndex = HVC.createHvxIntrinsic(
2447 Builder, V6_vaslh, NT, {Indexes, HVC.getConstInt(2)});
2449 <<
" Shifted word index: " << *AdjustedIndex <<
")\n");
2451 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermw_128B,
2474 Dst->eraseFromParent();
2475 }
else if (Qual == HvxIdioms::LLVM_Scatter) {
2478 assert(DstInpTy &&
"Cannot handle no vector type for llvm.scatter");
2479 [[maybe_unused]]
unsigned DstInpSize = HVC.getSizeOf(DstInpTy);
2480 [[maybe_unused]]
unsigned DstElements = HVC.length(DstInpTy);
2481 [[maybe_unused]]
auto *DstElemTy =
2483 assert(DstElemTy &&
"llvm.scatter needs vector of ptr argument");
2484 LLVM_DEBUG(
dbgs() <<
" Gather feeds into scatter\n Values to scatter : "
2485 << *Dst->getOperand(0) <<
"\n");
2487 << DstElements <<
") VecLen(" << DstInpSize <<
") type("
2488 << *DstElemTy <<
") Access alignment("
2489 << *Dst->getOperand(2) <<
")\n");
2502 Src, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2522 Value *AdjustedIndex = HVC.createHvxIntrinsic(
2523 Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)});
2524 LLVM_DEBUG(
dbgs() <<
" Shifted half index: " << *AdjustedIndex <<
")\n");
2527 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2531 }
else if (Qual == HvxIdioms::HEX_Gather_Scatter) {
2539 if (cstDataVector) {
2544 [[maybe_unused]]
auto *StoreIndexes =
2545 Builder.
CreateStore(cstDataVector, IndexesAlloca);
2546 LLVM_DEBUG(
dbgs() <<
" StoreIndexes : " << *StoreIndexes <<
"\n");
2550 LLVM_DEBUG(
dbgs() <<
" ResultAlloca : " << *ResultAlloca <<
"\n");
2553 IndexLoad, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2557 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2558 {ResultAlloca, CastedSrc,
2562 HVC.getHvxTy(HVC.getIntTy(16),
false), ResultAlloca,
"temp_result");
2563 LLVM_DEBUG(
dbgs() <<
" LoadedResult : " << *LoadedResult <<
"\n");
2565 In.replaceAllUsesWith(LoadedResult);
2575 Src, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2588 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgather_vscattermh,
2594 }
else if (Qual == HvxIdioms::HEX_Scatter) {
2601 IndexLoad, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2608 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2613 HVC.getHvxTy(HVC.getIntTy(16),
false), ResultAlloca,
"temp_result");
2614 LLVM_DEBUG(
dbgs() <<
" LoadedResult : " << *LoadedResult <<
"\n");
2615 In.replaceAllUsesWith(LoadedResult);
2616 }
else if (Qual == HvxIdioms::HEX_Gather) {
2621 if (cstDataVector) {
2625 [[maybe_unused]]
auto *StoreIndexes =
2626 Builder.
CreateStore(cstDataVector, IndexesAlloca);
2627 LLVM_DEBUG(
dbgs() <<
" StoreIndexes : " << *StoreIndexes <<
"\n");
2632 <<
"\n AddressSpace: "
2636 IndexLoad, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2640 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2641 {ResultAlloca, CastedSrc,
2645 HVC.getHvxTy(HVC.getIntTy(16),
false), ResultAlloca,
"temp_result");
2646 LLVM_DEBUG(
dbgs() <<
" LoadedResult : " << *LoadedResult <<
"\n");
2648 In.replaceAllUsesWith(LoadedResult);
2651 }
else if (Qual == HvxIdioms::LLVM_Gather) {
2653 errs() <<
" Underimplemented vgather to vgather sequence\n";
2661auto HvxIdioms::processFxpMulChopped(IRBuilderBase &Builder, Instruction &In,
2662 const FxpOp &
Op)
const ->
Value * {
2663 assert(
Op.X.Val->getType() ==
Op.Y.Val->getType());
2665 unsigned Width = InpTy->getScalarSizeInBits();
2668 if (!
Op.RoundAt || *
Op.RoundAt ==
Op.Frac - 1) {
2671 Value *QMul =
nullptr;
2673 QMul = createMulQ15(Builder,
Op.X,
Op.Y, Rounding);
2674 }
else if (Width == 32) {
2675 QMul = createMulQ31(Builder,
Op.X,
Op.Y, Rounding);
2677 if (QMul !=
nullptr)
2683 assert(Width < 32 || Width % 32 == 0);
2693 assert(
Op.Frac != 0 &&
"Unshifted mul should have been skipped");
2694 if (
Op.Frac == 16) {
2696 if (
Value *MulH = createMulH16(Builder,
Op.X,
Op.Y))
2700 Value *Prod32 = createMul16(Builder,
Op.X,
Op.Y);
2702 Value *RoundVal = HVC.getConstSplat(Prod32->
getType(), 1 << *
Op.RoundAt);
2703 Prod32 = Builder.
CreateAdd(Prod32, RoundVal,
"add");
2708 ? Builder.
CreateAShr(Prod32, ShiftAmt,
"asr")
2709 : Builder.
CreateLShr(Prod32, ShiftAmt,
"lsr");
2710 return Builder.
CreateTrunc(Shifted, InpTy,
"trn");
2717 auto WordX = HVC.splitVectorElements(Builder,
Op.X.Val, 32);
2718 auto WordY = HVC.splitVectorElements(Builder,
Op.Y.Val, 32);
2719 auto WordP = createMulLong(Builder, WordX,
Op.X.Sgn, WordY,
Op.Y.Sgn);
2724 if (
Op.RoundAt.has_value()) {
2727 RoundV[*
Op.RoundAt / 32] =
2728 HVC.getConstSplat(HvxWordTy, 1 << (*
Op.RoundAt % 32));
2729 WordP = createAddLong(Builder, WordP, RoundV);
2735 unsigned SkipWords =
Op.Frac / 32;
2736 Constant *ShiftAmt = HVC.getConstSplat(HvxWordTy,
Op.Frac % 32);
2738 for (
int Dst = 0, End = WordP.size() - SkipWords; Dst != End; ++Dst) {
2739 int Src = Dst + SkipWords;
2741 if (Src + 1 < End) {
2752 WordP.resize(WordP.size() - SkipWords);
2754 return HVC.joinVectorElements(Builder, WordP,
Op.ResTy);
2757auto HvxIdioms::createMulQ15(IRBuilderBase &Builder, SValue
X, SValue
Y,
2758 bool Rounding)
const ->
Value * {
2759 assert(
X.Val->getType() ==
Y.Val->getType());
2760 assert(
X.Val->getType()->getScalarType() == HVC.getIntTy(16));
2767 auto V6_vmpyhvsrs = HVC.HST.
getIntrinsicId(Hexagon::V6_vmpyhvsrs);
2768 return HVC.createHvxIntrinsic(Builder, V6_vmpyhvsrs,
X.Val->getType(),
2772auto HvxIdioms::createMulQ31(IRBuilderBase &Builder, SValue
X, SValue
Y,
2773 bool Rounding)
const ->
Value * {
2774 Type *InpTy =
X.Val->getType();
2775 assert(InpTy ==
Y.Val->getType());
2787 HVC.createHvxIntrinsic(Builder, V6_vmpyewuh, InpTy, {
X.Val,
Y.Val});
2788 return HVC.createHvxIntrinsic(Builder, V6_vmpyo_acc, InpTy,
2789 {V1,
X.Val,
Y.Val});
2792auto HvxIdioms::createAddCarry(IRBuilderBase &Builder,
Value *
X,
Value *
Y,
2793 Value *CarryIn)
const
2794 -> std::pair<Value *, Value *> {
2795 assert(
X->getType() ==
Y->getType());
2804 if (CarryIn ==
nullptr)
2805 CarryIn = HVC.getNullValue(HVC.getBoolTy(HVC.length(VecTy)));
2806 Args.push_back(CarryIn);
2808 Value *Ret = HVC.createHvxIntrinsic(Builder, AddCarry,
2812 return {
Result, CarryOut};
2819 if (CarryIn !=
nullptr) {
2820 unsigned Width = VecTy->getScalarSizeInBits();
2823 for (
unsigned i = 0, e = 32 / Width; i !=
e; ++i)
2824 Mask = (Mask << Width) | 1;
2828 HVC.createHvxIntrinsic(Builder, V6_vandqrt,
nullptr,
2829 {CarryIn, HVC.getConstInt(Mask)});
2830 Result1 = Builder.
CreateAdd(
X, ValueIn,
"add");
2836 return {Result2, Builder.
CreateOr(CarryOut1, CarryOut2,
"orb")};
2839auto HvxIdioms::createMul16(IRBuilderBase &Builder, SValue
X, SValue
Y)
const
2842 std::tie(
X,
Y) = canonSgn(
X,
Y);
2855 HVC.createHvxIntrinsic(Builder, V6_vmpyh, HvxP32Ty, {
Y.Val,
X.Val});
2857 return HVC.vshuff(Builder, HVC.sublo(Builder,
P), HVC.subhi(Builder,
P));
2860auto HvxIdioms::createMulH16(IRBuilderBase &Builder, SValue
X, SValue
Y)
const
2862 Type *HvxI16Ty = HVC.getHvxTy(HVC.getIntTy(16),
false);
2867 return HVC.createHvxIntrinsic(Builder, V6_vmpyuhvs, HvxI16Ty,
2872 Type *HvxP16Ty = HVC.getHvxTy(HVC.getIntTy(16),
true);
2875 unsigned Len = HVC.length(HvxP16Ty) / 2;
2877 SmallVector<int, 128> PickOdd(Len);
2878 for (
int i = 0; i !=
static_cast<int>(
Len); ++i)
2879 PickOdd[i] = 2 * i + 1;
2882 HVC.sublo(Builder, Pair16), HVC.subhi(Builder, Pair16), PickOdd,
"shf");
2885auto HvxIdioms::createMul32(IRBuilderBase &Builder, SValue
X, SValue
Y)
const
2886 -> std::pair<Value *, Value *> {
2887 assert(
X.Val->getType() ==
Y.Val->getType());
2888 assert(
X.Val->getType() == HvxI32Ty);
2891 std::tie(
X,
Y) = canonSgn(
X,
Y);
2894 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyss_parts;
2896 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyus_parts;
2898 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyuu_parts;
2901 Value *Parts = HVC.createHvxIntrinsic(Builder, V6_vmpy_parts,
nullptr,
2902 {
X.Val,
Y.Val}, {HvxI32Ty});
2911 assert(WordX.size() == WordY.size());
2912 unsigned Idx = 0,
Length = WordX.size();
2916 if (HVC.isZero(WordX[Idx]))
2917 Sum[Idx] = WordY[Idx];
2918 else if (HVC.isZero(WordY[Idx]))
2919 Sum[Idx] = WordX[Idx];
2925 Value *Carry =
nullptr;
2926 for (; Idx !=
Length; ++Idx) {
2927 std::tie(Sum[Idx], Carry) =
2928 createAddCarry(Builder, WordX[Idx], WordY[Idx], Carry);
2942 for (
int i = 0, e = WordX.size(); i != e; ++i) {
2943 for (
int j = 0, f = WordY.size(); j != f; ++j) {
2945 Signedness SX = (i + 1 ==
e) ? SgnX :
Unsigned;
2947 auto [
Lo,
Hi] = createMul32(Builder, {WordX[i], SX}, {WordY[
j],
SY});
2948 Products[i +
j + 0].push_back(
Lo);
2949 Products[i +
j + 1].push_back(
Hi);
2963 for (
int i = 0, e = Products.size(); i != e; ++i) {
2964 while (Products[i].
size() > 1) {
2965 Value *Carry =
nullptr;
2966 for (
int j = i;
j !=
e; ++
j) {
2967 auto &ProdJ = Products[
j];
2968 auto [Sum, CarryOut] = createAddCarry(Builder, pop_back_or_zero(ProdJ),
2969 pop_back_or_zero(ProdJ), Carry);
2970 ProdJ.insert(ProdJ.begin(), Sum);
2977 for (
auto &
P : Products) {
2978 assert(
P.size() == 1 &&
"Should have been added together");
2985auto HvxIdioms::run() ->
bool {
2988 for (BasicBlock &
B : HVC.F) {
2989 for (
auto It =
B.rbegin(); It !=
B.rend(); ++It) {
2990 if (
auto Fxm = matchFxpMul(*It)) {
2991 Value *
New = processFxpMul(*It, *Fxm);
2997 It->replaceAllUsesWith(New);
2999 It = StartOver ?
B.rbegin()
3002 }
else if (matchGather(*It)) {
3008 It->eraseFromParent();
3012 }
else if (matchScatter(*It)) {
3018 It->eraseFromParent();
3031auto HexagonVectorCombine::run() ->
bool {
3033 dbgs() <<
"Module before HexagonVectorCombine\n" << *
F.getParent();
3036 if (HST.useHVXOps()) {
3038 Changed |= AlignVectors(*this).run();
3040 Changed |= HvxIdioms(*this).run();
3044 dbgs() <<
"Module " << (
Changed ?
"(modified)" :
"(unchanged)")
3045 <<
" after HexagonVectorCombine\n"
3051auto HexagonVectorCombine::getIntTy(
unsigned Width)
const -> IntegerType * {
3055auto HexagonVectorCombine::getByteTy(
int ElemCount)
const ->
Type * {
3057 IntegerType *ByteTy = Type::getInt8Ty(
F.getContext());
3060 return VectorType::get(ByteTy, ElemCount,
false);
3063auto HexagonVectorCombine::getBoolTy(
int ElemCount)
const ->
Type * {
3065 IntegerType *BoolTy = Type::getInt1Ty(
F.getContext());
3068 return VectorType::get(BoolTy, ElemCount,
false);
3071auto HexagonVectorCombine::getConstInt(
int Val,
unsigned Width)
const
3076auto HexagonVectorCombine::isZero(
const Value *Val)
const ->
bool {
3078 return C->isZeroValue();
3082auto HexagonVectorCombine::getIntValue(
const Value *Val)
const
3083 -> std::optional<APInt> {
3085 return CI->getValue();
3086 return std::nullopt;
3089auto HexagonVectorCombine::isUndef(
const Value *Val)
const ->
bool {
3093auto HexagonVectorCombine::isTrue(
const Value *Val)
const ->
bool {
3097auto HexagonVectorCombine::isFalse(
const Value *Val)
const ->
bool {
3101auto HexagonVectorCombine::getHvxTy(
Type *ElemTy,
bool Pair)
const
3107 "Invalid HVX element type");
3108 unsigned HwLen = HST.getVectorLength();
3110 return VectorType::get(ElemTy, Pair ? 2 * NumElems : NumElems,
3114auto HexagonVectorCombine::getSizeOf(
const Value *Val, SizeKind Kind)
const
3116 return getSizeOf(Val->
getType(), Kind);
3119auto HexagonVectorCombine::getSizeOf(
const Type *Ty, SizeKind Kind)
const
3121 auto *NcTy =
const_cast<Type *
>(Ty);
3124 return DL.getTypeStoreSize(NcTy).getFixedValue();
3126 return DL.getTypeAllocSize(NcTy).getFixedValue();
3131auto HexagonVectorCombine::getTypeAlignment(
Type *Ty)
const ->
int {
3134 if (HST.isTypeForHVX(Ty))
3135 return HST.getVectorLength();
3136 return DL.getABITypeAlign(Ty).value();
3139auto HexagonVectorCombine::length(
Value *Val)
const ->
size_t {
3140 return length(Val->
getType());
3143auto HexagonVectorCombine::length(
Type *Ty)
const ->
size_t {
3145 assert(VecTy &&
"Must be a vector type");
3146 return VecTy->getElementCount().getFixedValue();
3149auto HexagonVectorCombine::getNullValue(
Type *Ty)
const ->
Constant * {
3157auto HexagonVectorCombine::getFullValue(
Type *Ty)
const ->
Constant * {
3165auto HexagonVectorCombine::getConstSplat(
Type *Ty,
int Val)
const
3169 Type *ElemTy = VecTy->getElementType();
3172 ConstantInt::get(ElemTy, Val));
3176auto HexagonVectorCombine::simplify(
Value *V)
const ->
Value * {
3178 SimplifyQuery Q(
DL, &TLI, &DT, &AC, In);
3185auto HexagonVectorCombine::insertb(IRBuilderBase &Builder,
Value *Dst,
3187 int Where)
const ->
Value * {
3188 assert(isByteVecTy(Dst->getType()) && isByteVecTy(Src->getType()));
3189 int SrcLen = getSizeOf(Src);
3190 int DstLen = getSizeOf(Dst);
3196 Value *P2Src = vresize(Builder, Src, P2Len,
Poison);
3197 Value *P2Dst = vresize(Builder, Dst, P2Len,
Poison);
3200 for (
int i = 0; i != P2Len; ++i) {
3204 (Where <= i && i < Where +
Length) ? P2Len + Start + (i - Where) : i;
3208 return vresize(Builder, P2Insert, DstLen,
Poison);
3211auto HexagonVectorCombine::vlalignb(IRBuilderBase &Builder,
Value *
Lo,
3213 assert(
Lo->getType() ==
Hi->getType() &&
"Argument type mismatch");
3216 int VecLen = getSizeOf(
Hi);
3217 if (
auto IntAmt = getIntValue(Amt))
3218 return getElementRange(Builder,
Lo,
Hi, VecLen - IntAmt->getSExtValue(),
3221 if (HST.isTypeForHVX(
Hi->getType())) {
3222 assert(
static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
3223 "Expecting an exact HVX type");
3224 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_vlalignb),
3225 Hi->getType(), {Hi, Lo, Amt});
3233 Builder.
CreateTrunc(Shift, Type::getInt32Ty(
F.getContext()),
"trn");
3238 return vralignb(Builder,
Lo,
Hi,
Sub);
3243auto HexagonVectorCombine::vralignb(IRBuilderBase &Builder,
Value *
Lo,
3245 assert(
Lo->getType() ==
Hi->getType() &&
"Argument type mismatch");
3248 int VecLen = getSizeOf(
Lo);
3249 if (
auto IntAmt = getIntValue(Amt))
3250 return getElementRange(Builder,
Lo,
Hi, IntAmt->getSExtValue(), VecLen);
3252 if (HST.isTypeForHVX(
Lo->getType())) {
3253 assert(
static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
3254 "Expecting an exact HVX type");
3255 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_valignb),
3256 Lo->getType(), {Hi, Lo, Amt});
3263 Builder.
CreateTrunc(Shift, Type::getInt32Ty(
F.getContext()),
"trn");
3267 Type *Int64Ty = Type::getInt64Ty(
F.getContext());
3279auto HexagonVectorCombine::concat(IRBuilderBase &Builder,
3283 std::vector<Value *> Work[2];
3284 int ThisW = 0, OtherW = 1;
3286 Work[ThisW].assign(Vecs.begin(), Vecs.end());
3287 while (Work[ThisW].
size() > 1) {
3289 SMask.
resize(length(Ty) * 2);
3290 std::iota(SMask.
begin(), SMask.
end(), 0);
3292 Work[OtherW].clear();
3293 if (Work[ThisW].
size() % 2 != 0)
3295 for (
int i = 0, e = Work[ThisW].
size(); i <
e; i += 2) {
3297 Work[ThisW][i], Work[ThisW][i + 1], SMask,
"shf");
3298 Work[OtherW].push_back(Joined);
3306 SMask.
resize(Vecs.size() * length(Vecs.front()->getType()));
3307 std::iota(SMask.
begin(), SMask.
end(), 0);
3312auto HexagonVectorCombine::vresize(IRBuilderBase &Builder,
Value *Val,
3316 assert(ValTy->getElementType() == Pad->getType());
3318 int CurSize = length(ValTy);
3319 if (CurSize == NewSize)
3322 if (CurSize > NewSize)
3323 return getElementRange(Builder, Val, Val, 0, NewSize);
3325 SmallVector<int, 128> SMask(NewSize);
3326 std::iota(SMask.
begin(), SMask.
begin() + CurSize, 0);
3327 std::fill(SMask.
begin() + CurSize, SMask.
end(), CurSize);
3332auto HexagonVectorCombine::rescale(IRBuilderBase &Builder,
Value *Mask,
3339 Type *FromSTy = FromTy->getScalarType();
3340 Type *ToSTy = ToTy->getScalarType();
3341 if (FromSTy == ToSTy)
3344 int FromSize = getSizeOf(FromSTy);
3345 int ToSize = getSizeOf(ToSTy);
3346 assert(FromSize % ToSize == 0 || ToSize % FromSize == 0);
3349 int FromCount = length(MaskTy);
3350 int ToCount = (FromCount * FromSize) / ToSize;
3351 assert((FromCount * FromSize) % ToSize == 0);
3353 auto *FromITy =
getIntTy(FromSize * 8);
3354 auto *ToITy =
getIntTy(ToSize * 8);
3359 Mask, VectorType::get(FromITy, FromCount,
false),
"sxt");
3361 Ext, VectorType::get(ToITy, ToCount,
false),
"cst");
3363 Cast, VectorType::get(getBoolTy(), ToCount,
false),
"trn");
3367auto HexagonVectorCombine::vlsb(IRBuilderBase &Builder,
Value *Val)
const
3370 if (ScalarTy == getBoolTy())
3373 Value *Bytes = vbytes(Builder, Val);
3375 return Builder.
CreateTrunc(Bytes, getBoolTy(getSizeOf(VecTy)),
"trn");
3378 return Builder.
CreateTrunc(Bytes, getBoolTy(),
"trn");
3382auto HexagonVectorCombine::vbytes(IRBuilderBase &Builder,
Value *Val)
const
3385 if (ScalarTy == getByteTy())
3388 if (ScalarTy != getBoolTy())
3389 return Builder.
CreateBitCast(Val, getByteTy(getSizeOf(Val)),
"cst");
3392 return Builder.
CreateSExt(Val, VectorType::get(getByteTy(), VecTy),
"sxt");
3393 return Builder.
CreateSExt(Val, getByteTy(),
"sxt");
3396auto HexagonVectorCombine::subvector(IRBuilderBase &Builder,
Value *Val,
3397 unsigned Start,
unsigned Length)
const
3400 return getElementRange(Builder, Val, Val, Start,
Length);
3403auto HexagonVectorCombine::sublo(IRBuilderBase &Builder,
Value *Val)
const
3405 size_t Len = length(Val);
3406 assert(Len % 2 == 0 &&
"Length should be even");
3407 return subvector(Builder, Val, 0, Len / 2);
3410auto HexagonVectorCombine::subhi(IRBuilderBase &Builder,
Value *Val)
const
3412 size_t Len = length(Val);
3413 assert(Len % 2 == 0 &&
"Length should be even");
3414 return subvector(Builder, Val, Len / 2, Len / 2);
3417auto HexagonVectorCombine::vdeal(IRBuilderBase &Builder,
Value *Val0,
3419 assert(Val0->getType() == Val1->getType());
3420 int Len = length(Val0);
3421 SmallVector<int, 128>
Mask(2 * Len);
3423 for (
int i = 0; i !=
Len; ++i) {
3430auto HexagonVectorCombine::vshuff(IRBuilderBase &Builder,
Value *Val0,
3432 assert(Val0->getType() == Val1->getType());
3433 int Len = length(Val0);
3434 SmallVector<int, 128>
Mask(2 * Len);
3436 for (
int i = 0; i !=
Len; ++i) {
3437 Mask[2 * i + 0] = i;
3443auto HexagonVectorCombine::createHvxIntrinsic(IRBuilderBase &Builder,
3449 auto getCast = [&](IRBuilderBase &Builder,
Value *Val,
3451 Type *SrcTy = Val->getType();
3452 if (SrcTy == DestTy)
3457 assert(HST.isTypeForHVX(SrcTy,
true));
3459 Type *BoolTy = Type::getInt1Ty(
F.getContext());
3464 unsigned HwLen = HST.getVectorLength();
3465 Intrinsic::ID TC = HwLen == 64 ? Intrinsic::hexagon_V6_pred_typecast
3466 : Intrinsic::hexagon_V6_pred_typecast_128B;
3476 for (
int i = 0, e =
Args.size(); i != e; ++i) {
3478 Type *
T = IntrTy->getParamType(i);
3479 if (
A->getType() !=
T) {
3485 StringRef MaybeName = !IntrTy->getReturnType()->isVoidTy() ?
"cup" :
"";
3486 CallInst *
Call = Builder.
CreateCall(IntrFn, IntrArgs, MaybeName);
3493 if (RetTy ==
nullptr || CallTy == RetTy)
3496 assert(HST.isTypeForHVX(CallTy,
true));
3497 return getCast(Builder,
Call, RetTy);
3500auto HexagonVectorCombine::splitVectorElements(IRBuilderBase &Builder,
3502 unsigned ToWidth)
const
3517 assert(VecTy->getElementType()->isIntegerTy());
3518 unsigned FromWidth = VecTy->getScalarSizeInBits();
3520 assert(ToWidth <= FromWidth &&
"Breaking up into wider elements?");
3521 unsigned NumResults = FromWidth / ToWidth;
3525 unsigned Length = length(VecTy);
3529 auto splitInHalf = [&](
unsigned Begin,
unsigned End,
auto splitFunc) ->
void {
3533 if (Begin + 1 == End)
3539 auto *VTy = VectorType::get(
getIntTy(Width / 2), 2 *
Length,
false);
3542 Value *Res =
vdeal(Builder, sublo(Builder, VVal), subhi(Builder, VVal));
3544 unsigned Half = (Begin + End) / 2;
3545 Results[Begin] = sublo(Builder, Res);
3546 Results[Half] = subhi(Builder, Res);
3548 splitFunc(Begin, Half, splitFunc);
3549 splitFunc(Half, End, splitFunc);
3552 splitInHalf(0, NumResults, splitInHalf);
3556auto HexagonVectorCombine::joinVectorElements(IRBuilderBase &Builder,
3558 VectorType *ToType)
const
3560 assert(ToType->getElementType()->isIntegerTy());
3571 unsigned ToWidth = ToType->getScalarSizeInBits();
3572 unsigned Width = Inputs.front()->getType()->getScalarSizeInBits();
3573 assert(Width <= ToWidth);
3575 unsigned Length = length(Inputs.front()->getType());
3577 unsigned NeedInputs = ToWidth / Width;
3578 if (Inputs.size() != NeedInputs) {
3583 Last, getConstSplat(
Last->getType(), Width - 1),
"asr");
3584 Inputs.resize(NeedInputs, Sign);
3587 while (Inputs.size() > 1) {
3590 for (
int i = 0, e = Inputs.size(); i < e; i += 2) {
3591 Value *Res =
vshuff(Builder, Inputs[i], Inputs[i + 1]);
3594 Inputs.resize(Inputs.size() / 2);
3597 assert(Inputs.front()->getType() == ToType);
3598 return Inputs.front();
3601auto HexagonVectorCombine::calculatePointerDifference(
Value *Ptr0,
3603 -> std::optional<int> {
3605 const SCEV *Scev0 = SE.getSCEV(Ptr0);
3606 const SCEV *Scev1 = SE.getSCEV(Ptr1);
3607 const SCEV *ScevDiff = SE.getMinusSCEV(Scev0, Scev1);
3609 APInt
V =
Const->getAPInt();
3610 if (
V.isSignedIntN(8 *
sizeof(
int)))
3611 return static_cast<int>(
V.getSExtValue());
3618 I->eraseFromParent();
3620 SmallVector<Instruction *, 8> ToErase;
3623#define CallBuilder(B, F) \
3626 if (auto *I = dyn_cast<Instruction>(V)) \
3627 B_.ToErase.push_back(I); \
3631 auto Simplify = [
this](
Value *
V) {
3637 auto StripBitCast = [](
Value *
V) {
3639 V =
C->getOperand(0);
3643 Ptr0 = StripBitCast(Ptr0);
3644 Ptr1 = StripBitCast(Ptr1);
3646 return std::nullopt;
3650 if (Gep0->getPointerOperand() != Gep1->getPointerOperand())
3651 return std::nullopt;
3652 if (Gep0->getSourceElementType() != Gep1->getSourceElementType())
3653 return std::nullopt;
3655 Builder
B(Gep0->getParent());
3656 int Scale = getSizeOf(Gep0->getSourceElementType(),
Alloc);
3659 if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2)
3660 return std::nullopt;
3662 Value *Idx0 = Gep0->getOperand(1);
3663 Value *Idx1 = Gep1->getOperand(1);
3668 return Diff->getSExtValue() * Scale;
3670 KnownBits Known0 = getKnownBits(Idx0, Gep0);
3671 KnownBits Known1 = getKnownBits(Idx1, Gep1);
3674 return std::nullopt;
3682 Diff0 =
C->getSExtValue();
3684 return std::nullopt;
3693 Diff1 =
C->getSExtValue();
3695 return std::nullopt;
3698 return (Diff0 + Diff1) * Scale;
3703auto HexagonVectorCombine::getNumSignificantBits(
const Value *V,
3704 const Instruction *CtxI)
const
3709auto HexagonVectorCombine::getKnownBits(
const Value *V,
3710 const Instruction *CtxI)
const
3715auto HexagonVectorCombine::isSafeToClone(
const Instruction &In)
const ->
bool {
3716 if (
In.mayHaveSideEffects() ||
In.isAtomic() ||
In.isVolatile() ||
3717 In.isFenceLike() ||
In.mayReadOrWriteMemory()) {
3725template <
typename T>
3726auto HexagonVectorCombine::isSafeToMoveBeforeInBB(
const Instruction &In,
3728 const T &IgnoreInsts)
const
3731 [
this](
const Instruction &
I) -> std::optional<MemoryLocation> {
3733 switch (
II->getIntrinsicID()) {
3734 case Intrinsic::masked_load:
3736 case Intrinsic::masked_store:
3752 bool MayWrite =
In.mayWriteToMemory();
3753 auto MaybeLoc = getLocOrNone(In);
3755 auto From =
In.getIterator();
3758 bool MoveUp = (To !=
Block.end() && To->comesBefore(&In));
3760 MoveUp ? std::make_pair(To, From) : std::make_pair(std::next(From), To);
3761 for (
auto It =
Range.first; It !=
Range.second; ++It) {
3762 const Instruction &I = *It;
3763 if (llvm::is_contained(IgnoreInsts, &I))
3766 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
3767 if (II->getIntrinsicID() == Intrinsic::assume)
3774 if (!CB->hasFnAttr(Attribute::WillReturn))
3776 if (!CB->hasFnAttr(Attribute::NoSync))
3779 if (
I.mayReadOrWriteMemory()) {
3780 auto MaybeLocI = getLocOrNone(I);
3781 if (MayWrite || I.mayWriteToMemory()) {
3782 if (!MaybeLoc || !MaybeLocI)
3784 if (!AA.isNoAlias(*MaybeLoc, *MaybeLocI))
3792auto HexagonVectorCombine::isByteVecTy(
Type *Ty)
const ->
bool {
3794 return VecTy->getElementType() == getByteTy();
3798auto HexagonVectorCombine::getElementRange(IRBuilderBase &Builder,
Value *
Lo,
3802 SmallVector<int, 128> SMask(
Length);
3803 std::iota(SMask.
begin(), SMask.
end(), Start);
3810class HexagonVectorCombineLegacy :
public FunctionPass {
3814 HexagonVectorCombineLegacy() : FunctionPass(
ID) {}
3816 StringRef getPassName()
const override {
return "Hexagon Vector Combine"; }
3818 void getAnalysisUsage(AnalysisUsage &AU)
const override {
3826 FunctionPass::getAnalysisUsage(AU);
3830 if (skipFunction(
F))
3832 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
3833 AssumptionCache &AC =
3834 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
3835 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3836 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
3837 TargetLibraryInfo &TLI =
3838 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
3839 auto &TM = getAnalysis<TargetPassConfig>().getTM<HexagonTargetMachine>();
3840 HexagonVectorCombine HVC(
F, AA, AC, DT, SE, TLI, TM);
3846char HexagonVectorCombineLegacy::ID = 0;
3849 "Hexagon Vector Combine",
false,
false)
3860 return new HexagonVectorCombineLegacy();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Prepare AGPR Alloc
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
static IntegerType * getIntTy(IRBuilderBase &B, const TargetLibraryInfo *TLI)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
shuff Hexagon Optimize Shuffle Vector
static Value * locateIndexesFromIntrinsic(Instruction *In)
Instruction * locateDestination(Instruction *In, HvxIdioms::DstQualifier &Qual)
Value * getReinterpretiveCast_i8_to_i32(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, Value *I)
static Value * locateIndexesFromGEP(Value *In)
#define CallBuilder(B, F)
Value * getPointer(Value *Ptr)
#define DEFAULT_HVX_VTCM_PAGE_SIZE
static Value * locateAddressFromIntrinsic(Instruction *In)
static Instruction * selectDestination(Instruction *In, HvxIdioms::DstQualifier &Qual)
Value * get_i32_Mask(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, unsigned int pattern)
bool isArithmetic(unsigned Opc)
static Type * getIndexType(Value *In)
GetElementPtrInst * locateGepFromIntrinsic(Instruction *In)
Value * getReinterpretiveCast_i16_to_i32(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, Value *I)
iv Induction Variable Users
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static bool isCandidate(const MachineInstr *MI, Register &DefedReg, Register FrameReg)
static bool isUndef(const MachineInstr &MI)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Remove Loads Into Fake Uses
static ConstantInt * getConstInt(MDNode *MD, unsigned NumOp)
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Target-Independent Code Generator Pass Configuration Options pass.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
InstListType::const_iterator const_iterator
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
AttributeList getAttributes() const
Return the attributes for this call.
@ ICMP_ULT
unsigned less than
static LLVM_ABI Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
iterator_range< iterator > children()
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
const BasicBlock & back() const
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * getPointerOperand()
bool isHVXVectorType(EVT VecTy, bool IncludeBool=false) const
bool useHVXV62Ops() const
bool useHVXV69Ops() const
unsigned getVectorLength() const
bool useHVXV66Ops() const
bool isTypeForHVX(Type *VecTy, bool IncludeBool=false) const
Intrinsic::ID getIntrinsicId(unsigned Opc) const
Common base class shared among various IRBuilders.
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
const char * getOpcodeName() const
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
bool doesNotAccessMemory() const
Whether this function accesses no memory.
bool onlyAccessesInaccessibleMem() const
Whether this function only (at most) accesses inaccessible memory.
static LLVM_ABI std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
static LLVM_ABI MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
The main scalar evolution driver.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Rounding
Possible values of current rounding mode, which is specified in bits 23:22 of FPCR.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI Instruction * getTerminator() const
LLVM_ABI Instruction & front() const
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createHexagonVectorCombineLegacyPass()
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
DomTreeNodeBase< BasicBlock > DomTreeNode
auto reverse(ContainerTy &&C)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
LLVM_ABI bool mayHaveNonDefUseDependency(const Instruction &I)
Returns true if the result or effects of the given instructions I depend values not reachable through...
MaskT vshuff(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
MaskT vdeal(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.