34#include "llvm/IR/IntrinsicsHexagon.h"
58#define DEBUG_TYPE "hexagon-vc"
63#define DEFAULT_HVX_VTCM_PAGE_SIZE 131072
78class HexagonVectorCombine {
83 :
F(F_),
DL(
F.getDataLayout()),
AA(AA_), AC(AC_), DT(DT_),
93 Type *getByteTy(
int ElemCount = 0)
const;
96 Type *getBoolTy(
int ElemCount = 0)
const;
100 std::optional<APInt> getIntValue(
const Value *Val)
const;
106 bool isTrue(
const Value *Val)
const;
108 bool isFalse(
const Value *Val)
const;
117 int getSizeOf(
const Value *Val, SizeKind Kind = Store)
const;
118 int getSizeOf(
const Type *Ty, SizeKind Kind = Store)
const;
119 int getTypeAlignment(
Type *Ty)
const;
120 size_t length(
Value *Val)
const;
121 size_t length(
Type *Ty)
const;
130 int Length,
int Where)
const;
154 unsigned ToWidth)
const;
158 std::optional<int> calculatePointerDifference(
Value *Ptr0,
Value *Ptr1)
const;
160 unsigned getNumSignificantBits(
const Value *V,
167 template <
typename T = std::vector<Instruction *>>
170 const T &IgnoreInsts = {})
const;
173 [[maybe_unused]]
bool isByteVecTy(
Type *Ty)
const;
186 int Start,
int Length)
const;
205 AlignVectors(
const HexagonVectorCombine &HVC_) : HVC(HVC_) {}
210 using InstList = std::vector<Instruction *>;
214 AddrInfo(
const AddrInfo &) =
default;
217 : Inst(
I), Addr(
A), ValTy(
T), HaveAlign(
H),
218 NeedAlign(HVC.getTypeAlignment(ValTy)) {}
219 AddrInfo &operator=(
const AddrInfo &) =
default;
230 using AddrList = std::vector<AddrInfo>;
234 return A->comesBefore(
B);
237 using DepList = std::set<Instruction *, InstrLess>;
240 MoveGroup(
const AddrInfo &AI,
Instruction *
B,
bool Hvx,
bool Load)
241 :
Base(
B), Main{AI.Inst}, Clones{}, IsHvx(Hvx), IsLoad(Load) {}
242 MoveGroup() =
default;
250 using MoveList = std::vector<MoveGroup>;
270 Segment(
Value *Val,
int Begin,
int Len)
271 : Val(Val), Start(Begin),
Size(Len) {}
272 Segment(
const Segment &Seg) =
default;
273 Segment &operator=(
const Segment &Seg) =
default;
280 Block(
Value *Val,
int Len,
int Pos) : Seg(Val, 0, Len), Pos(Pos) {}
282 : Seg(Val, Off, Len), Pos(Pos) {}
290 ByteSpan section(
int Start,
int Length)
const;
291 ByteSpan &shift(
int Offset);
294 int size()
const {
return Blocks.size(); }
295 Block &operator[](
int i) {
return Blocks[i]; }
296 const Block &operator[](
int i)
const {
return Blocks[i]; }
298 std::vector<Block> Blocks;
301 iterator begin() {
return Blocks.begin(); }
302 iterator end() {
return Blocks.end(); }
308 std::optional<AddrInfo> getAddrInfo(
Instruction &In)
const;
309 bool isHvx(
const AddrInfo &AI)
const;
311 [[maybe_unused]]
bool isSectorTy(
Type *Ty)
const;
319 const InstMap &CloneMap = InstMap())
const;
322 const InstMap &CloneMap = InstMap())
const;
346 bool createAddressGroups();
347 MoveList createLoadGroups(
const AddrList &Group)
const;
348 MoveList createStoreGroups(
const AddrList &Group)
const;
349 bool moveTogether(MoveGroup &Move)
const;
350 template <
typename T>
353 void realignLoadGroup(
IRBuilderBase &Builder,
const ByteSpan &VSpan,
354 int ScLen,
Value *AlignVal,
Value *AlignAddr)
const;
355 void realignStoreGroup(
IRBuilderBase &Builder,
const ByteSpan &VSpan,
356 int ScLen,
Value *AlignVal,
Value *AlignAddr)
const;
357 bool realignGroup(
const MoveGroup &Move)
const;
360 int Alignment)
const;
367 std::map<Instruction *, AddrList> AddrGroups;
368 const HexagonVectorCombine &HVC;
373 OS <<
"Inst: " << AI.Inst <<
" " << *AI.Inst <<
'\n';
374 OS <<
"Addr: " << *AI.Addr <<
'\n';
375 OS <<
"Type: " << *AI.ValTy <<
'\n';
376 OS <<
"HaveAlign: " << AI.HaveAlign.
value() <<
'\n';
377 OS <<
"NeedAlign: " << AI.NeedAlign.
value() <<
'\n';
378 OS <<
"Offset: " << AI.Offset;
384 OS <<
"IsLoad:" << (MG.IsLoad ?
"yes" :
"no");
385 OS <<
", IsHvx:" << (MG.IsHvx ?
"yes" :
"no") <<
'\n';
388 OS <<
" " << *
I <<
'\n';
391 OS <<
" " << *
I <<
'\n';
393 for (
auto [K, V] : MG.Clones) {
395 K->printAsOperand(OS,
false);
396 OS <<
"\t-> " << *V <<
'\n';
403 const AlignVectors::ByteSpan::Block &
B) {
404 OS <<
" @" <<
B.Pos <<
" [" <<
B.Seg.Start <<
',' <<
B.Seg.Size <<
"] ";
405 if (
B.Seg.Val ==
reinterpret_cast<const Value *
>(&
B)) {
406 OS <<
"(self:" <<
B.Seg.Val <<
')';
407 }
else if (
B.Seg.Val !=
nullptr) {
417 OS <<
"ByteSpan[size=" << BS.size() <<
", extent=" << BS.extent() <<
'\n';
418 for (
const AlignVectors::ByteSpan::Block &
B : BS)
438 HvxIdioms(
const HexagonVectorCombine &HVC_) : HVC(HVC_) {
439 auto *
Int32Ty = HVC.getIntTy(32);
440 HvxI32Ty = HVC.getHvxTy(
Int32Ty,
false);
441 HvxP32Ty = HVC.getHvxTy(
Int32Ty,
true);
462 std::optional<unsigned> RoundAt;
467 -> std::pair<unsigned, Signedness>;
468 auto canonSgn(SValue
X, SValue
Y)
const -> std::pair<SValue, SValue>;
470 auto matchFxpMul(
Instruction &In)
const -> std::optional<FxpOp>;
474 const FxpOp &
Op)
const ->
Value *;
476 bool Rounding)
const ->
Value *;
478 bool Rounding)
const ->
Value *;
481 Value *CarryIn =
nullptr)
const
482 -> std::pair<Value *, Value *>;
487 -> std::pair<Value *, Value *>;
501 const HexagonVectorCombine &HVC;
507 const HvxIdioms::FxpOp &
Op) {
508 static const char *SgnNames[] = {
"Positive",
"Signed",
"Unsigned"};
510 if (
Op.RoundAt.has_value()) {
511 if (
Op.Frac != 0 && *
Op.RoundAt ==
Op.Frac - 1) {
514 OS <<
" + 1<<" << *
Op.RoundAt;
517 OS <<
"\n X:(" << SgnNames[
Op.X.Sgn] <<
") " << *
Op.X.Val <<
"\n"
518 <<
" Y:(" << SgnNames[
Op.Y.Sgn] <<
") " << *
Op.Y.Val;
526template <
typename T>
T *getIfUnordered(
T *MaybeT) {
527 return MaybeT && MaybeT->isUnordered() ? MaybeT :
nullptr;
539#if !defined(_MSC_VER) || _MSC_VER >= 1926
543template <
typename Pred,
typename... Ts>
544void erase_if(std::map<Ts...> &map, Pred p)
546template <
typename Pred,
typename T,
typename U>
547void erase_if(std::map<T, U> &map, Pred p)
550 for (
auto i = map.begin(), e = map.end(); i != e;) {
559template <
typename Pred,
typename T>
void erase_if(
T &&container, Pred p) {
597auto AlignVectors::ByteSpan::extent()
const ->
int {
600 int Min = Blocks[0].Pos;
601 int Max = Blocks[0].Pos + Blocks[0].Seg.Size;
602 for (
int i = 1, e =
size(); i !=
e; ++i) {
603 Min = std::min(Min, Blocks[i].Pos);
604 Max = std::max(Max, Blocks[i].Pos + Blocks[i].Seg.Size);
609auto AlignVectors::ByteSpan::section(
int Start,
int Length)
const -> ByteSpan {
611 for (
const ByteSpan::Block &
B : Blocks) {
612 int L = std::max(
B.Pos, Start);
613 int R = std::min(
B.Pos +
B.Seg.Size, Start +
Length);
616 int Off =
L >
B.Pos ?
L -
B.Pos : 0;
617 Section.Blocks.emplace_back(
B.Seg.Val,
B.Seg.Start + Off, R - L, L);
623auto AlignVectors::ByteSpan::shift(
int Offset) -> ByteSpan & {
630 SmallVector<Value *, 8> Values(Blocks.size());
631 for (
int i = 0, e = Blocks.size(); i != e; ++i)
632 Values[i] = Blocks[i].Seg.Val;
636auto AlignVectors::getAddrInfo(Instruction &In)
const
637 -> std::optional<AddrInfo> {
639 return AddrInfo(HVC, L,
L->getPointerOperand(),
L->getType(),
642 return AddrInfo(HVC, S, S->getPointerOperand(),
643 S->getValueOperand()->getType(), S->getAlign());
647 case Intrinsic::masked_load:
648 return AddrInfo(HVC,
II,
II->getArgOperand(0),
II->getType(),
649 II->getParamAlign(0).valueOrOne());
650 case Intrinsic::masked_store:
651 return AddrInfo(HVC,
II,
II->getArgOperand(1),
652 II->getArgOperand(0)->getType(),
653 II->getParamAlign(1).valueOrOne());
659auto AlignVectors::isHvx(
const AddrInfo &AI)
const ->
bool {
663auto AlignVectors::getPayload(
Value *Val)
const ->
Value * {
667 ID =
II->getIntrinsicID();
669 return In->getOperand(0);
674auto AlignVectors::getMask(
Value *Val)
const ->
Value * {
676 switch (
II->getIntrinsicID()) {
677 case Intrinsic::masked_load:
678 return II->getArgOperand(1);
679 case Intrinsic::masked_store:
680 return II->getArgOperand(2);
684 Type *ValTy = getPayload(Val)->getType();
686 return HVC.getFullValue(HVC.getBoolTy(HVC.length(VecTy)));
687 return HVC.getFullValue(HVC.getBoolTy());
690auto AlignVectors::getPassThrough(
Value *Val)
const ->
Value * {
692 if (
II->getIntrinsicID() == Intrinsic::masked_load)
693 return II->getArgOperand(2);
698auto AlignVectors::createAdjustedPointer(IRBuilderBase &Builder,
Value *
Ptr,
699 Type *ValTy,
int Adjust,
700 const InstMap &CloneMap)
const
703 if (Instruction *New = CloneMap.lookup(
I))
705 return Builder.CreatePtrAdd(
Ptr, HVC.getConstInt(Adjust),
"gep");
708auto AlignVectors::createAlignedPointer(IRBuilderBase &Builder,
Value *
Ptr,
709 Type *ValTy,
int Alignment,
710 const InstMap &CloneMap)
const
714 for (
auto [Old, New] : CloneMap)
715 I->replaceUsesOfWith(Old, New);
720 Value *AsInt = Builder.CreatePtrToInt(
Ptr, HVC.getIntTy(),
"pti");
721 Value *
Mask = HVC.getConstInt(-Alignment);
722 Value *
And = Builder.CreateAnd(remap(AsInt), Mask,
"and");
723 return Builder.CreateIntToPtr(
727auto AlignVectors::createLoad(IRBuilderBase &Builder,
Type *ValTy,
Value *
Ptr,
735 "Expectning scalar predicate");
736 if (HVC.isFalse(Predicate))
738 if (!HVC.isTrue(Predicate) && HvxHasPredLoad) {
739 Value *
Load = createPredicatedLoad(Builder, ValTy,
Ptr, Predicate,
740 Alignment, MDSources);
741 return Builder.CreateSelect(Mask, Load, PassThru);
745 assert(!HVC.isUndef(Mask));
746 if (HVC.isZero(Mask))
748 if (HVC.isTrue(Mask))
749 return createSimpleLoad(Builder, ValTy,
Ptr, Alignment, MDSources);
752 Mask, PassThru,
"mld");
757auto AlignVectors::createSimpleLoad(IRBuilderBase &Builder,
Type *ValTy,
762 Builder.CreateAlignedLoad(ValTy,
Ptr,
Align(Alignment),
"ald");
767auto AlignVectors::createPredicatedLoad(IRBuilderBase &Builder,
Type *ValTy,
773 "Predicates 'scalar' vector loads not yet supported");
775 assert(!
Predicate->getType()->isVectorTy() &&
"Expectning scalar predicate");
776 assert(HVC.getSizeOf(ValTy, HVC.Alloc) % Alignment == 0);
777 if (HVC.isFalse(Predicate))
779 if (HVC.isTrue(Predicate))
780 return createSimpleLoad(Builder, ValTy,
Ptr, Alignment, MDSources);
782 auto V6_vL32b_pred_ai = HVC.HST.
getIntrinsicId(Hexagon::V6_vL32b_pred_ai);
784 return HVC.createHvxIntrinsic(Builder, V6_vL32b_pred_ai, ValTy,
789auto AlignVectors::createStore(IRBuilderBase &Builder,
Value *Val,
Value *
Ptr,
792 if (HVC.isZero(Mask) || HVC.isUndef(Val) || HVC.isUndef(Mask))
795 "Expectning scalar predicate"));
797 if (HVC.isFalse(Predicate))
799 if (HVC.isTrue(Predicate))
804 if (HVC.isTrue(Mask)) {
806 return createPredicatedStore(Builder, Val,
Ptr, Predicate, Alignment,
810 return createSimpleStore(Builder, Val,
Ptr, Alignment, MDSources);
816 Builder.CreateMaskedStore(Val,
Ptr,
Align(Alignment), Mask);
823 Value *PredLoad = createPredicatedLoad(Builder, Val->getType(),
Ptr,
824 Predicate, Alignment, MDSources);
825 Value *Mux = Builder.CreateSelect(Mask, Val, PredLoad);
826 return createPredicatedStore(Builder, Mux,
Ptr, Predicate, Alignment,
830auto AlignVectors::createSimpleStore(IRBuilderBase &Builder,
Value *Val,
839auto AlignVectors::createPredicatedStore(IRBuilderBase &Builder,
Value *Val,
845 "Predicates 'scalar' vector stores not yet supported");
847 if (HVC.isFalse(Predicate))
849 if (HVC.isTrue(Predicate))
850 return createSimpleStore(Builder, Val,
Ptr, Alignment, MDSources);
852 assert(HVC.getSizeOf(Val, HVC.Alloc) % Alignment == 0);
853 auto V6_vS32b_pred_ai = HVC.HST.
getIntrinsicId(Hexagon::V6_vS32b_pred_ai);
855 return HVC.createHvxIntrinsic(Builder, V6_vS32b_pred_ai,
nullptr,
860auto AlignVectors::getUpwardDeps(Instruction *In, Instruction *
Base)
const
864 "Base and In should be in the same block");
865 assert(
Base->comesBefore(In) &&
"Base should come before In");
868 std::deque<Instruction *> WorkQ = {
In};
869 while (!WorkQ.empty()) {
876 if (
I->getParent() == Parent &&
Base->comesBefore(
I))
884auto AlignVectors::createAddressGroups() ->
bool {
889 auto findBaseAndOffset = [&](AddrInfo &AI) -> std::pair<Instruction *, int> {
890 for (AddrInfo &W : WorkStack) {
891 if (
auto D = HVC.calculatePointerDifference(AI.Addr,
W.Addr))
892 return std::make_pair(
W.Inst, *
D);
894 return std::make_pair(
nullptr, 0);
897 auto traverseBlock = [&](
DomTreeNode *DomN,
auto Visit) ->
void {
899 for (Instruction &
I :
Block) {
900 auto AI = this->getAddrInfo(
I);
903 auto F = findBaseAndOffset(*AI);
905 if (Instruction *BI =
F.first) {
906 AI->Offset =
F.second;
909 WorkStack.push_back(*AI);
910 GroupInst = AI->Inst;
912 AddrGroups[GroupInst].push_back(*AI);
918 while (!WorkStack.empty() && WorkStack.back().Inst->getParent() == &
Block)
919 WorkStack.pop_back();
922 traverseBlock(HVC.DT.
getRootNode(), traverseBlock);
923 assert(WorkStack.empty());
928 erase_if(AddrGroups, [](
auto &
G) {
return G.second.size() == 1; });
932 G.second, [&](
auto &
I) { return HVC.HST.isTypeForHVX(I.ValTy); });
935 return !AddrGroups.empty();
938auto AlignVectors::createLoadGroups(
const AddrList &Group)
const -> MoveList {
946 auto tryAddTo = [&](
const AddrInfo &
Info, MoveGroup &Move) {
947 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
951 if (Move.IsHvx != isHvx(
Info))
955 if (
Base->getParent() !=
Info.Inst->getParent())
958 if (!HVC.isSafeToMoveBeforeInBB(*
Info.Inst,
Base->getIterator()))
962 return HVC.isSafeToMoveBeforeInBB(*
I,
Base->getIterator()) &&
963 HVC.isSafeToClone(*
I);
965 DepList Deps = getUpwardDeps(
Info.Inst,
Base);
969 Move.Main.push_back(
Info.Inst);
976 for (
const AddrInfo &
Info : Group) {
977 if (!
Info.Inst->mayReadFromMemory())
979 if (LoadGroups.empty() || !tryAddTo(
Info, LoadGroups.back()))
980 LoadGroups.emplace_back(
Info, Group.front().Inst, isHvx(
Info),
true);
984 erase_if(LoadGroups, [](
const MoveGroup &
G) {
return G.Main.size() <= 1; });
988 erase_if(LoadGroups, [](
const MoveGroup &
G) {
return G.IsHvx; });
993auto AlignVectors::createStoreGroups(
const AddrList &Group)
const -> MoveList {
1001 auto tryAddTo = [&](
const AddrInfo &
Info, MoveGroup &Move) {
1002 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
1008 "Not handling stores with return values");
1010 if (Move.IsHvx != isHvx(
Info))
1016 if (
Base->getParent() !=
Info.Inst->getParent())
1018 if (!HVC.isSafeToMoveBeforeInBB(*
Info.Inst,
Base->getIterator(), Move.Main))
1020 Move.Main.push_back(
Info.Inst);
1024 MoveList StoreGroups;
1026 for (
auto I = Group.rbegin(),
E = Group.rend();
I !=
E; ++
I) {
1027 const AddrInfo &
Info = *
I;
1028 if (!
Info.Inst->mayWriteToMemory())
1030 if (StoreGroups.empty() || !tryAddTo(
Info, StoreGroups.back()))
1031 StoreGroups.emplace_back(
Info, Group.front().Inst, isHvx(
Info),
false);
1035 erase_if(StoreGroups, [](
const MoveGroup &
G) {
return G.Main.size() <= 1; });
1039 erase_if(StoreGroups, [](
const MoveGroup &
G) {
return G.IsHvx; });
1044 if (!VADoFullStores) {
1045 erase_if(StoreGroups, [
this](
const MoveGroup &
G) {
1047 auto MaybeInfo = this->getAddrInfo(*S);
1048 assert(MaybeInfo.has_value());
1049 return HVC.HST.isHVXVectorType(
1050 EVT::getEVT(MaybeInfo->ValTy, false));
1058auto AlignVectors::moveTogether(MoveGroup &Move)
const ->
bool {
1060 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
1066 Move.Clones = cloneBefore(Where->
getIterator(), Move.Deps);
1069 for (Instruction *M : Main) {
1071 M->moveAfter(Where);
1072 for (
auto [Old, New] : Move.Clones)
1073 M->replaceUsesOfWith(Old, New);
1077 for (
int i = 0, e = Move.Deps.size(); i != e; ++i)
1078 Move.Deps[i] = Move.Clones[Move.Deps[i]];
1083 assert(Move.Deps.empty());
1086 for (Instruction *M : Main.drop_front(1)) {
1092 return Move.Main.size() + Move.Deps.size() > 1;
1095template <
typename T>
1100 for (Instruction *
I : Insts) {
1101 assert(HVC.isSafeToClone(*
I));
1103 C->setName(Twine(
"c.") +
I->getName() +
".");
1104 C->insertBefore(To);
1106 for (
auto [Old, New] : Map)
1107 C->replaceUsesOfWith(Old, New);
1108 Map.insert(std::make_pair(
I,
C));
1113auto AlignVectors::realignLoadGroup(IRBuilderBase &Builder,
1114 const ByteSpan &VSpan,
int ScLen,
1119 Type *SecTy = HVC.getByteTy(ScLen);
1120 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
1121 bool DoAlign = !HVC.isZero(AlignVal);
1123 BasicBlock *BaseBlock = Builder.GetInsertBlock();
1126 auto *True = HVC.getFullValue(HVC.getBoolTy(ScLen));
1153 for (
int Index = 0;
Index != NumSectors; ++
Index)
1154 ASpan.Blocks.emplace_back(
nullptr, ScLen, Index * ScLen);
1155 for (
int Index = 0;
Index != NumSectors; ++
Index) {
1156 ASpan.Blocks[
Index].Seg.Val =
1157 reinterpret_cast<Value *
>(&ASpan.Blocks[
Index]);
1163 DenseMap<void *, Instruction *> EarliestUser;
1169 assert(
A->getParent() ==
B->getParent());
1170 return A->comesBefore(
B);
1172 auto earliestUser = [&](
const auto &
Uses) {
1174 for (
const Use &U :
Uses) {
1176 assert(
I !=
nullptr &&
"Load used in a non-instruction?");
1180 if (
I->getParent() == BaseBlock) {
1182 User = std::min(User,
I, isEarlier);
1190 for (
const ByteSpan::Block &
B : VSpan) {
1191 ByteSpan ASection = ASpan.section(
B.Pos,
B.Seg.Size);
1192 for (
const ByteSpan::Block &S : ASection) {
1193 auto &EU = EarliestUser[S.Seg.Val];
1194 EU = std::min(EU, earliestUser(
B.Seg.Val->uses()), isEarlier);
1199 dbgs() <<
"ASpan:\n" << ASpan <<
'\n';
1200 dbgs() <<
"Earliest users of ASpan:\n";
1201 for (
auto &[Val, User] : EarliestUser) {
1202 dbgs() << Val <<
"\n ->" << *
User <<
'\n';
1206 auto createLoad = [&](IRBuilderBase &Builder,
const ByteSpan &VSpan,
1207 int Index,
bool MakePred) {
1209 createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
1211 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
1216 int Width = (1 + DoAlign) * ScLen;
1217 return this->createLoad(Builder, SecTy,
Ptr, Predicate, ScLen, True, Undef,
1218 VSpan.section(Start, Width).values());
1223 assert(
In->getParent() == To->getParent());
1224 DepList Deps = getUpwardDeps(&*In, &*To);
1227 InstMap
Map = cloneBefore(In, Deps);
1228 for (
auto [Old, New] : Map)
1229 In->replaceUsesOfWith(Old, New);
1234 for (
int Index = 0;
Index != NumSectors + 1; ++
Index) {
1242 DoAlign &&
Index > 0 ? EarliestUser[&ASpan[
Index - 1]] :
nullptr;
1244 Index < NumSectors ? EarliestUser[&ASpan[
Index]] :
nullptr;
1245 if (
auto *Where = std::min(PrevAt, ThisAt, isEarlier)) {
1248 createLoad(Builder, VSpan, Index, DoAlign && Index == NumSectors);
1256 if (!HVC.isSafeToMoveBeforeInBB(*Load, BasePos))
1257 moveBefore(
Load->getIterator(), BasePos);
1259 LLVM_DEBUG(
dbgs() <<
"Loads[" << Index <<
"]:" << *Loads[Index] <<
'\n');
1265 for (
int Index = 0;
Index != NumSectors; ++
Index) {
1266 ASpan[
Index].Seg.Val =
nullptr;
1267 if (
auto *Where = EarliestUser[&ASpan[Index]]) {
1273 assert(NextLoad !=
nullptr);
1274 Val = HVC.vralignb(Builder, Val, NextLoad, AlignVal);
1276 ASpan[
Index].Seg.Val = Val;
1281 for (
const ByteSpan::Block &
B : VSpan) {
1282 ByteSpan ASection = ASpan.section(
B.Pos,
B.Seg.Size).shift(-
B.Pos);
1289 std::vector<ByteSpan::Block *> ABlocks;
1290 for (ByteSpan::Block &S : ASection) {
1291 if (S.Seg.Val !=
nullptr)
1292 ABlocks.push_back(&S);
1295 [&](
const ByteSpan::Block *
A,
const ByteSpan::Block *
B) {
1299 for (ByteSpan::Block *S : ABlocks) {
1304 Value *Pay = HVC.vbytes(Builder, getPayload(S->Seg.Val));
1306 HVC.insertb(Builder, Accum, Pay, S->Seg.Start, S->Seg.Size, S->Pos);
1314 Type *ValTy = getPayload(
B.Seg.Val)->getType();
1317 getPassThrough(
B.Seg.Val),
"sel");
1322auto AlignVectors::realignStoreGroup(IRBuilderBase &Builder,
1323 const ByteSpan &VSpan,
int ScLen,
1328 Type *SecTy = HVC.getByteTy(ScLen);
1329 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
1330 bool DoAlign = !HVC.isZero(AlignVal);
1333 ByteSpan ASpanV, ASpanM;
1337 auto MakeVec = [](IRBuilderBase &Builder,
Value *Val) ->
Value * {
1341 auto *VecTy = VectorType::get(Ty, 1,
false);
1347 for (
int Index = (DoAlign ? -1 : 0);
Index != NumSectors + DoAlign; ++
Index) {
1351 VSpan.section(Index * ScLen, ScLen).shift(-Index * ScLen);
1356 for (ByteSpan::Block &S : VSection) {
1357 Value *Pay = getPayload(S.Seg.Val);
1359 Pay->
getType(), HVC.getByteTy());
1360 Value *PartM = HVC.insertb(Builder, Zero, HVC.vbytes(Builder, Mask),
1361 S.Seg.Start, S.Seg.Size, S.Pos);
1362 AccumM = Builder.
CreateOr(AccumM, PartM);
1364 Value *PartV = HVC.insertb(Builder, Undef, HVC.vbytes(Builder, Pay),
1365 S.Seg.Start, S.Seg.Size, S.Pos);
1370 ASpanV.Blocks.emplace_back(AccumV, ScLen, Index * ScLen);
1371 ASpanM.Blocks.emplace_back(AccumM, ScLen, Index * ScLen);
1375 dbgs() <<
"ASpanV before vlalign:\n" << ASpanV <<
'\n';
1376 dbgs() <<
"ASpanM before vlalign:\n" << ASpanM <<
'\n';
1381 for (
int Index = 1;
Index != NumSectors + 2; ++
Index) {
1382 Value *PrevV = ASpanV[
Index - 1].Seg.Val, *ThisV = ASpanV[
Index].Seg.Val;
1383 Value *PrevM = ASpanM[
Index - 1].Seg.Val, *ThisM = ASpanM[
Index].Seg.Val;
1385 ASpanV[
Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevV, ThisV, AlignVal);
1386 ASpanM[
Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevM, ThisM, AlignVal);
1391 dbgs() <<
"ASpanV after vlalign:\n" << ASpanV <<
'\n';
1392 dbgs() <<
"ASpanM after vlalign:\n" << ASpanM <<
'\n';
1395 auto createStore = [&](IRBuilderBase &Builder,
const ByteSpan &ASpanV,
1396 const ByteSpan &ASpanM,
int Index,
bool MakePred) {
1399 if (HVC.isUndef(Val) || HVC.isZero(Mask))
1402 createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
1404 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
1409 int Width = (1 + DoAlign) * ScLen;
1410 this->createStore(Builder, Val,
Ptr, Predicate, ScLen,
1411 HVC.vlsb(Builder, Mask),
1412 VSpan.section(Start, Width).values());
1415 for (
int Index = 0;
Index != NumSectors + DoAlign; ++
Index) {
1416 createStore(Builder, ASpanV, ASpanM, Index, DoAlign && Index == NumSectors);
1420auto AlignVectors::realignGroup(
const MoveGroup &Move)
const ->
bool {
1429 auto getMaxOf = [](
auto Range,
auto GetValue) {
1431 return GetValue(
A) < GetValue(
B);
1435 const AddrList &BaseInfos = AddrGroups.at(Move.Base);
1450 std::set<Instruction *> TestSet(Move.Main.begin(), Move.Main.end());
1453 BaseInfos, std::back_inserter(MoveInfos),
1454 [&TestSet](
const AddrInfo &AI) {
return TestSet.count(AI.Inst); });
1457 const AddrInfo &WithMaxAlign =
1458 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return AI.HaveAlign; });
1459 Align MaxGiven = WithMaxAlign.HaveAlign;
1462 const AddrInfo &WithMinOffset =
1463 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return -AI.Offset; });
1465 const AddrInfo &WithMaxNeeded =
1466 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return AI.NeedAlign; });
1467 Align MinNeeded = WithMaxNeeded.NeedAlign;
1480 InstSimplifyFolder(HVC.DL));
1481 Value *AlignAddr =
nullptr;
1482 Value *AlignVal =
nullptr;
1484 if (MinNeeded <= MaxGiven) {
1485 int Start = WithMinOffset.Offset;
1486 int OffAtMax = WithMaxAlign.Offset;
1493 int Adjust = -
alignTo(OffAtMax - Start, MinNeeded.value());
1494 AlignAddr = createAdjustedPointer(Builder, WithMaxAlign.Addr,
1495 WithMaxAlign.ValTy, Adjust, Move.Clones);
1496 int Diff =
Start - (OffAtMax + Adjust);
1497 AlignVal = HVC.getConstInt(Diff);
1499 assert(
static_cast<decltype(MinNeeded.value())
>(Diff) < MinNeeded.value());
1509 createAlignedPointer(Builder, WithMinOffset.Addr, WithMinOffset.ValTy,
1510 MinNeeded.value(), Move.Clones);
1512 Builder.
CreatePtrToInt(WithMinOffset.Addr, HVC.getIntTy(),
"pti");
1514 for (
auto [Old, New] : Move.Clones)
1515 I->replaceUsesOfWith(Old, New);
1520 for (
const AddrInfo &AI : MoveInfos) {
1521 VSpan.Blocks.emplace_back(AI.Inst, HVC.getSizeOf(AI.ValTy),
1522 AI.Offset - WithMinOffset.Offset);
1529 : std::max<int>(MinNeeded.value(), 4);
1530 assert(!Move.IsHvx || ScLen == 64 || ScLen == 128);
1531 assert(Move.IsHvx || ScLen == 4 || ScLen == 8);
1534 dbgs() <<
"ScLen: " << ScLen <<
"\n";
1535 dbgs() <<
"AlignVal:" << *AlignVal <<
"\n";
1536 dbgs() <<
"AlignAddr:" << *AlignAddr <<
"\n";
1537 dbgs() <<
"VSpan:\n" << VSpan <<
'\n';
1541 realignLoadGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1543 realignStoreGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1545 for (
auto *Inst : Move.Main)
1546 Inst->eraseFromParent();
1551auto AlignVectors::makeTestIfUnaligned(IRBuilderBase &Builder,
Value *AlignVal,
1552 int Alignment)
const ->
Value * {
1553 auto *AlignTy = AlignVal->getType();
1555 AlignVal, ConstantInt::get(AlignTy, Alignment - 1),
"and");
1556 Value *
Zero = ConstantInt::get(AlignTy, 0);
1560auto AlignVectors::isSectorTy(
Type *Ty)
const ->
bool {
1561 if (!HVC.isByteVecTy(Ty))
1563 int Size = HVC.getSizeOf(Ty);
1569auto AlignVectors::run() ->
bool {
1572 if (!createAddressGroups())
1576 dbgs() <<
"Address groups(" << AddrGroups.size() <<
"):\n";
1577 for (
auto &[In, AL] : AddrGroups) {
1578 for (
const AddrInfo &AI : AL)
1579 dbgs() <<
"---\n" << AI <<
'\n';
1584 MoveList LoadGroups, StoreGroups;
1586 for (
auto &
G : AddrGroups) {
1592 dbgs() <<
"\nLoad groups(" << LoadGroups.size() <<
"):\n";
1593 for (
const MoveGroup &
G : LoadGroups)
1594 dbgs() <<
G <<
"\n";
1595 dbgs() <<
"Store groups(" << StoreGroups.size() <<
"):\n";
1596 for (
const MoveGroup &
G : StoreGroups)
1597 dbgs() <<
G <<
"\n";
1601 unsigned CountLimit = VAGroupCountLimit;
1602 if (CountLimit == 0)
1605 if (LoadGroups.size() > CountLimit) {
1606 LoadGroups.resize(CountLimit);
1607 StoreGroups.clear();
1609 unsigned StoreLimit = CountLimit - LoadGroups.size();
1610 if (StoreGroups.size() > StoreLimit)
1611 StoreGroups.resize(StoreLimit);
1614 for (
auto &M : LoadGroups)
1616 for (
auto &M : StoreGroups)
1621 for (
auto &M : LoadGroups)
1623 for (
auto &M : StoreGroups)
1633auto HvxIdioms::getNumSignificantBits(
Value *V, Instruction *In)
const
1634 -> std::pair<unsigned, Signedness> {
1635 unsigned Bits = HVC.getNumSignificantBits(V, In);
1641 KnownBits Known = HVC.getKnownBits(V, In);
1642 Signedness Sign =
Signed;
1643 unsigned NumToTest = 0;
1647 NumToTest =
Bits - 1;
1660 return {
Bits, Sign};
1663auto HvxIdioms::canonSgn(SValue
X, SValue
Y)
const
1664 -> std::pair<SValue, SValue> {
1677auto HvxIdioms::matchFxpMul(Instruction &In)
const -> std::optional<FxpOp> {
1678 using namespace PatternMatch;
1679 auto *Ty =
In.getType();
1682 return std::nullopt;
1691 auto m_Shr = [](
auto &&
V,
auto &&S) {
1703 if (
Op.Frac > Width)
1704 return std::nullopt;
1711 return std::nullopt;
1719 Op.Opcode = Instruction::Mul;
1721 Op.X.Sgn = getNumSignificantBits(
Op.X.Val, &In).second;
1722 Op.Y.Sgn = getNumSignificantBits(
Op.Y.Val, &In).second;
1727 return std::nullopt;
1730auto HvxIdioms::processFxpMul(Instruction &In,
const FxpOp &
Op)
const
1732 assert(
Op.X.Val->getType() ==
Op.Y.Val->getType());
1735 if (VecTy ==
nullptr)
1738 unsigned ElemWidth = ElemTy->getBitWidth();
1741 if ((HVC.length(VecTy) * ElemWidth) % (8 * HVC.HST.
getVectorLength()) != 0)
1751 if (ElemWidth <= 32 &&
Op.Frac == 0)
1754 auto [BitsX, SignX] = getNumSignificantBits(
Op.X.Val, &In);
1755 auto [BitsY, SignY] = getNumSignificantBits(
Op.Y.Val, &In);
1761 InstSimplifyFolder(HVC.DL));
1763 auto roundUpWidth = [](
unsigned Width) ->
unsigned {
1769 if (Width > 32 && Width % 32 != 0) {
1776 BitsX = roundUpWidth(BitsX);
1777 BitsY = roundUpWidth(BitsY);
1782 unsigned Width = std::max(BitsX, BitsY);
1784 auto *ResizeTy = VectorType::get(HVC.getIntTy(Width), VecTy);
1785 if (Width < ElemWidth) {
1788 }
else if (Width > ElemWidth) {
1795 assert(
X->getType() ==
Y->getType() &&
X->getType() == ResizeTy);
1797 unsigned VecLen = HVC.length(ResizeTy);
1798 unsigned ChopLen = (8 * HVC.HST.
getVectorLength()) / std::min(Width, 32u);
1802 ChopOp.ResTy = VectorType::get(
Op.ResTy->getElementType(), ChopLen,
false);
1804 for (
unsigned V = 0;
V != VecLen / ChopLen; ++
V) {
1805 ChopOp.X.Val = HVC.subvector(Builder,
X, V * ChopLen, ChopLen);
1806 ChopOp.Y.Val = HVC.subvector(Builder,
Y, V * ChopLen, ChopLen);
1807 Results.push_back(processFxpMulChopped(Builder, In, ChopOp));
1822inline bool HvxIdioms::matchScatter(Instruction &In)
const {
1826 return (
II->getIntrinsicID() == Intrinsic::masked_scatter);
1829inline bool HvxIdioms::matchGather(Instruction &In)
const {
1833 return (
II->getIntrinsicID() == Intrinsic::masked_gather);
1841 case Instruction::Add:
1842 case Instruction::Sub:
1843 case Instruction::Mul:
1844 case Instruction::And:
1845 case Instruction::Or:
1846 case Instruction::Xor:
1847 case Instruction::AShr:
1848 case Instruction::LShr:
1849 case Instruction::Shl:
1850 case Instruction::UDiv:
1858 assert(
Ptr &&
"Unable to extract pointer");
1864 if (
II->getIntrinsicID() == Intrinsic::masked_store)
1865 return II->getOperand(1);
1871 HvxIdioms::DstQualifier &Qual) {
1877 Qual = HvxIdioms::LdSt;
1879 if (
II->getIntrinsicID() == Intrinsic::masked_gather) {
1881 Qual = HvxIdioms::LLVM_Gather;
1882 }
else if (
II->getIntrinsicID() == Intrinsic::masked_scatter) {
1884 Qual = HvxIdioms::LLVM_Scatter;
1885 }
else if (
II->getIntrinsicID() == Intrinsic::masked_store) {
1887 Qual = HvxIdioms::LdSt;
1888 }
else if (
II->getIntrinsicID() ==
1889 Intrinsic::hexagon_V6_vgather_vscattermh) {
1891 Qual = HvxIdioms::HEX_Gather_Scatter;
1892 }
else if (
II->getIntrinsicID() == Intrinsic::hexagon_V6_vscattermh_128B) {
1894 Qual = HvxIdioms::HEX_Scatter;
1895 }
else if (
II->getIntrinsicID() == Intrinsic::hexagon_V6_vgathermh_128B) {
1897 Qual = HvxIdioms::HEX_Gather;
1905 Qual = HvxIdioms::Call;
1910 Qual = HvxIdioms::Arithmetic;
1930 for (
auto &U : In->uses()) {
1934 Users.push_back(Destination);
1946 assert(In &&
"Bad instruction");
1950 "Not a gather Intrinsic");
1979 if (
II &&
II->getIntrinsicID() == Intrinsic::masked_gather)
1989 auto *Src = IE->getOperand(1);
2004 LLVM_DEBUG(
dbgs() <<
" Unable to locate Address from intrinsic\n");
2016 if (
II->getIntrinsicID() == Intrinsic::masked_load)
2017 return II->getType();
2018 if (
II->getIntrinsicID() == Intrinsic::masked_store)
2019 return II->getOperand(0)->getType();
2021 return In->getType();
2030 if (
II->getIntrinsicID() == Intrinsic::masked_load)
2032 if (
II->getIntrinsicID() == Intrinsic::masked_gather)
2044 return cstDataVector;
2046 return GEPIndex->getOperand(0);
2062 LLVM_DEBUG(
dbgs() <<
" Unable to locate Index from intrinsic\n");
2072 assert(
I &&
"Unable to reinterprete cast");
2073 Type *NT = HVC.getHvxTy(HVC.getIntTy(32),
false);
2074 std::vector<unsigned> shuffleMask;
2075 for (
unsigned i = 0; i < 64; ++i)
2076 shuffleMask.push_back(i);
2078 Value *CastShuffle =
2079 Builder.CreateShuffleVector(
I,
I, Mask,
"identity_shuffle");
2080 return Builder.CreateBitCast(CastShuffle, NT,
"cst64_i16_to_32_i32");
2087 assert(
I &&
"Unable to reinterprete cast");
2088 Type *NT = HVC.getHvxTy(HVC.getIntTy(32),
false);
2089 std::vector<unsigned> shuffleMask;
2090 for (
unsigned i = 0; i < 128; ++i)
2091 shuffleMask.push_back(i);
2093 Value *CastShuffle =
2094 Builder.CreateShuffleVector(
I,
I, Mask,
"identity_shuffle");
2095 return Builder.CreateBitCast(CastShuffle, NT,
"cst128_i8_to_32_i32");
2101 unsigned int pattern) {
2102 std::vector<unsigned int> byteMask;
2103 for (
unsigned i = 0; i < 32; ++i)
2104 byteMask.push_back(pattern);
2106 return Builder.CreateIntrinsic(
2108 {llvm::ConstantDataVector::get(Ctx, byteMask), HVC.getConstInt(~0)},
2112Value *HvxIdioms::processVScatter(Instruction &In)
const {
2114 assert(InpTy &&
"Cannot handle no vector type for llvm.scatter/gather");
2115 unsigned InpSize = HVC.getSizeOf(InpTy);
2116 auto *
F =
In.getFunction();
2117 LLVMContext &Ctx =
F->getContext();
2119 assert(ElemTy &&
"llvm.scatter needs integer type argument");
2122 unsigned Elements = HVC.length(InpTy);
2123 dbgs() <<
"\n[Process scatter](" <<
In <<
")\n" << *
In.getParent() <<
"\n";
2124 dbgs() <<
" Input type(" << *InpTy <<
") elements(" <<
Elements
2125 <<
") VecLen(" << InpSize <<
") type(" << *ElemTy <<
") ElemWidth("
2126 << ElemWidth <<
")\n";
2130 InstSimplifyFolder(HVC.DL));
2132 auto *ValueToScatter =
In.getOperand(0);
2133 LLVM_DEBUG(
dbgs() <<
" ValueToScatter : " << *ValueToScatter <<
"\n");
2137 <<
") for vscatter\n");
2162 Value *CastIndex =
nullptr;
2163 if (cstDataVector) {
2165 AllocaInst *IndexesAlloca =
2166 Builder.
CreateAlloca(HVC.getHvxTy(HVC.getIntTy(32),
false));
2167 [[maybe_unused]]
auto *StoreIndexes =
2168 Builder.
CreateStore(cstDataVector, IndexesAlloca);
2169 LLVM_DEBUG(
dbgs() <<
" StoreIndexes : " << *StoreIndexes <<
"\n");
2171 IndexesAlloca,
"reload_index");
2176 CastIndex = Indexes;
2180 if (ElemWidth == 1) {
2183 Type *
NT = HVC.getHvxTy(HVC.getIntTy(32),
false);
2189 HVC.getHvxTy(HVC.getIntTy(32),
true), V6_vunpack, CastIndexes,
nullptr);
2190 LLVM_DEBUG(
dbgs() <<
" UnpackedIndexes : " << *UnpackedIndexes <<
")\n");
2194 [[maybe_unused]]
Value *IndexHi =
2195 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes);
2196 [[maybe_unused]]
Value *IndexLo =
2197 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes);
2205 HVC.getHvxTy(HVC.getIntTy(32),
true), V6_vunpack, CastSrc,
nullptr);
2206 LLVM_DEBUG(
dbgs() <<
" UnpackedValToScat: " << *UnpackedValueToScatter
2209 [[maybe_unused]]
Value *UVSHi =
2210 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedValueToScatter);
2211 [[maybe_unused]]
Value *UVSLo =
2212 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedValueToScatter);
2217 auto *QByteMask =
get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff);
2220 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B,
2226 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B,
2230 }
else if (ElemWidth == 2) {
2235 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermh_128B,
2239 }
else if (ElemWidth == 4) {
2241 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermw_128B,
2251Value *HvxIdioms::processVGather(Instruction &In)
const {
2252 [[maybe_unused]]
auto *InpTy =
2254 assert(InpTy &&
"Cannot handle no vector type for llvm.gather");
2255 [[maybe_unused]]
auto *ElemTy =
2257 assert(ElemTy &&
"llvm.gather needs vector of ptr argument");
2258 auto *
F =
In.getFunction();
2259 LLVMContext &Ctx =
F->getContext();
2261 << *
In.getParent() <<
"\n");
2263 << HVC.length(InpTy) <<
") VecLen(" << HVC.getSizeOf(InpTy)
2264 <<
") type(" << *ElemTy <<
") Access alignment("
2265 << *
In.getOperand(1) <<
") AddressSpace("
2266 << ElemTy->getAddressSpace() <<
")\n");
2270 "llvm.gather needs vector for mask");
2272 InstSimplifyFolder(HVC.DL));
2277 HvxIdioms::DstQualifier Qual = HvxIdioms::Undefined;
2283 LLVM_DEBUG(
dbgs() <<
" Destination : " << *Dst <<
" Qual(" << Qual
2289 LLVM_DEBUG(
dbgs() <<
"Could not locate vgather destination ptr\n");
2295 assert(DstType &&
"Cannot handle non vector dst type for llvm.gather");
2310 Type *
NT = HVC.getHvxTy(HVC.getIntTy(32),
false);
2311 if (Qual == HvxIdioms::LdSt || Qual == HvxIdioms::Arithmetic) {
2315 unsigned OutputSize = HVC.getSizeOf(DstType);
2319 <<
" Address space ("
2320 <<
Ptr->getType()->getPointerAddressSpace() <<
")\n"
2321 <<
" Result type : " << *DstType
2322 <<
"\n Size in bytes : " << OutputSize
2323 <<
" element type(" << *DstElemTy
2324 <<
")\n ElemWidth : " << ElemWidth <<
" bytes\n");
2327 assert(IndexType &&
"Cannot handle non vector index type for llvm.gather");
2328 unsigned IndexWidth = HVC.DL.
getTypeAllocSize(IndexType->getElementType());
2333 IndexLoad, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2342 if (ElemWidth == 1) {
2347 Value *CastIndexes =
2350 auto *UnpackedIndexes =
2352 V6_vunpack, CastIndexes,
nullptr);
2358 [[maybe_unused]]
Value *IndexHi =
2359 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes);
2360 [[maybe_unused]]
Value *IndexLo =
2361 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes);
2365 auto *QByteMask =
get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff);
2369 auto V6_vgather = HVC.HST.
getIntrinsicId(Hexagon::V6_vgathermhq);
2371 Type::getVoidTy(Ctx), V6_vgather,
2372 {
Ptr, QByteMask, CastedPtr,
2378 HVC.getHvxTy(HVC.getIntTy(32),
false),
Ptr,
"temp_result_hi");
2379 LLVM_DEBUG(
dbgs() <<
" LoadedResultHi : " << *LoadedResultHi <<
"\n");
2384 Type::getVoidTy(Ctx), V6_vgather,
2385 {
Ptr, QByteMask, CastedPtr,
2390 HVC.getHvxTy(HVC.getIntTy(32),
false),
Ptr,
"temp_result_lo");
2391 LLVM_DEBUG(
dbgs() <<
" LoadedResultLo : " << *LoadedResultLo <<
"\n");
2398 NT, V6_vpackeb, {LoadedResultHi, LoadedResultLo},
nullptr);
2402 }
else if (ElemWidth == 2) {
2404 if (IndexWidth == 2) {
2412 Value *AdjustedIndex = HVC.createHvxIntrinsic(
2413 Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)});
2415 <<
" Shifted half index: " << *AdjustedIndex <<
")\n");
2421 Type::getVoidTy(Ctx), V6_vgather,
2425 for (
auto &U : Dst->uses()) {
2427 dbgs() <<
" dst used by: " << *UI <<
"\n";
2429 for (
auto &U :
In.uses()) {
2431 dbgs() <<
" In used by : " << *UI <<
"\n";
2436 HVC.getHvxTy(HVC.getIntTy(16),
false),
Ptr,
"temp_result");
2437 LLVM_DEBUG(
dbgs() <<
" LoadedResult : " << *LoadedResult <<
"\n");
2438 In.replaceAllUsesWith(LoadedResult);
2440 dbgs() <<
" Unhandled index type for vgather\n";
2443 }
else if (ElemWidth == 4) {
2444 if (IndexWidth == 4) {
2447 Value *AdjustedIndex = HVC.createHvxIntrinsic(
2448 Builder, V6_vaslh, NT, {Indexes, HVC.getConstInt(2)});
2450 <<
" Shifted word index: " << *AdjustedIndex <<
")\n");
2452 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermw_128B,
2475 Dst->eraseFromParent();
2476 }
else if (Qual == HvxIdioms::LLVM_Scatter) {
2480 assert(DstInpTy &&
"Cannot handle no vector type for llvm.scatter");
2481 unsigned DstInpSize = HVC.getSizeOf(DstInpTy);
2482 unsigned DstElements = HVC.length(DstInpTy);
2484 assert(DstElemTy &&
"llvm.scatter needs vector of ptr argument");
2485 dbgs() <<
" Gather feeds into scatter\n Values to scatter : "
2486 << *Dst->getOperand(0) <<
"\n";
2487 dbgs() <<
" Dst type(" << *DstInpTy <<
") elements(" << DstElements
2488 <<
") VecLen(" << DstInpSize <<
") type(" << *DstElemTy
2489 <<
") Access alignment(" << *Dst->getOperand(2) <<
")\n";
2503 Src, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2523 Value *AdjustedIndex = HVC.createHvxIntrinsic(
2524 Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)});
2525 LLVM_DEBUG(
dbgs() <<
" Shifted half index: " << *AdjustedIndex <<
")\n");
2528 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2532 }
else if (Qual == HvxIdioms::HEX_Gather_Scatter) {
2540 if (cstDataVector) {
2545 [[maybe_unused]]
auto *StoreIndexes =
2546 Builder.
CreateStore(cstDataVector, IndexesAlloca);
2547 LLVM_DEBUG(
dbgs() <<
" StoreIndexes : " << *StoreIndexes <<
"\n");
2551 LLVM_DEBUG(
dbgs() <<
" ResultAlloca : " << *ResultAlloca <<
"\n");
2554 IndexLoad, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2558 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2559 {ResultAlloca, CastedSrc,
2563 HVC.getHvxTy(HVC.getIntTy(16),
false), ResultAlloca,
"temp_result");
2564 LLVM_DEBUG(
dbgs() <<
" LoadedResult : " << *LoadedResult <<
"\n");
2566 In.replaceAllUsesWith(LoadedResult);
2576 Src, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2589 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgather_vscattermh,
2595 }
else if (Qual == HvxIdioms::HEX_Scatter) {
2602 IndexLoad, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2609 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2614 HVC.getHvxTy(HVC.getIntTy(16),
false), ResultAlloca,
"temp_result");
2615 LLVM_DEBUG(
dbgs() <<
" LoadedResult : " << *LoadedResult <<
"\n");
2616 In.replaceAllUsesWith(LoadedResult);
2617 }
else if (Qual == HvxIdioms::HEX_Gather) {
2622 if (cstDataVector) {
2626 [[maybe_unused]]
auto *StoreIndexes =
2627 Builder.
CreateStore(cstDataVector, IndexesAlloca);
2628 LLVM_DEBUG(
dbgs() <<
" StoreIndexes : " << *StoreIndexes <<
"\n");
2633 <<
"\n AddressSpace: "
2637 IndexLoad, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2641 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2642 {ResultAlloca, CastedSrc,
2646 HVC.getHvxTy(HVC.getIntTy(16),
false), ResultAlloca,
"temp_result");
2647 LLVM_DEBUG(
dbgs() <<
" LoadedResult : " << *LoadedResult <<
"\n");
2649 In.replaceAllUsesWith(LoadedResult);
2652 }
else if (Qual == HvxIdioms::LLVM_Gather) {
2654 errs() <<
" Underimplemented vgather to vgather sequence\n";
2662auto HvxIdioms::processFxpMulChopped(IRBuilderBase &Builder, Instruction &In,
2663 const FxpOp &
Op)
const ->
Value * {
2664 assert(
Op.X.Val->getType() ==
Op.Y.Val->getType());
2666 unsigned Width = InpTy->getScalarSizeInBits();
2669 if (!
Op.RoundAt || *
Op.RoundAt ==
Op.Frac - 1) {
2672 Value *QMul =
nullptr;
2674 QMul = createMulQ15(Builder,
Op.X,
Op.Y, Rounding);
2675 }
else if (Width == 32) {
2676 QMul = createMulQ31(Builder,
Op.X,
Op.Y, Rounding);
2678 if (QMul !=
nullptr)
2684 assert(Width < 32 || Width % 32 == 0);
2694 assert(
Op.Frac != 0 &&
"Unshifted mul should have been skipped");
2695 if (
Op.Frac == 16) {
2697 if (
Value *MulH = createMulH16(Builder,
Op.X,
Op.Y))
2701 Value *Prod32 = createMul16(Builder,
Op.X,
Op.Y);
2703 Value *RoundVal = HVC.getConstSplat(Prod32->
getType(), 1 << *
Op.RoundAt);
2704 Prod32 = Builder.
CreateAdd(Prod32, RoundVal,
"add");
2709 ? Builder.
CreateAShr(Prod32, ShiftAmt,
"asr")
2710 : Builder.
CreateLShr(Prod32, ShiftAmt,
"lsr");
2711 return Builder.
CreateTrunc(Shifted, InpTy,
"trn");
2718 auto WordX = HVC.splitVectorElements(Builder,
Op.X.Val, 32);
2719 auto WordY = HVC.splitVectorElements(Builder,
Op.Y.Val, 32);
2720 auto WordP = createMulLong(Builder, WordX,
Op.X.Sgn, WordY,
Op.Y.Sgn);
2725 if (
Op.RoundAt.has_value()) {
2728 RoundV[*
Op.RoundAt / 32] =
2729 HVC.getConstSplat(HvxWordTy, 1 << (*
Op.RoundAt % 32));
2730 WordP = createAddLong(Builder, WordP, RoundV);
2736 unsigned SkipWords =
Op.Frac / 32;
2737 Constant *ShiftAmt = HVC.getConstSplat(HvxWordTy,
Op.Frac % 32);
2739 for (
int Dst = 0, End = WordP.size() - SkipWords; Dst != End; ++Dst) {
2740 int Src = Dst + SkipWords;
2742 if (Src + 1 < End) {
2753 WordP.resize(WordP.size() - SkipWords);
2755 return HVC.joinVectorElements(Builder, WordP,
Op.ResTy);
2758auto HvxIdioms::createMulQ15(IRBuilderBase &Builder, SValue
X, SValue
Y,
2759 bool Rounding)
const ->
Value * {
2760 assert(
X.Val->getType() ==
Y.Val->getType());
2761 assert(
X.Val->getType()->getScalarType() == HVC.getIntTy(16));
2768 auto V6_vmpyhvsrs = HVC.HST.
getIntrinsicId(Hexagon::V6_vmpyhvsrs);
2769 return HVC.createHvxIntrinsic(Builder, V6_vmpyhvsrs,
X.Val->getType(),
2773auto HvxIdioms::createMulQ31(IRBuilderBase &Builder, SValue
X, SValue
Y,
2774 bool Rounding)
const ->
Value * {
2775 Type *InpTy =
X.Val->getType();
2776 assert(InpTy ==
Y.Val->getType());
2788 HVC.createHvxIntrinsic(Builder, V6_vmpyewuh, InpTy, {
X.Val,
Y.Val});
2789 return HVC.createHvxIntrinsic(Builder, V6_vmpyo_acc, InpTy,
2790 {V1,
X.Val,
Y.Val});
2793auto HvxIdioms::createAddCarry(IRBuilderBase &Builder,
Value *
X,
Value *
Y,
2794 Value *CarryIn)
const
2795 -> std::pair<Value *, Value *> {
2796 assert(
X->getType() ==
Y->getType());
2805 if (CarryIn ==
nullptr)
2806 CarryIn = HVC.getNullValue(HVC.getBoolTy(HVC.length(VecTy)));
2807 Args.push_back(CarryIn);
2809 Value *
Ret = HVC.createHvxIntrinsic(Builder, AddCarry,
2813 return {
Result, CarryOut};
2820 if (CarryIn !=
nullptr) {
2821 unsigned Width = VecTy->getScalarSizeInBits();
2824 for (
unsigned i = 0, e = 32 / Width; i !=
e; ++i)
2825 Mask = (Mask << Width) | 1;
2829 HVC.createHvxIntrinsic(Builder, V6_vandqrt,
nullptr,
2830 {CarryIn, HVC.getConstInt(Mask)});
2831 Result1 = Builder.
CreateAdd(
X, ValueIn,
"add");
2837 return {Result2, Builder.
CreateOr(CarryOut1, CarryOut2,
"orb")};
2840auto HvxIdioms::createMul16(IRBuilderBase &Builder, SValue
X, SValue
Y)
const
2843 std::tie(
X,
Y) = canonSgn(
X,
Y);
2856 HVC.createHvxIntrinsic(Builder, V6_vmpyh, HvxP32Ty, {
Y.Val,
X.Val});
2858 return HVC.vshuff(Builder, HVC.sublo(Builder,
P), HVC.subhi(Builder,
P));
2861auto HvxIdioms::createMulH16(IRBuilderBase &Builder, SValue
X, SValue
Y)
const
2863 Type *HvxI16Ty = HVC.getHvxTy(HVC.getIntTy(16),
false);
2868 return HVC.createHvxIntrinsic(Builder, V6_vmpyuhvs, HvxI16Ty,
2873 Type *HvxP16Ty = HVC.getHvxTy(HVC.getIntTy(16),
true);
2876 unsigned Len = HVC.length(HvxP16Ty) / 2;
2878 SmallVector<int, 128> PickOdd(Len);
2879 for (
int i = 0; i !=
static_cast<int>(
Len); ++i)
2880 PickOdd[i] = 2 * i + 1;
2883 HVC.sublo(Builder, Pair16), HVC.subhi(Builder, Pair16), PickOdd,
"shf");
2886auto HvxIdioms::createMul32(IRBuilderBase &Builder, SValue
X, SValue
Y)
const
2887 -> std::pair<Value *, Value *> {
2888 assert(
X.Val->getType() ==
Y.Val->getType());
2889 assert(
X.Val->getType() == HvxI32Ty);
2892 std::tie(
X,
Y) = canonSgn(
X,
Y);
2895 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyss_parts;
2897 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyus_parts;
2899 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyuu_parts;
2902 Value *Parts = HVC.createHvxIntrinsic(Builder, V6_vmpy_parts,
nullptr,
2903 {
X.Val,
Y.Val}, {HvxI32Ty});
2912 assert(WordX.size() == WordY.size());
2913 unsigned Idx = 0,
Length = WordX.size();
2917 if (HVC.isZero(WordX[Idx]))
2918 Sum[Idx] = WordY[Idx];
2919 else if (HVC.isZero(WordY[Idx]))
2920 Sum[Idx] = WordX[Idx];
2926 Value *Carry =
nullptr;
2927 for (; Idx !=
Length; ++Idx) {
2928 std::tie(Sum[Idx], Carry) =
2929 createAddCarry(Builder, WordX[Idx], WordY[Idx], Carry);
2943 for (
int i = 0, e = WordX.size(); i != e; ++i) {
2944 for (
int j = 0, f = WordY.size(); j != f; ++j) {
2946 Signedness SX = (i + 1 ==
e) ? SgnX :
Unsigned;
2948 auto [
Lo,
Hi] = createMul32(Builder, {WordX[i], SX}, {WordY[
j],
SY});
2949 Products[i +
j + 0].push_back(
Lo);
2950 Products[i +
j + 1].push_back(
Hi);
2964 for (
int i = 0, e = Products.size(); i != e; ++i) {
2965 while (Products[i].
size() > 1) {
2966 Value *Carry =
nullptr;
2967 for (
int j = i;
j !=
e; ++
j) {
2968 auto &ProdJ = Products[
j];
2969 auto [Sum, CarryOut] = createAddCarry(Builder, pop_back_or_zero(ProdJ),
2970 pop_back_or_zero(ProdJ), Carry);
2971 ProdJ.insert(ProdJ.begin(), Sum);
2978 for (
auto &
P : Products) {
2979 assert(
P.size() == 1 &&
"Should have been added together");
2986auto HvxIdioms::run() ->
bool {
2989 for (BasicBlock &
B : HVC.F) {
2990 for (
auto It =
B.rbegin(); It !=
B.rend(); ++It) {
2991 if (
auto Fxm = matchFxpMul(*It)) {
2992 Value *
New = processFxpMul(*It, *Fxm);
2998 It->replaceAllUsesWith(New);
3000 It = StartOver ?
B.rbegin()
3003 }
else if (matchGather(*It)) {
3009 It->eraseFromParent();
3013 }
else if (matchScatter(*It)) {
3019 It->eraseFromParent();
3032auto HexagonVectorCombine::run() ->
bool {
3034 dbgs() <<
"Module before HexagonVectorCombine\n" << *
F.getParent();
3037 if (HST.useHVXOps()) {
3039 Changed |= AlignVectors(*this).run();
3041 Changed |= HvxIdioms(*this).run();
3045 dbgs() <<
"Module " << (
Changed ?
"(modified)" :
"(unchanged)")
3046 <<
" after HexagonVectorCombine\n"
3052auto HexagonVectorCombine::getIntTy(
unsigned Width)
const -> IntegerType * {
3056auto HexagonVectorCombine::getByteTy(
int ElemCount)
const ->
Type * {
3058 IntegerType *ByteTy = Type::getInt8Ty(
F.getContext());
3061 return VectorType::get(ByteTy, ElemCount,
false);
3064auto HexagonVectorCombine::getBoolTy(
int ElemCount)
const ->
Type * {
3066 IntegerType *BoolTy = Type::getInt1Ty(
F.getContext());
3069 return VectorType::get(BoolTy, ElemCount,
false);
3072auto HexagonVectorCombine::getConstInt(
int Val,
unsigned Width)
const
3077auto HexagonVectorCombine::isZero(
const Value *Val)
const ->
bool {
3079 return C->isZeroValue();
3083auto HexagonVectorCombine::getIntValue(
const Value *Val)
const
3084 -> std::optional<APInt> {
3086 return CI->getValue();
3087 return std::nullopt;
3090auto HexagonVectorCombine::isUndef(
const Value *Val)
const ->
bool {
3094auto HexagonVectorCombine::isTrue(
const Value *Val)
const ->
bool {
3098auto HexagonVectorCombine::isFalse(
const Value *Val)
const ->
bool {
3102auto HexagonVectorCombine::getHvxTy(
Type *ElemTy,
bool Pair)
const
3108 "Invalid HVX element type");
3109 unsigned HwLen = HST.getVectorLength();
3111 return VectorType::get(ElemTy, Pair ? 2 * NumElems : NumElems,
3115auto HexagonVectorCombine::getSizeOf(
const Value *Val, SizeKind Kind)
const
3117 return getSizeOf(Val->
getType(), Kind);
3120auto HexagonVectorCombine::getSizeOf(
const Type *Ty, SizeKind Kind)
const
3122 auto *NcTy =
const_cast<Type *
>(Ty);
3125 return DL.getTypeStoreSize(NcTy).getFixedValue();
3127 return DL.getTypeAllocSize(NcTy).getFixedValue();
3132auto HexagonVectorCombine::getTypeAlignment(
Type *Ty)
const ->
int {
3135 if (HST.isTypeForHVX(Ty))
3136 return HST.getVectorLength();
3137 return DL.getABITypeAlign(Ty).value();
3140auto HexagonVectorCombine::length(
Value *Val)
const ->
size_t {
3141 return length(Val->
getType());
3144auto HexagonVectorCombine::length(
Type *Ty)
const ->
size_t {
3146 assert(VecTy &&
"Must be a vector type");
3147 return VecTy->getElementCount().getFixedValue();
3150auto HexagonVectorCombine::getNullValue(
Type *Ty)
const ->
Constant * {
3158auto HexagonVectorCombine::getFullValue(
Type *Ty)
const ->
Constant * {
3166auto HexagonVectorCombine::getConstSplat(
Type *Ty,
int Val)
const
3170 Type *ElemTy = VecTy->getElementType();
3173 ConstantInt::get(ElemTy, Val));
3177auto HexagonVectorCombine::simplify(
Value *V)
const ->
Value * {
3179 SimplifyQuery Q(
DL, &TLI, &DT, &AC, In);
3186auto HexagonVectorCombine::insertb(IRBuilderBase &Builder,
Value *Dst,
3188 int Where)
const ->
Value * {
3189 assert(isByteVecTy(Dst->getType()) && isByteVecTy(Src->getType()));
3190 int SrcLen = getSizeOf(Src);
3191 int DstLen = getSizeOf(Dst);
3197 Value *P2Src = vresize(Builder, Src, P2Len,
Poison);
3198 Value *P2Dst = vresize(Builder, Dst, P2Len,
Poison);
3201 for (
int i = 0; i != P2Len; ++i) {
3205 (Where <= i && i < Where +
Length) ? P2Len + Start + (i - Where) : i;
3209 return vresize(Builder, P2Insert, DstLen,
Poison);
3212auto HexagonVectorCombine::vlalignb(IRBuilderBase &Builder,
Value *
Lo,
3214 assert(
Lo->getType() ==
Hi->getType() &&
"Argument type mismatch");
3217 int VecLen = getSizeOf(
Hi);
3218 if (
auto IntAmt = getIntValue(Amt))
3219 return getElementRange(Builder,
Lo,
Hi, VecLen - IntAmt->getSExtValue(),
3222 if (HST.isTypeForHVX(
Hi->getType())) {
3223 assert(
static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
3224 "Expecting an exact HVX type");
3225 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_vlalignb),
3226 Hi->getType(), {Hi, Lo, Amt});
3234 Builder.
CreateTrunc(Shift, Type::getInt32Ty(
F.getContext()),
"trn");
3239 return vralignb(Builder,
Lo,
Hi,
Sub);
3244auto HexagonVectorCombine::vralignb(IRBuilderBase &Builder,
Value *
Lo,
3246 assert(
Lo->getType() ==
Hi->getType() &&
"Argument type mismatch");
3249 int VecLen = getSizeOf(
Lo);
3250 if (
auto IntAmt = getIntValue(Amt))
3251 return getElementRange(Builder,
Lo,
Hi, IntAmt->getSExtValue(), VecLen);
3253 if (HST.isTypeForHVX(
Lo->getType())) {
3254 assert(
static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
3255 "Expecting an exact HVX type");
3256 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_valignb),
3257 Lo->getType(), {Hi, Lo, Amt});
3264 Builder.
CreateTrunc(Shift, Type::getInt32Ty(
F.getContext()),
"trn");
3268 Type *Int64Ty = Type::getInt64Ty(
F.getContext());
3280auto HexagonVectorCombine::concat(IRBuilderBase &Builder,
3284 std::vector<Value *> Work[2];
3285 int ThisW = 0, OtherW = 1;
3287 Work[ThisW].assign(Vecs.begin(), Vecs.end());
3288 while (Work[ThisW].
size() > 1) {
3290 SMask.
resize(length(Ty) * 2);
3291 std::iota(SMask.
begin(), SMask.
end(), 0);
3293 Work[OtherW].clear();
3294 if (Work[ThisW].
size() % 2 != 0)
3296 for (
int i = 0, e = Work[ThisW].
size(); i <
e; i += 2) {
3298 Work[ThisW][i], Work[ThisW][i + 1], SMask,
"shf");
3299 Work[OtherW].push_back(Joined);
3307 SMask.
resize(Vecs.size() * length(Vecs.front()->getType()));
3308 std::iota(SMask.
begin(), SMask.
end(), 0);
3313auto HexagonVectorCombine::vresize(IRBuilderBase &Builder,
Value *Val,
3317 assert(ValTy->getElementType() == Pad->getType());
3319 int CurSize = length(ValTy);
3320 if (CurSize == NewSize)
3323 if (CurSize > NewSize)
3324 return getElementRange(Builder, Val, Val, 0, NewSize);
3326 SmallVector<int, 128> SMask(NewSize);
3327 std::iota(SMask.
begin(), SMask.
begin() + CurSize, 0);
3328 std::fill(SMask.
begin() + CurSize, SMask.
end(), CurSize);
3333auto HexagonVectorCombine::rescale(IRBuilderBase &Builder,
Value *Mask,
3340 Type *FromSTy = FromTy->getScalarType();
3341 Type *ToSTy = ToTy->getScalarType();
3342 if (FromSTy == ToSTy)
3345 int FromSize = getSizeOf(FromSTy);
3346 int ToSize = getSizeOf(ToSTy);
3347 assert(FromSize % ToSize == 0 || ToSize % FromSize == 0);
3350 int FromCount = length(MaskTy);
3351 int ToCount = (FromCount * FromSize) / ToSize;
3352 assert((FromCount * FromSize) % ToSize == 0);
3354 auto *FromITy =
getIntTy(FromSize * 8);
3355 auto *ToITy =
getIntTy(ToSize * 8);
3360 Mask, VectorType::get(FromITy, FromCount,
false),
"sxt");
3362 Ext, VectorType::get(ToITy, ToCount,
false),
"cst");
3364 Cast, VectorType::get(getBoolTy(), ToCount,
false),
"trn");
3368auto HexagonVectorCombine::vlsb(IRBuilderBase &Builder,
Value *Val)
const
3371 if (ScalarTy == getBoolTy())
3374 Value *Bytes = vbytes(Builder, Val);
3376 return Builder.
CreateTrunc(Bytes, getBoolTy(getSizeOf(VecTy)),
"trn");
3379 return Builder.
CreateTrunc(Bytes, getBoolTy(),
"trn");
3383auto HexagonVectorCombine::vbytes(IRBuilderBase &Builder,
Value *Val)
const
3386 if (ScalarTy == getByteTy())
3389 if (ScalarTy != getBoolTy())
3390 return Builder.
CreateBitCast(Val, getByteTy(getSizeOf(Val)),
"cst");
3393 return Builder.
CreateSExt(Val, VectorType::get(getByteTy(), VecTy),
"sxt");
3394 return Builder.
CreateSExt(Val, getByteTy(),
"sxt");
3397auto HexagonVectorCombine::subvector(IRBuilderBase &Builder,
Value *Val,
3398 unsigned Start,
unsigned Length)
const
3401 return getElementRange(Builder, Val, Val, Start,
Length);
3404auto HexagonVectorCombine::sublo(IRBuilderBase &Builder,
Value *Val)
const
3406 size_t Len = length(Val);
3407 assert(Len % 2 == 0 &&
"Length should be even");
3408 return subvector(Builder, Val, 0, Len / 2);
3411auto HexagonVectorCombine::subhi(IRBuilderBase &Builder,
Value *Val)
const
3413 size_t Len = length(Val);
3414 assert(Len % 2 == 0 &&
"Length should be even");
3415 return subvector(Builder, Val, Len / 2, Len / 2);
3418auto HexagonVectorCombine::vdeal(IRBuilderBase &Builder,
Value *Val0,
3420 assert(Val0->getType() == Val1->getType());
3421 int Len = length(Val0);
3422 SmallVector<int, 128>
Mask(2 * Len);
3424 for (
int i = 0; i !=
Len; ++i) {
3431auto HexagonVectorCombine::vshuff(IRBuilderBase &Builder,
Value *Val0,
3433 assert(Val0->getType() == Val1->getType());
3434 int Len = length(Val0);
3435 SmallVector<int, 128>
Mask(2 * Len);
3437 for (
int i = 0; i !=
Len; ++i) {
3438 Mask[2 * i + 0] = i;
3444auto HexagonVectorCombine::createHvxIntrinsic(IRBuilderBase &Builder,
3450 auto getCast = [&](IRBuilderBase &Builder,
Value *Val,
3452 Type *SrcTy = Val->getType();
3453 if (SrcTy == DestTy)
3458 assert(HST.isTypeForHVX(SrcTy,
true));
3460 Type *BoolTy = Type::getInt1Ty(
F.getContext());
3465 unsigned HwLen = HST.getVectorLength();
3466 Intrinsic::ID TC = HwLen == 64 ? Intrinsic::hexagon_V6_pred_typecast
3467 : Intrinsic::hexagon_V6_pred_typecast_128B;
3477 for (
int i = 0, e =
Args.size(); i != e; ++i) {
3479 Type *
T = IntrTy->getParamType(i);
3480 if (
A->getType() !=
T) {
3486 StringRef MaybeName = !IntrTy->getReturnType()->isVoidTy() ?
"cup" :
"";
3487 CallInst *
Call = Builder.
CreateCall(IntrFn, IntrArgs, MaybeName);
3494 if (RetTy ==
nullptr || CallTy == RetTy)
3497 assert(HST.isTypeForHVX(CallTy,
true));
3498 return getCast(Builder,
Call, RetTy);
3501auto HexagonVectorCombine::splitVectorElements(IRBuilderBase &Builder,
3503 unsigned ToWidth)
const
3518 assert(VecTy->getElementType()->isIntegerTy());
3519 unsigned FromWidth = VecTy->getScalarSizeInBits();
3521 assert(ToWidth <= FromWidth &&
"Breaking up into wider elements?");
3522 unsigned NumResults = FromWidth / ToWidth;
3526 unsigned Length = length(VecTy);
3530 auto splitInHalf = [&](
unsigned Begin,
unsigned End,
auto splitFunc) ->
void {
3534 if (Begin + 1 == End)
3540 auto *VTy = VectorType::get(
getIntTy(Width / 2), 2 *
Length,
false);
3543 Value *Res =
vdeal(Builder, sublo(Builder, VVal), subhi(Builder, VVal));
3545 unsigned Half = (Begin + End) / 2;
3546 Results[Begin] = sublo(Builder, Res);
3547 Results[Half] = subhi(Builder, Res);
3549 splitFunc(Begin, Half, splitFunc);
3550 splitFunc(Half, End, splitFunc);
3553 splitInHalf(0, NumResults, splitInHalf);
3557auto HexagonVectorCombine::joinVectorElements(IRBuilderBase &Builder,
3559 VectorType *ToType)
const
3561 assert(ToType->getElementType()->isIntegerTy());
3572 unsigned ToWidth = ToType->getScalarSizeInBits();
3573 unsigned Width = Inputs.front()->getType()->getScalarSizeInBits();
3574 assert(Width <= ToWidth);
3576 unsigned Length = length(Inputs.front()->getType());
3578 unsigned NeedInputs = ToWidth / Width;
3579 if (Inputs.size() != NeedInputs) {
3584 Last, getConstSplat(
Last->getType(), Width - 1),
"asr");
3585 Inputs.resize(NeedInputs, Sign);
3588 while (Inputs.size() > 1) {
3591 for (
int i = 0, e = Inputs.size(); i < e; i += 2) {
3592 Value *Res =
vshuff(Builder, Inputs[i], Inputs[i + 1]);
3595 Inputs.resize(Inputs.size() / 2);
3598 assert(Inputs.front()->getType() == ToType);
3599 return Inputs.front();
3602auto HexagonVectorCombine::calculatePointerDifference(
Value *Ptr0,
3604 -> std::optional<int> {
3606 const SCEV *Scev0 = SE.getSCEV(Ptr0);
3607 const SCEV *Scev1 = SE.getSCEV(Ptr1);
3608 const SCEV *ScevDiff = SE.getMinusSCEV(Scev0, Scev1);
3610 APInt
V =
Const->getAPInt();
3611 if (
V.isSignedIntN(8 *
sizeof(
int)))
3612 return static_cast<int>(
V.getSExtValue());
3619 I->eraseFromParent();
3621 SmallVector<Instruction *, 8> ToErase;
3624#define CallBuilder(B, F) \
3627 if (auto *I = dyn_cast<Instruction>(V)) \
3628 B_.ToErase.push_back(I); \
3632 auto Simplify = [
this](
Value *
V) {
3638 auto StripBitCast = [](
Value *
V) {
3640 V =
C->getOperand(0);
3644 Ptr0 = StripBitCast(Ptr0);
3645 Ptr1 = StripBitCast(Ptr1);
3647 return std::nullopt;
3651 if (Gep0->getPointerOperand() != Gep1->getPointerOperand())
3652 return std::nullopt;
3653 if (Gep0->getSourceElementType() != Gep1->getSourceElementType())
3654 return std::nullopt;
3656 Builder
B(Gep0->getParent());
3657 int Scale = getSizeOf(Gep0->getSourceElementType(),
Alloc);
3660 if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2)
3661 return std::nullopt;
3663 Value *Idx0 = Gep0->getOperand(1);
3664 Value *Idx1 = Gep1->getOperand(1);
3669 return Diff->getSExtValue() * Scale;
3671 KnownBits Known0 = getKnownBits(Idx0, Gep0);
3672 KnownBits Known1 = getKnownBits(Idx1, Gep1);
3675 return std::nullopt;
3683 Diff0 =
C->getSExtValue();
3685 return std::nullopt;
3694 Diff1 =
C->getSExtValue();
3696 return std::nullopt;
3699 return (Diff0 + Diff1) * Scale;
3704auto HexagonVectorCombine::getNumSignificantBits(
const Value *V,
3705 const Instruction *CtxI)
const
3710auto HexagonVectorCombine::getKnownBits(
const Value *V,
3711 const Instruction *CtxI)
const
3716auto HexagonVectorCombine::isSafeToClone(
const Instruction &In)
const ->
bool {
3717 if (
In.mayHaveSideEffects() ||
In.isAtomic() ||
In.isVolatile() ||
3718 In.isFenceLike() ||
In.mayReadOrWriteMemory()) {
3726template <
typename T>
3727auto HexagonVectorCombine::isSafeToMoveBeforeInBB(
const Instruction &In,
3729 const T &IgnoreInsts)
const
3732 [
this](
const Instruction &
I) -> std::optional<MemoryLocation> {
3734 switch (
II->getIntrinsicID()) {
3735 case Intrinsic::masked_load:
3737 case Intrinsic::masked_store:
3753 bool MayWrite =
In.mayWriteToMemory();
3754 auto MaybeLoc = getLocOrNone(In);
3756 auto From =
In.getIterator();
3759 bool MoveUp = (To !=
Block.end() && To->comesBefore(&In));
3761 MoveUp ? std::make_pair(To, From) : std::make_pair(std::next(From), To);
3762 for (
auto It =
Range.first; It !=
Range.second; ++It) {
3763 const Instruction &I = *It;
3764 if (llvm::is_contained(IgnoreInsts, &I))
3767 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
3768 if (II->getIntrinsicID() == Intrinsic::assume)
3775 if (!CB->hasFnAttr(Attribute::WillReturn))
3777 if (!CB->hasFnAttr(Attribute::NoSync))
3780 if (
I.mayReadOrWriteMemory()) {
3781 auto MaybeLocI = getLocOrNone(I);
3782 if (MayWrite || I.mayWriteToMemory()) {
3783 if (!MaybeLoc || !MaybeLocI)
3785 if (!AA.isNoAlias(*MaybeLoc, *MaybeLocI))
3793auto HexagonVectorCombine::isByteVecTy(
Type *Ty)
const ->
bool {
3795 return VecTy->getElementType() == getByteTy();
3799auto HexagonVectorCombine::getElementRange(IRBuilderBase &Builder,
Value *
Lo,
3803 SmallVector<int, 128> SMask(
Length);
3804 std::iota(SMask.
begin(), SMask.
end(), Start);
3811class HexagonVectorCombineLegacy :
public FunctionPass {
3815 HexagonVectorCombineLegacy() : FunctionPass(
ID) {}
3817 StringRef getPassName()
const override {
return "Hexagon Vector Combine"; }
3819 void getAnalysisUsage(AnalysisUsage &AU)
const override {
3827 FunctionPass::getAnalysisUsage(AU);
3831 if (skipFunction(
F))
3833 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
3834 AssumptionCache &AC =
3835 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
3836 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3837 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
3838 TargetLibraryInfo &TLI =
3839 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
3840 auto &
TM = getAnalysis<TargetPassConfig>().getTM<HexagonTargetMachine>();
3841 HexagonVectorCombine HVC(
F, AA, AC, DT, SE, TLI, TM);
3847char HexagonVectorCombineLegacy::ID = 0;
3850 "Hexagon Vector Combine",
false,
false)
3861 return new HexagonVectorCombineLegacy();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Prepare AGPR Alloc
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
static IntegerType * getIntTy(IRBuilderBase &B, const TargetLibraryInfo *TLI)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
static Value * locateIndexesFromIntrinsic(Instruction *In)
Instruction * locateDestination(Instruction *In, HvxIdioms::DstQualifier &Qual)
Value * getReinterpretiveCast_i8_to_i32(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, Value *I)
static Value * locateIndexesFromGEP(Value *In)
#define CallBuilder(B, F)
Value * getPointer(Value *Ptr)
#define DEFAULT_HVX_VTCM_PAGE_SIZE
static Value * locateAddressFromIntrinsic(Instruction *In)
static Instruction * selectDestination(Instruction *In, HvxIdioms::DstQualifier &Qual)
Value * get_i32_Mask(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, unsigned int pattern)
bool isArithmetic(unsigned Opc)
static Type * getIndexType(Value *In)
GetElementPtrInst * locateGepFromIntrinsic(Instruction *In)
Value * getReinterpretiveCast_i16_to_i32(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, Value *I)
iv Induction Variable Users
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static bool isCandidate(const MachineInstr *MI, Register &DefedReg, Register FrameReg)
static bool isUndef(const MachineInstr &MI)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Remove Loads Into Fake Uses
static ConstantInt * getConstInt(MDNode *MD, unsigned NumOp)
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Target-Independent Code Generator Pass Configuration Options pass.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
InstListType::const_iterator const_iterator
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
AttributeList getAttributes() const
Return the attributes for this call.
@ ICMP_ULT
unsigned less than
static LLVM_ABI Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
iterator_range< iterator > children()
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
const BasicBlock & back() const
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * getPointerOperand()
bool isHVXVectorType(EVT VecTy, bool IncludeBool=false) const
bool useHVXV62Ops() const
bool useHVXV69Ops() const
unsigned getVectorLength() const
bool useHVXV66Ops() const
bool isTypeForHVX(Type *VecTy, bool IncludeBool=false) const
Intrinsic::ID getIntrinsicId(unsigned Opc) const
Common base class shared among various IRBuilders.
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
const char * getOpcodeName() const
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
bool doesNotAccessMemory() const
Whether this function accesses no memory.
bool onlyAccessesInaccessibleMem() const
Whether this function only (at most) accesses inaccessible memory.
static LLVM_ABI std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
static LLVM_ABI MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
The main scalar evolution driver.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Rounding
Possible values of current rounding mode, which is specified in bits 23:22 of FPCR.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI Instruction * getTerminator() const
LLVM_ABI Instruction & front() const
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createHexagonVectorCombineLegacyPass()
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
DomTreeNodeBase< BasicBlock > DomTreeNode
auto reverse(ContainerTy &&C)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
LLVM_ABI bool mayHaveNonDefUseDependency(const Instruction &I)
Returns true if the result or effects of the given instructions I depend values not reachable through...
MaskT vshuff(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
MaskT vdeal(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.