36#include "llvm/IR/IntrinsicsHexagon.h"
60#define DEBUG_TYPE "hexagon-vc"
65#define DEFAULT_HVX_VTCM_PAGE_SIZE 131072
80 MinLoadGroupSizeForAlignment(
"hvc-ld-min-group-size-for-alignment",
83class HexagonVectorCombine {
88 :
F(F_),
DL(
F.getDataLayout()),
AA(AA_), AC(AC_), DT(DT_),
98 Type *getByteTy(
int ElemCount = 0)
const;
101 Type *getBoolTy(
int ElemCount = 0)
const;
105 std::optional<APInt> getIntValue(
const Value *Val)
const;
111 bool isTrue(
const Value *Val)
const;
113 bool isFalse(
const Value *Val)
const;
122 int getSizeOf(
const Value *Val, SizeKind Kind = Store)
const;
123 int getSizeOf(
const Type *Ty, SizeKind Kind = Store)
const;
124 int getTypeAlignment(
Type *Ty)
const;
125 size_t length(
Value *Val)
const;
126 size_t length(
Type *Ty)
const;
131 int Length,
int Where)
const;
155 unsigned ToWidth)
const;
159 std::optional<int> calculatePointerDifference(
Value *Ptr0,
Value *Ptr1)
const;
161 unsigned getNumSignificantBits(
const Value *V,
168 template <
typename T = std::vector<Instruction *>>
171 const T &IgnoreInsts = {})
const;
174 [[maybe_unused]]
bool isByteVecTy(
Type *Ty)
const;
187 int Start,
int Length)
const;
206 AlignVectors(
const HexagonVectorCombine &HVC_) : HVC(HVC_) {}
211 using InstList = std::vector<Instruction *>;
215 AddrInfo(
const AddrInfo &) =
default;
216 AddrInfo &operator=(
const AddrInfo &) =
default;
219 : Inst(
I), Addr(
A), ValTy(
T), HaveAlign(
H),
220 NeedAlign(HVC.getTypeAlignment(ValTy)) {}
231 using AddrList = std::vector<AddrInfo>;
235 return A->comesBefore(
B);
238 using DepList = std::set<Instruction *, InstrLess>;
241 MoveGroup(
const AddrInfo &AI,
Instruction *
B,
bool Hvx,
bool Load)
242 :
Base(
B), Main{AI.Inst}, Clones{}, IsHvx(Hvx), IsLoad(Load) {}
243 MoveGroup() =
default;
251 using MoveList = std::vector<MoveGroup>;
271 Segment(
Value *Val,
int Begin,
int Len)
272 : Val(Val), Start(Begin),
Size(Len) {}
273 Segment(
const Segment &Seg) =
default;
274 Segment &operator=(
const Segment &Seg) =
default;
281 Block(
Value *Val,
int Len,
int Pos) : Seg(Val, 0, Len), Pos(Pos) {}
283 : Seg(Val, Off, Len), Pos(Pos) {}
291 ByteSpan section(
int Start,
int Length)
const;
292 ByteSpan &shift(
int Offset);
295 int size()
const {
return Blocks.size(); }
296 Block &operator[](
int i) {
return Blocks[i]; }
297 const Block &operator[](
int i)
const {
return Blocks[i]; }
299 std::vector<Block> Blocks;
302 iterator begin() {
return Blocks.begin(); }
303 iterator end() {
return Blocks.end(); }
309 std::optional<AddrInfo> getAddrInfo(
Instruction &In)
const;
310 bool isHvx(
const AddrInfo &AI)
const;
312 [[maybe_unused]]
bool isSectorTy(
Type *Ty)
const;
320 const InstMap &CloneMap = InstMap())
const;
323 const InstMap &CloneMap = InstMap())
const;
347 bool createAddressGroups();
348 MoveList createLoadGroups(
const AddrList &Group)
const;
349 MoveList createStoreGroups(
const AddrList &Group)
const;
350 bool moveTogether(MoveGroup &Move)
const;
351 template <
typename T>
354 void realignLoadGroup(
IRBuilderBase &Builder,
const ByteSpan &VSpan,
355 int ScLen,
Value *AlignVal,
Value *AlignAddr)
const;
356 void realignStoreGroup(
IRBuilderBase &Builder,
const ByteSpan &VSpan,
357 int ScLen,
Value *AlignVal,
Value *AlignAddr)
const;
358 bool realignGroup(
const MoveGroup &Move);
360 int Alignment)
const;
363 AddrGroupMap AddrGroups;
380 const HexagonVectorCombine &HVC;
384 const AlignVectors::AddrGroupMap &AG) {
385 OS <<
"Printing AddrGroups:"
387 for (
auto &It : AG) {
388 OS <<
"\n\tInstruction: ";
390 OS <<
"\n\tAddrInfo: ";
391 for (
auto &AI : It.second)
398 const AlignVectors::AddrList &AL) {
399 OS <<
"\n *** Addr List: ***\n";
400 for (
auto &AG : AL) {
401 OS <<
"\n *** Addr Group: ***\n";
409 const AlignVectors::AddrInfo &AI) {
410 OS <<
"Inst: " << AI.Inst <<
" " << *AI.Inst <<
'\n';
411 OS <<
"Addr: " << *AI.Addr <<
'\n';
412 OS <<
"Type: " << *AI.ValTy <<
'\n';
413 OS <<
"HaveAlign: " << AI.HaveAlign.
value() <<
'\n';
414 OS <<
"NeedAlign: " << AI.NeedAlign.
value() <<
'\n';
415 OS <<
"Offset: " << AI.Offset;
420 const AlignVectors::MoveList &
ML) {
421 OS <<
"\n *** Move List: ***\n";
422 for (
auto &MG :
ML) {
423 OS <<
"\n *** Move Group: ***\n";
431 const AlignVectors::MoveGroup &MG) {
432 OS <<
"IsLoad:" << (MG.IsLoad ?
"yes" :
"no");
433 OS <<
", IsHvx:" << (MG.IsHvx ?
"yes" :
"no") <<
'\n';
436 OS <<
" " << *
I <<
'\n';
439 OS <<
" " << *
I <<
'\n';
441 for (
auto [K, V] : MG.Clones) {
443 K->printAsOperand(OS,
false);
444 OS <<
"\t-> " << *V <<
'\n';
451 OS <<
" @" <<
B.Pos <<
" [" <<
B.Seg.Start <<
',' <<
B.Seg.Size <<
"] ";
452 if (
B.Seg.Val ==
reinterpret_cast<const Value *
>(&
B)) {
453 OS <<
"(self:" <<
B.Seg.Val <<
')';
454 }
else if (
B.Seg.Val !=
nullptr) {
463 const AlignVectors::ByteSpan &BS) {
464 OS <<
"ByteSpan[size=" << BS.size() <<
", extent=" << BS.extent() <<
'\n';
465 for (
const AlignVectors::ByteSpan::Block &
B : BS)
485 HvxIdioms(
const HexagonVectorCombine &HVC_) : HVC(HVC_) {
486 auto *
Int32Ty = HVC.getIntTy(32);
487 HvxI32Ty = HVC.getHvxTy(
Int32Ty,
false);
488 HvxP32Ty = HVC.getHvxTy(
Int32Ty,
true);
509 std::optional<unsigned> RoundAt;
514 -> std::pair<unsigned, Signedness>;
515 auto canonSgn(SValue
X, SValue
Y)
const -> std::pair<SValue, SValue>;
517 auto matchFxpMul(
Instruction &In)
const -> std::optional<FxpOp>;
521 const FxpOp &
Op)
const ->
Value *;
523 bool Rounding)
const ->
Value *;
525 bool Rounding)
const ->
Value *;
528 Value *CarryIn =
nullptr)
const
529 -> std::pair<Value *, Value *>;
534 -> std::pair<Value *, Value *>;
546 std::optional<uint64_t>
549 std::optional<uint64_t> getPHIBaseMinAlignment(
Instruction &In,
560 const HexagonVectorCombine &HVC;
566 const HvxIdioms::FxpOp &
Op) {
567 static const char *SgnNames[] = {
"Positive",
"Signed",
"Unsigned"};
569 if (
Op.RoundAt.has_value()) {
570 if (
Op.Frac != 0 && *
Op.RoundAt ==
Op.Frac - 1) {
573 OS <<
" + 1<<" << *
Op.RoundAt;
576 OS <<
"\n X:(" << SgnNames[
Op.X.Sgn] <<
") " << *
Op.X.Val <<
"\n"
577 <<
" Y:(" << SgnNames[
Op.Y.Sgn] <<
") " << *
Op.Y.Val;
585template <
typename T>
T *getIfUnordered(
T *MaybeT) {
586 return MaybeT && MaybeT->isUnordered() ? MaybeT :
nullptr;
599template <
typename Pred,
typename T>
void erase_if(
T &&container, Pred p) {
637auto AlignVectors::ByteSpan::extent()
const ->
int {
640 int Min = Blocks[0].Pos;
641 int Max = Blocks[0].Pos + Blocks[0].Seg.Size;
642 for (
int i = 1, e =
size(); i !=
e; ++i) {
643 Min = std::min(Min, Blocks[i].Pos);
644 Max = std::max(Max, Blocks[i].Pos + Blocks[i].Seg.Size);
649auto AlignVectors::ByteSpan::section(
int Start,
int Length)
const -> ByteSpan {
651 for (
const ByteSpan::Block &
B : Blocks) {
652 int L = std::max(
B.Pos, Start);
653 int R = std::min(
B.Pos +
B.Seg.Size, Start +
Length);
656 int Off =
L >
B.Pos ?
L -
B.Pos : 0;
657 Section.Blocks.emplace_back(
B.Seg.Val,
B.Seg.Start + Off, R - L, L);
663auto AlignVectors::ByteSpan::shift(
int Offset) -> ByteSpan & {
670 SmallVector<Value *, 8> Values(Blocks.size());
671 for (
int i = 0, e = Blocks.size(); i != e; ++i)
672 Values[i] = Blocks[i].Seg.Val;
683 return Align(
DL.getABITypeAlign(ValTy).value());
686auto AlignVectors::getAddrInfo(Instruction &In)
const
687 -> std::optional<AddrInfo> {
689 return AddrInfo(HVC, L,
L->getPointerOperand(),
L->getType(),
692 return AddrInfo(HVC, S, S->getPointerOperand(),
693 S->getValueOperand()->getType(), S->getAlign());
697 case Intrinsic::masked_load:
698 return AddrInfo(HVC,
II,
II->getArgOperand(0),
II->getType(),
699 II->getParamAlign(0).valueOrOne());
700 case Intrinsic::masked_store:
701 return AddrInfo(HVC,
II,
II->getArgOperand(1),
702 II->getArgOperand(0)->getType(),
703 II->getParamAlign(1).valueOrOne());
709auto AlignVectors::isHvx(
const AddrInfo &AI)
const ->
bool {
713auto AlignVectors::getPayload(
Value *Val)
const ->
Value * {
717 ID =
II->getIntrinsicID();
719 return In->getOperand(0);
724auto AlignVectors::getMask(
Value *Val)
const ->
Value * {
726 switch (
II->getIntrinsicID()) {
727 case Intrinsic::masked_load:
728 return II->getArgOperand(1);
729 case Intrinsic::masked_store:
730 return II->getArgOperand(2);
734 Type *ValTy = getPayload(Val)->getType();
740auto AlignVectors::getPassThrough(
Value *Val)
const ->
Value * {
742 if (
II->getIntrinsicID() == Intrinsic::masked_load)
743 return II->getArgOperand(2);
748auto AlignVectors::createAdjustedPointer(IRBuilderBase &Builder,
Value *Ptr,
749 Type *ValTy,
int Adjust,
750 const InstMap &CloneMap)
const
753 if (Instruction *New = CloneMap.lookup(
I))
755 return Builder.CreatePtrAdd(Ptr, HVC.getConstInt(Adjust),
"gep");
758auto AlignVectors::createAlignedPointer(IRBuilderBase &Builder,
Value *Ptr,
759 Type *ValTy,
int Alignment,
760 const InstMap &CloneMap)
const
764 for (
auto [Old, New] : CloneMap)
765 I->replaceUsesOfWith(Old, New);
770 Value *AsInt = Builder.CreatePtrToInt(Ptr, HVC.getIntTy(),
"pti");
771 Value *
Mask = HVC.getConstInt(-Alignment);
772 Value *
And = Builder.CreateAnd(remap(AsInt), Mask,
"and");
773 return Builder.CreateIntToPtr(
777auto AlignVectors::createLoad(IRBuilderBase &Builder,
Type *ValTy,
Value *Ptr,
784 "Expectning scalar predicate");
785 if (HVC.isFalse(Predicate))
787 if (!HVC.isTrue(Predicate)) {
788 Value *
Load = createPredicatedLoad(Builder, ValTy, Ptr, Predicate,
789 Alignment, MDSources);
790 return Builder.CreateSelect(Mask, Load, PassThru);
794 assert(!HVC.isUndef(Mask));
795 if (HVC.isZero(Mask))
799 if (HVC.isTrue(Mask))
800 return createSimpleLoad(Builder, ValTy, Ptr, EffA.
value(), MDSources);
803 Builder.CreateMaskedLoad(ValTy, Ptr, EffA, Mask, PassThru,
"mld");
809auto AlignVectors::createSimpleLoad(IRBuilderBase &Builder,
Type *ValTy,
810 Value *Ptr,
int Alignment,
814 Instruction *
Load = Builder.CreateAlignedLoad(ValTy, Ptr, EffA,
"ald");
820auto AlignVectors::createPredicatedLoad(IRBuilderBase &Builder,
Type *ValTy,
826 "Predicates 'scalar' vector loads not yet supported");
828 assert(!
Predicate->getType()->isVectorTy() &&
"Expectning scalar predicate");
830 assert(HVC.getSizeOf(ValTy, HVC.Alloc) % EffA.
value() == 0);
832 if (HVC.isFalse(Predicate))
834 if (HVC.isTrue(Predicate))
835 return createSimpleLoad(Builder, ValTy, Ptr, EffA.
value(), MDSources);
837 auto V6_vL32b_pred_ai = HVC.HST.
getIntrinsicId(Hexagon::V6_vL32b_pred_ai);
839 return HVC.createHvxIntrinsic(Builder, V6_vL32b_pred_ai, ValTy,
840 {
Predicate, Ptr, HVC.getConstInt(0)}, {},
844auto AlignVectors::createStore(IRBuilderBase &Builder,
Value *Val,
Value *Ptr,
847 if (HVC.isZero(Mask) || HVC.isUndef(Val) || HVC.isUndef(Mask))
850 "Expectning scalar predicate"));
852 if (HVC.isFalse(Predicate))
854 if (HVC.isTrue(Predicate))
859 if (HVC.isTrue(Mask)) {
861 return createPredicatedStore(Builder, Val, Ptr, Predicate, Alignment,
865 return createSimpleStore(Builder, Val, Ptr, Alignment, MDSources);
871 Builder.CreateMaskedStore(Val, Ptr,
Align(Alignment), Mask);
878 Value *PredLoad = createPredicatedLoad(Builder, Val->getType(), Ptr,
879 Predicate, Alignment, MDSources);
880 Value *Mux = Builder.CreateSelect(Mask, Val, PredLoad);
881 return createPredicatedStore(Builder, Mux, Ptr, Predicate, Alignment,
885auto AlignVectors::createSimpleStore(IRBuilderBase &Builder,
Value *Val,
886 Value *Ptr,
int Alignment,
896auto AlignVectors::createPredicatedStore(IRBuilderBase &Builder,
Value *Val,
903 "Predicates 'scalar' vector stores not yet supported");
905 if (HVC.isFalse(Predicate))
907 if (HVC.isTrue(Predicate))
908 return createSimpleStore(Builder, Val, Ptr, EffA.
value(), MDSources);
910 assert(HVC.getSizeOf(Val, HVC.Alloc) % EffA.
value() == 0);
911 auto V6_vS32b_pred_ai = HVC.HST.
getIntrinsicId(Hexagon::V6_vS32b_pred_ai);
913 return HVC.createHvxIntrinsic(Builder, V6_vS32b_pred_ai,
nullptr,
914 {
Predicate, Ptr, HVC.getConstInt(0), Val}, {},
918auto AlignVectors::getUpwardDeps(Instruction *In, Instruction *
Base)
const
922 "Base and In should be in the same block");
923 assert(
Base->comesBefore(In) &&
"Base should come before In");
926 std::deque<Instruction *> WorkQ = {
In};
927 while (!WorkQ.empty()) {
934 if (
I->getParent() == Parent &&
Base->comesBefore(
I))
942auto AlignVectors::createAddressGroups() ->
bool {
947 auto findBaseAndOffset = [&](AddrInfo &AI) -> std::pair<Instruction *, int> {
948 for (AddrInfo &W : WorkStack) {
949 if (
auto D = HVC.calculatePointerDifference(AI.Addr,
W.Addr))
950 return std::make_pair(
W.Inst, *
D);
952 return std::make_pair(
nullptr, 0);
955 auto traverseBlock = [&](
DomTreeNode *DomN,
auto Visit) ->
void {
957 for (Instruction &
I :
Block) {
958 auto AI = this->getAddrInfo(
I);
961 auto F = findBaseAndOffset(*AI);
963 if (Instruction *BI =
F.first) {
964 AI->Offset =
F.second;
967 WorkStack.push_back(*AI);
968 GroupInst = AI->Inst;
970 AddrGroups[GroupInst].push_back(*AI);
976 while (!WorkStack.empty() && WorkStack.back().Inst->getParent() == &
Block)
977 WorkStack.pop_back();
980 traverseBlock(HVC.DT.
getRootNode(), traverseBlock);
981 assert(WorkStack.empty());
985 AddrGroups.
remove_if([](
auto &
G) {
return G.second.size() == 1; });
989 G.second, [&](
auto &
I) { return HVC.HST.isTypeForHVX(I.ValTy); });
993 return !AddrGroups.
empty();
996auto AlignVectors::createLoadGroups(
const AddrList &Group)
const -> MoveList {
1004 auto tryAddTo = [&](
const AddrInfo &
Info, MoveGroup &Move) {
1005 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
1009 if (Move.IsHvx != isHvx(
Info))
1013 if (
Base->getParent() !=
Info.Inst->getParent())
1016 if (!HVC.isSafeToMoveBeforeInBB(*
Info.Inst,
Base->getIterator()))
1020 return HVC.isSafeToMoveBeforeInBB(*
I,
Base->getIterator()) &&
1021 HVC.isSafeToClone(*
I);
1023 DepList Deps = getUpwardDeps(
Info.Inst,
Base);
1027 Move.Main.push_back(
Info.Inst);
1032 MoveList LoadGroups;
1034 for (
const AddrInfo &
Info : Group) {
1035 if (!
Info.Inst->mayReadFromMemory())
1037 if (LoadGroups.empty() || !tryAddTo(
Info, LoadGroups.back()))
1038 LoadGroups.emplace_back(
Info, Group.front().Inst, isHvx(
Info),
true);
1042 unsigned LoadGroupSizeLimit = MinLoadGroupSizeForAlignment;
1043 erase_if(LoadGroups, [LoadGroupSizeLimit](
const MoveGroup &
G) {
1044 return G.Main.size() < LoadGroupSizeLimit;
1049 erase_if(LoadGroups, [](
const MoveGroup &
G) {
return G.IsHvx; });
1055auto AlignVectors::createStoreGroups(
const AddrList &Group)
const -> MoveList {
1063 auto tryAddTo = [&](
const AddrInfo &
Info, MoveGroup &Move) {
1064 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
1070 "Not handling stores with return values");
1072 if (Move.IsHvx != isHvx(
Info))
1078 if (
Base->getParent() !=
Info.Inst->getParent())
1080 if (!HVC.isSafeToMoveBeforeInBB(*
Info.Inst,
Base->getIterator(), Move.Main))
1082 Move.Main.push_back(
Info.Inst);
1086 MoveList StoreGroups;
1088 for (
auto I = Group.rbegin(),
E = Group.rend();
I !=
E; ++
I) {
1089 const AddrInfo &
Info = *
I;
1090 if (!
Info.Inst->mayWriteToMemory())
1092 if (StoreGroups.empty() || !tryAddTo(
Info, StoreGroups.back()))
1093 StoreGroups.emplace_back(
Info, Group.front().Inst, isHvx(
Info),
false);
1097 erase_if(StoreGroups, [](
const MoveGroup &
G) {
return G.Main.size() <= 1; });
1101 erase_if(StoreGroups, [](
const MoveGroup &
G) {
return G.IsHvx; });
1106 if (!VADoFullStores) {
1107 erase_if(StoreGroups, [
this](
const MoveGroup &
G) {
1109 auto MaybeInfo = this->getAddrInfo(*S);
1110 assert(MaybeInfo.has_value());
1111 return HVC.HST.isHVXVectorType(
1112 EVT::getEVT(MaybeInfo->ValTy, false));
1120auto AlignVectors::moveTogether(MoveGroup &Move)
const ->
bool {
1122 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
1128 Move.Clones = cloneBefore(Where->
getIterator(), Move.Deps);
1131 for (Instruction *M : Main) {
1133 M->moveAfter(Where);
1134 for (
auto [Old, New] : Move.Clones)
1135 M->replaceUsesOfWith(Old, New);
1139 for (
int i = 0, e = Move.Deps.size(); i != e; ++i)
1140 Move.Deps[i] = Move.Clones[Move.Deps[i]];
1145 assert(Move.Deps.empty());
1148 for (Instruction *M : Main.drop_front(1)) {
1154 return Move.Main.size() + Move.Deps.size() > 1;
1157template <
typename T>
1162 for (Instruction *
I : Insts) {
1163 assert(HVC.isSafeToClone(*
I));
1165 C->setName(Twine(
"c.") +
I->getName() +
".");
1166 C->insertBefore(To);
1168 for (
auto [Old, New] : Map)
1169 C->replaceUsesOfWith(Old, New);
1170 Map.insert(std::make_pair(
I,
C));
1175auto AlignVectors::realignLoadGroup(IRBuilderBase &Builder,
1176 const ByteSpan &VSpan,
int ScLen,
1181 Type *SecTy = HVC.getByteTy(ScLen);
1182 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
1183 bool DoAlign = !HVC.isZero(AlignVal);
1185 BasicBlock *BaseBlock = Builder.GetInsertBlock();
1215 for (
int Index = 0;
Index != NumSectors; ++
Index)
1216 ASpan.Blocks.emplace_back(
nullptr, ScLen, Index * ScLen);
1217 for (
int Index = 0;
Index != NumSectors; ++
Index) {
1218 ASpan.Blocks[
Index].Seg.Val =
1219 reinterpret_cast<Value *
>(&ASpan.Blocks[
Index]);
1225 DenseMap<void *, Instruction *> EarliestUser;
1231 assert(
A->getParent() ==
B->getParent());
1232 return A->comesBefore(
B);
1234 auto earliestUser = [&](
const auto &
Uses) {
1236 for (
const Use &U :
Uses) {
1238 assert(
I !=
nullptr &&
"Load used in a non-instruction?");
1242 if (
I->getParent() == BaseBlock) {
1244 User = std::min(User,
I, isEarlier);
1252 for (
const ByteSpan::Block &
B : VSpan) {
1253 ByteSpan ASection = ASpan.section(
B.Pos,
B.Seg.Size);
1254 for (
const ByteSpan::Block &S : ASection) {
1255 auto &EU = EarliestUser[S.Seg.Val];
1256 EU = std::min(EU, earliestUser(
B.Seg.Val->uses()), isEarlier);
1261 dbgs() <<
"ASpan:\n" << ASpan <<
'\n';
1262 dbgs() <<
"Earliest users of ASpan:\n";
1263 for (
auto &[Val, User] : EarliestUser) {
1264 dbgs() << Val <<
"\n ->" << *
User <<
'\n';
1268 auto createLoad = [&](IRBuilderBase &Builder,
const ByteSpan &VSpan,
1269 int Index,
bool MakePred) {
1271 createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
1273 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
1278 int Width = (1 + DoAlign) * ScLen;
1279 return this->createLoad(Builder, SecTy, Ptr, Predicate, ScLen, True, Undef,
1280 VSpan.section(Start, Width).values());
1285 assert(
In->getParent() == To->getParent());
1286 DepList Deps = getUpwardDeps(&*In, &*To);
1289 InstMap
Map = cloneBefore(In, Deps);
1290 for (
auto [Old, New] : Map)
1291 In->replaceUsesOfWith(Old, New);
1296 for (
int Index = 0;
Index != NumSectors + 1; ++
Index) {
1304 DoAlign &&
Index > 0 ? EarliestUser[&ASpan[
Index - 1]] :
nullptr;
1306 Index < NumSectors ? EarliestUser[&ASpan[
Index]] :
nullptr;
1307 if (
auto *Where = std::min(PrevAt, ThisAt, isEarlier)) {
1310 createLoad(Builder, VSpan, Index, DoAlign && Index == NumSectors);
1318 if (!HVC.isSafeToMoveBeforeInBB(*Load, BasePos))
1319 moveBefore(
Load->getIterator(), BasePos);
1321 LLVM_DEBUG(
dbgs() <<
"Loads[" << Index <<
"]:" << *Loads[Index] <<
'\n');
1327 for (
int Index = 0;
Index != NumSectors; ++
Index) {
1328 ASpan[
Index].Seg.Val =
nullptr;
1329 if (
auto *Where = EarliestUser[&ASpan[Index]]) {
1335 assert(NextLoad !=
nullptr);
1336 Val = HVC.vralignb(Builder, Val, NextLoad, AlignVal);
1338 ASpan[
Index].Seg.Val = Val;
1343 for (
const ByteSpan::Block &
B : VSpan) {
1344 ByteSpan ASection = ASpan.section(
B.Pos,
B.Seg.Size).shift(-
B.Pos);
1351 std::vector<ByteSpan::Block *> ABlocks;
1352 for (ByteSpan::Block &S : ASection) {
1353 if (S.Seg.Val !=
nullptr)
1354 ABlocks.push_back(&S);
1357 [&](
const ByteSpan::Block *
A,
const ByteSpan::Block *
B) {
1361 for (ByteSpan::Block *S : ABlocks) {
1366 Value *Pay = HVC.vbytes(Builder, getPayload(S->Seg.Val));
1368 HVC.insertb(Builder, Accum, Pay, S->Seg.Start, S->Seg.Size, S->Pos);
1376 Type *ValTy = getPayload(
B.Seg.Val)->getType();
1379 getPassThrough(
B.Seg.Val),
"sel");
1384auto AlignVectors::realignStoreGroup(IRBuilderBase &Builder,
1385 const ByteSpan &VSpan,
int ScLen,
1390 Type *SecTy = HVC.getByteTy(ScLen);
1391 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
1392 bool DoAlign = !HVC.isZero(AlignVal);
1395 ByteSpan ASpanV, ASpanM;
1399 auto MakeVec = [](IRBuilderBase &Builder,
Value *Val) ->
Value * {
1403 auto *VecTy = VectorType::get(Ty, 1,
false);
1409 for (
int Index = (DoAlign ? -1 : 0);
Index != NumSectors + DoAlign; ++
Index) {
1413 VSpan.section(Index * ScLen, ScLen).shift(-Index * ScLen);
1418 for (ByteSpan::Block &S : VSection) {
1419 Value *Pay = getPayload(S.Seg.Val);
1421 Pay->
getType(), HVC.getByteTy());
1422 Value *PartM = HVC.insertb(Builder, Zero, HVC.vbytes(Builder, Mask),
1423 S.Seg.Start, S.Seg.Size, S.Pos);
1424 AccumM = Builder.
CreateOr(AccumM, PartM);
1426 Value *PartV = HVC.insertb(Builder, Undef, HVC.vbytes(Builder, Pay),
1427 S.Seg.Start, S.Seg.Size, S.Pos);
1432 ASpanV.Blocks.emplace_back(AccumV, ScLen, Index * ScLen);
1433 ASpanM.Blocks.emplace_back(AccumM, ScLen, Index * ScLen);
1437 dbgs() <<
"ASpanV before vlalign:\n" << ASpanV <<
'\n';
1438 dbgs() <<
"ASpanM before vlalign:\n" << ASpanM <<
'\n';
1443 for (
int Index = 1;
Index != NumSectors + 2; ++
Index) {
1444 Value *PrevV = ASpanV[
Index - 1].Seg.Val, *ThisV = ASpanV[
Index].Seg.Val;
1445 Value *PrevM = ASpanM[
Index - 1].Seg.Val, *ThisM = ASpanM[
Index].Seg.Val;
1447 ASpanV[
Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevV, ThisV, AlignVal);
1448 ASpanM[
Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevM, ThisM, AlignVal);
1453 dbgs() <<
"ASpanV after vlalign:\n" << ASpanV <<
'\n';
1454 dbgs() <<
"ASpanM after vlalign:\n" << ASpanM <<
'\n';
1457 auto createStore = [&](IRBuilderBase &Builder,
const ByteSpan &ASpanV,
1458 const ByteSpan &ASpanM,
int Index,
bool MakePred) {
1461 if (HVC.isUndef(Val) || HVC.isZero(Mask))
1464 createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
1466 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
1471 int Width = (1 + DoAlign) * ScLen;
1472 this->createStore(Builder, Val, Ptr, Predicate, ScLen,
1473 HVC.vlsb(Builder, Mask),
1474 VSpan.section(Start, Width).values());
1477 for (
int Index = 0;
Index != NumSectors + DoAlign; ++
Index) {
1478 createStore(Builder, ASpanV, ASpanM, Index, DoAlign && Index == NumSectors);
1482auto AlignVectors::realignGroup(
const MoveGroup &Move) ->
bool {
1491 auto getMaxOf = [](
auto Range,
auto GetValue) {
1493 return GetValue(
A) < GetValue(
B);
1497 AddrList &BaseInfos = AddrGroups[Move.Base];
1512 std::set<Instruction *> TestSet(Move.Main.begin(), Move.Main.end());
1516 BaseInfos, std::back_inserter(MoveInfos),
1517 [&TestSet](
const AddrInfo &AI) {
return TestSet.count(AI.Inst); });
1520 const AddrInfo &WithMaxAlign =
1521 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return AI.HaveAlign; });
1522 Align MaxGiven = WithMaxAlign.HaveAlign;
1525 const AddrInfo &WithMinOffset =
1526 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return -AI.Offset; });
1528 const AddrInfo &WithMaxNeeded =
1529 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return AI.NeedAlign; });
1530 Align MinNeeded = WithMaxNeeded.NeedAlign;
1543 InstSimplifyFolder(HVC.DL));
1544 Value *AlignAddr =
nullptr;
1545 Value *AlignVal =
nullptr;
1547 if (MinNeeded <= MaxGiven) {
1548 int Start = WithMinOffset.Offset;
1549 int OffAtMax = WithMaxAlign.Offset;
1556 int Adjust = -
alignTo(OffAtMax - Start, MinNeeded.value());
1557 AlignAddr = createAdjustedPointer(Builder, WithMaxAlign.Addr,
1558 WithMaxAlign.ValTy, Adjust, Move.Clones);
1559 int Diff =
Start - (OffAtMax + Adjust);
1560 AlignVal = HVC.getConstInt(Diff);
1562 assert(
static_cast<decltype(MinNeeded.value())
>(Diff) < MinNeeded.value());
1572 createAlignedPointer(Builder, WithMinOffset.Addr, WithMinOffset.ValTy,
1573 MinNeeded.value(), Move.Clones);
1575 Builder.
CreatePtrToInt(WithMinOffset.Addr, HVC.getIntTy(),
"pti");
1577 for (
auto [Old, New] : Move.Clones)
1578 I->replaceUsesOfWith(Old, New);
1583 for (
const AddrInfo &AI : MoveInfos) {
1584 VSpan.Blocks.emplace_back(AI.Inst, HVC.getSizeOf(AI.ValTy),
1585 AI.Offset - WithMinOffset.Offset);
1592 : std::max<int>(MinNeeded.value(), 4);
1593 assert(!Move.IsHvx || ScLen == 64 || ScLen == 128);
1594 assert(Move.IsHvx || ScLen == 4 || ScLen == 8);
1597 dbgs() <<
"ScLen: " << ScLen <<
"\n";
1598 dbgs() <<
"AlignVal:" << *AlignVal <<
"\n";
1599 dbgs() <<
"AlignAddr:" << *AlignAddr <<
"\n";
1600 dbgs() <<
"VSpan:\n" << VSpan <<
'\n';
1604 realignLoadGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1606 realignStoreGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1608 for (
auto *Inst : Move.Main)
1609 Inst->eraseFromParent();
1614auto AlignVectors::makeTestIfUnaligned(IRBuilderBase &Builder,
Value *AlignVal,
1615 int Alignment)
const ->
Value * {
1616 auto *AlignTy = AlignVal->getType();
1618 AlignVal, ConstantInt::get(AlignTy, Alignment - 1),
"and");
1619 Value *
Zero = ConstantInt::get(AlignTy, 0);
1623auto AlignVectors::isSectorTy(
Type *Ty)
const ->
bool {
1624 if (!HVC.isByteVecTy(Ty))
1626 int Size = HVC.getSizeOf(Ty);
1632auto AlignVectors::run() ->
bool {
1635 if (!createAddressGroups())
1639 dbgs() <<
"Address groups(" << AddrGroups.
size() <<
"):\n";
1640 for (
auto &[In, AL] : AddrGroups) {
1641 for (
const AddrInfo &AI : AL)
1642 dbgs() <<
"---\n" << AI <<
'\n';
1647 MoveList LoadGroups, StoreGroups;
1649 for (
auto &
G : AddrGroups) {
1655 dbgs() <<
"\nLoad groups(" << LoadGroups.size() <<
"):\n";
1656 for (
const MoveGroup &
G : LoadGroups)
1657 dbgs() <<
G <<
"\n";
1658 dbgs() <<
"Store groups(" << StoreGroups.size() <<
"):\n";
1659 for (
const MoveGroup &
G : StoreGroups)
1660 dbgs() <<
G <<
"\n";
1664 unsigned CountLimit = VAGroupCountLimit;
1665 if (CountLimit == 0)
1668 if (LoadGroups.size() > CountLimit) {
1669 LoadGroups.resize(CountLimit);
1670 StoreGroups.clear();
1672 unsigned StoreLimit = CountLimit - LoadGroups.size();
1673 if (StoreGroups.size() > StoreLimit)
1674 StoreGroups.resize(StoreLimit);
1677 for (
auto &M : LoadGroups)
1679 for (
auto &M : StoreGroups)
1684 for (
auto &M : LoadGroups)
1686 for (
auto &M : StoreGroups)
1696auto HvxIdioms::getNumSignificantBits(
Value *V, Instruction *In)
const
1697 -> std::pair<unsigned, Signedness> {
1698 unsigned Bits = HVC.getNumSignificantBits(V, In);
1704 KnownBits Known = HVC.getKnownBits(V, In);
1705 Signedness Sign =
Signed;
1706 unsigned NumToTest = 0;
1710 NumToTest =
Bits - 1;
1723 return {
Bits, Sign};
1726auto HvxIdioms::canonSgn(SValue
X, SValue
Y)
const
1727 -> std::pair<SValue, SValue> {
1740auto HvxIdioms::matchFxpMul(Instruction &In)
const -> std::optional<FxpOp> {
1741 using namespace PatternMatch;
1742 auto *Ty =
In.getType();
1745 return std::nullopt;
1754 auto m_Shr = [](
auto &&
V,
auto &&S) {
1766 if (
Op.Frac > Width)
1767 return std::nullopt;
1774 return std::nullopt;
1782 Op.Opcode = Instruction::Mul;
1784 Op.X.Sgn = getNumSignificantBits(
Op.X.Val, &In).second;
1785 Op.Y.Sgn = getNumSignificantBits(
Op.Y.Val, &In).second;
1790 return std::nullopt;
1793auto HvxIdioms::processFxpMul(Instruction &In,
const FxpOp &
Op)
const
1795 assert(
Op.X.Val->getType() ==
Op.Y.Val->getType());
1798 if (VecTy ==
nullptr)
1801 unsigned ElemWidth = ElemTy->getBitWidth();
1804 if ((HVC.length(VecTy) * ElemWidth) % (8 * HVC.HST.
getVectorLength()) != 0)
1814 if (ElemWidth <= 32 &&
Op.Frac == 0)
1817 auto [BitsX, SignX] = getNumSignificantBits(
Op.X.Val, &In);
1818 auto [BitsY, SignY] = getNumSignificantBits(
Op.Y.Val, &In);
1824 InstSimplifyFolder(HVC.DL));
1826 auto roundUpWidth = [](
unsigned Width) ->
unsigned {
1832 if (Width > 32 && Width % 32 != 0) {
1839 BitsX = roundUpWidth(BitsX);
1840 BitsY = roundUpWidth(BitsY);
1845 unsigned Width = std::max(BitsX, BitsY);
1847 auto *ResizeTy = VectorType::get(HVC.getIntTy(Width), VecTy);
1848 if (Width < ElemWidth) {
1851 }
else if (Width > ElemWidth) {
1858 assert(
X->getType() ==
Y->getType() &&
X->getType() == ResizeTy);
1860 unsigned VecLen = HVC.length(ResizeTy);
1861 unsigned ChopLen = (8 * HVC.HST.
getVectorLength()) / std::min(Width, 32u);
1865 ChopOp.ResTy = VectorType::get(
Op.ResTy->getElementType(), ChopLen,
false);
1867 for (
unsigned V = 0;
V != VecLen / ChopLen; ++
V) {
1868 ChopOp.X.Val = HVC.subvector(Builder,
X, V * ChopLen, ChopLen);
1869 ChopOp.Y.Val = HVC.subvector(Builder,
Y, V * ChopLen, ChopLen);
1870 Results.push_back(processFxpMulChopped(Builder, In, ChopOp));
1885inline bool HvxIdioms::matchScatter(Instruction &In)
const {
1889 return (
II->getIntrinsicID() == Intrinsic::masked_scatter);
1892inline bool HvxIdioms::matchGather(Instruction &In)
const {
1896 return (
II->getIntrinsicID() == Intrinsic::masked_gather);
1899inline bool HvxIdioms::matchMLoad(Instruction &In)
const {
1903 return (
II->getIntrinsicID() == Intrinsic::masked_load);
1906inline bool HvxIdioms::matchMStore(Instruction &In)
const {
1910 return (
II->getIntrinsicID() == Intrinsic::masked_store);
1918 case Instruction::Add:
1919 case Instruction::Sub:
1920 case Instruction::Mul:
1921 case Instruction::And:
1922 case Instruction::Or:
1923 case Instruction::Xor:
1924 case Instruction::AShr:
1925 case Instruction::LShr:
1926 case Instruction::Shl:
1927 case Instruction::UDiv:
1935 assert(Ptr &&
"Unable to extract pointer");
1941 if (
II->getIntrinsicID() == Intrinsic::masked_store)
1942 return II->getOperand(1);
1948 HvxIdioms::DstQualifier &Qual) {
1954 Qual = HvxIdioms::LdSt;
1956 if (
II->getIntrinsicID() == Intrinsic::masked_gather) {
1958 Qual = HvxIdioms::LLVM_Gather;
1959 }
else if (
II->getIntrinsicID() == Intrinsic::masked_scatter) {
1961 Qual = HvxIdioms::LLVM_Scatter;
1962 }
else if (
II->getIntrinsicID() == Intrinsic::masked_store) {
1964 Qual = HvxIdioms::LdSt;
1965 }
else if (
II->getIntrinsicID() ==
1966 Intrinsic::hexagon_V6_vgather_vscattermh) {
1968 Qual = HvxIdioms::HEX_Gather_Scatter;
1969 }
else if (
II->getIntrinsicID() == Intrinsic::hexagon_V6_vscattermh_128B) {
1971 Qual = HvxIdioms::HEX_Scatter;
1972 }
else if (
II->getIntrinsicID() == Intrinsic::hexagon_V6_vgathermh_128B) {
1974 Qual = HvxIdioms::HEX_Gather;
1982 Qual = HvxIdioms::Call;
1987 Qual = HvxIdioms::Arithmetic;
2007 for (
auto &U : In->uses()) {
2011 Users.push_back(Destination);
2023 assert(In &&
"Bad instruction");
2027 "Not a gather Intrinsic");
2056 if (
II &&
II->getIntrinsicID() == Intrinsic::masked_gather)
2066 auto *Src = IE->getOperand(1);
2081 LLVM_DEBUG(
dbgs() <<
" Unable to locate Address from intrinsic\n");
2093 if (
II->getIntrinsicID() == Intrinsic::masked_load)
2094 return II->getType();
2095 if (
II->getIntrinsicID() == Intrinsic::masked_store)
2096 return II->getOperand(0)->getType();
2098 return In->getType();
2107 if (
II->getIntrinsicID() == Intrinsic::masked_load)
2109 if (
II->getIntrinsicID() == Intrinsic::masked_gather)
2121 return cstDataVector;
2123 return GEPIndex->getOperand(0);
2139 LLVM_DEBUG(
dbgs() <<
" Unable to locate Index from intrinsic\n");
2149 assert(
I &&
"Unable to reinterprete cast");
2150 Type *NT = HVC.getHvxTy(HVC.getIntTy(32),
false);
2151 std::vector<unsigned> shuffleMask;
2152 for (
unsigned i = 0; i < 64; ++i)
2153 shuffleMask.push_back(i);
2155 Value *CastShuffle =
2156 Builder.CreateShuffleVector(
I,
I, Mask,
"identity_shuffle");
2157 return Builder.CreateBitCast(CastShuffle, NT,
"cst64_i16_to_32_i32");
2164 assert(
I &&
"Unable to reinterprete cast");
2165 Type *NT = HVC.getHvxTy(HVC.getIntTy(32),
false);
2166 std::vector<unsigned> shuffleMask;
2167 for (
unsigned i = 0; i < 128; ++i)
2168 shuffleMask.push_back(i);
2170 Value *CastShuffle =
2171 Builder.CreateShuffleVector(
I,
I, Mask,
"identity_shuffle");
2172 return Builder.CreateBitCast(CastShuffle, NT,
"cst128_i8_to_32_i32");
2178 unsigned int pattern) {
2179 std::vector<unsigned int> byteMask;
2180 for (
unsigned i = 0; i < 32; ++i)
2181 byteMask.push_back(pattern);
2183 return Builder.CreateIntrinsic(
2185 {llvm::ConstantDataVector::get(Ctx, byteMask), HVC.getConstInt(~0)},
2189Value *HvxIdioms::processVScatter(Instruction &In)
const {
2191 assert(InpTy &&
"Cannot handle no vector type for llvm.scatter/gather");
2192 unsigned InpSize = HVC.getSizeOf(InpTy);
2193 auto *
F =
In.getFunction();
2194 LLVMContext &Ctx =
F->getContext();
2196 assert(ElemTy &&
"llvm.scatter needs integer type argument");
2199 unsigned Elements = HVC.length(InpTy);
2200 dbgs() <<
"\n[Process scatter](" <<
In <<
")\n" << *
In.getParent() <<
"\n";
2201 dbgs() <<
" Input type(" << *InpTy <<
") elements(" <<
Elements
2202 <<
") VecLen(" << InpSize <<
") type(" << *ElemTy <<
") ElemWidth("
2203 << ElemWidth <<
")\n";
2207 InstSimplifyFolder(HVC.DL));
2209 auto *ValueToScatter =
In.getOperand(0);
2210 LLVM_DEBUG(
dbgs() <<
" ValueToScatter : " << *ValueToScatter <<
"\n");
2214 <<
") for vscatter\n");
2239 Value *CastIndex =
nullptr;
2240 if (cstDataVector) {
2242 AllocaInst *IndexesAlloca =
2243 Builder.
CreateAlloca(HVC.getHvxTy(HVC.getIntTy(32),
false));
2244 [[maybe_unused]]
auto *StoreIndexes =
2245 Builder.
CreateStore(cstDataVector, IndexesAlloca);
2246 LLVM_DEBUG(
dbgs() <<
" StoreIndexes : " << *StoreIndexes <<
"\n");
2248 IndexesAlloca,
"reload_index");
2253 CastIndex = Indexes;
2257 if (ElemWidth == 1) {
2260 Type *
NT = HVC.getHvxTy(HVC.getIntTy(32),
false);
2266 HVC.getHvxTy(HVC.getIntTy(32),
true), V6_vunpack, CastIndexes,
nullptr);
2267 LLVM_DEBUG(
dbgs() <<
" UnpackedIndexes : " << *UnpackedIndexes <<
")\n");
2271 [[maybe_unused]]
Value *IndexHi =
2272 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes);
2273 [[maybe_unused]]
Value *IndexLo =
2274 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes);
2282 HVC.getHvxTy(HVC.getIntTy(32),
true), V6_vunpack, CastSrc,
nullptr);
2283 LLVM_DEBUG(
dbgs() <<
" UnpackedValToScat: " << *UnpackedValueToScatter
2286 [[maybe_unused]]
Value *UVSHi =
2287 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedValueToScatter);
2288 [[maybe_unused]]
Value *UVSLo =
2289 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedValueToScatter);
2294 auto *QByteMask =
get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff);
2297 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B,
2303 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B,
2307 }
else if (ElemWidth == 2) {
2312 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermh_128B,
2316 }
else if (ElemWidth == 4) {
2318 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermw_128B,
2328Value *HvxIdioms::processVGather(Instruction &In)
const {
2329 [[maybe_unused]]
auto *InpTy =
2331 assert(InpTy &&
"Cannot handle no vector type for llvm.gather");
2332 [[maybe_unused]]
auto *ElemTy =
2334 assert(ElemTy &&
"llvm.gather needs vector of ptr argument");
2335 auto *
F =
In.getFunction();
2336 LLVMContext &Ctx =
F->getContext();
2338 << *
In.getParent() <<
"\n");
2340 << HVC.length(InpTy) <<
") VecLen(" << HVC.getSizeOf(InpTy)
2341 <<
") type(" << *ElemTy <<
") Access alignment("
2342 << *
In.getOperand(1) <<
") AddressSpace("
2343 << ElemTy->getAddressSpace() <<
")\n");
2347 "llvm.gather needs vector for mask");
2349 InstSimplifyFolder(HVC.DL));
2354 HvxIdioms::DstQualifier Qual = HvxIdioms::Undefined;
2360 LLVM_DEBUG(
dbgs() <<
" Destination : " << *Dst <<
" Qual(" << Qual
2366 LLVM_DEBUG(
dbgs() <<
"Could not locate vgather destination ptr\n");
2372 assert(DstType &&
"Cannot handle non vector dst type for llvm.gather");
2387 Type *
NT = HVC.getHvxTy(HVC.getIntTy(32),
false);
2388 if (Qual == HvxIdioms::LdSt || Qual == HvxIdioms::Arithmetic) {
2392 unsigned OutputSize = HVC.getSizeOf(DstType);
2396 <<
" Address space ("
2398 <<
" Result type : " << *DstType
2399 <<
"\n Size in bytes : " << OutputSize
2400 <<
" element type(" << *DstElemTy
2401 <<
")\n ElemWidth : " << ElemWidth <<
" bytes\n");
2404 assert(IndexType &&
"Cannot handle non vector index type for llvm.gather");
2405 unsigned IndexWidth = HVC.DL.
getTypeAllocSize(IndexType->getElementType());
2410 IndexLoad, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2419 if (ElemWidth == 1) {
2424 Value *CastIndexes =
2427 auto *UnpackedIndexes =
2429 V6_vunpack, CastIndexes,
nullptr);
2435 [[maybe_unused]]
Value *IndexHi =
2436 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes);
2437 [[maybe_unused]]
Value *IndexLo =
2438 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes);
2442 auto *QByteMask =
get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff);
2446 auto V6_vgather = HVC.HST.
getIntrinsicId(Hexagon::V6_vgathermhq);
2448 Type::getVoidTy(Ctx), V6_vgather,
2449 {Ptr, QByteMask, CastedPtr,
2455 HVC.getHvxTy(HVC.getIntTy(32),
false), Ptr,
"temp_result_hi");
2456 LLVM_DEBUG(
dbgs() <<
" LoadedResultHi : " << *LoadedResultHi <<
"\n");
2461 Type::getVoidTy(Ctx), V6_vgather,
2462 {Ptr, QByteMask, CastedPtr,
2467 HVC.getHvxTy(HVC.getIntTy(32),
false), Ptr,
"temp_result_lo");
2468 LLVM_DEBUG(
dbgs() <<
" LoadedResultLo : " << *LoadedResultLo <<
"\n");
2475 NT, V6_vpackeb, {LoadedResultHi, LoadedResultLo},
nullptr);
2477 [[maybe_unused]]
auto *StoreRes = Builder.
CreateStore(Res, Ptr);
2479 }
else if (ElemWidth == 2) {
2481 if (IndexWidth == 2) {
2489 Value *AdjustedIndex = HVC.createHvxIntrinsic(
2490 Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)});
2492 <<
" Shifted half index: " << *AdjustedIndex <<
")\n");
2498 Type::getVoidTy(Ctx), V6_vgather,
2502 for (
auto &U : Dst->uses()) {
2504 dbgs() <<
" dst used by: " << *UI <<
"\n";
2506 for (
auto &U :
In.uses()) {
2508 dbgs() <<
" In used by : " << *UI <<
"\n";
2513 HVC.getHvxTy(HVC.getIntTy(16),
false), Ptr,
"temp_result");
2514 LLVM_DEBUG(
dbgs() <<
" LoadedResult : " << *LoadedResult <<
"\n");
2515 In.replaceAllUsesWith(LoadedResult);
2517 dbgs() <<
" Unhandled index type for vgather\n";
2520 }
else if (ElemWidth == 4) {
2521 if (IndexWidth == 4) {
2524 Value *AdjustedIndex = HVC.createHvxIntrinsic(
2525 Builder, V6_vaslh, NT, {Indexes, HVC.getConstInt(2)});
2527 <<
" Shifted word index: " << *AdjustedIndex <<
")\n");
2529 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermw_128B,
2552 Dst->eraseFromParent();
2553 }
else if (Qual == HvxIdioms::LLVM_Scatter) {
2557 assert(DstInpTy &&
"Cannot handle no vector type for llvm.scatter");
2558 unsigned DstInpSize = HVC.getSizeOf(DstInpTy);
2559 unsigned DstElements = HVC.length(DstInpTy);
2561 assert(DstElemTy &&
"llvm.scatter needs vector of ptr argument");
2562 dbgs() <<
" Gather feeds into scatter\n Values to scatter : "
2563 << *Dst->getOperand(0) <<
"\n";
2564 dbgs() <<
" Dst type(" << *DstInpTy <<
") elements(" << DstElements
2565 <<
") VecLen(" << DstInpSize <<
") type(" << *DstElemTy
2566 <<
") Access alignment(" << *Dst->getOperand(2) <<
")\n";
2580 Src, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2600 Value *AdjustedIndex = HVC.createHvxIntrinsic(
2601 Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)});
2602 LLVM_DEBUG(
dbgs() <<
" Shifted half index: " << *AdjustedIndex <<
")\n");
2605 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2609 }
else if (Qual == HvxIdioms::HEX_Gather_Scatter) {
2617 if (cstDataVector) {
2622 [[maybe_unused]]
auto *StoreIndexes =
2623 Builder.
CreateStore(cstDataVector, IndexesAlloca);
2624 LLVM_DEBUG(
dbgs() <<
" StoreIndexes : " << *StoreIndexes <<
"\n");
2628 LLVM_DEBUG(
dbgs() <<
" ResultAlloca : " << *ResultAlloca <<
"\n");
2631 IndexLoad, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2635 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2636 {ResultAlloca, CastedSrc,
2640 HVC.getHvxTy(HVC.getIntTy(16),
false), ResultAlloca,
"temp_result");
2641 LLVM_DEBUG(
dbgs() <<
" LoadedResult : " << *LoadedResult <<
"\n");
2643 In.replaceAllUsesWith(LoadedResult);
2653 Src, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2666 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgather_vscattermh,
2672 }
else if (Qual == HvxIdioms::HEX_Scatter) {
2679 IndexLoad, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2686 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2691 HVC.getHvxTy(HVC.getIntTy(16),
false), ResultAlloca,
"temp_result");
2692 LLVM_DEBUG(
dbgs() <<
" LoadedResult : " << *LoadedResult <<
"\n");
2693 In.replaceAllUsesWith(LoadedResult);
2694 }
else if (Qual == HvxIdioms::HEX_Gather) {
2699 if (cstDataVector) {
2703 [[maybe_unused]]
auto *StoreIndexes =
2704 Builder.
CreateStore(cstDataVector, IndexesAlloca);
2705 LLVM_DEBUG(
dbgs() <<
" StoreIndexes : " << *StoreIndexes <<
"\n");
2710 <<
"\n AddressSpace: "
2714 IndexLoad, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2718 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2719 {ResultAlloca, CastedSrc,
2723 HVC.getHvxTy(HVC.getIntTy(16),
false), ResultAlloca,
"temp_result");
2724 LLVM_DEBUG(
dbgs() <<
" LoadedResult : " << *LoadedResult <<
"\n");
2726 In.replaceAllUsesWith(LoadedResult);
2729 }
else if (Qual == HvxIdioms::LLVM_Gather) {
2731 errs() <<
" Underimplemented vgather to vgather sequence\n";
2741std::optional<uint64_t> HvxIdioms::getPHIBaseMinAlignment(Instruction &In,
2742 PHINode *PN)
const {
2744 return std::nullopt;
2746 SmallVector<Value *, 16> Worklist;
2747 SmallPtrSet<Value *, 16> Visited;
2748 uint64_t minPHIAlignment = Value::MaximumAlignment;
2751 while (!Worklist.
empty()) {
2754 if (!Visited.
insert(V).second)
2766 if (KnownAlign.
value() < minPHIAlignment)
2767 minPHIAlignment = KnownAlign.
value();
2770 if (minPHIAlignment != Value::MaximumAlignment)
2771 return minPHIAlignment;
2772 return std::nullopt;
2776std::optional<uint64_t> HvxIdioms::getAlignment(Instruction &In,
2778 SmallPtrSet<Value *, 16> Visited;
2779 return getAlignmentImpl(In, ptr, Visited);
2782std::optional<uint64_t>
2783HvxIdioms::getAlignmentImpl(Instruction &In,
Value *ptr,
2784 SmallPtrSet<Value *, 16> &Visited)
const {
2787 if (!Visited.
insert(ptr).second)
2788 return std::nullopt;
2792 if (KnownAlign.
value() > 1) {
2794 return KnownAlign.
value();
2800 auto baseAlignmentOpt = getPHIBaseMinAlignment(In, PN);
2801 if (!baseAlignmentOpt)
2802 return std::nullopt;
2804 uint64_t minBaseAlignment = *baseAlignmentOpt;
2806 if (minBaseAlignment == 1)
2809 uint64_t minPHIAlignment = minBaseAlignment;
2811 <<
")nodes and min base aligned to (" << minBaseAlignment
2818 uint64_t MemberAlignment = Value::MaximumAlignment;
2820 MemberAlignment = *res;
2822 return std::nullopt;
2824 if (minPHIAlignment > MemberAlignment)
2825 minPHIAlignment = MemberAlignment;
2827 LLVM_DEBUG(
dbgs() <<
" total PHI alignment(" << minPHIAlignment <<
")\n");
2828 return minPHIAlignment;
2832 auto *GEPPtr =
GEP->getPointerOperand();
2835 if (&In == GEPPtr) {
2837 GEPPtr->getType()->getPointerAddressSpace()),
2839 if (
GEP->accumulateConstantOffset(HVC.DL,
Offset)) {
2841 <<
Offset.getZExtValue() <<
")\n");
2842 return Offset.getZExtValue();
2847 return std::nullopt;
2850Value *HvxIdioms::processMStore(Instruction &In)
const {
2851 [[maybe_unused]]
auto *InpTy =
2853 assert(InpTy &&
"Cannot handle no vector type for llvm.masked.store");
2856 << *
In.getParent() <<
"\n");
2858 << HVC.length(InpTy) <<
") VecLen(" << HVC.getSizeOf(InpTy)
2859 <<
") type(" << *InpTy->getElementType() <<
") of size("
2860 << InpTy->getScalarSizeInBits() <<
")bits\n");
2862 assert(CI &&
"Expected llvm.masked.store to be a call");
2863 Align HaveAlign = CI->getParamAlign(1).valueOrOne();
2871 Type *ValTy =
In.getOperand(0)->getType();
2875 if (EffA < HaveAlign)
2879 AttrBuilder AttrB(CI->getContext());
2880 AttrB.addAlignmentAttr(EffA);
2882 CI->getAttributes().addParamAttributes(CI->getContext(), 1, AttrB));
2886Value *HvxIdioms::processMLoad(Instruction &In)
const {
2888 assert(InpTy &&
"Cannot handle non vector type for llvm.masked.store");
2890 << *
In.getParent() <<
"\n");
2892 << HVC.length(InpTy) <<
") VecLen(" << HVC.getSizeOf(InpTy)
2893 <<
") type(" << *InpTy->getElementType() <<
") of size("
2894 << InpTy->getScalarSizeInBits() <<
")bits\n");
2896 assert(CI &&
"Expected to be a call to llvm.masked.load");
2898 Align HaveAlign = CI->getParamAlign(0).valueOrOne();
2906 Type *ValTy =
In.getType();
2909 if (EffA < HaveAlign)
2915 AttrBuilder AttrB(CI->getContext());
2916 AttrB.addAlignmentAttr(EffA);
2918 CI->getAttributes().addParamAttributes(CI->getContext(), 0, AttrB));
2922auto HvxIdioms::processFxpMulChopped(IRBuilderBase &Builder, Instruction &In,
2923 const FxpOp &
Op)
const ->
Value * {
2924 assert(
Op.X.Val->getType() ==
Op.Y.Val->getType());
2926 unsigned Width = InpTy->getScalarSizeInBits();
2929 if (!
Op.RoundAt || *
Op.RoundAt ==
Op.Frac - 1) {
2932 Value *QMul =
nullptr;
2934 QMul = createMulQ15(Builder,
Op.X,
Op.Y, Rounding);
2935 }
else if (Width == 32) {
2936 QMul = createMulQ31(Builder,
Op.X,
Op.Y, Rounding);
2938 if (QMul !=
nullptr)
2944 assert(Width < 32 || Width % 32 == 0);
2954 assert(
Op.Frac != 0 &&
"Unshifted mul should have been skipped");
2955 if (
Op.Frac == 16) {
2957 if (
Value *MulH = createMulH16(Builder,
Op.X,
Op.Y))
2961 Value *Prod32 = createMul16(Builder,
Op.X,
Op.Y);
2964 ConstantInt::get(Prod32->
getType(), 1ull << *
Op.RoundAt);
2965 Prod32 = Builder.
CreateAdd(Prod32, RoundVal,
"add");
2970 ? Builder.
CreateAShr(Prod32, ShiftAmt,
"asr")
2971 : Builder.
CreateLShr(Prod32, ShiftAmt,
"lsr");
2972 return Builder.
CreateTrunc(Shifted, InpTy,
"trn");
2979 auto WordX = HVC.splitVectorElements(Builder,
Op.X.Val, 32);
2980 auto WordY = HVC.splitVectorElements(Builder,
Op.Y.Val, 32);
2981 auto WordP = createMulLong(Builder, WordX,
Op.X.Sgn, WordY,
Op.Y.Sgn);
2986 if (
Op.RoundAt.has_value()) {
2989 RoundV[*
Op.RoundAt / 32] =
2990 ConstantInt::get(HvxWordTy, 1ull << (*
Op.RoundAt % 32));
2991 WordP = createAddLong(Builder, WordP, RoundV);
2997 unsigned SkipWords =
Op.Frac / 32;
2998 Constant *ShiftAmt = ConstantInt::get(HvxWordTy,
Op.Frac % 32);
3000 for (
int Dst = 0, End = WordP.size() - SkipWords; Dst != End; ++Dst) {
3001 int Src = Dst + SkipWords;
3003 if (Src + 1 < End) {
3014 WordP.resize(WordP.size() - SkipWords);
3016 return HVC.joinVectorElements(Builder, WordP,
Op.ResTy);
3019auto HvxIdioms::createMulQ15(IRBuilderBase &Builder, SValue
X, SValue
Y,
3020 bool Rounding)
const ->
Value * {
3021 assert(
X.Val->getType() ==
Y.Val->getType());
3022 assert(
X.Val->getType()->getScalarType() == HVC.getIntTy(16));
3029 auto V6_vmpyhvsrs = HVC.HST.
getIntrinsicId(Hexagon::V6_vmpyhvsrs);
3030 return HVC.createHvxIntrinsic(Builder, V6_vmpyhvsrs,
X.Val->getType(),
3034auto HvxIdioms::createMulQ31(IRBuilderBase &Builder, SValue
X, SValue
Y,
3035 bool Rounding)
const ->
Value * {
3036 Type *InpTy =
X.Val->getType();
3037 assert(InpTy ==
Y.Val->getType());
3049 HVC.createHvxIntrinsic(Builder, V6_vmpyewuh, InpTy, {
X.Val,
Y.Val});
3050 return HVC.createHvxIntrinsic(Builder, V6_vmpyo_acc, InpTy,
3051 {V1,
X.Val,
Y.Val});
3054auto HvxIdioms::createAddCarry(IRBuilderBase &Builder,
Value *
X,
Value *
Y,
3055 Value *CarryIn)
const
3056 -> std::pair<Value *, Value *> {
3057 assert(
X->getType() ==
Y->getType());
3066 if (CarryIn ==
nullptr)
3068 Args.push_back(CarryIn);
3070 Value *Ret = HVC.createHvxIntrinsic(Builder, AddCarry,
3074 return {
Result, CarryOut};
3081 if (CarryIn !=
nullptr) {
3082 unsigned Width = VecTy->getScalarSizeInBits();
3085 for (
unsigned i = 0, e = 32 / Width; i !=
e; ++i)
3086 Mask = (Mask << Width) | 1;
3090 HVC.createHvxIntrinsic(Builder, V6_vandqrt,
nullptr,
3091 {CarryIn, HVC.getConstInt(Mask)});
3092 Result1 = Builder.
CreateAdd(
X, ValueIn,
"add");
3098 return {Result2, Builder.
CreateOr(CarryOut1, CarryOut2,
"orb")};
3101auto HvxIdioms::createMul16(IRBuilderBase &Builder, SValue
X, SValue
Y)
const
3104 std::tie(
X,
Y) = canonSgn(
X,
Y);
3117 HVC.createHvxIntrinsic(Builder, V6_vmpyh, HvxP32Ty, {
Y.Val,
X.Val});
3119 return HVC.vshuff(Builder, HVC.sublo(Builder,
P), HVC.subhi(Builder,
P));
3122auto HvxIdioms::createMulH16(IRBuilderBase &Builder, SValue
X, SValue
Y)
const
3124 Type *HvxI16Ty = HVC.getHvxTy(HVC.getIntTy(16),
false);
3129 return HVC.createHvxIntrinsic(Builder, V6_vmpyuhvs, HvxI16Ty,
3134 Type *HvxP16Ty = HVC.getHvxTy(HVC.getIntTy(16),
true);
3137 unsigned Len = HVC.length(HvxP16Ty) / 2;
3139 SmallVector<int, 128> PickOdd(Len);
3140 for (
int i = 0; i !=
static_cast<int>(
Len); ++i)
3141 PickOdd[i] = 2 * i + 1;
3144 HVC.sublo(Builder, Pair16), HVC.subhi(Builder, Pair16), PickOdd,
"shf");
3147auto HvxIdioms::createMul32(IRBuilderBase &Builder, SValue
X, SValue
Y)
const
3148 -> std::pair<Value *, Value *> {
3149 assert(
X.Val->getType() ==
Y.Val->getType());
3150 assert(
X.Val->getType() == HvxI32Ty);
3153 std::tie(
X,
Y) = canonSgn(
X,
Y);
3156 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyss_parts;
3158 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyus_parts;
3160 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyuu_parts;
3163 Value *Parts = HVC.createHvxIntrinsic(Builder, V6_vmpy_parts,
nullptr,
3164 {
X.Val,
Y.Val}, {HvxI32Ty});
3173 assert(WordX.size() == WordY.size());
3174 unsigned Idx = 0,
Length = WordX.size();
3178 if (HVC.isZero(WordX[Idx]))
3179 Sum[Idx] = WordY[Idx];
3180 else if (HVC.isZero(WordY[Idx]))
3181 Sum[Idx] = WordX[Idx];
3187 Value *Carry =
nullptr;
3188 for (; Idx !=
Length; ++Idx) {
3189 std::tie(Sum[Idx], Carry) =
3190 createAddCarry(Builder, WordX[Idx], WordY[Idx], Carry);
3204 for (
int i = 0, e = WordX.size(); i != e; ++i) {
3205 for (
int j = 0, f = WordY.size(); j != f; ++j) {
3207 Signedness SX = (i + 1 ==
e) ? SgnX :
Unsigned;
3209 auto [
Lo,
Hi] = createMul32(Builder, {WordX[i], SX}, {WordY[
j],
SY});
3210 Products[i +
j + 0].push_back(
Lo);
3211 Products[i +
j + 1].push_back(
Hi);
3225 for (
int i = 0, e = Products.size(); i != e; ++i) {
3226 while (Products[i].
size() > 1) {
3227 Value *Carry =
nullptr;
3228 for (
int j = i;
j !=
e; ++
j) {
3229 auto &ProdJ = Products[
j];
3230 auto [Sum, CarryOut] = createAddCarry(Builder, pop_back_or_zero(ProdJ),
3231 pop_back_or_zero(ProdJ), Carry);
3232 ProdJ.insert(ProdJ.begin(), Sum);
3239 for (
auto &
P : Products) {
3240 assert(
P.size() == 1 &&
"Should have been added together");
3247auto HvxIdioms::run() ->
bool {
3250 for (BasicBlock &
B : HVC.F) {
3251 for (
auto It =
B.rbegin(); It !=
B.rend(); ++It) {
3252 if (
auto Fxm = matchFxpMul(*It)) {
3253 Value *
New = processFxpMul(*It, *Fxm);
3259 It->replaceAllUsesWith(New);
3261 It = StartOver ?
B.rbegin()
3264 }
else if (matchGather(*It)) {
3270 It->eraseFromParent();
3274 }
else if (matchScatter(*It)) {
3280 It->eraseFromParent();
3284 }
else if (matchMLoad(*It)) {
3290 }
else if (matchMStore(*It)) {
3305auto HexagonVectorCombine::run() ->
bool {
3307 dbgs() <<
"Module before HexagonVectorCombine\n" << *
F.getParent();
3310 if (HST.useHVXOps()) {
3312 Changed |= AlignVectors(*this).run();
3314 Changed |= HvxIdioms(*this).run();
3318 dbgs() <<
"Module " << (
Changed ?
"(modified)" :
"(unchanged)")
3319 <<
" after HexagonVectorCombine\n"
3325auto HexagonVectorCombine::getIntTy(
unsigned Width)
const -> IntegerType * {
3329auto HexagonVectorCombine::getByteTy(
int ElemCount)
const ->
Type * {
3331 IntegerType *ByteTy = Type::getInt8Ty(
F.getContext());
3334 return VectorType::get(ByteTy, ElemCount,
false);
3337auto HexagonVectorCombine::getBoolTy(
int ElemCount)
const ->
Type * {
3339 IntegerType *BoolTy = Type::getInt1Ty(
F.getContext());
3342 return VectorType::get(BoolTy, ElemCount,
false);
3345auto HexagonVectorCombine::getConstInt(
int Val,
unsigned Width)
const
3350auto HexagonVectorCombine::isZero(
const Value *Val)
const ->
bool {
3352 return C->isZeroValue();
3356auto HexagonVectorCombine::getIntValue(
const Value *Val)
const
3357 -> std::optional<APInt> {
3359 return CI->getValue();
3360 return std::nullopt;
3363auto HexagonVectorCombine::isUndef(
const Value *Val)
const ->
bool {
3367auto HexagonVectorCombine::isTrue(
const Value *Val)
const ->
bool {
3371auto HexagonVectorCombine::isFalse(
const Value *Val)
const ->
bool {
3375auto HexagonVectorCombine::getHvxTy(
Type *ElemTy,
bool Pair)
const
3381 "Invalid HVX element type");
3382 unsigned HwLen = HST.getVectorLength();
3384 return VectorType::get(ElemTy, Pair ? 2 * NumElems : NumElems,
3388auto HexagonVectorCombine::getSizeOf(
const Value *Val, SizeKind Kind)
const
3390 return getSizeOf(Val->
getType(), Kind);
3393auto HexagonVectorCombine::getSizeOf(
const Type *Ty, SizeKind Kind)
const
3395 auto *NcTy =
const_cast<Type *
>(Ty);
3398 return DL.getTypeStoreSize(NcTy).getFixedValue();
3400 return DL.getTypeAllocSize(NcTy).getFixedValue();
3405auto HexagonVectorCombine::getTypeAlignment(
Type *Ty)
const ->
int {
3408 if (HST.isTypeForHVX(Ty))
3409 return HST.getVectorLength();
3410 return DL.getABITypeAlign(Ty).value();
3413auto HexagonVectorCombine::length(
Value *Val)
const ->
size_t {
3414 return length(Val->
getType());
3417auto HexagonVectorCombine::length(
Type *Ty)
const ->
size_t {
3419 assert(VecTy &&
"Must be a vector type");
3420 return VecTy->getElementCount().getFixedValue();
3423auto HexagonVectorCombine::simplify(
Value *V)
const ->
Value * {
3425 SimplifyQuery Q(
DL, &TLI, &DT, &AC, In);
3432auto HexagonVectorCombine::insertb(IRBuilderBase &Builder,
Value *Dst,
3434 int Where)
const ->
Value * {
3435 assert(isByteVecTy(Dst->getType()) && isByteVecTy(Src->getType()));
3436 int SrcLen = getSizeOf(Src);
3437 int DstLen = getSizeOf(Dst);
3443 Value *P2Src = vresize(Builder, Src, P2Len,
Poison);
3444 Value *P2Dst = vresize(Builder, Dst, P2Len,
Poison);
3447 for (
int i = 0; i != P2Len; ++i) {
3451 (Where <= i && i < Where +
Length) ? P2Len + Start + (i - Where) : i;
3455 return vresize(Builder, P2Insert, DstLen,
Poison);
3458auto HexagonVectorCombine::vlalignb(IRBuilderBase &Builder,
Value *
Lo,
3460 assert(
Lo->getType() ==
Hi->getType() &&
"Argument type mismatch");
3463 int VecLen = getSizeOf(
Hi);
3464 if (
auto IntAmt = getIntValue(Amt))
3465 return getElementRange(Builder,
Lo,
Hi, VecLen - IntAmt->getSExtValue(),
3468 if (HST.isTypeForHVX(
Hi->getType())) {
3469 assert(
static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
3470 "Expecting an exact HVX type");
3471 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_vlalignb),
3472 Hi->getType(), {Hi, Lo, Amt});
3480 Builder.
CreateTrunc(Shift, Type::getInt32Ty(
F.getContext()),
"trn");
3485 return vralignb(Builder,
Lo,
Hi,
Sub);
3490auto HexagonVectorCombine::vralignb(IRBuilderBase &Builder,
Value *
Lo,
3492 assert(
Lo->getType() ==
Hi->getType() &&
"Argument type mismatch");
3495 int VecLen = getSizeOf(
Lo);
3496 if (
auto IntAmt = getIntValue(Amt))
3497 return getElementRange(Builder,
Lo,
Hi, IntAmt->getSExtValue(), VecLen);
3499 if (HST.isTypeForHVX(
Lo->getType())) {
3500 assert(
static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
3501 "Expecting an exact HVX type");
3502 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_valignb),
3503 Lo->getType(), {Hi, Lo, Amt});
3510 Builder.
CreateTrunc(Shift, Type::getInt32Ty(
F.getContext()),
"trn");
3514 Type *Int64Ty = Type::getInt64Ty(
F.getContext());
3526auto HexagonVectorCombine::concat(IRBuilderBase &Builder,
3530 std::vector<Value *> Work[2];
3531 int ThisW = 0, OtherW = 1;
3533 Work[ThisW].assign(Vecs.begin(), Vecs.end());
3534 while (Work[ThisW].
size() > 1) {
3536 SMask.
resize(length(Ty) * 2);
3537 std::iota(SMask.
begin(), SMask.
end(), 0);
3539 Work[OtherW].clear();
3540 if (Work[ThisW].
size() % 2 != 0)
3542 for (
int i = 0, e = Work[ThisW].
size(); i <
e; i += 2) {
3544 Work[ThisW][i], Work[ThisW][i + 1], SMask,
"shf");
3545 Work[OtherW].push_back(Joined);
3553 SMask.
resize(Vecs.size() * length(Vecs.front()->getType()));
3554 std::iota(SMask.
begin(), SMask.
end(), 0);
3559auto HexagonVectorCombine::vresize(IRBuilderBase &Builder,
Value *Val,
3563 assert(ValTy->getElementType() == Pad->getType());
3565 int CurSize = length(ValTy);
3566 if (CurSize == NewSize)
3569 if (CurSize > NewSize)
3570 return getElementRange(Builder, Val, Val, 0, NewSize);
3572 SmallVector<int, 128> SMask(NewSize);
3573 std::iota(SMask.
begin(), SMask.
begin() + CurSize, 0);
3574 std::fill(SMask.
begin() + CurSize, SMask.
end(), CurSize);
3579auto HexagonVectorCombine::rescale(IRBuilderBase &Builder,
Value *Mask,
3586 Type *FromSTy = FromTy->getScalarType();
3587 Type *ToSTy = ToTy->getScalarType();
3588 if (FromSTy == ToSTy)
3591 int FromSize = getSizeOf(FromSTy);
3592 int ToSize = getSizeOf(ToSTy);
3593 assert(FromSize % ToSize == 0 || ToSize % FromSize == 0);
3596 int FromCount = length(MaskTy);
3597 int ToCount = (FromCount * FromSize) / ToSize;
3598 assert((FromCount * FromSize) % ToSize == 0);
3600 auto *FromITy =
getIntTy(FromSize * 8);
3601 auto *ToITy =
getIntTy(ToSize * 8);
3606 Mask, VectorType::get(FromITy, FromCount,
false),
"sxt");
3608 Ext, VectorType::get(ToITy, ToCount,
false),
"cst");
3610 Cast, VectorType::get(getBoolTy(), ToCount,
false),
"trn");
3614auto HexagonVectorCombine::vlsb(IRBuilderBase &Builder,
Value *Val)
const
3617 if (ScalarTy == getBoolTy())
3620 Value *Bytes = vbytes(Builder, Val);
3622 return Builder.
CreateTrunc(Bytes, getBoolTy(getSizeOf(VecTy)),
"trn");
3625 return Builder.
CreateTrunc(Bytes, getBoolTy(),
"trn");
3629auto HexagonVectorCombine::vbytes(IRBuilderBase &Builder,
Value *Val)
const
3632 if (ScalarTy == getByteTy())
3635 if (ScalarTy != getBoolTy())
3636 return Builder.
CreateBitCast(Val, getByteTy(getSizeOf(Val)),
"cst");
3639 return Builder.
CreateSExt(Val, VectorType::get(getByteTy(), VecTy),
"sxt");
3640 return Builder.
CreateSExt(Val, getByteTy(),
"sxt");
3643auto HexagonVectorCombine::subvector(IRBuilderBase &Builder,
Value *Val,
3644 unsigned Start,
unsigned Length)
const
3647 return getElementRange(Builder, Val, Val, Start,
Length);
3650auto HexagonVectorCombine::sublo(IRBuilderBase &Builder,
Value *Val)
const
3652 size_t Len = length(Val);
3653 assert(Len % 2 == 0 &&
"Length should be even");
3654 return subvector(Builder, Val, 0, Len / 2);
3657auto HexagonVectorCombine::subhi(IRBuilderBase &Builder,
Value *Val)
const
3659 size_t Len = length(Val);
3660 assert(Len % 2 == 0 &&
"Length should be even");
3661 return subvector(Builder, Val, Len / 2, Len / 2);
3664auto HexagonVectorCombine::vdeal(IRBuilderBase &Builder,
Value *Val0,
3666 assert(Val0->getType() == Val1->getType());
3667 int Len = length(Val0);
3668 SmallVector<int, 128>
Mask(2 * Len);
3670 for (
int i = 0; i !=
Len; ++i) {
3677auto HexagonVectorCombine::vshuff(IRBuilderBase &Builder,
Value *Val0,
3679 assert(Val0->getType() == Val1->getType());
3680 int Len = length(Val0);
3681 SmallVector<int, 128>
Mask(2 * Len);
3683 for (
int i = 0; i !=
Len; ++i) {
3684 Mask[2 * i + 0] = i;
3690auto HexagonVectorCombine::createHvxIntrinsic(IRBuilderBase &Builder,
3696 auto getCast = [&](IRBuilderBase &Builder,
Value *Val,
3698 Type *SrcTy = Val->getType();
3699 if (SrcTy == DestTy)
3704 assert(HST.isTypeForHVX(SrcTy,
true));
3706 Type *BoolTy = Type::getInt1Ty(
F.getContext());
3711 unsigned HwLen = HST.getVectorLength();
3712 Intrinsic::ID TC = HwLen == 64 ? Intrinsic::hexagon_V6_pred_typecast
3713 : Intrinsic::hexagon_V6_pred_typecast_128B;
3722 SmallVector<Value *, 4> IntrArgs;
3723 for (
int i = 0, e =
Args.size(); i != e; ++i) {
3725 Type *
T = IntrTy->getParamType(i);
3726 if (
A->getType() !=
T) {
3732 StringRef MaybeName = !IntrTy->getReturnType()->isVoidTy() ?
"cup" :
"";
3733 CallInst *
Call = Builder.
CreateCall(IntrFn, IntrArgs, MaybeName);
3740 if (RetTy ==
nullptr || CallTy == RetTy)
3743 assert(HST.isTypeForHVX(CallTy,
true));
3744 return getCast(Builder,
Call, RetTy);
3747auto HexagonVectorCombine::splitVectorElements(IRBuilderBase &Builder,
3749 unsigned ToWidth)
const
3764 assert(VecTy->getElementType()->isIntegerTy());
3765 unsigned FromWidth = VecTy->getScalarSizeInBits();
3767 assert(ToWidth <= FromWidth &&
"Breaking up into wider elements?");
3768 unsigned NumResults = FromWidth / ToWidth;
3772 unsigned Length = length(VecTy);
3776 auto splitInHalf = [&](
unsigned Begin,
unsigned End,
auto splitFunc) ->
void {
3780 if (Begin + 1 == End)
3786 auto *VTy = VectorType::get(
getIntTy(Width / 2), 2 *
Length,
false);
3789 Value *Res =
vdeal(Builder, sublo(Builder, VVal), subhi(Builder, VVal));
3791 unsigned Half = (Begin + End) / 2;
3792 Results[Begin] = sublo(Builder, Res);
3793 Results[Half] = subhi(Builder, Res);
3795 splitFunc(Begin, Half, splitFunc);
3796 splitFunc(Half, End, splitFunc);
3799 splitInHalf(0, NumResults, splitInHalf);
3803auto HexagonVectorCombine::joinVectorElements(IRBuilderBase &Builder,
3805 VectorType *ToType)
const
3807 assert(ToType->getElementType()->isIntegerTy());
3818 unsigned ToWidth = ToType->getScalarSizeInBits();
3819 unsigned Width = Inputs.front()->getType()->getScalarSizeInBits();
3820 assert(Width <= ToWidth);
3822 unsigned Length = length(Inputs.front()->getType());
3824 unsigned NeedInputs = ToWidth / Width;
3825 if (Inputs.size() != NeedInputs) {
3830 Last, ConstantInt::get(
Last->getType(), Width - 1),
"asr");
3831 Inputs.resize(NeedInputs, Sign);
3834 while (Inputs.size() > 1) {
3837 for (
int i = 0, e = Inputs.size(); i < e; i += 2) {
3838 Value *Res =
vshuff(Builder, Inputs[i], Inputs[i + 1]);
3841 Inputs.resize(Inputs.size() / 2);
3844 assert(Inputs.front()->getType() == ToType);
3845 return Inputs.front();
3848auto HexagonVectorCombine::calculatePointerDifference(
Value *Ptr0,
3850 -> std::optional<int> {
3852 const SCEV *Scev0 = SE.getSCEV(Ptr0);
3853 const SCEV *Scev1 = SE.getSCEV(Ptr1);
3854 const SCEV *ScevDiff = SE.getMinusSCEV(Scev0, Scev1);
3856 APInt
V =
Const->getAPInt();
3857 if (
V.isSignedIntN(8 *
sizeof(
int)))
3858 return static_cast<int>(
V.getSExtValue());
3865 I->eraseFromParent();
3867 SmallVector<Instruction *, 8> ToErase;
3870#define CallBuilder(B, F) \
3873 if (auto *I = dyn_cast<Instruction>(V)) \
3874 B_.ToErase.push_back(I); \
3878 auto Simplify = [
this](
Value *
V) {
3884 auto StripBitCast = [](
Value *
V) {
3886 V =
C->getOperand(0);
3890 Ptr0 = StripBitCast(Ptr0);
3891 Ptr1 = StripBitCast(Ptr1);
3893 return std::nullopt;
3897 if (Gep0->getPointerOperand() != Gep1->getPointerOperand())
3898 return std::nullopt;
3899 if (Gep0->getSourceElementType() != Gep1->getSourceElementType())
3900 return std::nullopt;
3902 Builder
B(Gep0->getParent());
3903 int Scale = getSizeOf(Gep0->getSourceElementType(),
Alloc);
3906 if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2)
3907 return std::nullopt;
3909 Value *Idx0 = Gep0->getOperand(1);
3910 Value *Idx1 = Gep1->getOperand(1);
3915 return Diff->getSExtValue() * Scale;
3917 KnownBits Known0 = getKnownBits(Idx0, Gep0);
3918 KnownBits Known1 = getKnownBits(Idx1, Gep1);
3921 return std::nullopt;
3929 Diff0 =
C->getSExtValue();
3931 return std::nullopt;
3940 Diff1 =
C->getSExtValue();
3942 return std::nullopt;
3945 return (Diff0 + Diff1) * Scale;
3950auto HexagonVectorCombine::getNumSignificantBits(
const Value *V,
3951 const Instruction *CtxI)
const
3956auto HexagonVectorCombine::getKnownBits(
const Value *V,
3957 const Instruction *CtxI)
const
3962auto HexagonVectorCombine::isSafeToClone(
const Instruction &In)
const ->
bool {
3963 if (
In.mayHaveSideEffects() ||
In.isAtomic() ||
In.isVolatile() ||
3964 In.isFenceLike() ||
In.mayReadOrWriteMemory()) {
3972template <
typename T>
3973auto HexagonVectorCombine::isSafeToMoveBeforeInBB(
const Instruction &In,
3975 const T &IgnoreInsts)
const
3978 [
this](
const Instruction &
I) -> std::optional<MemoryLocation> {
3980 switch (
II->getIntrinsicID()) {
3981 case Intrinsic::masked_load:
3983 case Intrinsic::masked_store:
3999 bool MayWrite =
In.mayWriteToMemory();
4000 auto MaybeLoc = getLocOrNone(In);
4002 auto From =
In.getIterator();
4005 bool MoveUp = (To !=
Block.end() && To->comesBefore(&In));
4007 MoveUp ? std::make_pair(To, From) : std::make_pair(std::next(From), To);
4008 for (
auto It =
Range.first; It !=
Range.second; ++It) {
4009 const Instruction &I = *It;
4010 if (llvm::is_contained(IgnoreInsts, &I))
4013 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
4014 if (II->getIntrinsicID() == Intrinsic::assume)
4021 if (!CB->hasFnAttr(Attribute::WillReturn))
4023 if (!CB->hasFnAttr(Attribute::NoSync))
4026 if (
I.mayReadOrWriteMemory()) {
4027 auto MaybeLocI = getLocOrNone(I);
4028 if (MayWrite || I.mayWriteToMemory()) {
4029 if (!MaybeLoc || !MaybeLocI)
4031 if (!AA.isNoAlias(*MaybeLoc, *MaybeLocI))
4039auto HexagonVectorCombine::isByteVecTy(
Type *Ty)
const ->
bool {
4041 return VecTy->getElementType() == getByteTy();
4045auto HexagonVectorCombine::getElementRange(IRBuilderBase &Builder,
Value *
Lo,
4049 SmallVector<int, 128> SMask(
Length);
4050 std::iota(SMask.
begin(), SMask.
end(), Start);
4057class HexagonVectorCombineLegacy :
public FunctionPass {
4061 HexagonVectorCombineLegacy() : FunctionPass(
ID) {}
4063 StringRef getPassName()
const override {
return "Hexagon Vector Combine"; }
4065 void getAnalysisUsage(AnalysisUsage &AU)
const override {
4073 FunctionPass::getAnalysisUsage(AU);
4077 if (skipFunction(
F))
4079 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
4080 AssumptionCache &AC =
4081 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
4082 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
4083 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
4084 TargetLibraryInfo &TLI =
4085 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
4086 auto &TM = getAnalysis<TargetPassConfig>().getTM<HexagonTargetMachine>();
4087 HexagonVectorCombine HVC(
F, AA, AC, DT, SE, TLI, TM);
4093char HexagonVectorCombineLegacy::ID = 0;
4096 "Hexagon Vector Combine",
false,
false)
4107 return new HexagonVectorCombineLegacy();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Prepare AGPR Alloc
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
static IntegerType * getIntTy(IRBuilderBase &B, const TargetLibraryInfo *TLI)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
static Value * locateIndexesFromIntrinsic(Instruction *In)
Instruction * locateDestination(Instruction *In, HvxIdioms::DstQualifier &Qual)
Value * getReinterpretiveCast_i8_to_i32(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, Value *I)
static Value * locateIndexesFromGEP(Value *In)
#define CallBuilder(B, F)
Value * getPointer(Value *Ptr)
#define DEFAULT_HVX_VTCM_PAGE_SIZE
static Value * locateAddressFromIntrinsic(Instruction *In)
static Instruction * selectDestination(Instruction *In, HvxIdioms::DstQualifier &Qual)
Value * get_i32_Mask(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, unsigned int pattern)
bool isArithmetic(unsigned Opc)
static Type * getIndexType(Value *In)
GetElementPtrInst * locateGepFromIntrinsic(Instruction *In)
Value * getReinterpretiveCast_i16_to_i32(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, Value *I)
static Align effectiveAlignForValueTy(const DataLayout &DL, Type *ValTy, int Requested)
iv Induction Variable Users
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static bool isCandidate(const MachineInstr *MI, Register &DefedReg, Register FrameReg)
static bool isUndef(const MachineInstr &MI)
This file implements a map that provides insertion order iteration.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Remove Loads Into Fake Uses
static ConstantInt * getConstInt(MDNode *MD, unsigned NumOp)
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Target-Independent Code Generator Pass Configuration Options pass.
static uint32_t getAlignment(const MCSectionCOFF &Sec)
static const uint32_t IV[8]
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
InstListType::const_iterator const_iterator
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
AttributeList getAttributes() const
Return the attributes for this call.
@ ICMP_ULT
unsigned less than
static LLVM_ABI Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
iterator_range< iterator > children()
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
const BasicBlock & back() const
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * getPointerOperand()
bool isHVXVectorType(EVT VecTy, bool IncludeBool=false) const
bool useHVXV62Ops() const
bool useHVXV69Ops() const
unsigned getVectorLength() const
bool useHVXV66Ops() const
bool isTypeForHVX(Type *VecTy, bool IncludeBool=false) const
Intrinsic::ID getIntrinsicId(unsigned Opc) const
Common base class shared among various IRBuilders.
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
const char * getOpcodeName() const
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
This class implements a map that also provides access to all stored values in a deterministic order.
void remove_if(Predicate Pred)
Remove the elements that match the predicate.
bool doesNotAccessMemory() const
Whether this function accesses no memory.
bool onlyAccessesInaccessibleMem() const
Whether this function only (at most) accesses inaccessible memory.
static LLVM_ABI std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
static LLVM_ABI MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
The main scalar evolution driver.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Rounding
Possible values of current rounding mode, which is specified in bits 23:22 of FPCR.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI Instruction * getTerminator() const
LLVM_ABI Instruction & front() const
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createHexagonVectorCombineLegacyPass()
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
DomTreeNodeBase< BasicBlock > DomTreeNode
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
auto reverse(ContainerTy &&C)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
LLVM_ABI bool mayHaveNonDefUseDependency(const Instruction &I)
Returns true if the result or effects of the given instructions I depend values not reachable through...
MaskT vshuff(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
MaskT vdeal(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.