34#include "llvm/IR/IntrinsicsHexagon.h"
57#define DEBUG_TYPE "hexagon-vc"
72class HexagonVectorCombine {
77 :
F(F_),
DL(
F.getDataLayout()), AA(AA_), AC(AC_), DT(DT_),
87 Type *getByteTy(
int ElemCount = 0)
const;
90 Type *getBoolTy(
int ElemCount = 0)
const;
94 std::optional<APInt> getIntValue(
const Value *Val)
const;
100 bool isTrue(
const Value *Val)
const;
102 bool isFalse(
const Value *Val)
const;
111 int getSizeOf(
const Value *Val, SizeKind Kind = Store)
const;
112 int getSizeOf(
const Type *Ty, SizeKind Kind = Store)
const;
113 int getTypeAlignment(
Type *Ty)
const;
114 size_t length(
Value *Val)
const;
115 size_t length(
Type *Ty)
const;
124 int Length,
int Where)
const;
148 unsigned ToWidth)
const;
152 std::optional<int> calculatePointerDifference(
Value *Ptr0,
Value *Ptr1)
const;
154 unsigned getNumSignificantBits(
const Value *V,
161 template <
typename T = std::vector<Instruction *>>
164 const T &IgnoreInsts = {})
const;
167 [[maybe_unused]]
bool isByteVecTy(
Type *Ty)
const;
180 int Start,
int Length)
const;
199 AlignVectors(
const HexagonVectorCombine &HVC_) : HVC(HVC_) {}
204 using InstList = std::vector<Instruction *>;
208 AddrInfo(
const AddrInfo &) =
default;
211 : Inst(
I),
Addr(
A), ValTy(
T), HaveAlign(
H),
212 NeedAlign(HVC.getTypeAlignment(ValTy)) {}
213 AddrInfo &operator=(
const AddrInfo &) =
default;
224 using AddrList = std::vector<AddrInfo>;
228 return A->comesBefore(
B);
231 using DepList = std::set<Instruction *, InstrLess>;
234 MoveGroup(
const AddrInfo &AI,
Instruction *
B,
bool Hvx,
bool Load)
235 :
Base(
B), Main{AI.Inst}, Clones{}, IsHvx(Hvx), IsLoad(Load) {}
236 MoveGroup() =
default;
244 using MoveList = std::vector<MoveGroup>;
264 Segment(
Value *Val,
int Begin,
int Len)
265 : Val(Val), Start(Begin), Size(Len) {}
266 Segment(
const Segment &Seg) =
default;
267 Segment &operator=(
const Segment &Seg) =
default;
274 Block(
Value *Val,
int Len,
int Pos) : Seg(Val, 0, Len), Pos(Pos) {}
275 Block(
Value *Val,
int Off,
int Len,
int Pos)
276 : Seg(Val, Off, Len), Pos(Pos) {}
277 Block(
const Block &Blk) =
default;
278 Block &operator=(
const Block &Blk) =
default;
284 ByteSpan section(
int Start,
int Length)
const;
285 ByteSpan &shift(
int Offset);
289 Block &operator[](
int i) {
return Blocks[i]; }
290 const Block &operator[](
int i)
const {
return Blocks[i]; }
292 std::vector<Block>
Blocks;
294 using iterator =
decltype(
Blocks)::iterator;
295 iterator begin() {
return Blocks.begin(); }
296 iterator end() {
return Blocks.end(); }
302 Align getAlignFromValue(
const Value *V)
const;
303 std::optional<AddrInfo> getAddrInfo(
Instruction &In)
const;
304 bool isHvx(
const AddrInfo &AI)
const;
306 [[maybe_unused]]
bool isSectorTy(
Type *Ty)
const;
314 const InstMap &CloneMap = InstMap())
const;
317 const InstMap &CloneMap = InstMap())
const;
335 Value *Predicate,
int Alignment,
339 Value *Predicate,
int Alignment,
343 bool createAddressGroups();
344 MoveList createLoadGroups(
const AddrList &Group)
const;
345 MoveList createStoreGroups(
const AddrList &Group)
const;
346 bool moveTogether(MoveGroup &Move)
const;
347 template <
typename T> InstMap cloneBefore(
Instruction *To,
T &&Insts)
const;
349 void realignLoadGroup(
IRBuilderBase &Builder,
const ByteSpan &VSpan,
350 int ScLen,
Value *AlignVal,
Value *AlignAddr)
const;
351 void realignStoreGroup(
IRBuilderBase &Builder,
const ByteSpan &VSpan,
352 int ScLen,
Value *AlignVal,
Value *AlignAddr)
const;
353 bool realignGroup(
const MoveGroup &Move)
const;
356 int Alignment)
const;
363 std::map<Instruction *, AddrList> AddrGroups;
364 const HexagonVectorCombine &HVC;
369 OS <<
"Inst: " << AI.Inst <<
" " << *AI.Inst <<
'\n';
370 OS <<
"Addr: " << *AI.Addr <<
'\n';
371 OS <<
"Type: " << *AI.ValTy <<
'\n';
372 OS <<
"HaveAlign: " << AI.HaveAlign.value() <<
'\n';
373 OS <<
"NeedAlign: " << AI.NeedAlign.value() <<
'\n';
374 OS <<
"Offset: " << AI.Offset;
380 OS <<
"IsLoad:" << (MG.IsLoad ?
"yes" :
"no");
381 OS <<
", IsHvx:" << (MG.IsHvx ?
"yes" :
"no") <<
'\n';
384 OS <<
" " << *
I <<
'\n';
387 OS <<
" " << *
I <<
'\n';
389 for (
auto [K, V] : MG.Clones) {
391 K->printAsOperand(
OS,
false);
392 OS <<
"\t-> " << *V <<
'\n';
399 const AlignVectors::ByteSpan::Block &
B) {
400 OS <<
" @" <<
B.Pos <<
" [" <<
B.Seg.Start <<
',' <<
B.Seg.Size <<
"] ";
401 if (
B.Seg.Val ==
reinterpret_cast<const Value *
>(&
B)) {
402 OS <<
"(self:" <<
B.Seg.Val <<
')';
403 }
else if (
B.Seg.Val !=
nullptr) {
413 OS <<
"ByteSpan[size=" << BS.size() <<
", extent=" << BS.extent() <<
'\n';
414 for (
const AlignVectors::ByteSpan::Block &
B : BS)
422 HvxIdioms(
const HexagonVectorCombine &HVC_) : HVC(HVC_) {
423 auto *Int32Ty = HVC.getIntTy(32);
424 HvxI32Ty = HVC.getHvxTy(Int32Ty,
false);
425 HvxP32Ty = HVC.getHvxTy(Int32Ty,
true);
431 enum Signedness { Positive, Signed, Unsigned };
446 std::optional<unsigned> RoundAt;
451 -> std::pair<unsigned, Signedness>;
452 auto canonSgn(SValue
X, SValue
Y)
const -> std::pair<SValue, SValue>;
454 auto matchFxpMul(
Instruction &In)
const -> std::optional<FxpOp>;
458 const FxpOp &
Op)
const ->
Value *;
460 bool Rounding)
const ->
Value *;
462 bool Rounding)
const ->
Value *;
465 Value *CarryIn =
nullptr)
const
466 -> std::pair<Value *, Value *>;
471 -> std::pair<Value *, Value *>;
480 const HexagonVectorCombine &HVC;
486 const HvxIdioms::FxpOp &
Op) {
487 static const char *SgnNames[] = {
"Positive",
"Signed",
"Unsigned"};
489 if (
Op.RoundAt.has_value()) {
490 if (
Op.Frac != 0 && *
Op.RoundAt ==
Op.Frac - 1) {
493 OS <<
" + 1<<" << *
Op.RoundAt;
496 OS <<
"\n X:(" << SgnNames[
Op.X.Sgn] <<
") " << *
Op.X.Val <<
"\n"
497 <<
" Y:(" << SgnNames[
Op.Y.Sgn] <<
") " << *
Op.Y.Val;
505template <
typename T>
T *getIfUnordered(
T *MaybeT) {
506 return MaybeT && MaybeT->isUnordered() ? MaybeT :
nullptr;
509 return dyn_cast<T>(In);
512 return getIfUnordered(dyn_cast<LoadInst>(In));
515 return getIfUnordered(dyn_cast<StoreInst>(In));
518#if !defined(_MSC_VER) || _MSC_VER >= 1926
522template <
typename Pred,
typename... Ts>
523void erase_if(std::map<Ts...> &map, Pred p)
525template <
typename Pred,
typename T,
typename U>
526void erase_if(std::map<T, U> &map, Pred p)
529 for (
auto i = map.begin(), e = map.end(); i != e;) {
538template <
typename Pred,
typename T>
void erase_if(
T &&container, Pred p) {
576auto AlignVectors::ByteSpan::extent()
const ->
int {
581 for (
int i = 1, e =
size(); i !=
e; ++i) {
582 Min = std::min(Min,
Blocks[i].Pos);
588auto AlignVectors::ByteSpan::section(
int Start,
int Length)
const -> ByteSpan {
590 for (
const ByteSpan::Block &
B :
Blocks) {
591 int L = std::max(
B.Pos, Start);
592 int R = std::min(
B.Pos +
B.Seg.Size, Start +
Length);
595 int Off =
L >
B.Pos ?
L -
B.Pos : 0;
596 Section.Blocks.emplace_back(
B.Seg.Val,
B.Seg.Start + Off, R - L, L);
602auto AlignVectors::ByteSpan::shift(
int Offset) -> ByteSpan & {
610 for (
int i = 0, e =
Blocks.size(); i != e; ++i)
611 Values[i] =
Blocks[i].Seg.Val;
615auto AlignVectors::getAlignFromValue(
const Value *V)
const ->
Align {
616 const auto *
C = dyn_cast<ConstantInt>(V);
617 assert(
C &&
"Alignment must be a compile-time constant integer");
618 return C->getAlignValue();
621auto AlignVectors::getAddrInfo(
Instruction &In)
const
622 -> std::optional<AddrInfo> {
623 if (
auto *L = isCandidate<LoadInst>(&In))
624 return AddrInfo(HVC, L,
L->getPointerOperand(),
L->getType(),
626 if (
auto *S = isCandidate<StoreInst>(&In))
627 return AddrInfo(HVC, S, S->getPointerOperand(),
628 S->getValueOperand()->getType(), S->getAlign());
629 if (
auto *
II = isCandidate<IntrinsicInst>(&In)) {
632 case Intrinsic::masked_load:
633 return AddrInfo(HVC,
II,
II->getArgOperand(0),
II->getType(),
634 getAlignFromValue(
II->getArgOperand(1)));
635 case Intrinsic::masked_store:
636 return AddrInfo(HVC,
II,
II->getArgOperand(1),
637 II->getArgOperand(0)->getType(),
638 getAlignFromValue(
II->getArgOperand(2)));
644auto AlignVectors::isHvx(
const AddrInfo &AI)
const ->
bool {
645 return HVC.HST.isTypeForHVX(AI.ValTy);
648auto AlignVectors::getPayload(
Value *Val)
const ->
Value * {
649 if (
auto *In = dyn_cast<Instruction>(Val)) {
651 if (
auto *
II = dyn_cast<IntrinsicInst>(In))
652 ID =
II->getIntrinsicID();
653 if (isa<StoreInst>(In) ||
ID == Intrinsic::masked_store)
654 return In->getOperand(0);
659auto AlignVectors::getMask(
Value *Val)
const ->
Value * {
660 if (
auto *
II = dyn_cast<IntrinsicInst>(Val)) {
661 switch (
II->getIntrinsicID()) {
662 case Intrinsic::masked_load:
663 return II->getArgOperand(2);
664 case Intrinsic::masked_store:
665 return II->getArgOperand(3);
669 Type *ValTy = getPayload(Val)->getType();
670 if (
auto *VecTy = dyn_cast<VectorType>(ValTy))
671 return HVC.getFullValue(HVC.getBoolTy(HVC.length(VecTy)));
672 return HVC.getFullValue(HVC.getBoolTy());
675auto AlignVectors::getPassThrough(
Value *Val)
const ->
Value * {
676 if (
auto *
II = dyn_cast<IntrinsicInst>(Val)) {
677 if (
II->getIntrinsicID() == Intrinsic::masked_load)
678 return II->getArgOperand(3);
684 Type *ValTy,
int Adjust,
685 const InstMap &CloneMap)
const
687 if (
auto *
I = dyn_cast<Instruction>(
Ptr))
690 return Builder.CreatePtrAdd(
Ptr, HVC.getConstInt(Adjust),
"gep");
694 Type *ValTy,
int Alignment,
695 const InstMap &CloneMap)
const
698 if (
auto *
I = dyn_cast<Instruction>(V)) {
699 for (
auto [Old, New] : CloneMap)
700 I->replaceUsesOfWith(Old, New);
705 Value *AsInt = Builder.CreatePtrToInt(
Ptr, HVC.getIntTy(),
"pti");
706 Value *
Mask = HVC.getConstInt(-Alignment);
707 Value *
And = Builder.CreateAnd(remap(AsInt),
Mask,
"and");
708 return Builder.CreateIntToPtr(
716 bool HvxHasPredLoad = HVC.HST.useHVXV62Ops();
720 "Expectning scalar predicate");
721 if (HVC.isFalse(Predicate))
723 if (!HVC.isTrue(Predicate) && HvxHasPredLoad) {
724 Value *
Load = createPredicatedLoad(Builder, ValTy,
Ptr, Predicate,
725 Alignment, MDSources);
726 return Builder.CreateSelect(Mask, Load, PassThru);
730 assert(!HVC.isUndef(Mask));
731 if (HVC.isZero(Mask))
733 if (HVC.isTrue(Mask))
734 return createSimpleLoad(Builder, ValTy,
Ptr, Alignment, MDSources);
737 Mask, PassThru,
"mld");
747 Builder.CreateAlignedLoad(ValTy,
Ptr,
Align(Alignment),
"ald");
757 assert(HVC.HST.isTypeForHVX(ValTy) &&
758 "Predicates 'scalar' vector loads not yet supported");
760 assert(!
Predicate->getType()->isVectorTy() &&
"Expectning scalar predicate");
761 assert(HVC.getSizeOf(ValTy, HVC.Alloc) % Alignment == 0);
762 if (HVC.isFalse(Predicate))
764 if (HVC.isTrue(Predicate))
765 return createSimpleLoad(Builder, ValTy,
Ptr, Alignment, MDSources);
767 auto V6_vL32b_pred_ai = HVC.HST.getIntrinsicId(Hexagon::V6_vL32b_pred_ai);
769 return HVC.createHvxIntrinsic(Builder, V6_vL32b_pred_ai, ValTy,
771 std::nullopt, MDSources);
777 if (HVC.isZero(Mask) || HVC.isUndef(Val) || HVC.isUndef(Mask))
780 "Expectning scalar predicate"));
782 if (HVC.isFalse(Predicate))
784 if (HVC.isTrue(Predicate))
789 if (HVC.isTrue(Mask)) {
791 return createPredicatedStore(Builder, Val,
Ptr, Predicate, Alignment,
795 return createSimpleStore(Builder, Val,
Ptr, Alignment, MDSources);
801 Builder.CreateMaskedStore(Val,
Ptr,
Align(Alignment), Mask);
808 Value *PredLoad = createPredicatedLoad(Builder, Val->getType(),
Ptr,
809 Predicate, Alignment, MDSources);
810 Value *Mux = Builder.CreateSelect(Mask, Val, PredLoad);
811 return createPredicatedStore(Builder, Mux,
Ptr, Predicate, Alignment,
829 assert(HVC.HST.isTypeForHVX(Val->getType()) &&
830 "Predicates 'scalar' vector stores not yet supported");
832 if (HVC.isFalse(Predicate))
834 if (HVC.isTrue(Predicate))
835 return createSimpleStore(Builder, Val,
Ptr, Alignment, MDSources);
837 assert(HVC.getSizeOf(Val, HVC.Alloc) % Alignment == 0);
838 auto V6_vS32b_pred_ai = HVC.HST.getIntrinsicId(Hexagon::V6_vS32b_pred_ai);
840 return HVC.createHvxIntrinsic(Builder, V6_vS32b_pred_ai,
nullptr,
842 std::nullopt, MDSources);
849 "Base and In should be in the same block");
850 assert(
Base->comesBefore(In) &&
"Base should come before In");
853 std::deque<Instruction *> WorkQ = {
In};
854 while (!WorkQ.empty()) {
860 if (
auto *
I = dyn_cast<Instruction>(
Op)) {
861 if (
I->getParent() == Parent &&
Base->comesBefore(
I))
869auto AlignVectors::createAddressGroups() ->
bool {
874 auto findBaseAndOffset = [&](AddrInfo &AI) -> std::pair<Instruction *, int> {
875 for (AddrInfo &W : WorkStack) {
876 if (
auto D = HVC.calculatePointerDifference(AI.Addr,
W.Addr))
877 return std::make_pair(
W.Inst, *
D);
879 return std::make_pair(
nullptr, 0);
882 auto traverseBlock = [&](
DomTreeNode *DomN,
auto Visit) ->
void {
885 auto AI = this->getAddrInfo(
I);
888 auto F = findBaseAndOffset(*AI);
891 AI->Offset =
F.second;
894 WorkStack.push_back(*AI);
895 GroupInst = AI->Inst;
897 AddrGroups[GroupInst].push_back(*AI);
903 while (!WorkStack.empty() && WorkStack.back().Inst->getParent() == &
Block)
904 WorkStack.pop_back();
907 traverseBlock(HVC.DT.getRootNode(), traverseBlock);
908 assert(WorkStack.empty());
913 erase_if(AddrGroups, [](
auto &
G) {
return G.second.size() == 1; });
917 G.second, [&](
auto &
I) { return HVC.HST.isTypeForHVX(I.ValTy); });
920 return !AddrGroups.empty();
923auto AlignVectors::createLoadGroups(
const AddrList &Group)
const -> MoveList {
931 auto tryAddTo = [&](
const AddrInfo &
Info, MoveGroup &Move) {
932 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
936 if (Move.IsHvx != isHvx(Info))
940 if (
Base->getParent() !=
Info.Inst->getParent())
943 if (!HVC.isSafeToMoveBeforeInBB(*
Info.Inst,
Base->getIterator()))
947 return HVC.isSafeToMoveBeforeInBB(*
I,
Base->getIterator()) &&
948 HVC.isSafeToClone(*
I);
950 DepList Deps = getUpwardDeps(
Info.Inst,
Base);
954 Move.Main.push_back(
Info.Inst);
961 for (
const AddrInfo &Info : Group) {
962 if (!
Info.Inst->mayReadFromMemory())
964 if (LoadGroups.empty() || !tryAddTo(Info, LoadGroups.back()))
965 LoadGroups.emplace_back(Info, Group.front().Inst, isHvx(Info),
true);
969 erase_if(LoadGroups, [](
const MoveGroup &
G) {
return G.Main.size() <= 1; });
972 if (!HVC.HST.useHVXV62Ops())
973 erase_if(LoadGroups, [](
const MoveGroup &
G) {
return G.IsHvx; });
978auto AlignVectors::createStoreGroups(
const AddrList &Group)
const -> MoveList {
986 auto tryAddTo = [&](
const AddrInfo &
Info, MoveGroup &Move) {
987 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
993 "Not handling stores with return values");
995 if (Move.IsHvx != isHvx(Info))
1001 if (
Base->getParent() !=
Info.Inst->getParent())
1003 if (!HVC.isSafeToMoveBeforeInBB(*
Info.Inst,
Base->getIterator(), Move.Main))
1005 Move.Main.push_back(
Info.Inst);
1009 MoveList StoreGroups;
1011 for (
auto I = Group.rbegin(), E = Group.rend();
I != E; ++
I) {
1012 const AddrInfo &
Info = *
I;
1013 if (!
Info.Inst->mayWriteToMemory())
1015 if (StoreGroups.empty() || !tryAddTo(Info, StoreGroups.back()))
1016 StoreGroups.emplace_back(Info, Group.front().Inst, isHvx(Info),
false);
1020 erase_if(StoreGroups, [](
const MoveGroup &
G) {
return G.Main.size() <= 1; });
1023 if (!HVC.HST.useHVXV62Ops())
1024 erase_if(StoreGroups, [](
const MoveGroup &
G) {
return G.IsHvx; });
1029 if (!VADoFullStores) {
1030 erase_if(StoreGroups, [
this](
const MoveGroup &
G) {
1032 auto MaybeInfo = this->getAddrInfo(*S);
1033 assert(MaybeInfo.has_value());
1034 return HVC.HST.isHVXVectorType(
1035 EVT::getEVT(MaybeInfo->ValTy, false));
1043auto AlignVectors::moveTogether(MoveGroup &Move)
const ->
bool {
1045 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
1051 Move.Clones = cloneBefore(Where, Move.Deps);
1056 M->moveAfter(Where);
1057 for (
auto [Old, New] : Move.Clones)
1058 M->replaceUsesOfWith(Old, New);
1062 for (
int i = 0, e = Move.Deps.size(); i != e; ++i)
1063 Move.Deps[i] = Move.Clones[Move.Deps[i]];
1068 assert(Move.Deps.empty());
1072 M->moveBefore(Where);
1077 return Move.Main.size() + Move.Deps.size() > 1;
1080template <
typename T>
1081auto AlignVectors::cloneBefore(
Instruction *To,
T &&Insts)
const -> InstMap {
1085 assert(HVC.isSafeToClone(*
I));
1087 C->setName(
Twine(
"c.") +
I->getName() +
".");
1088 C->insertBefore(To);
1090 for (
auto [Old, New] : Map)
1091 C->replaceUsesOfWith(Old, New);
1092 Map.insert(std::make_pair(
I,
C));
1098 const ByteSpan &VSpan,
int ScLen,
1103 Type *SecTy = HVC.getByteTy(ScLen);
1104 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
1105 bool DoAlign = !HVC.isZero(AlignVal);
1107 BasicBlock *BaseBlock = Builder.GetInsertBlock();
1110 auto *True = HVC.getFullValue(HVC.getBoolTy(ScLen));
1138 ASpan.Blocks.emplace_back(
nullptr, ScLen,
Index * ScLen);
1140 ASpan.Blocks[
Index].Seg.Val =
1141 reinterpret_cast<Value *
>(&ASpan.Blocks[
Index]);
1153 assert(
A->getParent() ==
B->getParent());
1154 return A->comesBefore(
B);
1156 auto earliestUser = [&](
const auto &
Uses) {
1159 auto *
I = dyn_cast<Instruction>(
U.getUser());
1160 assert(
I !=
nullptr &&
"Load used in a non-instruction?");
1164 if (
I->getParent() == BaseBlock) {
1165 if (!isa<PHINode>(
I))
1174 for (
const ByteSpan::Block &
B : VSpan) {
1175 ByteSpan ASection = ASpan.section(
B.Pos,
B.Seg.Size);
1176 for (
const ByteSpan::Block &S : ASection) {
1177 EarliestUser[S.Seg.Val] = std::min(
1178 EarliestUser[S.Seg.Val], earliestUser(
B.Seg.Val->uses()), isEarlier);
1183 dbgs() <<
"ASpan:\n" << ASpan <<
'\n';
1184 dbgs() <<
"Earliest users of ASpan:\n";
1185 for (
auto &[Val,
User] : EarliestUser) {
1186 dbgs() << Val <<
"\n ->" << *
User <<
'\n';
1190 auto createLoad = [&](
IRBuilderBase &Builder,
const ByteSpan &VSpan,
1191 int Index,
bool MakePred) {
1193 createAdjustedPointer(Builder, AlignAddr, SecTy,
Index * ScLen);
1195 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
1199 int Start = (
Index - DoAlign) * ScLen;
1200 int Width = (1 + DoAlign) * ScLen;
1201 return this->createLoad(Builder, SecTy,
Ptr, Predicate, ScLen, True, Undef,
1202 VSpan.section(Start, Width).values());
1207 assert(
In->getParent() == To->getParent());
1208 DepList Deps = getUpwardDeps(In, To);
1211 InstMap
Map = cloneBefore(In, Deps);
1212 for (
auto [Old, New] : Map)
1213 In->replaceUsesOfWith(Old, New);
1226 DoAlign &&
Index > 0 ? EarliestUser[&ASpan[
Index - 1]] :
nullptr;
1228 Index < NumSectors ? EarliestUser[&ASpan[
Index]] :
nullptr;
1229 if (
auto *Where = std::min(PrevAt, ThisAt, isEarlier)) {
1232 createLoad(Builder, VSpan,
Index, DoAlign &&
Index == NumSectors);
1239 if (
auto *Load = dyn_cast<Instruction>(Loads[
Index])) {
1240 if (!HVC.isSafeToMoveBeforeInBB(*Load, BasePos))
1241 moveBefore(Load, &*BasePos);
1250 ASpan[
Index].Seg.Val =
nullptr;
1251 if (
auto *Where = EarliestUser[&ASpan[
Index]]) {
1257 assert(NextLoad !=
nullptr);
1258 Val = HVC.vralignb(Builder, Val, NextLoad, AlignVal);
1260 ASpan[
Index].Seg.Val = Val;
1265 for (
const ByteSpan::Block &
B : VSpan) {
1266 ByteSpan ASection = ASpan.section(
B.Pos,
B.Seg.Size).shift(-
B.Pos);
1273 std::vector<ByteSpan::Block *> ABlocks;
1274 for (ByteSpan::Block &S : ASection) {
1275 if (S.Seg.Val !=
nullptr)
1276 ABlocks.push_back(&S);
1279 [&](
const ByteSpan::Block *
A,
const ByteSpan::Block *
B) {
1280 return isEarlier(cast<Instruction>(
A->Seg.Val),
1281 cast<Instruction>(
B->Seg.Val));
1283 for (ByteSpan::Block *S : ABlocks) {
1286 Instruction *SegI = cast<Instruction>(S->Seg.Val);
1288 Value *Pay = HVC.vbytes(Builder, getPayload(S->Seg.Val));
1290 HVC.insertb(Builder, Accum, Pay, S->Seg.Start, S->Seg.Size, S->Pos);
1298 Type *ValTy = getPayload(
B.Seg.Val)->getType();
1301 getPassThrough(
B.Seg.Val),
"sel");
1302 B.Seg.Val->replaceAllUsesWith(Sel);
1307 const ByteSpan &VSpan,
int ScLen,
1312 Type *SecTy = HVC.getByteTy(ScLen);
1313 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
1314 bool DoAlign = !HVC.isZero(AlignVal);
1317 ByteSpan ASpanV, ASpanM;
1325 auto *VecTy = VectorType::get(Ty, 1,
false);
1331 for (
int Index = (DoAlign ? -1 : 0);
Index != NumSectors + DoAlign; ++
Index) {
1335 VSpan.section(
Index * ScLen, ScLen).shift(-
Index * ScLen);
1340 for (ByteSpan::Block &S : VSection) {
1341 Value *Pay = getPayload(S.Seg.Val);
1342 Value *
Mask = HVC.rescale(Builder, MakeVec(Builder, getMask(S.Seg.Val)),
1343 Pay->
getType(), HVC.getByteTy());
1344 Value *PartM = HVC.insertb(Builder, Zero, HVC.vbytes(Builder, Mask),
1345 S.Seg.Start, S.Seg.Size, S.Pos);
1346 AccumM = Builder.
CreateOr(AccumM, PartM);
1348 Value *PartV = HVC.insertb(Builder, Undef, HVC.vbytes(Builder, Pay),
1349 S.Seg.Start, S.Seg.Size, S.Pos);
1354 ASpanV.Blocks.emplace_back(AccumV, ScLen,
Index * ScLen);
1355 ASpanM.Blocks.emplace_back(AccumM, ScLen,
Index * ScLen);
1359 dbgs() <<
"ASpanV before vlalign:\n" << ASpanV <<
'\n';
1360 dbgs() <<
"ASpanM before vlalign:\n" << ASpanM <<
'\n';
1366 Value *PrevV = ASpanV[
Index - 1].Seg.Val, *ThisV = ASpanV[
Index].Seg.Val;
1367 Value *PrevM = ASpanM[
Index - 1].Seg.Val, *ThisM = ASpanM[
Index].Seg.Val;
1369 ASpanV[
Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevV, ThisV, AlignVal);
1370 ASpanM[
Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevM, ThisM, AlignVal);
1375 dbgs() <<
"ASpanV after vlalign:\n" << ASpanV <<
'\n';
1376 dbgs() <<
"ASpanM after vlalign:\n" << ASpanM <<
'\n';
1379 auto createStore = [&](
IRBuilderBase &Builder,
const ByteSpan &ASpanV,
1380 const ByteSpan &ASpanM,
int Index,
bool MakePred) {
1383 if (HVC.isUndef(Val) || HVC.isZero(Mask))
1386 createAdjustedPointer(Builder, AlignAddr, SecTy,
Index * ScLen);
1388 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
1392 int Start = (
Index - DoAlign) * ScLen;
1393 int Width = (1 + DoAlign) * ScLen;
1394 this->createStore(Builder, Val,
Ptr, Predicate, ScLen,
1395 HVC.vlsb(Builder, Mask),
1396 VSpan.section(Start, Width).values());
1400 createStore(Builder, ASpanV, ASpanM,
Index, DoAlign &&
Index == NumSectors);
1404auto AlignVectors::realignGroup(
const MoveGroup &Move)
const ->
bool {
1413 auto getMaxOf = [](
auto Range,
auto GetValue) {
1415 return GetValue(
A) < GetValue(
B);
1419 const AddrList &BaseInfos = AddrGroups.at(Move.Base);
1434 std::set<Instruction *> TestSet(Move.Main.begin(), Move.Main.end());
1437 BaseInfos, std::back_inserter(MoveInfos),
1438 [&TestSet](
const AddrInfo &AI) {
return TestSet.count(AI.Inst); });
1441 const AddrInfo &WithMaxAlign =
1442 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return AI.HaveAlign; });
1443 Align MaxGiven = WithMaxAlign.HaveAlign;
1446 const AddrInfo &WithMinOffset =
1447 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return -AI.Offset; });
1449 const AddrInfo &WithMaxNeeded =
1450 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return AI.NeedAlign; });
1451 Align MinNeeded = WithMaxNeeded.NeedAlign;
1465 Value *AlignAddr =
nullptr;
1466 Value *AlignVal =
nullptr;
1468 if (MinNeeded <= MaxGiven) {
1469 int Start = WithMinOffset.Offset;
1470 int OffAtMax = WithMaxAlign.Offset;
1477 int Adjust = -
alignTo(OffAtMax - Start, MinNeeded.value());
1478 AlignAddr = createAdjustedPointer(Builder, WithMaxAlign.Addr,
1479 WithMaxAlign.ValTy, Adjust, Move.Clones);
1480 int Diff = Start - (OffAtMax + Adjust);
1481 AlignVal = HVC.getConstInt(Diff);
1483 assert(
static_cast<decltype(MinNeeded.
value())
>(Diff) < MinNeeded.value());
1493 createAlignedPointer(Builder, WithMinOffset.Addr, WithMinOffset.ValTy,
1494 MinNeeded.value(), Move.Clones);
1496 Builder.
CreatePtrToInt(WithMinOffset.Addr, HVC.getIntTy(),
"pti");
1497 if (
auto *
I = dyn_cast<Instruction>(AlignVal)) {
1498 for (
auto [Old, New] : Move.Clones)
1499 I->replaceUsesOfWith(Old, New);
1504 for (
const AddrInfo &AI : MoveInfos) {
1505 VSpan.Blocks.emplace_back(AI.Inst, HVC.getSizeOf(AI.ValTy),
1506 AI.Offset - WithMinOffset.Offset);
1512 int ScLen = Move.IsHvx ? HVC.HST.getVectorLength()
1513 : std::max<int>(MinNeeded.value(), 4);
1514 assert(!Move.IsHvx || ScLen == 64 || ScLen == 128);
1515 assert(Move.IsHvx || ScLen == 4 || ScLen == 8);
1518 dbgs() <<
"ScLen: " << ScLen <<
"\n";
1519 dbgs() <<
"AlignVal:" << *AlignVal <<
"\n";
1520 dbgs() <<
"AlignAddr:" << *AlignAddr <<
"\n";
1521 dbgs() <<
"VSpan:\n" << VSpan <<
'\n';
1525 realignLoadGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1527 realignStoreGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1529 for (
auto *Inst : Move.Main)
1530 Inst->eraseFromParent();
1536 int Alignment)
const ->
Value * {
1537 auto *AlignTy = AlignVal->
getType();
1539 AlignVal, ConstantInt::get(AlignTy, Alignment - 1),
"and");
1540 Value *
Zero = ConstantInt::get(AlignTy, 0);
1544auto AlignVectors::isSectorTy(
Type *Ty)
const ->
bool {
1545 if (!HVC.isByteVecTy(Ty))
1547 int Size = HVC.getSizeOf(Ty);
1548 if (HVC.HST.isTypeForHVX(Ty))
1549 return Size ==
static_cast<int>(HVC.HST.getVectorLength());
1553auto AlignVectors::run() ->
bool {
1554 LLVM_DEBUG(
dbgs() <<
"Running HVC::AlignVectors on " << HVC.F.getName()
1556 if (!createAddressGroups())
1560 dbgs() <<
"Address groups(" << AddrGroups.size() <<
"):\n";
1561 for (
auto &[In, AL] : AddrGroups) {
1562 for (
const AddrInfo &AI : AL)
1563 dbgs() <<
"---\n" << AI <<
'\n';
1567 bool Changed =
false;
1568 MoveList LoadGroups, StoreGroups;
1570 for (
auto &
G : AddrGroups) {
1576 dbgs() <<
"\nLoad groups(" << LoadGroups.size() <<
"):\n";
1577 for (
const MoveGroup &
G : LoadGroups)
1578 dbgs() <<
G <<
"\n";
1579 dbgs() <<
"Store groups(" << StoreGroups.size() <<
"):\n";
1580 for (
const MoveGroup &
G : StoreGroups)
1581 dbgs() <<
G <<
"\n";
1585 unsigned CountLimit = VAGroupCountLimit;
1586 if (CountLimit == 0)
1589 if (LoadGroups.size() > CountLimit) {
1590 LoadGroups.resize(CountLimit);
1591 StoreGroups.clear();
1593 unsigned StoreLimit = CountLimit - LoadGroups.size();
1594 if (StoreGroups.size() > StoreLimit)
1595 StoreGroups.resize(StoreLimit);
1598 for (
auto &M : LoadGroups)
1599 Changed |= moveTogether(M);
1600 for (
auto &M : StoreGroups)
1601 Changed |= moveTogether(M);
1605 for (
auto &M : LoadGroups)
1606 Changed |= realignGroup(M);
1607 for (
auto &M : StoreGroups)
1608 Changed |= realignGroup(M);
1618 -> std::pair<unsigned, Signedness> {
1619 unsigned Bits = HVC.getNumSignificantBits(V, In);
1625 KnownBits Known = HVC.getKnownBits(V, In);
1626 Signedness Sign =
Signed;
1627 unsigned NumToTest = 0;
1631 NumToTest =
Bits - 1;
1644 return {
Bits, Sign};
1647auto HvxIdioms::canonSgn(SValue
X, SValue
Y)
const
1648 -> std::pair<SValue, SValue> {
1661auto HvxIdioms::matchFxpMul(
Instruction &In)
const -> std::optional<FxpOp> {
1662 using namespace PatternMatch;
1663 auto *Ty =
In.getType();
1666 return std::nullopt;
1668 unsigned Width = cast<IntegerType>(Ty->
getScalarType())->getBitWidth();
1675 auto m_Shr = [](
auto &&
V,
auto &&S) {
1679 const APInt *Qn =
nullptr;
1687 if (
Op.Frac > Width)
1688 return std::nullopt;
1691 const APInt *
C =
nullptr;
1695 return std::nullopt;
1703 Op.Opcode = Instruction::Mul;
1705 Op.X.Sgn = getNumSignificantBits(
Op.X.Val, &In).second;
1706 Op.Y.Sgn = getNumSignificantBits(
Op.Y.Val, &In).second;
1707 Op.ResTy = cast<VectorType>(Ty);
1711 return std::nullopt;
1714auto HvxIdioms::processFxpMul(
Instruction &In,
const FxpOp &
Op)
const
1716 assert(
Op.X.Val->getType() ==
Op.Y.Val->getType());
1718 auto *VecTy = dyn_cast<VectorType>(
Op.X.Val->getType());
1719 if (VecTy ==
nullptr)
1721 auto *ElemTy = cast<IntegerType>(VecTy->getElementType());
1722 unsigned ElemWidth = ElemTy->getBitWidth();
1725 if ((HVC.length(VecTy) * ElemWidth) % (8 * HVC.HST.getVectorLength()) != 0)
1735 if (ElemWidth <= 32 &&
Op.Frac == 0)
1738 auto [BitsX, SignX] = getNumSignificantBits(
Op.X.Val, &In);
1739 auto [BitsY, SignY] = getNumSignificantBits(
Op.Y.Val, &In);
1747 auto roundUpWidth = [](
unsigned Width) ->
unsigned {
1753 if (Width > 32 && Width % 32 != 0) {
1760 BitsX = roundUpWidth(BitsX);
1761 BitsY = roundUpWidth(BitsY);
1766 unsigned Width = std::max(BitsX, BitsY);
1768 auto *ResizeTy = VectorType::get(HVC.getIntTy(Width), VecTy);
1769 if (Width < ElemWidth) {
1772 }
else if (Width > ElemWidth) {
1779 assert(
X->getType() ==
Y->getType() &&
X->getType() == ResizeTy);
1781 unsigned VecLen = HVC.length(ResizeTy);
1782 unsigned ChopLen = (8 * HVC.HST.getVectorLength()) / std::min(Width, 32u);
1786 ChopOp.ResTy = VectorType::get(
Op.ResTy->getElementType(), ChopLen,
false);
1788 for (
unsigned V = 0;
V != VecLen / ChopLen; ++
V) {
1789 ChopOp.X.Val = HVC.subvector(Builder,
X, V * ChopLen, ChopLen);
1790 ChopOp.Y.Val = HVC.subvector(Builder,
Y, V * ChopLen, ChopLen);
1791 Results.push_back(processFxpMulChopped(Builder, In, ChopOp));
1807 const FxpOp &
Op)
const ->
Value * {
1808 assert(
Op.X.Val->getType() ==
Op.Y.Val->getType());
1809 auto *InpTy = cast<VectorType>(
Op.X.Val->getType());
1810 unsigned Width = InpTy->getScalarSizeInBits();
1813 if (!
Op.RoundAt || *
Op.RoundAt ==
Op.Frac - 1) {
1816 Value *QMul =
nullptr;
1818 QMul = createMulQ15(Builder,
Op.X,
Op.Y, Rounding);
1819 }
else if (Width == 32) {
1820 QMul = createMulQ31(Builder,
Op.X,
Op.Y, Rounding);
1822 if (QMul !=
nullptr)
1828 assert(Width < 32 || Width % 32 == 0);
1838 assert(
Op.Frac != 0 &&
"Unshifted mul should have been skipped");
1839 if (
Op.Frac == 16) {
1841 if (
Value *MulH = createMulH16(Builder,
Op.X,
Op.Y))
1845 Value *Prod32 = createMul16(Builder,
Op.X,
Op.Y);
1847 Value *RoundVal = HVC.getConstSplat(Prod32->
getType(), 1 << *
Op.RoundAt);
1848 Prod32 = Builder.
CreateAdd(Prod32, RoundVal,
"add");
1853 ? Builder.
CreateAShr(Prod32, ShiftAmt,
"asr")
1854 : Builder.
CreateLShr(Prod32, ShiftAmt,
"lsr");
1855 return Builder.
CreateTrunc(Shifted, InpTy,
"trn");
1862 auto WordX = HVC.splitVectorElements(Builder,
Op.X.Val, 32);
1863 auto WordY = HVC.splitVectorElements(Builder,
Op.Y.Val, 32);
1864 auto WordP = createMulLong(Builder, WordX,
Op.X.Sgn, WordY,
Op.Y.Sgn);
1866 auto *HvxWordTy = cast<VectorType>(WordP.front()->getType());
1869 if (
Op.RoundAt.has_value()) {
1872 RoundV[*
Op.RoundAt / 32] =
1873 HVC.getConstSplat(HvxWordTy, 1 << (*
Op.RoundAt % 32));
1874 WordP = createAddLong(Builder, WordP, RoundV);
1880 unsigned SkipWords =
Op.Frac / 32;
1881 Constant *ShiftAmt = HVC.getConstSplat(HvxWordTy,
Op.Frac % 32);
1883 for (
int Dst = 0,
End = WordP.size() - SkipWords; Dst !=
End; ++Dst) {
1884 int Src = Dst + SkipWords;
1886 if (Src + 1 <
End) {
1897 WordP.resize(WordP.size() - SkipWords);
1899 return HVC.joinVectorElements(Builder, WordP,
Op.ResTy);
1902auto HvxIdioms::createMulQ15(
IRBuilderBase &Builder, SValue
X, SValue
Y,
1903 bool Rounding)
const ->
Value * {
1904 assert(
X.Val->getType() ==
Y.Val->getType());
1905 assert(
X.Val->getType()->getScalarType() == HVC.getIntTy(16));
1912 auto V6_vmpyhvsrs = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhvsrs);
1913 return HVC.createHvxIntrinsic(Builder, V6_vmpyhvsrs,
X.Val->getType(),
1917auto HvxIdioms::createMulQ31(
IRBuilderBase &Builder, SValue
X, SValue
Y,
1918 bool Rounding)
const ->
Value * {
1919 Type *InpTy =
X.Val->getType();
1920 assert(InpTy ==
Y.Val->getType());
1927 auto V6_vmpyewuh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyewuh);
1929 ? HVC.HST.getIntrinsicId(Hexagon::V6_vmpyowh_rnd_sacc)
1930 : HVC.HST.getIntrinsicId(Hexagon::V6_vmpyowh_sacc);
1932 HVC.createHvxIntrinsic(Builder, V6_vmpyewuh, InpTy, {
X.Val,
Y.Val});
1933 return HVC.createHvxIntrinsic(Builder, V6_vmpyo_acc, InpTy,
1934 {V1,
X.Val,
Y.Val});
1938 Value *CarryIn)
const
1939 -> std::pair<Value *, Value *> {
1940 assert(
X->getType() ==
Y->getType());
1941 auto VecTy = cast<VectorType>(
X->getType());
1942 if (VecTy == HvxI32Ty && HVC.HST.useHVXV62Ops()) {
1945 if (CarryIn ==
nullptr && HVC.HST.useHVXV66Ops()) {
1946 AddCarry = HVC.HST.getIntrinsicId(Hexagon::V6_vaddcarryo);
1948 AddCarry = HVC.HST.getIntrinsicId(Hexagon::V6_vaddcarry);
1949 if (CarryIn ==
nullptr)
1950 CarryIn = HVC.getNullValue(HVC.getBoolTy(HVC.length(VecTy)));
1951 Args.push_back(CarryIn);
1953 Value *
Ret = HVC.createHvxIntrinsic(Builder, AddCarry,
1957 return {
Result, CarryOut};
1964 if (CarryIn !=
nullptr) {
1965 unsigned Width = VecTy->getScalarSizeInBits();
1968 for (
unsigned i = 0, e = 32 / Width; i !=
e; ++i)
1969 Mask = (Mask << Width) | 1;
1971 auto V6_vandqrt = HVC.HST.getIntrinsicId(Hexagon::V6_vandqrt);
1973 HVC.createHvxIntrinsic(Builder, V6_vandqrt,
nullptr,
1974 {CarryIn, HVC.getConstInt(Mask)});
1975 Result1 = Builder.
CreateAdd(
X, ValueIn,
"add");
1981 return {Result2, Builder.
CreateOr(CarryOut1, CarryOut2,
"orb")};
1984auto HvxIdioms::createMul16(
IRBuilderBase &Builder, SValue
X, SValue
Y)
const
1987 std::tie(
X,
Y) = canonSgn(
X,
Y);
1990 V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhv);
1993 V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhus);
1995 V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyuhv);
2000 HVC.createHvxIntrinsic(Builder, V6_vmpyh, HvxP32Ty, {
Y.Val,
X.Val});
2002 return HVC.vshuff(Builder, HVC.sublo(Builder,
P), HVC.subhi(Builder,
P));
2005auto HvxIdioms::createMulH16(
IRBuilderBase &Builder, SValue
X, SValue
Y)
const
2007 Type *HvxI16Ty = HVC.getHvxTy(HVC.getIntTy(16),
false);
2009 if (HVC.HST.useHVXV69Ops()) {
2011 auto V6_vmpyuhvs = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyuhvs);
2012 return HVC.createHvxIntrinsic(Builder, V6_vmpyuhvs, HvxI16Ty,
2017 Type *HvxP16Ty = HVC.getHvxTy(HVC.getIntTy(16),
true);
2020 unsigned Len = HVC.length(HvxP16Ty) / 2;
2023 for (
int i = 0; i !=
static_cast<int>(
Len); ++i)
2024 PickOdd[i] = 2 * i + 1;
2027 HVC.sublo(Builder, Pair16), HVC.subhi(Builder, Pair16), PickOdd,
"shf");
2030auto HvxIdioms::createMul32(
IRBuilderBase &Builder, SValue
X, SValue
Y)
const
2031 -> std::pair<Value *, Value *> {
2032 assert(
X.Val->getType() ==
Y.Val->getType());
2033 assert(
X.Val->getType() == HvxI32Ty);
2036 std::tie(
X,
Y) = canonSgn(
X,
Y);
2039 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyss_parts;
2041 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyus_parts;
2043 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyuu_parts;
2046 Value *Parts = HVC.createHvxIntrinsic(Builder, V6_vmpy_parts,
nullptr,
2047 {
X.Val,
Y.Val}, {HvxI32Ty});
2056 assert(WordX.size() == WordY.size());
2057 unsigned Idx = 0,
Length = WordX.size();
2061 if (HVC.isZero(WordX[
Idx]))
2063 else if (HVC.isZero(WordY[
Idx]))
2070 Value *Carry =
nullptr;
2072 std::tie(Sum[
Idx], Carry) =
2073 createAddCarry(Builder, WordX[
Idx], WordY[
Idx], Carry);
2087 for (
int i = 0, e = WordX.size(); i != e; ++i) {
2088 for (
int j = 0, f = WordY.size(); j != f; ++j) {
2090 Signedness SX = (i + 1 ==
e) ? SgnX :
Unsigned;
2092 auto [
Lo,
Hi] = createMul32(Builder, {WordX[i], SX}, {WordY[
j],
SY});
2093 Products[i +
j + 0].push_back(
Lo);
2094 Products[i +
j + 1].push_back(
Hi);
2108 for (
int i = 0, e = Products.size(); i !=
e; ++i) {
2109 while (Products[i].
size() > 1) {
2110 Value *Carry =
nullptr;
2111 for (
int j = i;
j !=
e; ++
j) {
2112 auto &ProdJ = Products[
j];
2113 auto [Sum, CarryOut] = createAddCarry(Builder, pop_back_or_zero(ProdJ),
2114 pop_back_or_zero(ProdJ), Carry);
2115 ProdJ.insert(ProdJ.begin(), Sum);
2122 for (
auto &
P : Products) {
2123 assert(
P.size() == 1 &&
"Should have been added together");
2130auto HvxIdioms::run() ->
bool {
2131 bool Changed =
false;
2134 for (
auto It =
B.rbegin(); It !=
B.rend(); ++It) {
2135 if (
auto Fxm = matchFxpMul(*It)) {
2136 Value *
New = processFxpMul(*It, *Fxm);
2141 bool StartOver = !isa<Instruction>(New);
2142 It->replaceAllUsesWith(New);
2144 It = StartOver ?
B.rbegin()
2145 : cast<Instruction>(New)->getReverseIterator();
2156auto HexagonVectorCombine::run() ->
bool {
2158 dbgs() <<
"Module before HexagonVectorCombine\n" << *
F.getParent();
2160 bool Changed =
false;
2161 if (HST.useHVXOps()) {
2163 Changed |= AlignVectors(*this).run();
2165 Changed |= HvxIdioms(*this).run();
2169 dbgs() <<
"Module " << (Changed ?
"(modified)" :
"(unchanged)")
2170 <<
" after HexagonVectorCombine\n"
2176auto HexagonVectorCombine::getIntTy(
unsigned Width)
const ->
IntegerType * {
2180auto HexagonVectorCombine::getByteTy(
int ElemCount)
const ->
Type * {
2185 return VectorType::get(ByteTy, ElemCount,
false);
2188auto HexagonVectorCombine::getBoolTy(
int ElemCount)
const ->
Type * {
2193 return VectorType::get(BoolTy, ElemCount,
false);
2196auto HexagonVectorCombine::getConstInt(
int Val,
unsigned Width)
const
2201auto HexagonVectorCombine::isZero(
const Value *Val)
const ->
bool {
2202 if (
auto *
C = dyn_cast<Constant>(Val))
2203 return C->isZeroValue();
2207auto HexagonVectorCombine::getIntValue(
const Value *Val)
const
2208 -> std::optional<APInt> {
2209 if (
auto *CI = dyn_cast<ConstantInt>(Val))
2210 return CI->getValue();
2211 return std::nullopt;
2214auto HexagonVectorCombine::isUndef(
const Value *Val)
const ->
bool {
2215 return isa<UndefValue>(Val);
2218auto HexagonVectorCombine::isTrue(
const Value *Val)
const ->
bool {
2222auto HexagonVectorCombine::isFalse(
const Value *Val)
const ->
bool {
2226auto HexagonVectorCombine::getHvxTy(
Type *ElemTy,
bool Pair)
const
2232 "Invalid HVX element type");
2233 unsigned HwLen = HST.getVectorLength();
2235 return VectorType::get(ElemTy, Pair ? 2 * NumElems : NumElems,
2239auto HexagonVectorCombine::getSizeOf(
const Value *Val, SizeKind Kind)
const
2241 return getSizeOf(Val->
getType(), Kind);
2244auto HexagonVectorCombine::getSizeOf(
const Type *Ty, SizeKind Kind)
const
2246 auto *NcTy =
const_cast<Type *
>(Ty);
2249 return DL.getTypeStoreSize(NcTy).getFixedValue();
2251 return DL.getTypeAllocSize(NcTy).getFixedValue();
2256auto HexagonVectorCombine::getTypeAlignment(
Type *Ty)
const ->
int {
2259 if (HST.isTypeForHVX(Ty))
2260 return HST.getVectorLength();
2261 return DL.getABITypeAlign(Ty).value();
2264auto HexagonVectorCombine::length(
Value *Val)
const ->
size_t {
2265 return length(Val->
getType());
2268auto HexagonVectorCombine::length(
Type *Ty)
const ->
size_t {
2269 auto *VecTy = dyn_cast<VectorType>(Ty);
2270 assert(VecTy &&
"Must be a vector type");
2271 return VecTy->getElementCount().getFixedValue();
2274auto HexagonVectorCombine::getNullValue(
Type *Ty)
const ->
Constant * {
2277 if (
auto *VecTy = dyn_cast<VectorType>(Ty))
2282auto HexagonVectorCombine::getFullValue(
Type *Ty)
const ->
Constant * {
2285 if (
auto *VecTy = dyn_cast<VectorType>(Ty))
2290auto HexagonVectorCombine::getConstSplat(
Type *Ty,
int Val)
const
2293 auto VecTy = cast<VectorType>(Ty);
2294 Type *ElemTy = VecTy->getElementType();
2297 ConstantInt::get(ElemTy, Val));
2301auto HexagonVectorCombine::simplify(
Value *V)
const ->
Value * {
2302 if (
auto *In = dyn_cast<Instruction>(V)) {
2312 int Where)
const ->
Value * {
2313 assert(isByteVecTy(Dst->getType()) && isByteVecTy(Src->getType()));
2314 int SrcLen = getSizeOf(Src);
2315 int DstLen = getSizeOf(Dst);
2321 Value *P2Src = vresize(Builder, Src, P2Len, Undef);
2322 Value *P2Dst = vresize(Builder, Dst, P2Len, Undef);
2325 for (
int i = 0; i != P2Len; ++i) {
2329 (Where <= i && i < Where +
Length) ? P2Len + Start + (i - Where) : i;
2333 return vresize(Builder, P2Insert, DstLen, Undef);
2338 assert(
Lo->getType() ==
Hi->getType() &&
"Argument type mismatch");
2341 int VecLen = getSizeOf(
Hi);
2342 if (
auto IntAmt = getIntValue(Amt))
2343 return getElementRange(Builder,
Lo,
Hi, VecLen - IntAmt->getSExtValue(),
2346 if (HST.isTypeForHVX(
Hi->getType())) {
2347 assert(
static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
2348 "Expecting an exact HVX type");
2349 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_vlalignb),
2350 Hi->getType(), {Hi, Lo, Amt});
2363 return vralignb(Builder,
Lo,
Hi, Sub);
2370 assert(
Lo->getType() ==
Hi->getType() &&
"Argument type mismatch");
2373 int VecLen = getSizeOf(
Lo);
2374 if (
auto IntAmt = getIntValue(Amt))
2375 return getElementRange(Builder,
Lo,
Hi, IntAmt->getSExtValue(), VecLen);
2377 if (HST.isTypeForHVX(
Lo->getType())) {
2378 assert(
static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
2379 "Expecting an exact HVX type");
2380 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_valignb),
2381 Lo->getType(), {Hi, Lo, Amt});
2396 Intrinsic::hexagon_S2_valignrb);
2408 std::vector<Value *> Work[2];
2409 int ThisW = 0, OtherW = 1;
2411 Work[ThisW].
assign(Vecs.begin(), Vecs.end());
2412 while (Work[ThisW].
size() > 1) {
2413 auto *Ty = cast<VectorType>(Work[ThisW].front()->
getType());
2414 SMask.
resize(length(Ty) * 2);
2415 std::iota(SMask.
begin(), SMask.
end(), 0);
2417 Work[OtherW].clear();
2418 if (Work[ThisW].
size() % 2 != 0)
2420 for (
int i = 0, e = Work[ThisW].
size(); i <
e; i += 2) {
2422 Work[ThisW][i], Work[ThisW][i + 1], SMask,
"shf");
2423 Work[OtherW].push_back(Joined);
2431 SMask.
resize(Vecs.size() * length(Vecs.front()->getType()));
2432 std::iota(SMask.
begin(), SMask.
end(), 0);
2440 auto *ValTy = cast<VectorType>(Val->
getType());
2441 assert(ValTy->getElementType() == Pad->getType());
2443 int CurSize = length(ValTy);
2444 if (CurSize == NewSize)
2447 if (CurSize > NewSize)
2448 return getElementRange(Builder, Val, Val, 0, NewSize);
2451 std::iota(SMask.
begin(), SMask.
begin() + CurSize, 0);
2452 std::fill(SMask.
begin() + CurSize, SMask.
end(), CurSize);
2466 if (FromSTy == ToSTy)
2469 int FromSize = getSizeOf(FromSTy);
2470 int ToSize = getSizeOf(ToSTy);
2471 assert(FromSize % ToSize == 0 || ToSize % FromSize == 0);
2473 auto *MaskTy = cast<VectorType>(
Mask->getType());
2474 int FromCount = length(MaskTy);
2475 int ToCount = (FromCount * FromSize) / ToSize;
2476 assert((FromCount * FromSize) % ToSize == 0);
2478 auto *FromITy =
getIntTy(FromSize * 8);
2479 auto *ToITy =
getIntTy(ToSize * 8);
2484 Mask, VectorType::get(FromITy, FromCount,
false),
"sxt");
2486 Ext, VectorType::get(ToITy, ToCount,
false),
"cst");
2488 Cast, VectorType::get(getBoolTy(), ToCount,
false),
"trn");
2495 if (ScalarTy == getBoolTy())
2498 Value *Bytes = vbytes(Builder, Val);
2499 if (
auto *VecTy = dyn_cast<VectorType>(Bytes->
getType()))
2500 return Builder.
CreateTrunc(Bytes, getBoolTy(getSizeOf(VecTy)),
"trn");
2503 return Builder.
CreateTrunc(Bytes, getBoolTy(),
"trn");
2510 if (ScalarTy == getByteTy())
2513 if (ScalarTy != getBoolTy())
2514 return Builder.
CreateBitCast(Val, getByteTy(getSizeOf(Val)),
"cst");
2516 if (
auto *VecTy = dyn_cast<VectorType>(Val->
getType()))
2517 return Builder.
CreateSExt(Val, VectorType::get(getByteTy(), VecTy),
"sxt");
2518 return Builder.
CreateSExt(Val, getByteTy(),
"sxt");
2522 unsigned Start,
unsigned Length)
const
2525 return getElementRange(Builder, Val, Val, Start,
Length);
2530 size_t Len = length(Val);
2531 assert(Len % 2 == 0 &&
"Length should be even");
2532 return subvector(Builder, Val, 0, Len / 2);
2537 size_t Len = length(Val);
2538 assert(Len % 2 == 0 &&
"Length should be even");
2539 return subvector(Builder, Val, Len / 2, Len / 2);
2544 assert(Val0->getType() == Val1->getType());
2545 int Len = length(Val0);
2548 for (
int i = 0; i !=
Len; ++i) {
2557 assert(Val0->getType() == Val1->getType());
2558 int Len = length(Val0);
2561 for (
int i = 0; i !=
Len; ++i) {
2562 Mask[2 * i + 0] = i;
2568auto HexagonVectorCombine::createHvxIntrinsic(
IRBuilderBase &Builder,
2576 Type *SrcTy = Val->getType();
2577 if (SrcTy == DestTy)
2582 assert(HST.isTypeForHVX(SrcTy,
true));
2585 if (cast<VectorType>(SrcTy)->getElementType() != BoolTy)
2589 unsigned HwLen = HST.getVectorLength();
2590 Intrinsic::ID TC = HwLen == 64 ? Intrinsic::hexagon_V6_pred_typecast
2591 : Intrinsic::hexagon_V6_pred_typecast_128B;
2601 for (
int i = 0, e =
Args.size(); i != e; ++i) {
2603 Type *
T = IntrTy->getParamType(i);
2604 if (
A->getType() !=
T) {
2610 StringRef MaybeName = !IntrTy->getReturnType()->isVoidTy() ?
"cup" :
"";
2621 assert(HST.isTypeForHVX(CallTy,
true));
2622 return getCast(Builder, Call,
RetTy);
2625auto HexagonVectorCombine::splitVectorElements(
IRBuilderBase &Builder,
2627 unsigned ToWidth)
const
2641 auto *VecTy = cast<VectorType>(Vec->getType());
2642 assert(VecTy->getElementType()->isIntegerTy());
2643 unsigned FromWidth = VecTy->getScalarSizeInBits();
2645 assert(ToWidth <= FromWidth &&
"Breaking up into wider elements?");
2646 unsigned NumResults = FromWidth / ToWidth;
2650 unsigned Length = length(VecTy);
2654 auto splitInHalf = [&](
unsigned Begin,
unsigned End,
auto splitFunc) ->
void {
2658 if (Begin + 1 ==
End)
2664 auto *VTy = VectorType::get(
getIntTy(Width / 2), 2 *
Length,
false);
2667 Value *Res =
vdeal(Builder, sublo(Builder, VVal), subhi(Builder, VVal));
2669 unsigned Half = (Begin +
End) / 2;
2670 Results[Begin] = sublo(Builder, Res);
2671 Results[Half] = subhi(Builder, Res);
2673 splitFunc(Begin, Half, splitFunc);
2674 splitFunc(Half,
End, splitFunc);
2677 splitInHalf(0, NumResults, splitInHalf);
2681auto HexagonVectorCombine::joinVectorElements(
IRBuilderBase &Builder,
2685 assert(ToType->getElementType()->isIntegerTy());
2696 unsigned ToWidth = ToType->getScalarSizeInBits();
2697 unsigned Width = Inputs.front()->getType()->getScalarSizeInBits();
2698 assert(Width <= ToWidth);
2700 unsigned Length = length(Inputs.front()->getType());
2702 unsigned NeedInputs = ToWidth / Width;
2703 if (Inputs.size() != NeedInputs) {
2708 Last, getConstSplat(
Last->getType(), Width - 1),
"asr");
2709 Inputs.resize(NeedInputs, Sign);
2712 while (Inputs.size() > 1) {
2715 for (
int i = 0, e = Inputs.size(); i < e; i += 2) {
2716 Value *Res =
vshuff(Builder, Inputs[i], Inputs[i + 1]);
2719 Inputs.resize(Inputs.size() / 2);
2722 assert(Inputs.front()->getType() == ToType);
2723 return Inputs.front();
2726auto HexagonVectorCombine::calculatePointerDifference(
Value *Ptr0,
2728 -> std::optional<int> {
2730 const SCEV *Scev0 = SE.getSCEV(Ptr0);
2731 const SCEV *Scev1 = SE.getSCEV(Ptr1);
2732 const SCEV *ScevDiff = SE.getMinusSCEV(Scev0, Scev1);
2733 if (
auto *Const = dyn_cast<SCEVConstant>(ScevDiff)) {
2735 if (
V.isSignedIntN(8 *
sizeof(
int)))
2736 return static_cast<int>(
V.getSExtValue());
2743 I->eraseFromParent();
2748#define CallBuilder(B, F) \
2751 if (auto *I = dyn_cast<Instruction>(V)) \
2752 B_.ToErase.push_back(I); \
2756 auto Simplify = [
this](
Value *
V) {
2762 auto StripBitCast = [](
Value *
V) {
2763 while (
auto *
C = dyn_cast<BitCastInst>(V))
2764 V =
C->getOperand(0);
2768 Ptr0 = StripBitCast(Ptr0);
2769 Ptr1 = StripBitCast(Ptr1);
2770 if (!isa<GetElementPtrInst>(Ptr0) || !isa<GetElementPtrInst>(Ptr1))
2771 return std::nullopt;
2773 auto *Gep0 = cast<GetElementPtrInst>(Ptr0);
2774 auto *Gep1 = cast<GetElementPtrInst>(Ptr1);
2775 if (Gep0->getPointerOperand() != Gep1->getPointerOperand())
2776 return std::nullopt;
2777 if (Gep0->getSourceElementType() != Gep1->getSourceElementType())
2778 return std::nullopt;
2780 Builder
B(Gep0->getParent());
2781 int Scale = getSizeOf(Gep0->getSourceElementType(), Alloc);
2784 if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2)
2785 return std::nullopt;
2787 Value *Idx0 = Gep0->getOperand(1);
2788 Value *Idx1 = Gep1->getOperand(1);
2791 if (
auto *Diff = dyn_cast<ConstantInt>(
2793 return Diff->getSExtValue() * Scale;
2795 KnownBits Known0 = getKnownBits(Idx0, Gep0);
2796 KnownBits Known1 = getKnownBits(Idx1, Gep1);
2799 return std::nullopt;
2806 if (
auto *
C = dyn_cast<ConstantInt>(SubU)) {
2807 Diff0 =
C->getSExtValue();
2809 return std::nullopt;
2817 if (
auto *
C = dyn_cast<ConstantInt>(SubK)) {
2818 Diff1 =
C->getSExtValue();
2820 return std::nullopt;
2823 return (Diff0 + Diff1) * Scale;
2828auto HexagonVectorCombine::getNumSignificantBits(
const Value *V,
2834auto HexagonVectorCombine::getKnownBits(
const Value *V,
2840auto HexagonVectorCombine::isSafeToClone(
const Instruction &In)
const ->
bool {
2841 if (
In.mayHaveSideEffects() ||
In.isAtomic() ||
In.isVolatile() ||
2842 In.isFenceLike() ||
In.mayReadOrWriteMemory()) {
2845 if (isa<CallBase>(In) || isa<AllocaInst>(In))
2850template <
typename T>
2851auto HexagonVectorCombine::isSafeToMoveBeforeInBB(
const Instruction &In,
2853 const T &IgnoreInsts)
const
2856 [
this](
const Instruction &
I) -> std::optional<MemoryLocation> {
2857 if (
const auto *
II = dyn_cast<IntrinsicInst>(&
I)) {
2858 switch (
II->getIntrinsicID()) {
2859 case Intrinsic::masked_load:
2861 case Intrinsic::masked_store:
2872 if (isa<PHINode>(In) || (To !=
Block.end() && isa<PHINode>(*To)))
2877 bool MayWrite =
In.mayWriteToMemory();
2878 auto MaybeLoc = getLocOrNone(In);
2880 auto From =
In.getIterator();
2883 bool MoveUp = (To !=
Block.end() && To->comesBefore(&In));
2885 MoveUp ? std::make_pair(To,
From) :
std::make_pair(
std::next(
From), To);
2886 for (
auto It =
Range.first; It !=
Range.second; ++It) {
2891 if (
auto *
II = dyn_cast<IntrinsicInst>(&
I)) {
2892 if (
II->getIntrinsicID() == Intrinsic::assume)
2898 if (
auto *CB = dyn_cast<CallBase>(&
I)) {
2899 if (!CB->hasFnAttr(Attribute::WillReturn))
2901 if (!CB->hasFnAttr(Attribute::NoSync))
2904 if (
I.mayReadOrWriteMemory()) {
2905 auto MaybeLocI = getLocOrNone(
I);
2906 if (MayWrite ||
I.mayWriteToMemory()) {
2907 if (!MaybeLoc || !MaybeLocI)
2909 if (!AA.isNoAlias(*MaybeLoc, *MaybeLocI))
2917auto HexagonVectorCombine::isByteVecTy(
Type *Ty)
const ->
bool {
2918 if (
auto *VecTy = dyn_cast<VectorType>(Ty))
2919 return VecTy->getElementType() == getByteTy();
2928 std::iota(SMask.
begin(), SMask.
end(), Start);
2940class HexagonVectorCombineLegacy :
public FunctionPass {
2962 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
2964 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
2965 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2966 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2968 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
2970 HexagonVectorCombine HVC(
F, AA, AC, DT, SE, TLI, TM);
2976char HexagonVectorCombineLegacy::ID = 0;
2979 "Hexagon Vector Combine",
false,
false)
2990 return new HexagonVectorCombineLegacy();
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
BlockVerifier::State From
static IntegerType * getIntTy(IRBuilderBase &B, const TargetLibraryInfo *TLI)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
#define LLVM_ATTRIBUTE_UNUSED
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Given that RA is a live value
Mark the given Function as meaning that it cannot be changed in any way mark any values that are used as this function s parameters or by its return values(according to Uses) live as well. void DeadArgumentEliminationPass
This file defines the DenseMap class.
DenseMap< Block *, BlockRelaxAux > Blocks
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
static bool isUndef(ArrayRef< int > Mask)
#define CallBuilder(B, F)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static bool isCandidate(const MachineInstr *MI, Register &DefedReg, Register FrameReg)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static ConstantInt * getConstInt(MDNode *MD, unsigned NumOp)
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Target-Independent Code Generator Pass Configuration Options pass.
support::ulittle16_t & Lo
support::ulittle16_t & Hi
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
InstListType::const_iterator const_iterator
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
iterator_range< iterator > children()
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
const BasicBlock & back() const
Common base class shared among various IRBuilders.
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
const char * getOpcodeName() const
Class to represent integer types.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
An instruction for reading from memory.
bool doesNotAccessMemory() const
Whether this function accesses no memory.
bool onlyAccessesInaccessibleMem() const
Whether this function only (at most) accesses inaccessible memory.
static std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
void assign(size_type NumElts, ValueParamT Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Provides information about what library functions are available for the current target.
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
static IntegerType * getInt1Ty(LLVMContext &C)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt8Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Rounding
Possible values of current rounding mode, which is specified in bits 23:22 of FPCR.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createHexagonVectorCombineLegacyPass()
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
auto reverse(ContainerTy &&C)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&... Ranges)
Concatenated range across two or more ranges.
void initializeHexagonVectorCombineLegacyPass(PassRegistry &)
@ And
Bitwise or logical AND of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Get the upper bound on bit size for this Value Op as a signed integer.
bool mayHaveNonDefUseDependency(const Instruction &I)
Returns true if the result or effects of the given instructions I depend values not reachable through...
MaskT vshuff(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
MaskT vdeal(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
Implement std::hash so that hash_code can be used in STL containers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.