34#include "llvm/IR/IntrinsicsHexagon.h"
57#define DEBUG_TYPE "hexagon-vc"
72class HexagonVectorCombine {
77 :
F(F_),
DL(
F.getDataLayout()), AA(AA_), AC(AC_), DT(DT_),
87 Type *getByteTy(
int ElemCount = 0)
const;
90 Type *getBoolTy(
int ElemCount = 0)
const;
94 std::optional<APInt> getIntValue(
const Value *Val)
const;
100 bool isTrue(
const Value *Val)
const;
102 bool isFalse(
const Value *Val)
const;
111 int getSizeOf(
const Value *Val, SizeKind Kind = Store)
const;
112 int getSizeOf(
const Type *Ty, SizeKind Kind = Store)
const;
113 int getTypeAlignment(
Type *Ty)
const;
114 size_t length(
Value *Val)
const;
115 size_t length(
Type *Ty)
const;
124 int Length,
int Where)
const;
148 unsigned ToWidth)
const;
152 std::optional<int> calculatePointerDifference(
Value *Ptr0,
Value *Ptr1)
const;
154 unsigned getNumSignificantBits(
const Value *V,
161 template <
typename T = std::vector<Instruction *>>
164 const T &IgnoreInsts = {})
const;
167 [[maybe_unused]]
bool isByteVecTy(
Type *Ty)
const;
180 int Start,
int Length)
const;
199 AlignVectors(
const HexagonVectorCombine &HVC_) : HVC(HVC_) {}
204 using InstList = std::vector<Instruction *>;
208 AddrInfo(
const AddrInfo &) =
default;
211 : Inst(
I),
Addr(
A), ValTy(
T), HaveAlign(
H),
212 NeedAlign(HVC.getTypeAlignment(ValTy)) {}
213 AddrInfo &operator=(
const AddrInfo &) =
default;
224 using AddrList = std::vector<AddrInfo>;
228 return A->comesBefore(
B);
231 using DepList = std::set<Instruction *, InstrLess>;
234 MoveGroup(
const AddrInfo &AI,
Instruction *
B,
bool Hvx,
bool Load)
235 :
Base(
B), Main{AI.Inst}, Clones{}, IsHvx(Hvx), IsLoad(Load) {}
236 MoveGroup() =
default;
244 using MoveList = std::vector<MoveGroup>;
264 Segment(
Value *Val,
int Begin,
int Len)
265 : Val(Val), Start(Begin), Size(Len) {}
266 Segment(
const Segment &Seg) =
default;
267 Segment &operator=(
const Segment &Seg) =
default;
274 Block(
Value *Val,
int Len,
int Pos) : Seg(Val, 0, Len), Pos(Pos) {}
275 Block(
Value *Val,
int Off,
int Len,
int Pos)
276 : Seg(Val, Off, Len), Pos(Pos) {}
277 Block(
const Block &Blk) =
default;
278 Block &operator=(
const Block &Blk) =
default;
284 ByteSpan section(
int Start,
int Length)
const;
285 ByteSpan &shift(
int Offset);
289 Block &operator[](
int i) {
return Blocks[i]; }
290 const Block &operator[](
int i)
const {
return Blocks[i]; }
292 std::vector<Block>
Blocks;
294 using iterator =
decltype(
Blocks)::iterator;
295 iterator begin() {
return Blocks.begin(); }
296 iterator end() {
return Blocks.end(); }
302 Align getAlignFromValue(
const Value *V)
const;
303 std::optional<AddrInfo> getAddrInfo(
Instruction &In)
const;
304 bool isHvx(
const AddrInfo &AI)
const;
306 [[maybe_unused]]
bool isSectorTy(
Type *Ty)
const;
314 const InstMap &CloneMap = InstMap())
const;
317 const InstMap &CloneMap = InstMap())
const;
334 Value *Predicate,
int Alignment,
337 Value *Predicate,
int Alignment,
341 bool createAddressGroups();
342 MoveList createLoadGroups(
const AddrList &Group)
const;
343 MoveList createStoreGroups(
const AddrList &Group)
const;
344 bool moveTogether(MoveGroup &Move)
const;
345 template <
typename T> InstMap cloneBefore(
Instruction *To,
T &&Insts)
const;
347 void realignLoadGroup(
IRBuilderBase &Builder,
const ByteSpan &VSpan,
348 int ScLen,
Value *AlignVal,
Value *AlignAddr)
const;
349 void realignStoreGroup(
IRBuilderBase &Builder,
const ByteSpan &VSpan,
350 int ScLen,
Value *AlignVal,
Value *AlignAddr)
const;
351 bool realignGroup(
const MoveGroup &Move)
const;
354 int Alignment)
const;
361 std::map<Instruction *, AddrList> AddrGroups;
362 const HexagonVectorCombine &HVC;
367 OS <<
"Inst: " << AI.Inst <<
" " << *AI.Inst <<
'\n';
368 OS <<
"Addr: " << *AI.Addr <<
'\n';
369 OS <<
"Type: " << *AI.ValTy <<
'\n';
370 OS <<
"HaveAlign: " << AI.HaveAlign.value() <<
'\n';
371 OS <<
"NeedAlign: " << AI.NeedAlign.value() <<
'\n';
372 OS <<
"Offset: " << AI.Offset;
378 OS <<
"IsLoad:" << (MG.IsLoad ?
"yes" :
"no");
379 OS <<
", IsHvx:" << (MG.IsHvx ?
"yes" :
"no") <<
'\n';
382 OS <<
" " << *
I <<
'\n';
385 OS <<
" " << *
I <<
'\n';
387 for (
auto [K, V] : MG.Clones) {
389 K->printAsOperand(
OS,
false);
390 OS <<
"\t-> " << *V <<
'\n';
397 const AlignVectors::ByteSpan::Block &
B) {
398 OS <<
" @" <<
B.Pos <<
" [" <<
B.Seg.Start <<
',' <<
B.Seg.Size <<
"] ";
399 if (
B.Seg.Val ==
reinterpret_cast<const Value *
>(&
B)) {
400 OS <<
"(self:" <<
B.Seg.Val <<
')';
401 }
else if (
B.Seg.Val !=
nullptr) {
411 OS <<
"ByteSpan[size=" << BS.size() <<
", extent=" << BS.extent() <<
'\n';
412 for (
const AlignVectors::ByteSpan::Block &
B : BS)
420 HvxIdioms(
const HexagonVectorCombine &HVC_) : HVC(HVC_) {
421 auto *Int32Ty = HVC.getIntTy(32);
422 HvxI32Ty = HVC.getHvxTy(Int32Ty,
false);
423 HvxP32Ty = HVC.getHvxTy(Int32Ty,
true);
429 enum Signedness { Positive, Signed, Unsigned };
444 std::optional<unsigned> RoundAt;
449 -> std::pair<unsigned, Signedness>;
450 auto canonSgn(SValue
X, SValue
Y)
const -> std::pair<SValue, SValue>;
452 auto matchFxpMul(
Instruction &In)
const -> std::optional<FxpOp>;
456 const FxpOp &
Op)
const ->
Value *;
458 bool Rounding)
const ->
Value *;
460 bool Rounding)
const ->
Value *;
463 Value *CarryIn =
nullptr)
const
464 -> std::pair<Value *, Value *>;
469 -> std::pair<Value *, Value *>;
478 const HexagonVectorCombine &HVC;
484 const HvxIdioms::FxpOp &
Op) {
485 static const char *SgnNames[] = {
"Positive",
"Signed",
"Unsigned"};
487 if (
Op.RoundAt.has_value()) {
488 if (
Op.Frac != 0 && *
Op.RoundAt ==
Op.Frac - 1) {
491 OS <<
" + 1<<" << *
Op.RoundAt;
494 OS <<
"\n X:(" << SgnNames[
Op.X.Sgn] <<
") " << *
Op.X.Val <<
"\n"
495 <<
" Y:(" << SgnNames[
Op.Y.Sgn] <<
") " << *
Op.Y.Val;
503template <
typename T>
T *getIfUnordered(
T *MaybeT) {
504 return MaybeT && MaybeT->isUnordered() ? MaybeT :
nullptr;
507 return dyn_cast<T>(In);
510 return getIfUnordered(dyn_cast<LoadInst>(In));
513 return getIfUnordered(dyn_cast<StoreInst>(In));
516#if !defined(_MSC_VER) || _MSC_VER >= 1926
520template <
typename Pred,
typename... Ts>
521void erase_if(std::map<Ts...> &map, Pred p)
523template <
typename Pred,
typename T,
typename U>
524void erase_if(std::map<T, U> &map, Pred p)
527 for (
auto i = map.begin(), e = map.end(); i != e;) {
536template <
typename Pred,
typename T>
void erase_if(
T &&container, Pred p) {
574auto AlignVectors::ByteSpan::extent()
const ->
int {
579 for (
int i = 1, e =
size(); i !=
e; ++i) {
580 Min = std::min(Min,
Blocks[i].Pos);
586auto AlignVectors::ByteSpan::section(
int Start,
int Length)
const -> ByteSpan {
588 for (
const ByteSpan::Block &
B :
Blocks) {
589 int L = std::max(
B.Pos, Start);
590 int R = std::min(
B.Pos +
B.Seg.Size, Start +
Length);
593 int Off =
L >
B.Pos ?
L -
B.Pos : 0;
594 Section.Blocks.emplace_back(
B.Seg.Val,
B.Seg.Start + Off, R - L, L);
600auto AlignVectors::ByteSpan::shift(
int Offset) -> ByteSpan & {
608 for (
int i = 0, e =
Blocks.size(); i != e; ++i)
609 Values[i] =
Blocks[i].Seg.Val;
613auto AlignVectors::getAlignFromValue(
const Value *V)
const ->
Align {
614 const auto *
C = dyn_cast<ConstantInt>(V);
615 assert(
C &&
"Alignment must be a compile-time constant integer");
616 return C->getAlignValue();
619auto AlignVectors::getAddrInfo(
Instruction &In)
const
620 -> std::optional<AddrInfo> {
621 if (
auto *L = isCandidate<LoadInst>(&In))
622 return AddrInfo(HVC, L,
L->getPointerOperand(),
L->getType(),
624 if (
auto *S = isCandidate<StoreInst>(&In))
625 return AddrInfo(HVC, S, S->getPointerOperand(),
626 S->getValueOperand()->getType(), S->getAlign());
627 if (
auto *
II = isCandidate<IntrinsicInst>(&In)) {
630 case Intrinsic::masked_load:
631 return AddrInfo(HVC,
II,
II->getArgOperand(0),
II->getType(),
632 getAlignFromValue(
II->getArgOperand(1)));
633 case Intrinsic::masked_store:
634 return AddrInfo(HVC,
II,
II->getArgOperand(1),
635 II->getArgOperand(0)->getType(),
636 getAlignFromValue(
II->getArgOperand(2)));
642auto AlignVectors::isHvx(
const AddrInfo &AI)
const ->
bool {
643 return HVC.HST.isTypeForHVX(AI.ValTy);
646auto AlignVectors::getPayload(
Value *Val)
const ->
Value * {
647 if (
auto *In = dyn_cast<Instruction>(Val)) {
649 if (
auto *
II = dyn_cast<IntrinsicInst>(In))
650 ID =
II->getIntrinsicID();
651 if (isa<StoreInst>(In) ||
ID == Intrinsic::masked_store)
652 return In->getOperand(0);
657auto AlignVectors::getMask(
Value *Val)
const ->
Value * {
658 if (
auto *
II = dyn_cast<IntrinsicInst>(Val)) {
659 switch (
II->getIntrinsicID()) {
660 case Intrinsic::masked_load:
661 return II->getArgOperand(2);
662 case Intrinsic::masked_store:
663 return II->getArgOperand(3);
667 Type *ValTy = getPayload(Val)->getType();
668 if (
auto *VecTy = dyn_cast<VectorType>(ValTy))
669 return HVC.getFullValue(HVC.getBoolTy(HVC.length(VecTy)));
670 return HVC.getFullValue(HVC.getBoolTy());
673auto AlignVectors::getPassThrough(
Value *Val)
const ->
Value * {
674 if (
auto *
II = dyn_cast<IntrinsicInst>(Val)) {
675 if (
II->getIntrinsicID() == Intrinsic::masked_load)
676 return II->getArgOperand(3);
682 Type *ValTy,
int Adjust,
683 const InstMap &CloneMap)
const
685 if (
auto *
I = dyn_cast<Instruction>(
Ptr))
688 return Builder.CreatePtrAdd(
Ptr, HVC.getConstInt(Adjust),
"gep");
692 Type *ValTy,
int Alignment,
693 const InstMap &CloneMap)
const
696 if (
auto *
I = dyn_cast<Instruction>(V)) {
697 for (
auto [Old, New] : CloneMap)
698 I->replaceUsesOfWith(Old, New);
703 Value *AsInt = Builder.CreatePtrToInt(
Ptr, HVC.getIntTy(),
"pti");
704 Value *
Mask = HVC.getConstInt(-Alignment);
705 Value *
And = Builder.CreateAnd(remap(AsInt),
Mask,
"and");
706 return Builder.CreateIntToPtr(
714 bool HvxHasPredLoad = HVC.HST.useHVXV62Ops();
718 "Expectning scalar predicate");
719 if (HVC.isFalse(Predicate))
721 if (!HVC.isTrue(Predicate) && HvxHasPredLoad) {
722 Value *
Load = createPredicatedLoad(Builder, ValTy,
Ptr, Predicate,
723 Alignment, MDSources);
724 return Builder.CreateSelect(Mask, Load, PassThru);
728 assert(!HVC.isUndef(Mask));
729 if (HVC.isZero(Mask))
731 if (HVC.isTrue(Mask))
732 return createSimpleLoad(Builder, ValTy,
Ptr, Alignment, MDSources);
735 Mask, PassThru,
"mld");
745 Builder.CreateAlignedLoad(ValTy,
Ptr,
Align(Alignment),
"ald");
755 assert(HVC.HST.isTypeForHVX(ValTy) &&
756 "Predicates 'scalar' vector loads not yet supported");
758 assert(!
Predicate->getType()->isVectorTy() &&
"Expectning scalar predicate");
759 assert(HVC.getSizeOf(ValTy, HVC.Alloc) % Alignment == 0);
760 if (HVC.isFalse(Predicate))
762 if (HVC.isTrue(Predicate))
763 return createSimpleLoad(Builder, ValTy,
Ptr, Alignment, MDSources);
765 auto V6_vL32b_pred_ai = HVC.HST.getIntrinsicId(Hexagon::V6_vL32b_pred_ai);
767 return HVC.createHvxIntrinsic(Builder, V6_vL32b_pred_ai, ValTy,
775 if (HVC.isZero(Mask) || HVC.isUndef(Val) || HVC.isUndef(Mask))
778 "Expectning scalar predicate"));
780 if (HVC.isFalse(Predicate))
782 if (HVC.isTrue(Predicate))
787 if (HVC.isTrue(Mask)) {
789 return createPredicatedStore(Builder, Val,
Ptr, Predicate, Alignment,
793 return createSimpleStore(Builder, Val,
Ptr, Alignment, MDSources);
799 Builder.CreateMaskedStore(Val,
Ptr,
Align(Alignment), Mask);
806 Value *PredLoad = createPredicatedLoad(Builder, Val->getType(),
Ptr,
807 Predicate, Alignment, MDSources);
808 Value *Mux = Builder.CreateSelect(Mask, Val, PredLoad);
809 return createPredicatedStore(Builder, Mux,
Ptr, Predicate, Alignment,
827 assert(HVC.HST.isTypeForHVX(Val->getType()) &&
828 "Predicates 'scalar' vector stores not yet supported");
830 if (HVC.isFalse(Predicate))
832 if (HVC.isTrue(Predicate))
833 return createSimpleStore(Builder, Val,
Ptr, Alignment, MDSources);
835 assert(HVC.getSizeOf(Val, HVC.Alloc) % Alignment == 0);
836 auto V6_vS32b_pred_ai = HVC.HST.getIntrinsicId(Hexagon::V6_vS32b_pred_ai);
838 return HVC.createHvxIntrinsic(Builder, V6_vS32b_pred_ai,
nullptr,
847 "Base and In should be in the same block");
848 assert(
Base->comesBefore(In) &&
"Base should come before In");
851 std::deque<Instruction *> WorkQ = {
In};
852 while (!WorkQ.empty()) {
858 if (
auto *
I = dyn_cast<Instruction>(
Op)) {
859 if (
I->getParent() == Parent &&
Base->comesBefore(
I))
867auto AlignVectors::createAddressGroups() ->
bool {
872 auto findBaseAndOffset = [&](AddrInfo &AI) -> std::pair<Instruction *, int> {
873 for (AddrInfo &W : WorkStack) {
874 if (
auto D = HVC.calculatePointerDifference(AI.Addr,
W.Addr))
875 return std::make_pair(
W.Inst, *
D);
877 return std::make_pair(
nullptr, 0);
880 auto traverseBlock = [&](
DomTreeNode *DomN,
auto Visit) ->
void {
883 auto AI = this->getAddrInfo(
I);
886 auto F = findBaseAndOffset(*AI);
889 AI->Offset =
F.second;
892 WorkStack.push_back(*AI);
893 GroupInst = AI->Inst;
895 AddrGroups[GroupInst].push_back(*AI);
901 while (!WorkStack.empty() && WorkStack.back().Inst->getParent() == &
Block)
902 WorkStack.pop_back();
905 traverseBlock(HVC.DT.getRootNode(), traverseBlock);
906 assert(WorkStack.empty());
911 erase_if(AddrGroups, [](
auto &
G) {
return G.second.size() == 1; });
915 G.second, [&](
auto &
I) { return HVC.HST.isTypeForHVX(I.ValTy); });
918 return !AddrGroups.empty();
921auto AlignVectors::createLoadGroups(
const AddrList &Group)
const -> MoveList {
929 auto tryAddTo = [&](
const AddrInfo &
Info, MoveGroup &Move) {
930 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
934 if (Move.IsHvx != isHvx(Info))
938 if (
Base->getParent() !=
Info.Inst->getParent())
941 if (!HVC.isSafeToMoveBeforeInBB(*
Info.Inst,
Base->getIterator()))
945 return HVC.isSafeToMoveBeforeInBB(*
I,
Base->getIterator()) &&
946 HVC.isSafeToClone(*
I);
948 DepList Deps = getUpwardDeps(
Info.Inst,
Base);
952 Move.Main.push_back(
Info.Inst);
959 for (
const AddrInfo &Info : Group) {
960 if (!
Info.Inst->mayReadFromMemory())
962 if (LoadGroups.empty() || !tryAddTo(Info, LoadGroups.back()))
963 LoadGroups.emplace_back(Info, Group.front().Inst, isHvx(Info),
true);
967 erase_if(LoadGroups, [](
const MoveGroup &
G) {
return G.Main.size() <= 1; });
970 if (!HVC.HST.useHVXV62Ops())
971 erase_if(LoadGroups, [](
const MoveGroup &
G) {
return G.IsHvx; });
976auto AlignVectors::createStoreGroups(
const AddrList &Group)
const -> MoveList {
984 auto tryAddTo = [&](
const AddrInfo &
Info, MoveGroup &Move) {
985 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
991 "Not handling stores with return values");
993 if (Move.IsHvx != isHvx(Info))
999 if (
Base->getParent() !=
Info.Inst->getParent())
1001 if (!HVC.isSafeToMoveBeforeInBB(*
Info.Inst,
Base->getIterator(), Move.Main))
1003 Move.Main.push_back(
Info.Inst);
1007 MoveList StoreGroups;
1009 for (
auto I = Group.rbegin(), E = Group.rend();
I != E; ++
I) {
1010 const AddrInfo &
Info = *
I;
1011 if (!
Info.Inst->mayWriteToMemory())
1013 if (StoreGroups.empty() || !tryAddTo(Info, StoreGroups.back()))
1014 StoreGroups.emplace_back(Info, Group.front().Inst, isHvx(Info),
false);
1018 erase_if(StoreGroups, [](
const MoveGroup &
G) {
return G.Main.size() <= 1; });
1021 if (!HVC.HST.useHVXV62Ops())
1022 erase_if(StoreGroups, [](
const MoveGroup &
G) {
return G.IsHvx; });
1027 if (!VADoFullStores) {
1028 erase_if(StoreGroups, [
this](
const MoveGroup &
G) {
1030 auto MaybeInfo = this->getAddrInfo(*S);
1031 assert(MaybeInfo.has_value());
1032 return HVC.HST.isHVXVectorType(
1033 EVT::getEVT(MaybeInfo->ValTy, false));
1041auto AlignVectors::moveTogether(MoveGroup &Move)
const ->
bool {
1043 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
1049 Move.Clones = cloneBefore(Where, Move.Deps);
1054 M->moveAfter(Where);
1055 for (
auto [Old, New] : Move.Clones)
1056 M->replaceUsesOfWith(Old, New);
1060 for (
int i = 0, e = Move.Deps.size(); i != e; ++i)
1061 Move.Deps[i] = Move.Clones[Move.Deps[i]];
1066 assert(Move.Deps.empty());
1070 M->moveBefore(Where);
1075 return Move.Main.size() + Move.Deps.size() > 1;
1078template <
typename T>
1079auto AlignVectors::cloneBefore(
Instruction *To,
T &&Insts)
const -> InstMap {
1083 assert(HVC.isSafeToClone(*
I));
1085 C->setName(
Twine(
"c.") +
I->getName() +
".");
1086 C->insertBefore(To);
1088 for (
auto [Old, New] : Map)
1089 C->replaceUsesOfWith(Old, New);
1090 Map.insert(std::make_pair(
I,
C));
1096 const ByteSpan &VSpan,
int ScLen,
1101 Type *SecTy = HVC.getByteTy(ScLen);
1102 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
1103 bool DoAlign = !HVC.isZero(AlignVal);
1105 BasicBlock *BaseBlock = Builder.GetInsertBlock();
1108 auto *True = HVC.getFullValue(HVC.getBoolTy(ScLen));
1135 for (
int Index = 0;
Index != NumSectors; ++
Index)
1136 ASpan.Blocks.emplace_back(
nullptr, ScLen, Index * ScLen);
1137 for (
int Index = 0;
Index != NumSectors; ++
Index) {
1138 ASpan.Blocks[
Index].Seg.Val =
1139 reinterpret_cast<Value *
>(&ASpan.Blocks[
Index]);
1151 assert(
A->getParent() ==
B->getParent());
1152 return A->comesBefore(
B);
1154 auto earliestUser = [&](
const auto &
Uses) {
1157 auto *
I = dyn_cast<Instruction>(
U.getUser());
1158 assert(
I !=
nullptr &&
"Load used in a non-instruction?");
1162 if (
I->getParent() == BaseBlock) {
1163 if (!isa<PHINode>(
I))
1172 for (
const ByteSpan::Block &
B : VSpan) {
1173 ByteSpan ASection = ASpan.section(
B.Pos,
B.Seg.Size);
1174 for (
const ByteSpan::Block &S : ASection) {
1175 EarliestUser[S.Seg.Val] = std::min(
1176 EarliestUser[S.Seg.Val], earliestUser(
B.Seg.Val->uses()), isEarlier);
1181 dbgs() <<
"ASpan:\n" << ASpan <<
'\n';
1182 dbgs() <<
"Earliest users of ASpan:\n";
1183 for (
auto &[Val,
User] : EarliestUser) {
1184 dbgs() << Val <<
"\n ->" << *
User <<
'\n';
1188 auto createLoad = [&](
IRBuilderBase &Builder,
const ByteSpan &VSpan,
1189 int Index,
bool MakePred) {
1191 createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
1193 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
1197 int Start = (
Index - DoAlign) * ScLen;
1198 int Width = (1 + DoAlign) * ScLen;
1199 return this->createLoad(Builder, SecTy,
Ptr, Predicate, ScLen, True, Undef,
1200 VSpan.section(Start, Width).values());
1205 assert(
In->getParent() == To->getParent());
1206 DepList Deps = getUpwardDeps(In, To);
1209 InstMap
Map = cloneBefore(In, Deps);
1210 for (
auto [Old, New] : Map)
1211 In->replaceUsesOfWith(Old, New);
1216 for (
int Index = 0;
Index != NumSectors + 1; ++
Index) {
1224 DoAlign &&
Index > 0 ? EarliestUser[&ASpan[
Index - 1]] :
nullptr;
1226 Index < NumSectors ? EarliestUser[&ASpan[
Index]] :
nullptr;
1227 if (
auto *Where = std::min(PrevAt, ThisAt, isEarlier)) {
1230 createLoad(Builder, VSpan, Index, DoAlign && Index == NumSectors);
1237 if (
auto *Load = dyn_cast<Instruction>(Loads[Index])) {
1238 if (!HVC.isSafeToMoveBeforeInBB(*Load, BasePos))
1239 moveBefore(Load, &*BasePos);
1241 LLVM_DEBUG(
dbgs() <<
"Loads[" << Index <<
"]:" << *Loads[Index] <<
'\n');
1247 for (
int Index = 0;
Index != NumSectors; ++
Index) {
1248 ASpan[
Index].Seg.Val =
nullptr;
1249 if (
auto *Where = EarliestUser[&ASpan[Index]]) {
1255 assert(NextLoad !=
nullptr);
1256 Val = HVC.vralignb(Builder, Val, NextLoad, AlignVal);
1258 ASpan[
Index].Seg.Val = Val;
1263 for (
const ByteSpan::Block &
B : VSpan) {
1264 ByteSpan ASection = ASpan.section(
B.Pos,
B.Seg.Size).shift(-
B.Pos);
1271 std::vector<ByteSpan::Block *> ABlocks;
1272 for (ByteSpan::Block &S : ASection) {
1273 if (S.Seg.Val !=
nullptr)
1274 ABlocks.push_back(&S);
1277 [&](
const ByteSpan::Block *
A,
const ByteSpan::Block *
B) {
1278 return isEarlier(cast<Instruction>(
A->Seg.Val),
1279 cast<Instruction>(
B->Seg.Val));
1281 for (ByteSpan::Block *S : ABlocks) {
1284 Instruction *SegI = cast<Instruction>(S->Seg.Val);
1286 Value *Pay = HVC.vbytes(Builder, getPayload(S->Seg.Val));
1288 HVC.insertb(Builder, Accum, Pay, S->Seg.Start, S->Seg.Size, S->Pos);
1296 Type *ValTy = getPayload(
B.Seg.Val)->getType();
1299 getPassThrough(
B.Seg.Val),
"sel");
1300 B.Seg.Val->replaceAllUsesWith(Sel);
1305 const ByteSpan &VSpan,
int ScLen,
1310 Type *SecTy = HVC.getByteTy(ScLen);
1311 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
1312 bool DoAlign = !HVC.isZero(AlignVal);
1315 ByteSpan ASpanV, ASpanM;
1323 auto *VecTy = VectorType::get(Ty, 1,
false);
1329 for (
int Index = (DoAlign ? -1 : 0);
Index != NumSectors + DoAlign; ++
Index) {
1333 VSpan.section(Index * ScLen, ScLen).shift(-Index * ScLen);
1338 for (ByteSpan::Block &S : VSection) {
1339 Value *Pay = getPayload(S.Seg.Val);
1340 Value *
Mask = HVC.rescale(Builder, MakeVec(Builder, getMask(S.Seg.Val)),
1341 Pay->
getType(), HVC.getByteTy());
1342 Value *PartM = HVC.insertb(Builder, Zero, HVC.vbytes(Builder, Mask),
1343 S.Seg.Start, S.Seg.Size, S.Pos);
1344 AccumM = Builder.
CreateOr(AccumM, PartM);
1346 Value *PartV = HVC.insertb(Builder, Undef, HVC.vbytes(Builder, Pay),
1347 S.Seg.Start, S.Seg.Size, S.Pos);
1352 ASpanV.Blocks.emplace_back(AccumV, ScLen, Index * ScLen);
1353 ASpanM.Blocks.emplace_back(AccumM, ScLen, Index * ScLen);
1357 dbgs() <<
"ASpanV before vlalign:\n" << ASpanV <<
'\n';
1358 dbgs() <<
"ASpanM before vlalign:\n" << ASpanM <<
'\n';
1363 for (
int Index = 1;
Index != NumSectors + 2; ++
Index) {
1364 Value *PrevV = ASpanV[
Index - 1].Seg.Val, *ThisV = ASpanV[
Index].Seg.Val;
1365 Value *PrevM = ASpanM[
Index - 1].Seg.Val, *ThisM = ASpanM[
Index].Seg.Val;
1367 ASpanV[
Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevV, ThisV, AlignVal);
1368 ASpanM[
Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevM, ThisM, AlignVal);
1373 dbgs() <<
"ASpanV after vlalign:\n" << ASpanV <<
'\n';
1374 dbgs() <<
"ASpanM after vlalign:\n" << ASpanM <<
'\n';
1377 auto createStore = [&](
IRBuilderBase &Builder,
const ByteSpan &ASpanV,
1378 const ByteSpan &ASpanM,
int Index,
bool MakePred) {
1381 if (HVC.isUndef(Val) || HVC.isZero(Mask))
1384 createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
1386 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
1390 int Start = (
Index - DoAlign) * ScLen;
1391 int Width = (1 + DoAlign) * ScLen;
1392 this->createStore(Builder, Val,
Ptr, Predicate, ScLen,
1393 HVC.vlsb(Builder, Mask),
1394 VSpan.section(Start, Width).values());
1397 for (
int Index = 0;
Index != NumSectors + DoAlign; ++
Index) {
1398 createStore(Builder, ASpanV, ASpanM, Index, DoAlign && Index == NumSectors);
1402auto AlignVectors::realignGroup(
const MoveGroup &Move)
const ->
bool {
1411 auto getMaxOf = [](
auto Range,
auto GetValue) {
1413 return GetValue(
A) < GetValue(
B);
1417 const AddrList &BaseInfos = AddrGroups.at(Move.Base);
1432 std::set<Instruction *> TestSet(Move.Main.begin(), Move.Main.end());
1435 BaseInfos, std::back_inserter(MoveInfos),
1436 [&TestSet](
const AddrInfo &AI) {
return TestSet.count(AI.Inst); });
1439 const AddrInfo &WithMaxAlign =
1440 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return AI.HaveAlign; });
1441 Align MaxGiven = WithMaxAlign.HaveAlign;
1444 const AddrInfo &WithMinOffset =
1445 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return -AI.Offset; });
1447 const AddrInfo &WithMaxNeeded =
1448 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return AI.NeedAlign; });
1449 Align MinNeeded = WithMaxNeeded.NeedAlign;
1463 Value *AlignAddr =
nullptr;
1464 Value *AlignVal =
nullptr;
1466 if (MinNeeded <= MaxGiven) {
1467 int Start = WithMinOffset.Offset;
1468 int OffAtMax = WithMaxAlign.Offset;
1475 int Adjust = -
alignTo(OffAtMax - Start, MinNeeded.value());
1476 AlignAddr = createAdjustedPointer(Builder, WithMaxAlign.Addr,
1477 WithMaxAlign.ValTy, Adjust, Move.Clones);
1478 int Diff = Start - (OffAtMax + Adjust);
1479 AlignVal = HVC.getConstInt(Diff);
1481 assert(
static_cast<decltype(MinNeeded.
value())
>(Diff) < MinNeeded.value());
1491 createAlignedPointer(Builder, WithMinOffset.Addr, WithMinOffset.ValTy,
1492 MinNeeded.value(), Move.Clones);
1494 Builder.
CreatePtrToInt(WithMinOffset.Addr, HVC.getIntTy(),
"pti");
1495 if (
auto *
I = dyn_cast<Instruction>(AlignVal)) {
1496 for (
auto [Old, New] : Move.Clones)
1497 I->replaceUsesOfWith(Old, New);
1502 for (
const AddrInfo &AI : MoveInfos) {
1503 VSpan.Blocks.emplace_back(AI.Inst, HVC.getSizeOf(AI.ValTy),
1504 AI.Offset - WithMinOffset.Offset);
1510 int ScLen = Move.IsHvx ? HVC.HST.getVectorLength()
1511 : std::max<int>(MinNeeded.value(), 4);
1512 assert(!Move.IsHvx || ScLen == 64 || ScLen == 128);
1513 assert(Move.IsHvx || ScLen == 4 || ScLen == 8);
1516 dbgs() <<
"ScLen: " << ScLen <<
"\n";
1517 dbgs() <<
"AlignVal:" << *AlignVal <<
"\n";
1518 dbgs() <<
"AlignAddr:" << *AlignAddr <<
"\n";
1519 dbgs() <<
"VSpan:\n" << VSpan <<
'\n';
1523 realignLoadGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1525 realignStoreGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1527 for (
auto *Inst : Move.Main)
1528 Inst->eraseFromParent();
1534 int Alignment)
const ->
Value * {
1535 auto *AlignTy = AlignVal->
getType();
1537 AlignVal, ConstantInt::get(AlignTy, Alignment - 1),
"and");
1538 Value *
Zero = ConstantInt::get(AlignTy, 0);
1542auto AlignVectors::isSectorTy(
Type *Ty)
const ->
bool {
1543 if (!HVC.isByteVecTy(Ty))
1545 int Size = HVC.getSizeOf(Ty);
1546 if (HVC.HST.isTypeForHVX(Ty))
1547 return Size ==
static_cast<int>(HVC.HST.getVectorLength());
1551auto AlignVectors::run() ->
bool {
1552 LLVM_DEBUG(
dbgs() <<
"Running HVC::AlignVectors on " << HVC.F.getName()
1554 if (!createAddressGroups())
1558 dbgs() <<
"Address groups(" << AddrGroups.size() <<
"):\n";
1559 for (
auto &[In, AL] : AddrGroups) {
1560 for (
const AddrInfo &AI : AL)
1561 dbgs() <<
"---\n" << AI <<
'\n';
1565 bool Changed =
false;
1566 MoveList LoadGroups, StoreGroups;
1568 for (
auto &
G : AddrGroups) {
1574 dbgs() <<
"\nLoad groups(" << LoadGroups.size() <<
"):\n";
1575 for (
const MoveGroup &
G : LoadGroups)
1576 dbgs() <<
G <<
"\n";
1577 dbgs() <<
"Store groups(" << StoreGroups.size() <<
"):\n";
1578 for (
const MoveGroup &
G : StoreGroups)
1579 dbgs() <<
G <<
"\n";
1583 unsigned CountLimit = VAGroupCountLimit;
1584 if (CountLimit == 0)
1587 if (LoadGroups.size() > CountLimit) {
1588 LoadGroups.resize(CountLimit);
1589 StoreGroups.clear();
1591 unsigned StoreLimit = CountLimit - LoadGroups.size();
1592 if (StoreGroups.size() > StoreLimit)
1593 StoreGroups.resize(StoreLimit);
1596 for (
auto &M : LoadGroups)
1597 Changed |= moveTogether(M);
1598 for (
auto &M : StoreGroups)
1599 Changed |= moveTogether(M);
1603 for (
auto &M : LoadGroups)
1604 Changed |= realignGroup(M);
1605 for (
auto &M : StoreGroups)
1606 Changed |= realignGroup(M);
1616 -> std::pair<unsigned, Signedness> {
1617 unsigned Bits = HVC.getNumSignificantBits(V, In);
1623 KnownBits Known = HVC.getKnownBits(V, In);
1624 Signedness Sign =
Signed;
1625 unsigned NumToTest = 0;
1629 NumToTest =
Bits - 1;
1642 return {
Bits, Sign};
1645auto HvxIdioms::canonSgn(SValue
X, SValue
Y)
const
1646 -> std::pair<SValue, SValue> {
1659auto HvxIdioms::matchFxpMul(
Instruction &In)
const -> std::optional<FxpOp> {
1660 using namespace PatternMatch;
1661 auto *Ty =
In.getType();
1664 return std::nullopt;
1666 unsigned Width = cast<IntegerType>(Ty->
getScalarType())->getBitWidth();
1673 auto m_Shr = [](
auto &&
V,
auto &&S) {
1677 const APInt *Qn =
nullptr;
1685 if (
Op.Frac > Width)
1686 return std::nullopt;
1689 const APInt *
C =
nullptr;
1693 return std::nullopt;
1701 Op.Opcode = Instruction::Mul;
1703 Op.X.Sgn = getNumSignificantBits(
Op.X.Val, &In).second;
1704 Op.Y.Sgn = getNumSignificantBits(
Op.Y.Val, &In).second;
1705 Op.ResTy = cast<VectorType>(Ty);
1709 return std::nullopt;
1712auto HvxIdioms::processFxpMul(
Instruction &In,
const FxpOp &
Op)
const
1714 assert(
Op.X.Val->getType() ==
Op.Y.Val->getType());
1716 auto *VecTy = dyn_cast<VectorType>(
Op.X.Val->getType());
1717 if (VecTy ==
nullptr)
1719 auto *ElemTy = cast<IntegerType>(VecTy->getElementType());
1720 unsigned ElemWidth = ElemTy->getBitWidth();
1723 if ((HVC.length(VecTy) * ElemWidth) % (8 * HVC.HST.getVectorLength()) != 0)
1733 if (ElemWidth <= 32 &&
Op.Frac == 0)
1736 auto [BitsX, SignX] = getNumSignificantBits(
Op.X.Val, &In);
1737 auto [BitsY, SignY] = getNumSignificantBits(
Op.Y.Val, &In);
1745 auto roundUpWidth = [](
unsigned Width) ->
unsigned {
1751 if (Width > 32 && Width % 32 != 0) {
1758 BitsX = roundUpWidth(BitsX);
1759 BitsY = roundUpWidth(BitsY);
1764 unsigned Width = std::max(BitsX, BitsY);
1766 auto *ResizeTy = VectorType::get(HVC.getIntTy(Width), VecTy);
1767 if (Width < ElemWidth) {
1770 }
else if (Width > ElemWidth) {
1777 assert(
X->getType() ==
Y->getType() &&
X->getType() == ResizeTy);
1779 unsigned VecLen = HVC.length(ResizeTy);
1780 unsigned ChopLen = (8 * HVC.HST.getVectorLength()) / std::min(Width, 32u);
1784 ChopOp.ResTy = VectorType::get(
Op.ResTy->getElementType(), ChopLen,
false);
1786 for (
unsigned V = 0;
V != VecLen / ChopLen; ++
V) {
1787 ChopOp.X.Val = HVC.subvector(Builder,
X, V * ChopLen, ChopLen);
1788 ChopOp.Y.Val = HVC.subvector(Builder,
Y, V * ChopLen, ChopLen);
1789 Results.push_back(processFxpMulChopped(Builder, In, ChopOp));
1805 const FxpOp &
Op)
const ->
Value * {
1806 assert(
Op.X.Val->getType() ==
Op.Y.Val->getType());
1807 auto *InpTy = cast<VectorType>(
Op.X.Val->getType());
1808 unsigned Width = InpTy->getScalarSizeInBits();
1811 if (!
Op.RoundAt || *
Op.RoundAt ==
Op.Frac - 1) {
1814 Value *QMul =
nullptr;
1816 QMul = createMulQ15(Builder,
Op.X,
Op.Y, Rounding);
1817 }
else if (Width == 32) {
1818 QMul = createMulQ31(Builder,
Op.X,
Op.Y, Rounding);
1820 if (QMul !=
nullptr)
1826 assert(Width < 32 || Width % 32 == 0);
1836 assert(
Op.Frac != 0 &&
"Unshifted mul should have been skipped");
1837 if (
Op.Frac == 16) {
1839 if (
Value *MulH = createMulH16(Builder,
Op.X,
Op.Y))
1843 Value *Prod32 = createMul16(Builder,
Op.X,
Op.Y);
1845 Value *RoundVal = HVC.getConstSplat(Prod32->
getType(), 1 << *
Op.RoundAt);
1846 Prod32 = Builder.
CreateAdd(Prod32, RoundVal,
"add");
1851 ? Builder.
CreateAShr(Prod32, ShiftAmt,
"asr")
1852 : Builder.
CreateLShr(Prod32, ShiftAmt,
"lsr");
1853 return Builder.
CreateTrunc(Shifted, InpTy,
"trn");
1860 auto WordX = HVC.splitVectorElements(Builder,
Op.X.Val, 32);
1861 auto WordY = HVC.splitVectorElements(Builder,
Op.Y.Val, 32);
1862 auto WordP = createMulLong(Builder, WordX,
Op.X.Sgn, WordY,
Op.Y.Sgn);
1864 auto *HvxWordTy = cast<VectorType>(WordP.front()->getType());
1867 if (
Op.RoundAt.has_value()) {
1870 RoundV[*
Op.RoundAt / 32] =
1871 HVC.getConstSplat(HvxWordTy, 1 << (*
Op.RoundAt % 32));
1872 WordP = createAddLong(Builder, WordP, RoundV);
1878 unsigned SkipWords =
Op.Frac / 32;
1879 Constant *ShiftAmt = HVC.getConstSplat(HvxWordTy,
Op.Frac % 32);
1881 for (
int Dst = 0,
End = WordP.size() - SkipWords; Dst !=
End; ++Dst) {
1882 int Src = Dst + SkipWords;
1884 if (Src + 1 <
End) {
1895 WordP.resize(WordP.size() - SkipWords);
1897 return HVC.joinVectorElements(Builder, WordP,
Op.ResTy);
1900auto HvxIdioms::createMulQ15(
IRBuilderBase &Builder, SValue
X, SValue
Y,
1901 bool Rounding)
const ->
Value * {
1902 assert(
X.Val->getType() ==
Y.Val->getType());
1903 assert(
X.Val->getType()->getScalarType() == HVC.getIntTy(16));
1910 auto V6_vmpyhvsrs = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhvsrs);
1911 return HVC.createHvxIntrinsic(Builder, V6_vmpyhvsrs,
X.Val->getType(),
1915auto HvxIdioms::createMulQ31(
IRBuilderBase &Builder, SValue
X, SValue
Y,
1916 bool Rounding)
const ->
Value * {
1917 Type *InpTy =
X.Val->getType();
1918 assert(InpTy ==
Y.Val->getType());
1925 auto V6_vmpyewuh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyewuh);
1927 ? HVC.HST.getIntrinsicId(Hexagon::V6_vmpyowh_rnd_sacc)
1928 : HVC.HST.getIntrinsicId(Hexagon::V6_vmpyowh_sacc);
1930 HVC.createHvxIntrinsic(Builder, V6_vmpyewuh, InpTy, {
X.Val,
Y.Val});
1931 return HVC.createHvxIntrinsic(Builder, V6_vmpyo_acc, InpTy,
1932 {V1,
X.Val,
Y.Val});
1936 Value *CarryIn)
const
1937 -> std::pair<Value *, Value *> {
1938 assert(
X->getType() ==
Y->getType());
1939 auto VecTy = cast<VectorType>(
X->getType());
1940 if (VecTy == HvxI32Ty && HVC.HST.useHVXV62Ops()) {
1943 if (CarryIn ==
nullptr && HVC.HST.useHVXV66Ops()) {
1944 AddCarry = HVC.HST.getIntrinsicId(Hexagon::V6_vaddcarryo);
1946 AddCarry = HVC.HST.getIntrinsicId(Hexagon::V6_vaddcarry);
1947 if (CarryIn ==
nullptr)
1948 CarryIn = HVC.getNullValue(HVC.getBoolTy(HVC.length(VecTy)));
1949 Args.push_back(CarryIn);
1951 Value *
Ret = HVC.createHvxIntrinsic(Builder, AddCarry,
1955 return {
Result, CarryOut};
1962 if (CarryIn !=
nullptr) {
1963 unsigned Width = VecTy->getScalarSizeInBits();
1966 for (
unsigned i = 0, e = 32 / Width; i !=
e; ++i)
1967 Mask = (Mask << Width) | 1;
1969 auto V6_vandqrt = HVC.HST.getIntrinsicId(Hexagon::V6_vandqrt);
1971 HVC.createHvxIntrinsic(Builder, V6_vandqrt,
nullptr,
1972 {CarryIn, HVC.getConstInt(Mask)});
1973 Result1 = Builder.
CreateAdd(
X, ValueIn,
"add");
1979 return {Result2, Builder.
CreateOr(CarryOut1, CarryOut2,
"orb")};
1982auto HvxIdioms::createMul16(
IRBuilderBase &Builder, SValue
X, SValue
Y)
const
1985 std::tie(
X,
Y) = canonSgn(
X,
Y);
1988 V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhv);
1991 V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhus);
1993 V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyuhv);
1998 HVC.createHvxIntrinsic(Builder, V6_vmpyh, HvxP32Ty, {
Y.Val,
X.Val});
2000 return HVC.vshuff(Builder, HVC.sublo(Builder,
P), HVC.subhi(Builder,
P));
2003auto HvxIdioms::createMulH16(
IRBuilderBase &Builder, SValue
X, SValue
Y)
const
2005 Type *HvxI16Ty = HVC.getHvxTy(HVC.getIntTy(16),
false);
2007 if (HVC.HST.useHVXV69Ops()) {
2009 auto V6_vmpyuhvs = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyuhvs);
2010 return HVC.createHvxIntrinsic(Builder, V6_vmpyuhvs, HvxI16Ty,
2015 Type *HvxP16Ty = HVC.getHvxTy(HVC.getIntTy(16),
true);
2018 unsigned Len = HVC.length(HvxP16Ty) / 2;
2021 for (
int i = 0; i !=
static_cast<int>(
Len); ++i)
2022 PickOdd[i] = 2 * i + 1;
2025 HVC.sublo(Builder, Pair16), HVC.subhi(Builder, Pair16), PickOdd,
"shf");
2028auto HvxIdioms::createMul32(
IRBuilderBase &Builder, SValue
X, SValue
Y)
const
2029 -> std::pair<Value *, Value *> {
2030 assert(
X.Val->getType() ==
Y.Val->getType());
2031 assert(
X.Val->getType() == HvxI32Ty);
2034 std::tie(
X,
Y) = canonSgn(
X,
Y);
2037 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyss_parts;
2039 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyus_parts;
2041 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyuu_parts;
2044 Value *Parts = HVC.createHvxIntrinsic(Builder, V6_vmpy_parts,
nullptr,
2045 {
X.Val,
Y.Val}, {HvxI32Ty});
2054 assert(WordX.size() == WordY.size());
2055 unsigned Idx = 0,
Length = WordX.size();
2059 if (HVC.isZero(WordX[
Idx]))
2061 else if (HVC.isZero(WordY[
Idx]))
2068 Value *Carry =
nullptr;
2070 std::tie(Sum[
Idx], Carry) =
2071 createAddCarry(Builder, WordX[
Idx], WordY[
Idx], Carry);
2085 for (
int i = 0, e = WordX.size(); i != e; ++i) {
2086 for (
int j = 0, f = WordY.size(); j != f; ++j) {
2088 Signedness SX = (i + 1 ==
e) ? SgnX :
Unsigned;
2090 auto [
Lo,
Hi] = createMul32(Builder, {WordX[i], SX}, {WordY[
j],
SY});
2091 Products[i +
j + 0].push_back(
Lo);
2092 Products[i +
j + 1].push_back(
Hi);
2106 for (
int i = 0, e = Products.size(); i !=
e; ++i) {
2107 while (Products[i].
size() > 1) {
2108 Value *Carry =
nullptr;
2109 for (
int j = i;
j !=
e; ++
j) {
2110 auto &ProdJ = Products[
j];
2111 auto [Sum, CarryOut] = createAddCarry(Builder, pop_back_or_zero(ProdJ),
2112 pop_back_or_zero(ProdJ), Carry);
2113 ProdJ.insert(ProdJ.begin(), Sum);
2120 for (
auto &
P : Products) {
2121 assert(
P.size() == 1 &&
"Should have been added together");
2128auto HvxIdioms::run() ->
bool {
2129 bool Changed =
false;
2132 for (
auto It =
B.rbegin(); It !=
B.rend(); ++It) {
2133 if (
auto Fxm = matchFxpMul(*It)) {
2134 Value *
New = processFxpMul(*It, *Fxm);
2139 bool StartOver = !isa<Instruction>(New);
2140 It->replaceAllUsesWith(New);
2142 It = StartOver ?
B.rbegin()
2143 : cast<Instruction>(New)->getReverseIterator();
2154auto HexagonVectorCombine::run() ->
bool {
2156 dbgs() <<
"Module before HexagonVectorCombine\n" << *
F.getParent();
2158 bool Changed =
false;
2159 if (HST.useHVXOps()) {
2161 Changed |= AlignVectors(*this).run();
2163 Changed |= HvxIdioms(*this).run();
2167 dbgs() <<
"Module " << (Changed ?
"(modified)" :
"(unchanged)")
2168 <<
" after HexagonVectorCombine\n"
2174auto HexagonVectorCombine::getIntTy(
unsigned Width)
const ->
IntegerType * {
2178auto HexagonVectorCombine::getByteTy(
int ElemCount)
const ->
Type * {
2183 return VectorType::get(ByteTy, ElemCount,
false);
2186auto HexagonVectorCombine::getBoolTy(
int ElemCount)
const ->
Type * {
2191 return VectorType::get(BoolTy, ElemCount,
false);
2194auto HexagonVectorCombine::getConstInt(
int Val,
unsigned Width)
const
2199auto HexagonVectorCombine::isZero(
const Value *Val)
const ->
bool {
2200 if (
auto *
C = dyn_cast<Constant>(Val))
2201 return C->isZeroValue();
2205auto HexagonVectorCombine::getIntValue(
const Value *Val)
const
2206 -> std::optional<APInt> {
2207 if (
auto *CI = dyn_cast<ConstantInt>(Val))
2208 return CI->getValue();
2209 return std::nullopt;
2212auto HexagonVectorCombine::isUndef(
const Value *Val)
const ->
bool {
2213 return isa<UndefValue>(Val);
2216auto HexagonVectorCombine::isTrue(
const Value *Val)
const ->
bool {
2220auto HexagonVectorCombine::isFalse(
const Value *Val)
const ->
bool {
2224auto HexagonVectorCombine::getHvxTy(
Type *ElemTy,
bool Pair)
const
2230 "Invalid HVX element type");
2231 unsigned HwLen = HST.getVectorLength();
2233 return VectorType::get(ElemTy, Pair ? 2 * NumElems : NumElems,
2237auto HexagonVectorCombine::getSizeOf(
const Value *Val, SizeKind Kind)
const
2239 return getSizeOf(Val->
getType(), Kind);
2242auto HexagonVectorCombine::getSizeOf(
const Type *Ty, SizeKind Kind)
const
2244 auto *NcTy =
const_cast<Type *
>(Ty);
2247 return DL.getTypeStoreSize(NcTy).getFixedValue();
2249 return DL.getTypeAllocSize(NcTy).getFixedValue();
2254auto HexagonVectorCombine::getTypeAlignment(
Type *Ty)
const ->
int {
2257 if (HST.isTypeForHVX(Ty))
2258 return HST.getVectorLength();
2259 return DL.getABITypeAlign(Ty).value();
2262auto HexagonVectorCombine::length(
Value *Val)
const ->
size_t {
2263 return length(Val->
getType());
2266auto HexagonVectorCombine::length(
Type *Ty)
const ->
size_t {
2267 auto *VecTy = dyn_cast<VectorType>(Ty);
2268 assert(VecTy &&
"Must be a vector type");
2269 return VecTy->getElementCount().getFixedValue();
2272auto HexagonVectorCombine::getNullValue(
Type *Ty)
const ->
Constant * {
2275 if (
auto *VecTy = dyn_cast<VectorType>(Ty))
2280auto HexagonVectorCombine::getFullValue(
Type *Ty)
const ->
Constant * {
2283 if (
auto *VecTy = dyn_cast<VectorType>(Ty))
2288auto HexagonVectorCombine::getConstSplat(
Type *Ty,
int Val)
const
2291 auto VecTy = cast<VectorType>(Ty);
2292 Type *ElemTy = VecTy->getElementType();
2295 ConstantInt::get(ElemTy, Val));
2299auto HexagonVectorCombine::simplify(
Value *V)
const ->
Value * {
2300 if (
auto *In = dyn_cast<Instruction>(V)) {
2310 int Where)
const ->
Value * {
2311 assert(isByteVecTy(Dst->getType()) && isByteVecTy(Src->getType()));
2312 int SrcLen = getSizeOf(Src);
2313 int DstLen = getSizeOf(Dst);
2319 Value *P2Src = vresize(Builder, Src, P2Len, Undef);
2320 Value *P2Dst = vresize(Builder, Dst, P2Len, Undef);
2323 for (
int i = 0; i != P2Len; ++i) {
2327 (Where <= i && i < Where +
Length) ? P2Len + Start + (i - Where) : i;
2331 return vresize(Builder, P2Insert, DstLen, Undef);
2336 assert(
Lo->getType() ==
Hi->getType() &&
"Argument type mismatch");
2339 int VecLen = getSizeOf(
Hi);
2340 if (
auto IntAmt = getIntValue(Amt))
2341 return getElementRange(Builder,
Lo,
Hi, VecLen - IntAmt->getSExtValue(),
2344 if (HST.isTypeForHVX(
Hi->getType())) {
2345 assert(
static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
2346 "Expecting an exact HVX type");
2347 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_vlalignb),
2348 Hi->getType(), {Hi, Lo, Amt});
2361 return vralignb(Builder,
Lo,
Hi, Sub);
2368 assert(
Lo->getType() ==
Hi->getType() &&
"Argument type mismatch");
2371 int VecLen = getSizeOf(
Lo);
2372 if (
auto IntAmt = getIntValue(Amt))
2373 return getElementRange(Builder,
Lo,
Hi, IntAmt->getSExtValue(), VecLen);
2375 if (HST.isTypeForHVX(
Lo->getType())) {
2376 assert(
static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
2377 "Expecting an exact HVX type");
2378 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_valignb),
2379 Lo->getType(), {Hi, Lo, Amt});
2406 std::vector<Value *> Work[2];
2407 int ThisW = 0, OtherW = 1;
2409 Work[ThisW].
assign(Vecs.begin(), Vecs.end());
2410 while (Work[ThisW].
size() > 1) {
2411 auto *Ty = cast<VectorType>(Work[ThisW].front()->
getType());
2412 SMask.
resize(length(Ty) * 2);
2413 std::iota(SMask.
begin(), SMask.
end(), 0);
2415 Work[OtherW].clear();
2416 if (Work[ThisW].
size() % 2 != 0)
2418 for (
int i = 0, e = Work[ThisW].
size(); i <
e; i += 2) {
2420 Work[ThisW][i], Work[ThisW][i + 1], SMask,
"shf");
2421 Work[OtherW].push_back(Joined);
2429 SMask.
resize(Vecs.size() * length(Vecs.front()->getType()));
2430 std::iota(SMask.
begin(), SMask.
end(), 0);
2438 auto *ValTy = cast<VectorType>(Val->
getType());
2439 assert(ValTy->getElementType() == Pad->getType());
2441 int CurSize = length(ValTy);
2442 if (CurSize == NewSize)
2445 if (CurSize > NewSize)
2446 return getElementRange(Builder, Val, Val, 0, NewSize);
2449 std::iota(SMask.
begin(), SMask.
begin() + CurSize, 0);
2450 std::fill(SMask.
begin() + CurSize, SMask.
end(), CurSize);
2464 if (FromSTy == ToSTy)
2467 int FromSize = getSizeOf(FromSTy);
2468 int ToSize = getSizeOf(ToSTy);
2469 assert(FromSize % ToSize == 0 || ToSize % FromSize == 0);
2471 auto *MaskTy = cast<VectorType>(
Mask->getType());
2472 int FromCount = length(MaskTy);
2473 int ToCount = (FromCount * FromSize) / ToSize;
2474 assert((FromCount * FromSize) % ToSize == 0);
2476 auto *FromITy =
getIntTy(FromSize * 8);
2477 auto *ToITy =
getIntTy(ToSize * 8);
2482 Mask, VectorType::get(FromITy, FromCount,
false),
"sxt");
2484 Ext, VectorType::get(ToITy, ToCount,
false),
"cst");
2486 Cast, VectorType::get(getBoolTy(), ToCount,
false),
"trn");
2493 if (ScalarTy == getBoolTy())
2496 Value *Bytes = vbytes(Builder, Val);
2497 if (
auto *VecTy = dyn_cast<VectorType>(Bytes->
getType()))
2498 return Builder.
CreateTrunc(Bytes, getBoolTy(getSizeOf(VecTy)),
"trn");
2501 return Builder.
CreateTrunc(Bytes, getBoolTy(),
"trn");
2508 if (ScalarTy == getByteTy())
2511 if (ScalarTy != getBoolTy())
2512 return Builder.
CreateBitCast(Val, getByteTy(getSizeOf(Val)),
"cst");
2514 if (
auto *VecTy = dyn_cast<VectorType>(Val->
getType()))
2515 return Builder.
CreateSExt(Val, VectorType::get(getByteTy(), VecTy),
"sxt");
2516 return Builder.
CreateSExt(Val, getByteTy(),
"sxt");
2520 unsigned Start,
unsigned Length)
const
2523 return getElementRange(Builder, Val, Val, Start,
Length);
2528 size_t Len = length(Val);
2529 assert(Len % 2 == 0 &&
"Length should be even");
2530 return subvector(Builder, Val, 0, Len / 2);
2535 size_t Len = length(Val);
2536 assert(Len % 2 == 0 &&
"Length should be even");
2537 return subvector(Builder, Val, Len / 2, Len / 2);
2542 assert(Val0->getType() == Val1->getType());
2543 int Len = length(Val0);
2546 for (
int i = 0; i !=
Len; ++i) {
2555 assert(Val0->getType() == Val1->getType());
2556 int Len = length(Val0);
2559 for (
int i = 0; i !=
Len; ++i) {
2560 Mask[2 * i + 0] = i;
2566auto HexagonVectorCombine::createHvxIntrinsic(
IRBuilderBase &Builder,
2574 Type *SrcTy = Val->getType();
2575 if (SrcTy == DestTy)
2580 assert(HST.isTypeForHVX(SrcTy,
true));
2583 if (cast<VectorType>(SrcTy)->getElementType() != BoolTy)
2587 unsigned HwLen = HST.getVectorLength();
2588 Intrinsic::ID TC = HwLen == 64 ? Intrinsic::hexagon_V6_pred_typecast
2589 : Intrinsic::hexagon_V6_pred_typecast_128B;
2599 for (
int i = 0, e =
Args.size(); i != e; ++i) {
2601 Type *
T = IntrTy->getParamType(i);
2602 if (
A->getType() !=
T) {
2608 StringRef MaybeName = !IntrTy->getReturnType()->isVoidTy() ?
"cup" :
"";
2619 assert(HST.isTypeForHVX(CallTy,
true));
2620 return getCast(Builder, Call,
RetTy);
2623auto HexagonVectorCombine::splitVectorElements(
IRBuilderBase &Builder,
2625 unsigned ToWidth)
const
2639 auto *VecTy = cast<VectorType>(Vec->getType());
2640 assert(VecTy->getElementType()->isIntegerTy());
2641 unsigned FromWidth = VecTy->getScalarSizeInBits();
2643 assert(ToWidth <= FromWidth &&
"Breaking up into wider elements?");
2644 unsigned NumResults = FromWidth / ToWidth;
2648 unsigned Length = length(VecTy);
2652 auto splitInHalf = [&](
unsigned Begin,
unsigned End,
auto splitFunc) ->
void {
2656 if (Begin + 1 ==
End)
2662 auto *VTy = VectorType::get(
getIntTy(Width / 2), 2 *
Length,
false);
2665 Value *Res =
vdeal(Builder, sublo(Builder, VVal), subhi(Builder, VVal));
2667 unsigned Half = (Begin +
End) / 2;
2668 Results[Begin] = sublo(Builder, Res);
2669 Results[Half] = subhi(Builder, Res);
2671 splitFunc(Begin, Half, splitFunc);
2672 splitFunc(Half,
End, splitFunc);
2675 splitInHalf(0, NumResults, splitInHalf);
2679auto HexagonVectorCombine::joinVectorElements(
IRBuilderBase &Builder,
2683 assert(ToType->getElementType()->isIntegerTy());
2694 unsigned ToWidth = ToType->getScalarSizeInBits();
2695 unsigned Width = Inputs.front()->getType()->getScalarSizeInBits();
2696 assert(Width <= ToWidth);
2698 unsigned Length = length(Inputs.front()->getType());
2700 unsigned NeedInputs = ToWidth / Width;
2701 if (Inputs.size() != NeedInputs) {
2706 Last, getConstSplat(
Last->getType(), Width - 1),
"asr");
2707 Inputs.resize(NeedInputs, Sign);
2710 while (Inputs.size() > 1) {
2713 for (
int i = 0, e = Inputs.size(); i < e; i += 2) {
2714 Value *Res =
vshuff(Builder, Inputs[i], Inputs[i + 1]);
2717 Inputs.resize(Inputs.size() / 2);
2720 assert(Inputs.front()->getType() == ToType);
2721 return Inputs.front();
2724auto HexagonVectorCombine::calculatePointerDifference(
Value *Ptr0,
2726 -> std::optional<int> {
2728 const SCEV *Scev0 = SE.getSCEV(Ptr0);
2729 const SCEV *Scev1 = SE.getSCEV(Ptr1);
2730 const SCEV *ScevDiff = SE.getMinusSCEV(Scev0, Scev1);
2731 if (
auto *Const = dyn_cast<SCEVConstant>(ScevDiff)) {
2733 if (
V.isSignedIntN(8 *
sizeof(
int)))
2734 return static_cast<int>(
V.getSExtValue());
2741 I->eraseFromParent();
2746#define CallBuilder(B, F) \
2749 if (auto *I = dyn_cast<Instruction>(V)) \
2750 B_.ToErase.push_back(I); \
2754 auto Simplify = [
this](
Value *
V) {
2760 auto StripBitCast = [](
Value *
V) {
2761 while (
auto *
C = dyn_cast<BitCastInst>(V))
2762 V =
C->getOperand(0);
2766 Ptr0 = StripBitCast(Ptr0);
2767 Ptr1 = StripBitCast(Ptr1);
2768 if (!isa<GetElementPtrInst>(Ptr0) || !isa<GetElementPtrInst>(Ptr1))
2769 return std::nullopt;
2771 auto *Gep0 = cast<GetElementPtrInst>(Ptr0);
2772 auto *Gep1 = cast<GetElementPtrInst>(Ptr1);
2773 if (Gep0->getPointerOperand() != Gep1->getPointerOperand())
2774 return std::nullopt;
2775 if (Gep0->getSourceElementType() != Gep1->getSourceElementType())
2776 return std::nullopt;
2778 Builder
B(Gep0->getParent());
2779 int Scale = getSizeOf(Gep0->getSourceElementType(), Alloc);
2782 if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2)
2783 return std::nullopt;
2785 Value *Idx0 = Gep0->getOperand(1);
2786 Value *Idx1 = Gep1->getOperand(1);
2789 if (
auto *Diff = dyn_cast<ConstantInt>(
2791 return Diff->getSExtValue() * Scale;
2793 KnownBits Known0 = getKnownBits(Idx0, Gep0);
2794 KnownBits Known1 = getKnownBits(Idx1, Gep1);
2797 return std::nullopt;
2804 if (
auto *
C = dyn_cast<ConstantInt>(SubU)) {
2805 Diff0 =
C->getSExtValue();
2807 return std::nullopt;
2815 if (
auto *
C = dyn_cast<ConstantInt>(SubK)) {
2816 Diff1 =
C->getSExtValue();
2818 return std::nullopt;
2821 return (Diff0 + Diff1) * Scale;
2826auto HexagonVectorCombine::getNumSignificantBits(
const Value *V,
2832auto HexagonVectorCombine::getKnownBits(
const Value *V,
2838auto HexagonVectorCombine::isSafeToClone(
const Instruction &In)
const ->
bool {
2839 if (
In.mayHaveSideEffects() ||
In.isAtomic() ||
In.isVolatile() ||
2840 In.isFenceLike() ||
In.mayReadOrWriteMemory()) {
2843 if (isa<CallBase>(In) || isa<AllocaInst>(In))
2848template <
typename T>
2849auto HexagonVectorCombine::isSafeToMoveBeforeInBB(
const Instruction &In,
2851 const T &IgnoreInsts)
const
2854 [
this](
const Instruction &
I) -> std::optional<MemoryLocation> {
2855 if (
const auto *
II = dyn_cast<IntrinsicInst>(&
I)) {
2856 switch (
II->getIntrinsicID()) {
2857 case Intrinsic::masked_load:
2859 case Intrinsic::masked_store:
2870 if (isa<PHINode>(In) || (To !=
Block.end() && isa<PHINode>(*To)))
2875 bool MayWrite =
In.mayWriteToMemory();
2876 auto MaybeLoc = getLocOrNone(In);
2878 auto From =
In.getIterator();
2881 bool MoveUp = (To !=
Block.end() && To->comesBefore(&In));
2883 MoveUp ? std::make_pair(To,
From) :
std::make_pair(
std::next(
From), To);
2884 for (
auto It =
Range.first; It !=
Range.second; ++It) {
2889 if (
auto *
II = dyn_cast<IntrinsicInst>(&
I)) {
2890 if (
II->getIntrinsicID() == Intrinsic::assume)
2896 if (
auto *CB = dyn_cast<CallBase>(&
I)) {
2897 if (!CB->hasFnAttr(Attribute::WillReturn))
2899 if (!CB->hasFnAttr(Attribute::NoSync))
2902 if (
I.mayReadOrWriteMemory()) {
2903 auto MaybeLocI = getLocOrNone(
I);
2904 if (MayWrite ||
I.mayWriteToMemory()) {
2905 if (!MaybeLoc || !MaybeLocI)
2907 if (!AA.isNoAlias(*MaybeLoc, *MaybeLocI))
2915auto HexagonVectorCombine::isByteVecTy(
Type *Ty)
const ->
bool {
2916 if (
auto *VecTy = dyn_cast<VectorType>(Ty))
2917 return VecTy->getElementType() == getByteTy();
2926 std::iota(SMask.
begin(), SMask.
end(), Start);
2938class HexagonVectorCombineLegacy :
public FunctionPass {
2960 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
2962 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
2963 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2964 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2966 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
2968 HexagonVectorCombine HVC(
F, AA, AC, DT, SE, TLI, TM);
2974char HexagonVectorCombineLegacy::ID = 0;
2977 "Hexagon Vector Combine",
false,
false)
2988 return new HexagonVectorCombineLegacy();
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
BlockVerifier::State From
static IntegerType * getIntTy(IRBuilderBase &B, const TargetLibraryInfo *TLI)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
#define LLVM_ATTRIBUTE_UNUSED
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Given that RA is a live value
Mark the given Function as meaning that it cannot be changed in any way mark any values that are used as this function s parameters or by its return values(according to Uses) live as well. void DeadArgumentEliminationPass
This file defines the DenseMap class.
DenseMap< Block *, BlockRelaxAux > Blocks
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
static bool isUndef(ArrayRef< int > Mask)
#define CallBuilder(B, F)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static bool isCandidate(const MachineInstr *MI, Register &DefedReg, Register FrameReg)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Remove Loads Into Fake Uses
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static ConstantInt * getConstInt(MDNode *MD, unsigned NumOp)
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Target-Independent Code Generator Pass Configuration Options pass.
support::ulittle16_t & Lo
support::ulittle16_t & Hi
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
InstListType::const_iterator const_iterator
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
iterator_range< iterator > children()
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
const BasicBlock & back() const
Common base class shared among various IRBuilders.
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
const char * getOpcodeName() const
Class to represent integer types.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
An instruction for reading from memory.
bool doesNotAccessMemory() const
Whether this function accesses no memory.
bool onlyAccessesInaccessibleMem() const
Whether this function only (at most) accesses inaccessible memory.
static std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
void assign(size_type NumElts, ValueParamT Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Provides information about what library functions are available for the current target.
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
static IntegerType * getInt1Ty(LLVMContext &C)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt8Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Rounding
Possible values of current rounding mode, which is specified in bits 23:22 of FPCR.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createHexagonVectorCombineLegacyPass()
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
auto reverse(ContainerTy &&C)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
void initializeHexagonVectorCombineLegacyPass(PassRegistry &)
@ And
Bitwise or logical AND of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Get the upper bound on bit size for this Value Op as a signed integer.
bool mayHaveNonDefUseDependency(const Instruction &I)
Returns true if the result or effects of the given instructions I depend values not reachable through...
MaskT vshuff(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
MaskT vdeal(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
Implement std::hash so that hash_code can be used in STL containers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.