23#include "llvm/IR/IntrinsicsHexagon.h"
33#define DEBUG_TYPE "hex-shuff-vec"
36 "shuffvec-max-search-count",
37 cl::desc(
"Maximum number of instructions traversed along def chain."),
43 cl::desc(
"Maximum number of shuffles to be relocated."),
58 static int NumRelocated;
64 HexagonOptShuffleVector(
const HexagonTargetMachine *TM)
65 : FunctionPass(ID), TM(TM) {
69 StringRef getPassName()
const override {
70 return "Hexagon Optimize Vector Shuffles";
75 void getAnalysisUsage(AnalysisUsage &AU)
const override {
76 FunctionPass::getAnalysisUsage(AU);
80 using ValueVector = SmallVector<Value *, 8>;
81 const HexagonTargetMachine *TM =
nullptr;
82 const HexagonSubtarget *HST =
nullptr;
83 SmallPtrSet<Instruction *, 8> Visited;
85 SmallDenseMap<Instruction *, SmallVector<Instruction *, 2>>;
86 ShuffUseList ShuffUses;
89 bool visitBlock(BasicBlock *
B);
90 bool findNewShuffLoc(Instruction *
I, ArrayRef<int> &ShuffMask,
92 bool isValidIntrinsic(IntrinsicInst *
I);
93 bool relocateShuffVec(Instruction *
I, ArrayRef<int> &M,
Value *NewLoc,
94 std::list<Instruction *> &WorkList);
95 bool getUseList(Instruction *
I, ValueVector &UseList);
96 bool analyzeHiLoUse(Instruction *HI, Instruction *LO,
97 ArrayRef<int> &ShuffMask,
Value *&NewLoc,
98 ShuffUseList &CurShuffUses);
99 bool isHILo(
Value *V,
bool IsHI);
100 bool hasDefWithSameShuffMask(
Value *V, SmallVector<Instruction *, 2> &ImmUse,
101 ArrayRef<int> &ShuffMask,
102 ShuffUseList &CurShuffUses);
103 void FindHiLoUse(ValueVector &UseList, Instruction *&HI, Instruction *&LO);
104 bool isConcatMask(ArrayRef<int> &Mask, Instruction *ShuffInst);
105 bool isValidUseInstr(ValueVector &UseList, Instruction *&UI);
106 bool areAllOperandsValid(Instruction *
I, Instruction *UI,
107 ArrayRef<int> &ShuffMask,
108 ShuffUseList &CurShuffUses);
109 Value *getOperand(Instruction *
I,
unsigned i);
111 static std::pair<Value *, Value *> stripCasts(
Value *V);
112 static bool isConstantVectorSplat(
Value *V);
118int HexagonOptShuffleVector::NumRelocated = 0;
120char HexagonOptShuffleVector::ID = 0;
123 "Hexagon Optimize Shuffle Vector",
false,
false)
130 Type *ShuffTy = ShuffInst->getType();
132 for (
int i = 0; i < NumElts; i++) {
140 switch (
I->getIntrinsicID()) {
143 case Intrinsic::hexagon_V6_vaddubh_128B:
144 case Intrinsic::hexagon_V6_vadduhw_128B:
145 case Intrinsic::hexagon_V6_vaddhw_128B:
146 case Intrinsic::hexagon_V6_vaddh_dv_128B:
147 case Intrinsic::hexagon_V6_vsububh_128B:
148 case Intrinsic::hexagon_V6_vsubuhw_128B:
149 case Intrinsic::hexagon_V6_vsubhw_128B:
150 case Intrinsic::hexagon_V6_vsubh_dv_128B:
151 case Intrinsic::hexagon_V6_vmpyubv_128B:
152 case Intrinsic::hexagon_V6_vmpybv_128B:
153 case Intrinsic::hexagon_V6_vmpyuhv_128B:
154 case Intrinsic::hexagon_V6_vmpyhv_128B:
155 case Intrinsic::hexagon_V6_vmpybusv_128B:
156 case Intrinsic::hexagon_V6_vmpyhus_128B:
157 case Intrinsic::hexagon_V6_vavgb_128B:
158 case Intrinsic::hexagon_V6_vavgub_128B:
159 case Intrinsic::hexagon_V6_vavgh_128B:
160 case Intrinsic::hexagon_V6_vavguh_128B:
161 case Intrinsic::hexagon_V6_vavgw_128B:
162 case Intrinsic::hexagon_V6_vavguw_128B:
163 case Intrinsic::hexagon_V6_hi_128B:
164 case Intrinsic::hexagon_V6_lo_128B:
165 case Intrinsic::sadd_sat:
166 case Intrinsic::uadd_sat:
168 case Intrinsic::hexagon_vadd_su:
169 case Intrinsic::hexagon_vadd_uu:
170 case Intrinsic::hexagon_vadd_ss:
171 case Intrinsic::hexagon_vadd_us:
172 case Intrinsic::hexagon_vsub_su:
173 case Intrinsic::hexagon_vsub_uu:
174 case Intrinsic::hexagon_vsub_ss:
175 case Intrinsic::hexagon_vsub_us:
176 case Intrinsic::hexagon_vmpy_su:
177 case Intrinsic::hexagon_vmpy_uu:
178 case Intrinsic::hexagon_vmpy_ss:
179 case Intrinsic::hexagon_vmpy_us:
180 case Intrinsic::hexagon_vavgu:
181 case Intrinsic::hexagon_vavgs:
182 case Intrinsic::hexagon_vmpy_ub_b:
183 case Intrinsic::hexagon_vmpy_ub_ub:
184 case Intrinsic::hexagon_vmpy_uh_uh:
185 case Intrinsic::hexagon_vmpy_h_h:
191bool HexagonOptShuffleVector::getUseList(Instruction *
I,
ValueVector &UseList) {
192 for (
auto UI =
I->user_begin(), UE =
I->user_end(); UI != UE;) {
197 if (!getUseList(
C, UseList))
206bool HexagonOptShuffleVector::isHILo(
Value *V,
bool IsHI) {
215 if ((
II->getIntrinsicID() == Intrinsic::hexagon_V6_hi_128B && IsHI) ||
216 (
II->getIntrinsicID() == Intrinsic::hexagon_V6_lo_128B && !IsHI))
221Value *HexagonOptShuffleVector::getOperand(Instruction *
I,
unsigned i) {
224 return C->getOperand(0);
229HexagonOptShuffleVector::getArgOperands(User *U) {
232 return U->operands();
237std::pair<Value *, Value *> HexagonOptShuffleVector::stripCasts(
Value *V) {
238 Value *LastCast =
nullptr;
241 V =
C->getOperand(0);
243 return std::make_pair(V, LastCast);
246bool HexagonOptShuffleVector::isConstantVectorSplat(
Value *V) {
248 return CV->getSplatValue();
250 return CV->isSplat();
257bool HexagonOptShuffleVector::analyzeHiLoUse(Instruction *HI, Instruction *LO,
258 ArrayRef<int> &ShuffMask,
260 ShuffUseList &CurShuffUses) {
262 getUseList(HI, HiUseList);
263 getUseList(LO, LoUseList);
268 if (HiUseList.
size() != 1 || LoUseList.
size() != 1)
273 if (!HiUse || !LoUse)
276 bool IsUseIntrinsic =
false;
283 if (!HiUseII || !LoUseII ||
285 !isValidIntrinsic(HiUseII))
287 IsUseIntrinsic =
true;
296 if (HiUse == LoUse) {
310 ArrayRef<int>
M1, M2;
314 return analyzeHiLoUse(HiUse, LoUse, ShuffMask, NewLoc, CurShuffUses);
322 int HiOpNum = -1, LoOpNum = -1;
324 Value *
V = getOperand(HiUse, i);
331 Value *
V = getOperand(LoUse, i);
340 if (HiOpNum < 0 || HiOpNum != LoOpNum ||
341 LoUseOperands.
size() != HiUseOperands.
size())
344 unsigned NumOperands = HiUseOperands.
size();
345 for (
unsigned i = 0; i < NumOperands; i++) {
346 if (HiUseOperands[i] == LoUseOperands[i])
350 if (!isHILo(HiUseOperands[i],
true) || !isHILo(LoUseOperands[i],
false))
355 if (!DefHiUse || DefHiUse != DefLoUse)
357 SmallVector<Instruction *, 2> ImmUseList;
366 if (!hasDefWithSameShuffMask(DefHiUse, ImmUseList, ShuffMask, CurShuffUses))
371 return analyzeHiLoUse(HiUse, LoUse, ShuffMask, NewLoc, CurShuffUses);
374bool HexagonOptShuffleVector::hasDefWithSameShuffMask(
375 Value *V, SmallVector<Instruction *, 2> &ImmUses, ArrayRef<int> &ShuffMask,
376 ShuffUseList &CurShuffUses) {
383 V = stripCasts(V).first;
390 M.equals(ShuffMask)) {
391 CurShuffUses[
I] = ImmUses;
399 if (!
I->isBinaryOp() && (!
II || !isValidIntrinsic(
II)))
402 for (
Value *OpV : getArgOperands(
I)) {
403 std::pair<Value *, Value *>
P = stripCasts(OpV);
406 SmallVector<Instruction *, 2> ImmUseList;
414 if (isConstantVectorSplat(OpV))
421 Found &= hasDefWithSameShuffMask(OpV, ImmUseList, ShuffMask, CurShuffUses);
426void HexagonOptShuffleVector::FindHiLoUse(
ValueVector &UseList,
427 Instruction *&HI, Instruction *&LO) {
429 for (
unsigned i = 0; i < UseList.
size(); i++) {
436 if (IntID == Intrinsic::hexagon_V6_hi_128B)
438 if (IntID == Intrinsic::hexagon_V6_lo_128B)
445bool HexagonOptShuffleVector::isValidUseInstr(
ValueVector &UseList,
448 if (UseList.
size() != 1)
456 if (!
II || !isValidIntrinsic(
II))
469bool HexagonOptShuffleVector::areAllOperandsValid(Instruction *
I,
471 ArrayRef<int> &ShuffMask,
472 ShuffUseList &CurShuffUses) {
473 bool AllOperandsOK =
true;
474 for (
Value *OpV : getArgOperands(Use)) {
475 bool HasOneUse = OpV->hasOneUse();
476 std::pair<Value *, Value *>
P = stripCasts(OpV);
479 SmallVector<Instruction *, 2> ImmUseList;
487 if (isConstantVectorSplat(OpV))
496 hasDefWithSameShuffMask(OpV, ImmUseList, ShuffMask, CurShuffUses);
498 return AllOperandsOK;
502bool HexagonOptShuffleVector::findNewShuffLoc(Instruction *
I,
503 ArrayRef<int> &ShuffMask,
507 if (!getUseList(
I, UseList))
511 SmallDenseMap<Instruction *, SmallVector<Instruction *, 2>>;
512 ShuffUseList CurShuffUses;
515 FindHiLoUse(UseList, HI, LO);
516 if (UseList.
size() == 2 && HI && LO) {
521 dbgs() <<
"\tFollowing the Hi/LO pair :\n";
522 dbgs() <<
"\t\tHI - ";
524 dbgs() <<
"\t\tLO - ";
527 if (!analyzeHiLoUse(HI, LO, ShuffMask, NewLoc, CurShuffUses))
529 for (
auto &it : CurShuffUses)
530 ShuffUses[it.first] = it.second;
534 if (!isValidUseInstr(UseList, UI))
536 assert(UI &&
"Expected a valid use, but found none!!");
544 if (!areAllOperandsValid(
I, UI, ShuffMask, CurShuffUses)) {
548 for (
auto &it : CurShuffUses)
549 ShuffUses[it.first] = it.second;
552 findNewShuffLoc(UI, ShuffMask, NewLoc);
558bool HexagonOptShuffleVector::relocateShuffVec(
559 Instruction *
I, ArrayRef<int> &M,
Value *NewLoc,
560 std::list<Instruction *> &WorkList) {
564 std::map<Instruction *, Value *> InstrMap;
565 bool CanReplace =
true;
566 unsigned ShuffInstCount = ShuffUses.size();
567 for (
auto &it : ShuffUses) {
570 Value *ShuffleOP =
nullptr;
574 if (JTy->getElementCount() != ShuffTy->getElementCount())
581 if (ShuffInstCount == 1 &&
582 NewShuffTy->getElementType() > ShuffTy->getElementType())
584 InstrMap[J] = ShuffleOP;
590 for (
auto IM : InstrMap) {
592 assert(ShuffUses.count(J));
593 SmallVector<Instruction *, 2>
Uses = ShuffUses[J];
594 if (
Uses.size() > 0) {
596 U->replaceUsesOfWith(IM.first, IM.second);
601 IM.first->replaceAllUsesWith(IM.second);
616 Use &TheUse = UI.getUse();
619 if (J && TheUse.
getUser() != NewShuffV)
622 WorkList.push_back(NewInst);
628bool HexagonOptShuffleVector::visitBlock(BasicBlock *
B) {
631 std::list<Instruction *> WorkList;
638 WorkList.push_back(&
I);
644 while (!WorkList.empty()) {
650 dbgs() <<
"Reached maximum limit!! \n";
651 dbgs() <<
"Can't process any more shuffles.... \n";
658 WorkList.pop_front();
660 Value *NewLoc =
nullptr;
674 ShuffUses[
I] = SmallVector<Instruction *, 2>();
676 if (!Visited.insert(
I).second) {
681 LLVM_DEBUG(
dbgs() <<
"\t\tSKIPPING - Not a vector shuffle ...\n");
684 if (!findNewShuffLoc(
I, M, NewLoc) || !NewLoc) {
689 Changed |= relocateShuffVec(
I, M, NewLoc, WorkList);
697bool HexagonOptShuffleVector::runOnFunction(Function &
F) {
712 return new HexagonOptShuffleVector(&TM);
static bool isConcatMask(ArrayRef< int > Mask, EVT VT, bool SplitLHS)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool runOnFunction(Function &F, bool PostInlining)
static cl::opt< int > MaxDefSearchCount("shuffvec-max-search-count", cl::desc("Maximum number of instructions traversed along def chain."), cl::Hidden, cl::init(15))
A command line argument to limit the search space along def chain.
static cl::opt< int > ShuffVecLimit("shuff-vec-max", cl::desc("Maximum number of shuffles to be relocated."), cl::Hidden, cl::init(-1))
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Remove Loads Into Fake Uses
SmallVector< Value *, 8 > ValueVector
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Legacy analysis pass which computes a DominatorTree.
FunctionPass class - This class is used to implement most global optimizations.
bool useHVX128BOps() const
const HexagonSubtarget * getSubtargetImpl(const Function &F) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
void push_back(const T &Elt)
The instances of the Type class are immutable: once they are created, they are never changed.
User * getUser() const
Returns the User that contains this Use.
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
unsigned getNumOperands() const
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
LLVM_ABI void dump() const
Support for debugging, callable in GDB: V->dump()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
bool match(Val *V, const Pattern &P)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionPass * createHexagonOptShuffleVector(const HexagonTargetMachine &)
unsigned M1(unsigned Val)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
void initializeHexagonOptShuffleVectorPass(PassRegistry &)
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.