71#define DEBUG_TYPE "mve-laneinterleave"
75 cl::desc(
"Enable interleave MVE vector operation lowering"));
100char MVELaneInterleaving::ID = 0;
106 return new MVELaneInterleaving();
123 for (
auto *E : Exts) {
124 if (isa<FPExtInst>(E) || !isa<LoadInst>(E->getOperand(0))) {
129 for (
auto *
T : Truncs) {
130 if (
T->hasOneUse() && !isa<StoreInst>(*
T->user_begin())) {
138 for (
auto *E : Exts) {
139 if (!E->hasOneUse() ||
140 cast<Instruction>(*E->user_begin())->getOpcode() != Instruction::Mul) {
152 if (!isa<Instruction>(Start->getOperand(0)))
156 std::vector<Instruction *> Worklist;
157 Worklist.push_back(Start);
158 Worklist.push_back(cast<Instruction>(Start->getOperand(0)));
166 while (!Worklist.empty()) {
170 switch (
I->getOpcode()) {
172 case Instruction::Trunc:
173 case Instruction::FPTrunc:
180 case Instruction::SExt:
181 case Instruction::ZExt:
182 case Instruction::FPExt:
185 for (
auto *
Use :
I->users())
186 Worklist.push_back(cast<Instruction>(
Use));
190 case Instruction::Call: {
195 if (
II->getIntrinsicID() == Intrinsic::vector_reduce_add) {
202 switch (
II->getIntrinsicID()) {
204 case Intrinsic::smin:
205 case Intrinsic::smax:
206 case Intrinsic::umin:
207 case Intrinsic::umax:
208 case Intrinsic::sadd_sat:
209 case Intrinsic::ssub_sat:
210 case Intrinsic::uadd_sat:
211 case Intrinsic::usub_sat:
212 case Intrinsic::minnum:
213 case Intrinsic::maxnum:
214 case Intrinsic::fabs:
216 case Intrinsic::ceil:
217 case Intrinsic::floor:
218 case Intrinsic::rint:
219 case Intrinsic::round:
220 case Intrinsic::trunc:
228 case Instruction::Add:
229 case Instruction::Sub:
230 case Instruction::Mul:
231 case Instruction::AShr:
232 case Instruction::LShr:
233 case Instruction::Shl:
234 case Instruction::ICmp:
235 case Instruction::FCmp:
236 case Instruction::FAdd:
237 case Instruction::FMul:
238 case Instruction::Select:
242 for (
Use &
Op :
I->operands()) {
243 if (!isa<FixedVectorType>(
Op->getType()))
245 if (isa<Instruction>(
Op))
246 Worklist.push_back(cast<Instruction>(&
Op));
251 for (
auto *
Use :
I->users())
252 Worklist.push_back(cast<Instruction>(
Use));
255 case Instruction::ShuffleVector:
257 if (cast<ShuffleVectorInst>(
I)->isZeroEltSplat())
271 dbgs() <<
"Found group:\n Exts:\n";
273 dbgs() <<
" " << *
I <<
"\n";
276 dbgs() <<
" " << *
I <<
"\n";
277 dbgs() <<
" OtherLeafs:\n";
278 for (
auto *
I : OtherLeafs)
279 dbgs() <<
" " << *
I->get() <<
" of " << *
I->getUser() <<
"\n";
280 dbgs() <<
" Truncs:\n";
281 for (
auto *
I : Truncs)
282 dbgs() <<
" " << *
I <<
"\n";
283 dbgs() <<
" Reducts:\n";
284 for (
auto *
I : Reducts)
285 dbgs() <<
" " << *
I <<
"\n";
289 "Expected some truncs or reductions");
293 auto *VT = !Truncs.
empty()
294 ? cast<FixedVectorType>(Truncs[0]->
getType())
295 : cast<FixedVectorType>(Exts[0]->getOperand(0)->
getType());
299 unsigned NumElts = VT->getNumElements();
300 unsigned BaseElts = VT->getScalarSizeInBits() == 16
302 : (VT->getScalarSizeInBits() == 8 ? 16 : 0);
303 if (BaseElts == 0 || NumElts % BaseElts != 0) {
307 if (Start->getOperand(0)->getType()->getScalarSizeInBits() !=
308 VT->getScalarSizeInBits() * 2) {
313 if (
I->getOperand(0)->getType() != VT) {
318 if (
I->getType() != VT) {
327 return I->getOpcode() == Instruction::Mul ||
328 I->getOpcode() == Instruction::Select ||
329 I->getOpcode() == Instruction::ICmp;
342 for (
unsigned Base = 0;
Base < NumElts;
Base += BaseElts) {
343 for (
unsigned i = 0; i < BaseElts / 2; i++)
345 for (
unsigned i = 0; i < BaseElts / 2; i++)
348 for (
unsigned Base = 0;
Base < NumElts;
Base += BaseElts) {
349 for (
unsigned i = 0; i < BaseElts / 2; i++) {
359 bool FPext = isa<FPExtInst>(
I);
360 bool Sext = isa<SExtInst>(
I);
364 I->replaceAllUsesWith(Ext);
368 for (
Use *
I : OtherLeafs) {
372 I->getUser()->setOperand(
I->getOperandNo(), Shuffle);
381 I->replaceAllUsesWith(Shuf);
382 cast<Instruction>(Shuf)->setOperand(0,
I);
393 if (
auto *
II = dyn_cast<IntrinsicInst>(&
I))
394 return II->getIntrinsicID() == Intrinsic::vector_reduce_add;
398bool MVELaneInterleaving::runOnFunction(
Function &
F) {
401 auto &TPC = getAnalysis<TargetPassConfig>();
404 if (!
ST->hasMVEIntegerOps())
407 bool Changed =
false;
411 if (((
I.getType()->isVectorTy() &&
412 (isa<TruncInst>(
I) || isa<FPTruncInst>(
I))) ||
Expand Atomic instructions
static bool isProfitableToInterleave(SmallSetVector< Instruction *, 4 > &Exts, SmallSetVector< Instruction *, 4 > &Truncs)
static bool tryInterleave(Instruction *Start, SmallPtrSetImpl< Instruction * > &Visited)
cl::opt< bool > EnableInterleave("enable-mve-interleave", cl::Hidden, cl::init(true), cl::desc("Enable interleave MVE vector operation lowering"))
static bool isAddReduction(Instruction &I)
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
This class represents an Operation in the Expression.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFPExt(Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
A wrapper class for inspecting calls to intrinsic functions.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Pass * createMVELaneInterleavingPass()
void initializeMVELaneInterleavingPass(PassRegistry &)
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.