71#define DEBUG_TYPE "mve-laneinterleave"
75 cl::desc(
"Enable interleave MVE vector operation lowering"));
89 StringRef getPassName()
const override {
return "MVE lane interleaving"; }
91 void getAnalysisUsage(AnalysisUsage &AU)
const override {
94 FunctionPass::getAnalysisUsage(AU);
100char MVELaneInterleaving::ID = 0;
106 return new MVELaneInterleaving();
123 for (
auto *
E : Exts) {
129 for (
auto *
T : Truncs) {
138 for (
auto *
E : Exts) {
139 if (!
E->hasOneUse() ||
156 std::vector<Instruction *> Worklist;
157 Worklist.push_back(Start);
166 while (!Worklist.empty()) {
170 switch (
I->getOpcode()) {
172 case Instruction::Trunc:
173 case Instruction::FPTrunc:
180 case Instruction::SExt:
181 case Instruction::ZExt:
182 case Instruction::FPExt:
185 for (
auto *
Use :
I->users())
190 case Instruction::Call: {
195 if (
II->getIntrinsicID() == Intrinsic::vector_reduce_add) {
202 switch (
II->getIntrinsicID()) {
204 case Intrinsic::smin:
205 case Intrinsic::smax:
206 case Intrinsic::umin:
207 case Intrinsic::umax:
208 case Intrinsic::sadd_sat:
209 case Intrinsic::ssub_sat:
210 case Intrinsic::uadd_sat:
211 case Intrinsic::usub_sat:
212 case Intrinsic::minnum:
213 case Intrinsic::maxnum:
214 case Intrinsic::fabs:
216 case Intrinsic::ceil:
217 case Intrinsic::floor:
218 case Intrinsic::rint:
219 case Intrinsic::round:
220 case Intrinsic::trunc:
228 case Instruction::Add:
229 case Instruction::Sub:
230 case Instruction::Mul:
231 case Instruction::AShr:
232 case Instruction::LShr:
233 case Instruction::Shl:
234 case Instruction::ICmp:
235 case Instruction::FCmp:
236 case Instruction::FAdd:
237 case Instruction::FMul:
238 case Instruction::Select:
242 for (
Use &
Op :
I->operands()) {
251 for (
auto *
Use :
I->users())
255 case Instruction::ShuffleVector:
271 dbgs() <<
"Found group:\n Exts:\n";
273 dbgs() <<
" " << *
I <<
"\n";
276 dbgs() <<
" " << *
I <<
"\n";
277 dbgs() <<
" OtherLeafs:\n";
278 for (
auto *
I : OtherLeafs)
279 dbgs() <<
" " << *
I->get() <<
" of " << *
I->getUser() <<
"\n";
280 dbgs() <<
" Truncs:\n";
281 for (
auto *
I : Truncs)
282 dbgs() <<
" " << *
I <<
"\n";
283 dbgs() <<
" Reducts:\n";
284 for (
auto *
I : Reducts)
285 dbgs() <<
" " << *
I <<
"\n";
289 "Expected some truncs or reductions");
293 auto *VT = !Truncs.
empty()
299 unsigned NumElts = VT->getNumElements();
300 unsigned BaseElts = VT->getScalarSizeInBits() == 16
302 : (VT->getScalarSizeInBits() == 8 ? 16 : 0);
303 if (BaseElts == 0 || NumElts % BaseElts != 0) {
307 if (Start->getOperand(0)->getType()->getScalarSizeInBits() !=
308 VT->getScalarSizeInBits() * 2) {
313 if (
I->getOperand(0)->getType() != VT) {
318 if (
I->getType() != VT) {
327 return I->getOpcode() == Instruction::Mul ||
328 I->getOpcode() == Instruction::Select ||
329 I->getOpcode() == Instruction::ICmp;
342 for (
unsigned Base = 0;
Base < NumElts;
Base += BaseElts) {
343 for (
unsigned i = 0; i < BaseElts / 2; i++)
345 for (
unsigned i = 0; i < BaseElts / 2; i++)
348 for (
unsigned Base = 0;
Base < NumElts;
Base += BaseElts) {
349 for (
unsigned i = 0; i < BaseElts / 2; i++) {
357 Builder.SetInsertPoint(
I);
358 Value *Shuffle = Builder.CreateShuffleVector(
I->getOperand(0), LeafMask);
361 Value *Ext = FPext ? Builder.CreateFPExt(Shuffle,
I->getType())
362 : Sext ? Builder.CreateSExt(Shuffle,
I->getType())
363 : Builder.CreateZExt(Shuffle,
I->getType());
364 I->replaceAllUsesWith(Ext);
368 for (
Use *
I : OtherLeafs) {
371 Value *Shuffle = Builder.CreateShuffleVector(
I->get(), LeafMask);
372 I->getUser()->setOperand(
I->getOperandNo(), Shuffle);
379 Builder.SetInsertPoint(
I->getParent(), ++
I->getIterator());
380 Value *Shuf = Builder.CreateShuffleVector(
I, TruncMask);
381 I->replaceAllUsesWith(Shuf);
394 return II->getIntrinsicID() == Intrinsic::vector_reduce_add;
398bool MVELaneInterleaving::runOnFunction(
Function &
F) {
401 auto &TPC = getAnalysis<TargetPassConfig>();
402 auto &
TM = TPC.getTM<TargetMachine>();
403 auto *
ST = &
TM.getSubtarget<ARMSubtarget>(
F);
404 if (!
ST->hasMVEIntegerOps())
409 SmallPtrSet<Instruction *, 16> Visited;
411 if (((
I.getType()->isVectorTy() &&
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Expand Atomic instructions
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static bool runOnFunction(Function &F, bool PostInlining)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isProfitableToInterleave(SmallSetVector< Instruction *, 4 > &Exts, SmallSetVector< Instruction *, 4 > &Truncs)
static bool tryInterleave(Instruction *Start, SmallPtrSetImpl< Instruction * > &Visited)
static cl::opt< bool > EnableInterleave("enable-mve-interleave", cl::Hidden, cl::init(true), cl::desc("Enable interleave MVE vector operation lowering"))
static bool isAddReduction(Instruction &I)
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
This file implements a set that has insertion order iteration characteristics.
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
FunctionPass class - This class is used to implement most global optimizations.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
A wrapper class for inspecting calls to intrinsic functions.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
A SetVector that performs no allocations if smaller than a certain size.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Pass * createMVELaneInterleavingPass()
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
void initializeMVELaneInterleavingPass(PassRegistry &)
auto reverse(ContainerTy &&C)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.