Go to the documentation of this file.
65 #include "llvm/IR/IntrinsicsARM.h"
77 #define DEBUG_TYPE "mve-laneinterleave"
81 cl::desc(
"Enable interleave MVE vector operation lowering"));
95 StringRef getPassName()
const override {
return "MVE lane interleaving"; }
112 return new MVELaneInterleaving();
129 for (
auto *
E : Exts) {
130 if (isa<FPExtInst>(
E) || !isa<LoadInst>(
E->getOperand(0))) {
135 for (
auto *
T : Truncs) {
136 if (
T->hasOneUse() && !isa<StoreInst>(*
T->user_begin())) {
144 for (
auto *
E : Exts) {
145 if (!
E->hasOneUse() ||
157 auto *VT = cast<FixedVectorType>(Start->getType());
159 if (!isa<Instruction>(Start->getOperand(0)))
163 std::vector<Instruction *> Worklist;
164 Worklist.push_back(Start);
165 Worklist.push_back(cast<Instruction>(Start->getOperand(0)));
172 while (!Worklist.empty()) {
176 switch (
I->getOpcode()) {
178 case Instruction::Trunc:
179 case Instruction::FPTrunc:
187 case Instruction::SExt:
188 case Instruction::ZExt:
189 case Instruction::FPExt:
192 for (
auto *
Use :
I->users())
193 Worklist.push_back(cast<Instruction>(
Use));
208 case Intrinsic::sadd_sat:
209 case Intrinsic::ssub_sat:
210 case Intrinsic::uadd_sat:
211 case Intrinsic::usub_sat:
214 case Intrinsic::fabs:
218 case Intrinsic::rint:
229 case Instruction::Sub:
231 case Instruction::AShr:
232 case Instruction::LShr:
233 case Instruction::Shl:
234 case Instruction::ICmp:
235 case Instruction::FCmp:
236 case Instruction::FAdd:
237 case Instruction::FMul:
243 for (
Use &
Op :
I->operands()) {
244 if (!isa<FixedVectorType>(
Op->getType()))
246 if (isa<Instruction>(
Op))
247 Worklist.push_back(cast<Instruction>(&
Op));
252 for (
auto *
Use :
I->users())
253 Worklist.push_back(cast<Instruction>(
Use));
256 case Instruction::ShuffleVector:
258 if (cast<ShuffleVectorInst>(
I)->isZeroEltSplat())
272 dbgs() <<
"Found group:\n Exts:";
274 dbgs() <<
" " << *
I <<
"\n";
277 dbgs() <<
" " << *
I <<
"\n";
278 dbgs() <<
" OtherLeafs:";
279 for (
auto *
I : OtherLeafs)
280 dbgs() <<
" " << *
I->get() <<
" of " << *
I->getUser() <<
"\n";
282 for (
auto *
I : Truncs)
283 dbgs() <<
" " << *
I <<
"\n";
289 unsigned NumElts = VT->getNumElements();
290 unsigned BaseElts = VT->getScalarSizeInBits() == 16
292 : (VT->getScalarSizeInBits() == 8 ? 16 : 0);
293 if (BaseElts == 0 || NumElts % BaseElts != 0) {
297 if (Start->getOperand(0)->getType()->getScalarSizeInBits() !=
298 VT->getScalarSizeInBits() * 2) {
303 if (
I->getOperand(0)->getType() != VT) {
308 if (
I->getType() != VT) {
324 for (
unsigned Base = 0;
Base < NumElts;
Base += BaseElts) {
325 for (
unsigned i = 0;
i < BaseElts / 2;
i++)
326 LeafMask.push_back(
Base +
i * 2);
327 for (
unsigned i = 0;
i < BaseElts / 2;
i++)
328 LeafMask.push_back(
Base +
i * 2 + 1);
330 for (
unsigned Base = 0;
Base < NumElts;
Base += BaseElts) {
331 for (
unsigned i = 0;
i < BaseElts / 2;
i++) {
332 TruncMask.push_back(
Base +
i);
333 TruncMask.push_back(
Base +
i + BaseElts / 2);
340 Value *Shuffle =
Builder.CreateShuffleVector(
I->getOperand(0), LeafMask);
341 bool FPext = isa<FPExtInst>(
I);
342 bool Sext = isa<SExtInst>(
I);
344 : Sext ?
Builder.CreateSExt(Shuffle,
I->getType())
345 :
Builder.CreateZExt(Shuffle,
I->getType());
346 I->replaceAllUsesWith(
Ext);
350 for (
Use *
I : OtherLeafs) {
352 Builder.SetInsertPoint(cast<Instruction>(
I->getUser()));
353 Value *Shuffle =
Builder.CreateShuffleVector(
I->get(), LeafMask);
354 I->getUser()->setOperand(
I->getOperandNo(), Shuffle);
361 Builder.SetInsertPoint(
I->getParent(), ++
I->getIterator());
363 I->replaceAllUsesWith(Shuf);
364 cast<Instruction>(Shuf)->setOperand(0,
I);
375 auto &TPC = getAnalysis<TargetPassConfig>();
378 if (!
ST->hasMVEIntegerOps())
381 bool Changed =
false;
385 if (
I.getType()->isVectorTy() &&
386 (isa<TruncInst>(
I) || isa<FPTruncInst>(
I)) && !Visited.
count(&
I))
INITIALIZE_PASS(MVELaneInterleaving, DEBUG_TYPE, "MVE lane interleaving", false, false) Pass *llvm
This is an optimization pass for GlobalISel generic memory operations.
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g ceil
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static bool isProfitableToInterleave(SmallSetVector< Instruction *, 4 > &Exts, SmallSetVector< Instruction *, 4 > &Truncs)
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g floor
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static uint64_t round(uint64_t Acc, uint64_t Input)
Represent the analysis usage information of a pass.
bool empty() const
Determine if the SetVector is empty or not.
cl::opt< bool > EnableInterleave("enable-mve-interleave", cl::Hidden, cl::init(true), cl::desc("Enable interleave MVE vector operation lowering"))
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Target-Independent Code Generator Pass Configuration Options.
inst_range instructions(Function *F)
initializer< Ty > init(const Ty &Val)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Primary interface to the complete machine description for the target machine.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool insert(const value_type &X)
Insert a new element into the SetVector.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
StringRef - Represent a constant reference to a string, i.e.
Pass * createMVELaneInterleavingPass()
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g trunc
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
static bool runOnFunction(Function &F, bool PostInlining)
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
void initializeMVELaneInterleavingPass(PassRegistry &)
A wrapper class for inspecting calls to intrinsic functions.
Pass interface - Implemented by all 'passes'.
static bool tryInterleave(Instruction *Start, SmallPtrSetImpl< Instruction * > &Visited)
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
A SetVector that performs no allocations if smaller than a certain size.
const char LLVMTargetMachineRef TM
FunctionPass class - This class is used to implement most global optimizations.
AnalysisUsage & addRequired()
APFloat abs(APFloat X)
Returns the absolute value of the argument.
LLVM Value Representation.
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
A Use represents the edge between a Value definition and its users.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.