76#define DEBUG_TYPE "interleaved-access"
79 "lower-interleaved-accesses",
80 cl::desc(
"Enable lowering interleaved accesses to intrinsics"),
85class InterleavedAccessImpl {
86 friend class InterleavedAccess;
89 InterleavedAccessImpl() =
default;
91 : DT(DT), TLI(TLI), MaxFactor(TLI->getMaxSupportedInterleaveFactor()) {}
99 unsigned MaxFactor = 0
u;
102 bool lowerInterleavedLoad(
LoadInst *LI,
106 bool lowerInterleavedStore(
StoreInst *SI,
137 InterleavedAccessImpl Impl;
162 InterleavedAccessImpl Impl(DT, TLI);
163 bool Changed = Impl.runOnFunction(
F);
173char InterleavedAccess::ID = 0;
175bool InterleavedAccess::runOnFunction(
Function &
F) {
176 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
180 LLVM_DEBUG(
dbgs() <<
"*** " << getPassName() <<
": " <<
F.getName() <<
"\n");
182 Impl.DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
184 Impl.TLI = TM.getSubtargetImpl(
F)->getTargetLowering();
185 Impl.MaxFactor = Impl.TLI->getMaxSupportedInterleaveFactor();
187 return Impl.runOnFunction(
F);
191 "Lower interleaved memory accesses to target specific intrinsics",
false,
199 return new InterleavedAccess();
208 unsigned &Index,
unsigned MaxFactor,
209 unsigned NumLoadElements) {
214 for (Factor = 2; Factor <= MaxFactor; Factor++) {
216 if (Mask.size() * Factor > NumLoadElements)
237 unsigned MaxFactor) {
243 for (Factor = 2; Factor <= MaxFactor; Factor++) {
251bool InterleavedAccessImpl::lowerInterleavedLoad(
268 auto *Extract = dyn_cast<ExtractElementInst>(
User);
269 if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
273 if (
auto *BI = dyn_cast<BinaryOperator>(
User)) {
274 if (!BI->user_empty() &&
all_of(BI->users(), [](
auto *U) {
275 auto *SVI = dyn_cast<ShuffleVectorInst>(U);
276 return SVI && isa<UndefValue>(SVI->getOperand(1));
278 for (
auto *SVI : BI->users())
279 BinOpShuffles.
insert(cast<ShuffleVectorInst>(SVI));
283 auto *SVI = dyn_cast<ShuffleVectorInst>(
User);
284 if (!SVI || !isa<UndefValue>(SVI->getOperand(1)))
290 if (Shuffles.
empty() && BinOpShuffles.
empty())
293 unsigned Factor,
Index;
295 unsigned NumLoadElements =
296 cast<FixedVectorType>(LI->
getType())->getNumElements();
297 auto *FirstSVI = Shuffles.
size() > 0 ? Shuffles[0] : BinOpShuffles[0];
306 Type *VecTy = FirstSVI->getType();
310 for (
auto *Shuffle : Shuffles) {
311 if (Shuffle->getType() != VecTy)
314 Shuffle->getShuffleMask(), Factor, Index))
317 assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
320 for (
auto *Shuffle : BinOpShuffles) {
321 if (Shuffle->getType() != VecTy)
324 Shuffle->getShuffleMask(), Factor, Index))
327 assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
329 if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == LI)
331 if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == LI)
337 if (!tryReplaceExtracts(Extracts, Shuffles))
340 bool BinOpShuffleChanged =
341 replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI);
343 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved load: " << *LI <<
"\n");
346 if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) {
348 return !Extracts.
empty() || BinOpShuffleChanged;
357bool InterleavedAccessImpl::replaceBinOpShuffles(
360 for (
auto *SVI : BinOpShuffles) {
365 return Idx < (int)cast<FixedVectorType>(BIOp0Ty)->getNumElements();
371 Mask, SVI->
getName(), insertPos);
377 SVI->replaceAllUsesWith(NewBI);
379 <<
"\n With : " << *NewSVI1 <<
"\n And : "
380 << *NewSVI2 <<
"\n And : " << *NewBI <<
"\n");
382 if (NewSVI1->getOperand(0) == LI)
384 if (NewSVI2->getOperand(0) == LI)
388 return !BinOpShuffles.empty();
391bool InterleavedAccessImpl::tryReplaceExtracts(
396 if (Extracts.
empty())
403 for (
auto *Extract : Extracts) {
405 auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand());
406 auto Index = IndexOperand->getSExtValue();
411 for (
auto *Shuffle : Shuffles) {
421 Shuffle->getShuffleMask(Indices);
422 for (
unsigned I = 0;
I < Indices.
size(); ++
I)
423 if (Indices[
I] == Index) {
424 assert(Extract->getOperand(0) == Shuffle->getOperand(0) &&
425 "Vector operations do not match");
426 ReplacementMap[Extract] = std::make_pair(Shuffle,
I);
431 if (ReplacementMap.
count(Extract))
437 if (!ReplacementMap.
count(Extract))
443 for (
auto &Replacement : ReplacementMap) {
444 auto *Extract = Replacement.first;
445 auto *
Vector = Replacement.second.first;
446 auto Index = Replacement.second.second;
447 Builder.SetInsertPoint(Extract);
448 Extract->replaceAllUsesWith(Builder.CreateExtractElement(
Vector, Index));
449 Extract->eraseFromParent();
455bool InterleavedAccessImpl::lowerInterleavedStore(
460 auto *SVI = dyn_cast<ShuffleVectorInst>(
SI->getValueOperand());
461 if (!SVI || !SVI->hasOneUse() || isa<ScalableVectorType>(SVI->getType()))
469 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved store: " << *SI <<
"\n");
472 if (!TLI->lowerInterleavedStore(SI, SVI, Factor))
481bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
488 LLVM_DEBUG(
dbgs() <<
"IA: Found a deinterleave intrinsic: " << *DI <<
"\n");
491 if (!TLI->lowerDeinterleaveIntrinsicToLoad(DI, LI, DeadInsts))
500bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
502 if (!
II->hasOneUse())
507 if (!SI || !
SI->isSimple())
514 if (!TLI->lowerInterleaveIntrinsicToStore(
II, SI, InterleaveDeadInsts))
521 InterleaveDeadInsts.
end());
525bool InterleavedAccessImpl::runOnFunction(
Function &
F) {
528 bool Changed =
false;
531 if (
auto *LI = dyn_cast<LoadInst>(&
I))
532 Changed |= lowerInterleavedLoad(LI, DeadInsts);
534 if (
auto *SI = dyn_cast<StoreInst>(&
I))
535 Changed |= lowerInterleavedStore(SI, DeadInsts);
537 if (
auto *
II = dyn_cast<IntrinsicInst>(&
I)) {
540 if (
II->getIntrinsicID() == Intrinsic::vector_deinterleave2)
541 Changed |= lowerDeinterleaveIntrinsic(
II, DeadInsts);
542 else if (
II->getIntrinsicID() == Intrinsic::vector_interleave2)
543 Changed |= lowerInterleaveIntrinsic(
II, DeadInsts);
547 for (
auto *
I : DeadInsts)
548 I->eraseFromParent();
Expand Atomic instructions
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
expand Expand reduction intrinsics
static bool isDeInterleaveMask(ArrayRef< int > Mask, unsigned &Factor, unsigned &Index, unsigned MaxFactor, unsigned NumLoadElements)
Check if the mask is a DE-interleave mask for an interleaved load.
static cl::opt< bool > LowerInterleavedAccesses("lower-interleaved-accesses", cl::desc("Enable lowering interleaved accesses to intrinsics"), cl::init(true), cl::Hidden)
static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor, unsigned MaxFactor)
Check if the mask can be used in an interleaved store.
This file contains the declaration of the InterleavedAccessPass class, its corresponding pass name is...
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Represents analyses that only rely on functions' control flow.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
A wrapper class for inspecting calls to intrinsic functions.
An instruction for reading from memory.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This instruction constructs a fixed permutation of two input vectors.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
bool isInterleave(unsigned Factor)
Return if this shuffle interleaves its two input vectors together.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Value * getOperand(unsigned i) const
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
iterator_range< user_iterator > users()
StringRef getName() const
Return a constant reference to the value's name.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
void initializeInterleavedAccessPass(PassRegistry &)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionPass * createInterleavedAccessPass()
InterleavedAccess Pass - This pass identifies and matches interleaved memory accesses to target speci...