77#define DEBUG_TYPE "interleaved-access"
80 "lower-interleaved-accesses",
81 cl::desc(
"Enable lowering interleaved accesses to intrinsics"),
86class InterleavedAccessImpl {
87 friend class InterleavedAccess;
90 InterleavedAccessImpl() =
default;
92 : DT(DT), TLI(TLI), MaxFactor(TLI->getMaxSupportedInterleaveFactor()) {}
100 unsigned MaxFactor = 0
u;
103 bool lowerInterleavedLoad(
LoadInst *LI,
107 bool lowerInterleavedStore(
StoreInst *SI,
138 InterleavedAccessImpl Impl;
163 InterleavedAccessImpl Impl(DT, TLI);
164 bool Changed = Impl.runOnFunction(
F);
174char InterleavedAccess::ID = 0;
176bool InterleavedAccess::runOnFunction(
Function &
F) {
177 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
181 LLVM_DEBUG(
dbgs() <<
"*** " << getPassName() <<
": " <<
F.getName() <<
"\n");
183 Impl.DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
185 Impl.TLI = TM.getSubtargetImpl(
F)->getTargetLowering();
186 Impl.MaxFactor = Impl.TLI->getMaxSupportedInterleaveFactor();
188 return Impl.runOnFunction(
F);
192 "Lower interleaved memory accesses to target specific intrinsics",
false,
200 return new InterleavedAccess();
209 unsigned &
Index,
unsigned MaxFactor,
210 unsigned NumLoadElements) {
215 for (Factor = 2; Factor <= MaxFactor; Factor++) {
217 if (Mask.size() * Factor > NumLoadElements)
238 unsigned MaxFactor) {
244 for (Factor = 2; Factor <= MaxFactor; Factor++) {
252bool InterleavedAccessImpl::lowerInterleavedLoad(
269 auto *Extract = dyn_cast<ExtractElementInst>(
User);
270 if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
274 if (
auto *BI = dyn_cast<BinaryOperator>(
User)) {
275 if (!BI->user_empty() &&
all_of(BI->users(), [](
auto *U) {
276 auto *SVI = dyn_cast<ShuffleVectorInst>(U);
277 return SVI && isa<UndefValue>(SVI->getOperand(1));
279 for (
auto *SVI : BI->users())
280 BinOpShuffles.
insert(cast<ShuffleVectorInst>(SVI));
284 auto *SVI = dyn_cast<ShuffleVectorInst>(
User);
285 if (!SVI || !isa<UndefValue>(SVI->getOperand(1)))
291 if (Shuffles.
empty() && BinOpShuffles.
empty())
294 unsigned Factor,
Index;
296 unsigned NumLoadElements =
297 cast<FixedVectorType>(LI->
getType())->getNumElements();
298 auto *FirstSVI = Shuffles.
size() > 0 ? Shuffles[0] : BinOpShuffles[0];
307 Type *VecTy = FirstSVI->getType();
311 for (
auto *Shuffle : Shuffles) {
312 if (Shuffle->getType() != VecTy)
315 Shuffle->getShuffleMask(), Factor,
Index))
318 assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
321 for (
auto *Shuffle : BinOpShuffles) {
322 if (Shuffle->getType() != VecTy)
325 Shuffle->getShuffleMask(), Factor,
Index))
328 assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
330 if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == LI)
332 if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == LI)
338 if (!tryReplaceExtracts(Extracts, Shuffles))
341 bool BinOpShuffleChanged =
342 replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI);
344 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved load: " << *LI <<
"\n");
347 if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) {
349 return !Extracts.
empty() || BinOpShuffleChanged;
358bool InterleavedAccessImpl::replaceBinOpShuffles(
361 for (
auto *SVI : BinOpShuffles) {
366 return Idx < (int)cast<FixedVectorType>(BIOp0Ty)->getNumElements();
372 Mask, SVI->
getName(), insertPos);
378 SVI->replaceAllUsesWith(NewBI);
380 <<
"\n With : " << *NewSVI1 <<
"\n And : "
381 << *NewSVI2 <<
"\n And : " << *NewBI <<
"\n");
383 if (NewSVI1->getOperand(0) == LI)
385 if (NewSVI2->getOperand(0) == LI)
389 return !BinOpShuffles.empty();
392bool InterleavedAccessImpl::tryReplaceExtracts(
397 if (Extracts.
empty())
404 for (
auto *Extract : Extracts) {
406 auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand());
407 auto Index = IndexOperand->getSExtValue();
412 for (
auto *Shuffle : Shuffles) {
415 if (!DT->dominates(Shuffle, Extract))
422 Shuffle->getShuffleMask(Indices);
423 for (
unsigned I = 0;
I < Indices.
size(); ++
I)
424 if (Indices[
I] ==
Index) {
425 assert(Extract->getOperand(0) == Shuffle->getOperand(0) &&
426 "Vector operations do not match");
427 ReplacementMap[Extract] = std::make_pair(Shuffle,
I);
432 if (ReplacementMap.
count(Extract))
438 if (!ReplacementMap.
count(Extract))
444 for (
auto &Replacement : ReplacementMap) {
445 auto *Extract = Replacement.first;
446 auto *
Vector = Replacement.second.first;
447 auto Index = Replacement.second.second;
448 Builder.SetInsertPoint(Extract);
449 Extract->replaceAllUsesWith(Builder.CreateExtractElement(
Vector,
Index));
450 Extract->eraseFromParent();
456bool InterleavedAccessImpl::lowerInterleavedStore(
461 auto *SVI = dyn_cast<ShuffleVectorInst>(
SI->getValueOperand());
462 if (!SVI || !SVI->hasOneUse() || isa<ScalableVectorType>(SVI->getType()))
470 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved store: " << *SI <<
"\n");
473 if (!TLI->lowerInterleavedStore(SI, SVI, Factor))
482bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
489 LLVM_DEBUG(
dbgs() <<
"IA: Found a deinterleave intrinsic: " << *DI <<
"\n");
492 if (!TLI->lowerDeinterleaveIntrinsicToLoad(DI, LI, DeadInsts))
501bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
503 if (!
II->hasOneUse())
508 if (!SI || !
SI->isSimple())
515 if (!TLI->lowerInterleaveIntrinsicToStore(
II, SI, InterleaveDeadInsts))
522 InterleaveDeadInsts.
end());
526bool InterleavedAccessImpl::runOnFunction(
Function &
F) {
529 bool Changed =
false;
532 if (
auto *LI = dyn_cast<LoadInst>(&
I))
533 Changed |= lowerInterleavedLoad(LI, DeadInsts);
535 if (
auto *SI = dyn_cast<StoreInst>(&
I))
536 Changed |= lowerInterleavedStore(SI, DeadInsts);
538 if (
auto *
II = dyn_cast<IntrinsicInst>(&
I)) {
541 if (
II->getIntrinsicID() == Intrinsic::vector_deinterleave2)
542 Changed |= lowerDeinterleaveIntrinsic(
II, DeadInsts);
543 else if (
II->getIntrinsicID() == Intrinsic::vector_interleave2)
544 Changed |= lowerInterleaveIntrinsic(
II, DeadInsts);
548 for (
auto *
I : DeadInsts)
549 I->eraseFromParent();
Expand Atomic instructions
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
expand Expand reduction intrinsics
static bool isDeInterleaveMask(ArrayRef< int > Mask, unsigned &Factor, unsigned &Index, unsigned MaxFactor, unsigned NumLoadElements)
Check if the mask is a DE-interleave mask for an interleaved load.
static cl::opt< bool > LowerInterleavedAccesses("lower-interleaved-accesses", cl::desc("Enable lowering interleaved accesses to intrinsics"), cl::init(true), cl::Hidden)
static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor, unsigned MaxFactor)
Check if the mask can be used in an interleaved store.
This file contains the declaration of the InterleavedAccessPass class, its corresponding pass name is...
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Represents analyses that only rely on functions' control flow.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
A wrapper class for inspecting calls to intrinsic functions.
An instruction for reading from memory.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This instruction constructs a fixed permutation of two input vectors.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
bool isInterleave(unsigned Factor)
Return if this shuffle interleaves its two input vectors together.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Value * getOperand(unsigned i) const
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
iterator_range< user_iterator > users()
StringRef getName() const
Return a constant reference to the value's name.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
void initializeInterleavedAccessPass(PassRegistry &)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionPass * createInterleavedAccessPass()
InterleavedAccess Pass - This pass identifies and matches interleaved memory accesses to target speci...