77#define DEBUG_TYPE "interleaved-access"
80 "lower-interleaved-accesses",
81 cl::desc(
"Enable lowering interleaved accesses to intrinsics"),
86class InterleavedAccessImpl {
87 friend class InterleavedAccess;
90 InterleavedAccessImpl() =
default;
92 : DT(DT), TLI(TLI), MaxFactor(TLI->getMaxSupportedInterleaveFactor()) {}
100 unsigned MaxFactor = 0
u;
103 bool lowerInterleavedLoad(
LoadInst *LI,
107 bool lowerInterleavedStore(
StoreInst *SI,
138 InterleavedAccessImpl Impl;
163 InterleavedAccessImpl Impl(DT, TLI);
164 bool Changed = Impl.runOnFunction(
F);
174char InterleavedAccess::ID = 0;
176bool InterleavedAccess::runOnFunction(
Function &
F) {
177 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
181 LLVM_DEBUG(
dbgs() <<
"*** " << getPassName() <<
": " <<
F.getName() <<
"\n");
183 Impl.DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
185 Impl.TLI =
TM.getSubtargetImpl(
F)->getTargetLowering();
186 Impl.MaxFactor = Impl.TLI->getMaxSupportedInterleaveFactor();
188 return Impl.runOnFunction(
F);
192 "Lower interleaved memory accesses to target specific intrinsics",
false,
200 return new InterleavedAccess();
209 unsigned &
Index,
unsigned MaxFactor,
210 unsigned NumLoadElements) {
215 for (Factor = 2; Factor <= MaxFactor; Factor++) {
217 if (Mask.size() * Factor > NumLoadElements)
238 unsigned MaxFactor) {
244 for (Factor = 2; Factor <= MaxFactor; Factor++) {
252bool InterleavedAccessImpl::lowerInterleavedLoad(
269 auto *Extract = dyn_cast<ExtractElementInst>(
User);
270 if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
274 if (
auto *BI = dyn_cast<BinaryOperator>(
User)) {
275 if (!BI->user_empty() &&
all_of(BI->users(), [](
auto *U) {
276 auto *SVI = dyn_cast<ShuffleVectorInst>(U);
277 return SVI && isa<UndefValue>(SVI->getOperand(1));
279 for (
auto *SVI : BI->users())
280 BinOpShuffles.
insert(cast<ShuffleVectorInst>(SVI));
284 auto *SVI = dyn_cast<ShuffleVectorInst>(
User);
285 if (!SVI || !isa<UndefValue>(SVI->getOperand(1)))
291 if (Shuffles.
empty() && BinOpShuffles.
empty())
294 unsigned Factor,
Index;
296 unsigned NumLoadElements =
297 cast<FixedVectorType>(LI->
getType())->getNumElements();
298 auto *FirstSVI = Shuffles.
size() > 0 ? Shuffles[0] : BinOpShuffles[0];
307 Type *VecTy = FirstSVI->getType();
311 for (
auto *Shuffle : Shuffles) {
312 if (Shuffle->getType() != VecTy)
315 Shuffle->getShuffleMask(), Factor,
Index))
318 assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
321 for (
auto *Shuffle : BinOpShuffles) {
322 if (Shuffle->getType() != VecTy)
325 Shuffle->getShuffleMask(), Factor,
Index))
328 assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
330 if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == LI)
332 if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == LI)
338 if (!tryReplaceExtracts(Extracts, Shuffles))
341 bool BinOpShuffleChanged =
342 replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI);
344 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved load: " << *LI <<
"\n");
347 if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) {
349 return !Extracts.
empty() || BinOpShuffleChanged;
358bool InterleavedAccessImpl::replaceBinOpShuffles(
361 for (
auto *SVI : BinOpShuffles) {
366 return Idx < (int)cast<FixedVectorType>(BIOp0Ty)->getNumElements();
372 Mask, SVI->
getName(), insertPos);
378 SVI->replaceAllUsesWith(NewBI);
380 <<
"\n With : " << *NewSVI1 <<
"\n And : "
381 << *NewSVI2 <<
"\n And : " << *NewBI <<
"\n");
383 if (NewSVI1->getOperand(0) == LI)
385 if (NewSVI2->getOperand(0) == LI)
389 return !BinOpShuffles.empty();
392bool InterleavedAccessImpl::tryReplaceExtracts(
397 if (Extracts.
empty())
404 for (
auto *Extract : Extracts) {
406 auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand());
407 auto Index = IndexOperand->getSExtValue();
412 for (
auto *Shuffle : Shuffles) {
415 if (!DT->dominates(Shuffle, Extract))
422 Shuffle->getShuffleMask(Indices);
423 for (
unsigned I = 0;
I < Indices.
size(); ++
I)
424 if (Indices[
I] ==
Index) {
425 assert(Extract->getOperand(0) == Shuffle->getOperand(0) &&
426 "Vector operations do not match");
427 ReplacementMap[Extract] = std::make_pair(Shuffle,
I);
432 if (ReplacementMap.
count(Extract))
438 if (!ReplacementMap.
count(Extract))
444 for (
auto &Replacement : ReplacementMap) {
445 auto *Extract = Replacement.first;
446 auto *
Vector = Replacement.second.first;
447 auto Index = Replacement.second.second;
448 Builder.SetInsertPoint(Extract);
449 Extract->replaceAllUsesWith(Builder.CreateExtractElement(
Vector,
Index));
450 Extract->eraseFromParent();
456bool InterleavedAccessImpl::lowerInterleavedStore(
461 auto *SVI = dyn_cast<ShuffleVectorInst>(
SI->getValueOperand());
462 if (!SVI || !SVI->hasOneUse() || isa<ScalableVectorType>(SVI->getType()))
470 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved store: " << *SI <<
"\n");
473 if (!TLI->lowerInterleavedStore(SI, SVI, Factor))
482bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
489 LLVM_DEBUG(
dbgs() <<
"IA: Found a deinterleave intrinsic: " << *DI <<
"\n");
492 if (!TLI->lowerDeinterleaveIntrinsicToLoad(DI, LI))
501bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
508 if (!SI || !
SI->isSimple())
511 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleave intrinsic: " << *II <<
"\n");
514 if (!TLI->lowerInterleaveIntrinsicToStore(II, SI))
523bool InterleavedAccessImpl::runOnFunction(
Function &
F) {
526 bool Changed =
false;
529 if (
auto *LI = dyn_cast<LoadInst>(&
I))
530 Changed |= lowerInterleavedLoad(LI, DeadInsts);
532 if (
auto *SI = dyn_cast<StoreInst>(&
I))
533 Changed |= lowerInterleavedStore(SI, DeadInsts);
535 if (
auto *II = dyn_cast<IntrinsicInst>(&
I)) {
538 if (II->
getIntrinsicID() == Intrinsic::experimental_vector_deinterleave2)
539 Changed |= lowerDeinterleaveIntrinsic(II, DeadInsts);
540 if (II->
getIntrinsicID() == Intrinsic::experimental_vector_interleave2)
541 Changed |= lowerInterleaveIntrinsic(II, DeadInsts);
545 for (
auto *
I : DeadInsts)
546 I->eraseFromParent();
Expand Atomic instructions
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
expand Expand reduction intrinsics
static bool isDeInterleaveMask(ArrayRef< int > Mask, unsigned &Factor, unsigned &Index, unsigned MaxFactor, unsigned NumLoadElements)
Check if the mask is a DE-interleave mask for an interleaved load.
static cl::opt< bool > LowerInterleavedAccesses("lower-interleaved-accesses", cl::desc("Enable lowering interleaved accesses to intrinsics"), cl::init(true), cl::Hidden)
static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor, unsigned MaxFactor)
Check if the mask can be used in an interleaved store.
This file contains the declaration of the InterleavedAccessPass class, its corresponding pass name is...
FunctionAnalysisManager FAM
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name, BasicBlock::iterator InsertBefore)
Represents analyses that only rely on functions' control flow.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This instruction constructs a fixed permutation of two input vectors.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
bool isInterleave(unsigned Factor)
Return if this shuffle interleaves its two input vectors together.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Value * getOperand(unsigned i) const
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
iterator_range< user_iterator > users()
StringRef getName() const
Return a constant reference to the value's name.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
void initializeInterleavedAccessPass(PassRegistry &)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionPass * createInterleavedAccessPass()
InterleavedAccess Pass - This pass identifies and matches interleaved memory accesses to target speci...