75#define DEBUG_TYPE "interleaved-access"
78 "lower-interleaved-accesses",
79 cl::desc(
"Enable lowering interleaved accesses to intrinsics"),
109 bool lowerInterleavedLoad(
LoadInst *LI,
113 bool lowerInterleavedStore(
StoreInst *SI,
135char InterleavedAccess::ID = 0;
138 "Lower interleaved memory accesses to target specific intrinsics",
false,
146 return new InterleavedAccess();
160 for (; i < Mask.size(); i++)
161 if (Mask[i] >= 0 &&
static_cast<unsigned>(Mask[i]) !=
Index + i * Factor)
164 if (i == Mask.size())
177 unsigned &
Index,
unsigned MaxFactor,
178 unsigned NumLoadElements) {
183 for (Factor = 2; Factor <= MaxFactor; Factor++) {
185 if (Mask.size() * Factor > NumLoadElements)
206 unsigned MaxFactor,
unsigned OpNumElts) {
207 unsigned NumElts = Mask.size();
212 for (Factor = 2; Factor <= MaxFactor; Factor++) {
213 if (NumElts % Factor)
216 unsigned LaneLen = NumElts / Factor;
224 for (;
I < Factor;
I++) {
225 unsigned SavedLaneValue;
226 unsigned SavedNoUndefs = 0;
229 for (J = 0; J < LaneLen - 1; J++) {
231 unsigned Lane = J * Factor +
I;
232 unsigned NextLane = Lane + Factor;
233 int LaneValue = Mask[Lane];
234 int NextLaneValue = Mask[NextLane];
237 if (LaneValue >= 0 && NextLaneValue >= 0 &&
238 LaneValue + 1 != NextLaneValue)
242 if (LaneValue >= 0 && NextLaneValue < 0) {
243 SavedLaneValue = LaneValue;
252 if (SavedNoUndefs > 0 && LaneValue < 0) {
254 if (NextLaneValue >= 0 &&
255 SavedLaneValue + SavedNoUndefs != (
unsigned)NextLaneValue)
267 }
else if (Mask[(LaneLen - 1) * Factor +
I] >= 0) {
269 StartMask = Mask[(LaneLen - 1) * Factor +
I] - J;
270 }
else if (SavedNoUndefs > 0) {
272 StartMask = SavedLaneValue - (LaneLen - 1 - SavedNoUndefs);
279 if (StartMask + LaneLen > OpNumElts*2)
291bool InterleavedAccess::lowerInterleavedLoad(
308 auto *Extract = dyn_cast<ExtractElementInst>(
User);
309 if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
313 if (
auto *BI = dyn_cast<BinaryOperator>(
User)) {
315 [](
auto *U) { return isa<ShuffleVectorInst>(U); })) {
316 for (
auto *SVI : BI->users())
317 BinOpShuffles.
insert(cast<ShuffleVectorInst>(SVI));
321 auto *SVI = dyn_cast<ShuffleVectorInst>(
User);
322 if (!SVI || !isa<UndefValue>(SVI->getOperand(1)))
328 if (Shuffles.
empty() && BinOpShuffles.
empty())
331 unsigned Factor,
Index;
333 unsigned NumLoadElements =
334 cast<FixedVectorType>(LI->
getType())->getNumElements();
335 auto *FirstSVI = Shuffles.
size() > 0 ? Shuffles[0] : BinOpShuffles[0];
344 Type *VecTy = FirstSVI->getType();
348 for (
auto *
Shuffle : Shuffles) {
349 if (
Shuffle->getType() != VecTy)
358 for (
auto *
Shuffle : BinOpShuffles) {
359 if (
Shuffle->getType() != VecTy)
367 if (cast<Instruction>(
Shuffle->getOperand(0))->getOperand(0) == LI)
369 if (cast<Instruction>(
Shuffle->getOperand(0))->getOperand(1) == LI)
375 if (!tryReplaceExtracts(Extracts, Shuffles))
378 bool BinOpShuffleChanged =
379 replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI);
381 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved load: " << *LI <<
"\n");
384 if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) {
386 return !Extracts.
empty() || BinOpShuffleChanged;
395bool InterleavedAccess::replaceBinOpShuffles(
398 for (
auto *SVI : BinOpShuffles) {
403 return Idx < (int)cast<FixedVectorType>(BIOp0Ty)->getNumElements();
414 SVI->replaceAllUsesWith(NewBI);
416 <<
"\n With : " << *NewSVI1 <<
"\n And : "
417 << *NewSVI2 <<
"\n And : " << *NewBI <<
"\n");
419 if (NewSVI1->getOperand(0) == LI)
421 if (NewSVI2->getOperand(0) == LI)
425 return !BinOpShuffles.empty();
428bool InterleavedAccess::tryReplaceExtracts(
433 if (Extracts.
empty())
440 for (
auto *Extract : Extracts) {
442 auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand());
443 auto Index = IndexOperand->getSExtValue();
448 for (
auto *
Shuffle : Shuffles) {
451 if (!DT->dominates(
Shuffle, Extract))
458 Shuffle->getShuffleMask(Indices);
459 for (
unsigned I = 0;
I < Indices.
size(); ++
I)
460 if (Indices[
I] ==
Index) {
462 "Vector operations do not match");
463 ReplacementMap[Extract] = std::make_pair(
Shuffle,
I);
468 if (ReplacementMap.
count(Extract))
474 if (!ReplacementMap.
count(Extract))
480 for (
auto &Replacement : ReplacementMap) {
481 auto *Extract = Replacement.first;
482 auto *
Vector = Replacement.second.first;
483 auto Index = Replacement.second.second;
484 Builder.SetInsertPoint(Extract);
486 Extract->eraseFromParent();
492bool InterleavedAccess::lowerInterleavedStore(
497 auto *SVI = dyn_cast<ShuffleVectorInst>(
SI->getValueOperand());
498 if (!SVI || !SVI->hasOneUse() || isa<ScalableVectorType>(SVI->getType()))
504 cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements();
508 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved store: " << *SI <<
"\n");
511 if (!TLI->lowerInterleavedStore(SI, SVI, Factor))
520bool InterleavedAccess::runOnFunction(
Function &
F) {
521 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
525 LLVM_DEBUG(
dbgs() <<
"*** " << getPassName() <<
": " <<
F.getName() <<
"\n");
527 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
529 TLI =
TM.getSubtargetImpl(
F)->getTargetLowering();
530 MaxFactor = TLI->getMaxSupportedInterleaveFactor();
534 bool Changed =
false;
537 if (
auto *LI = dyn_cast<LoadInst>(&
I))
538 Changed |= lowerInterleavedLoad(LI, DeadInsts);
540 if (
auto *SI = dyn_cast<StoreInst>(&
I))
541 Changed |= lowerInterleavedStore(SI, DeadInsts);
544 for (
auto *
I : DeadInsts)
545 I->eraseFromParent();
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
static bool isDeInterleaveMask(ArrayRef< int > Mask, unsigned &Factor, unsigned &Index, unsigned MaxFactor, unsigned NumLoadElements)
Check if the mask is a DE-interleave mask for an interleaved load.
static cl::opt< bool > LowerInterleavedAccesses("lower-interleaved-accesses", cl::desc("Enable lowering interleaved accesses to intrinsics"), cl::init(true), cl::Hidden)
Lower interleaved memory accesses to target specific intrinsics
static bool isReInterleaveMask(ArrayRef< int > Mask, unsigned &Factor, unsigned MaxFactor, unsigned OpNumElts)
Check if the mask can be used in an interleaved store.
static bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
print must be executed print the must be executed context for all instructions
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
BinaryOps getOpcode() const
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Instruction *CopyO, const Twine &Name="", Instruction *InsertBefore=nullptr)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
An instruction for reading from memory.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
This instruction constructs a fixed permutation of two input vectors.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
The instances of the Type class are immutable: once they are created, they are never changed.
Value * getOperand(unsigned i) const
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
StringRef getName() const
Return a constant reference to the value's name.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
void initializeInterleavedAccessPass(PassRegistry &)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionPass * createInterleavedAccessPass()
InterleavedAccess Pass - This pass identifies and matches interleaved memory accesses to target speci...