58 #define DEBUG_TYPE "interleaved-access"
61 "lower-interleaved-accesses",
62 cl::desc(
"Enable lowering interleaved accesses to intrinsics"),
76 StringRef getPassName()
const override {
return "Interleaved Access Pass"; }
94 bool lowerInterleavedLoad(
LoadInst *LI,
112 InterleavedAccess,
"interleaved-access",
113 "Lower interleaved memory accesses to target specific intrinsics",
false,
118 "Lower interleaved memory accesses to target specific
intrinsics",
false,
122 return new InterleavedAccess(TM);
131 for (Index = 0; Index < Factor; Index++) {
136 for (; i < Mask.
size(); i++)
137 if (Mask[i] >= 0 && static_cast<unsigned>(Mask[i]) != Index + i * Factor)
140 if (i == Mask.
size())
153 unsigned &Index,
unsigned MaxFactor) {
158 for (Factor = 2; Factor <= MaxFactor; Factor++)
177 unsigned MaxFactor,
unsigned OpNumElts) {
178 unsigned NumElts = Mask.
size();
183 for (Factor = 2; Factor <= MaxFactor; Factor++) {
184 if (NumElts % Factor)
187 unsigned LaneLen = NumElts / Factor;
195 for (; I < Factor; I++) {
196 unsigned SavedLaneValue;
197 unsigned SavedNoUndefs = 0;
200 for (J = 0; J < LaneLen - 1; J++) {
202 unsigned Lane = J * Factor +
I;
203 unsigned NextLane = Lane + Factor;
204 int LaneValue = Mask[Lane];
205 int NextLaneValue = Mask[NextLane];
208 if (LaneValue >= 0 && NextLaneValue >= 0 &&
209 LaneValue + 1 != NextLaneValue)
213 if (LaneValue >= 0 && NextLaneValue < 0) {
214 SavedLaneValue = LaneValue;
223 if (SavedNoUndefs > 0 && LaneValue < 0) {
225 if (NextLaneValue >= 0 &&
226 SavedLaneValue + SavedNoUndefs != (
unsigned)NextLaneValue)
238 }
else if (Mask[(LaneLen - 1) * Factor + I] >= 0) {
240 StartMask = Mask[(LaneLen - 1) * Factor + I] - J;
241 }
else if (SavedNoUndefs > 0) {
243 StartMask = SavedLaneValue - (LaneLen - 1 - SavedNoUndefs);
250 if (StartMask + LaneLen > OpNumElts*2)
262 bool InterleavedAccess::lowerInterleavedLoad(
276 if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
281 if (!SVI || !isa<UndefValue>(SVI->
getOperand(1)))
287 if (Shuffles.
empty())
290 unsigned Factor, Index;
301 Type *VecTy = Shuffles[0]->getType();
305 for (
unsigned i = 1;
i < Shuffles.
size();
i++) {
306 if (Shuffles[
i]->
getType() != VecTy)
318 if (!tryReplaceExtracts(Extracts, Shuffles))
321 DEBUG(
dbgs() <<
"IA: Found an interleaved load: " << *LI <<
"\n");
324 if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor))
327 for (
auto SVI : Shuffles)
334 bool InterleavedAccess::tryReplaceExtracts(
340 if (Extracts.
empty())
347 for (
auto *Extract : Extracts) {
350 auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand());
351 auto Index = IndexOperand->getSExtValue();
356 for (
auto *Shuffle : Shuffles) {
360 if (!DT->dominates(Shuffle, Extract))
367 Shuffle->getShuffleMask(Indices);
368 for (
unsigned I = 0;
I < Indices.
size(); ++
I)
369 if (Indices[
I] == Index) {
370 assert(Extract->getOperand(0) == Shuffle->getOperand(0) &&
371 "Vector operations do not match");
372 ReplacementMap[Extract] = std::make_pair(Shuffle,
I);
377 if (ReplacementMap.
count(Extract))
383 if (!ReplacementMap.
count(Extract))
389 for (
auto &Replacement : ReplacementMap) {
390 auto *Extract = Replacement.first;
391 auto *Vector = Replacement.second.first;
392 auto Index = Replacement.second.second;
393 Builder.SetInsertPoint(Extract);
394 Extract->replaceAllUsesWith(Builder.CreateExtractElement(Vector, Index));
395 Extract->eraseFromParent();
401 bool InterleavedAccess::lowerInterleavedStore(
416 DEBUG(
dbgs() <<
"IA: Found an interleaved store: " << *SI <<
"\n");
419 if (!TLI->lowerInterleavedStore(SI, SVI, Factor))
428 bool InterleavedAccess::runOnFunction(
Function &
F) {
434 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
435 TLI =
TM->getSubtargetImpl(F)->getTargetLowering();
436 MaxFactor = TLI->getMaxSupportedInterleaveFactor();
440 bool Changed =
false;
443 if (
LoadInst *LI = dyn_cast<LoadInst>(&
I))
444 Changed |= lowerInterleavedLoad(LI, DeadInsts);
447 Changed |= lowerInterleavedStore(SI, DeadInsts);
450 for (
auto I : DeadInsts)
451 I->eraseFromParent();
Value * getValueOperand()
void push_back(const T &Elt)
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static cl::opt< bool > LowerInterleavedAccesses("lower-interleaved-accesses", cl::desc("Enable lowering interleaved accesses to intrinsics"), cl::init(true), cl::Hidden)
This instruction constructs a fixed permutation of two input vectors.
An instruction for reading from memory.
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
StringRef getName() const
Return a constant reference to the value's name.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
LLVM_NODISCARD bool empty() const
interleaved Lower interleaved memory accesses to target specific false
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
An instruction for storing to memory.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
size_t size() const
size - Get the array size.
static bool isReInterleaveMask(ArrayRef< int > Mask, unsigned &Factor, unsigned MaxFactor, unsigned OpNumElts)
Check if the mask can be used in an interleaved store.
FunctionPass * createInterleavedAccessPass(const TargetMachine *TM)
InterleavedAccess Pass - This pass identifies and matches interleaved memory accesses to target speci...
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
initializer< Ty > init(const Ty &Val)
constexpr bool isPowerOf2_32(uint32_t Value)
isPowerOf2_32 - This function returns true if the argument is a power of two > 0. ...
The instances of the Type class are immutable: once they are created, they are never changed...
#define INITIALIZE_TM_PASS_END(passName, arg, name, cfg, analysis)
Target machine pass initializer for passes with dependencies.
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
Value * getOperand(unsigned i) const
bool empty() const
empty - Check if the array is empty.
void initializeInterleavedAccessPass(PassRegistry &)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Type * getType() const
All values are typed, get the type of this value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
unsigned getVectorNumElements() const
static bool isDeInterleaveMask(ArrayRef< int > Mask, unsigned &Factor, unsigned &Index, unsigned MaxFactor)
Check if the mask is a DE-interleave mask for an interleaved load.
interleaved Lower interleaved memory accesses to target specific intrinsics
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
bool hasOneUse() const
Return true if there is exactly one user of this value.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
INITIALIZE_TM_PASS_BEGIN(InterleavedAccess,"interleaved-access","Lower interleaved memory accesses to target specific intrinsics", false, false) INITIALIZE_TM_PASS_END(InterleavedAccess
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Primary interface to the complete machine description for the target machine.
StringRef - Represent a constant reference to a string, i.e.
inst_range instructions(Function *F)
Legacy analysis pass which computes a DominatorTree.
static bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index, Index+Factor, ..., Index+(NumElts-1)*Factor>
static void getShuffleMask(Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
This file describes how to lower LLVM code to machine code.