43 const unsigned Factor;
87 : Inst(I), Shuffles(Shuffs), Indices(Ind), Factor(F), Subtarget(STarget),
88 DL(Inst->getModule()->getDataLayout()), Builder(B) {}
92 bool isSupported()
const;
96 bool lowerIntoOptimizedSequence();
100 VectorType *ShuffleVecTy = Shuffles[0]->getType();
101 uint64_t ShuffleVecSize = DL.getTypeSizeInBits(ShuffleVecTy);
104 if (DL.getTypeSizeInBits(Inst->getType()) < Factor * ShuffleVecSize)
108 if (!Subtarget.hasAVX() || ShuffleVecSize != 256 ||
109 DL.getTypeSizeInBits(ShuffleEltTy) != 64 || Factor != 4)
115 bool X86InterleavedAccessGroup::decompose(
121 DL.getTypeSizeInBits(VecTy) >=
122 DL.getTypeSizeInBits(SubVecTy) * NumSubVectors &&
123 "Invalid Inst-size!!!");
125 "Element type mismatched!!!");
127 if (!isa<LoadInst>(VecInst))
130 LoadInst *LI = cast<LoadInst>(VecInst);
137 for (
unsigned i = 0;
i < NumSubVectors;
i++) {
139 Value *NewBasePtr = Builder.CreateGEP(VecBasePtr, Builder.getInt32(
i));
141 Builder.CreateAlignedLoad(NewBasePtr, LI->
getAlignment());
148 void X86InterleavedAccessGroup::transpose_4x4(
151 assert(Matrix.
size() == 4 &&
"Invalid matrix size");
152 TransposedMatrix.
resize(4);
157 Value *IntrVec1 = Builder.CreateShuffleVector(Matrix[0], Matrix[2], Mask);
158 Value *IntrVec2 = Builder.CreateShuffleVector(Matrix[1], Matrix[3], Mask);
163 Value *IntrVec3 = Builder.CreateShuffleVector(Matrix[0], Matrix[2], Mask);
164 Value *IntrVec4 = Builder.CreateShuffleVector(Matrix[1], Matrix[3], Mask);
169 TransposedMatrix[0] = Builder.CreateShuffleVector(IntrVec1, IntrVec2, Mask);
170 TransposedMatrix[2] = Builder.CreateShuffleVector(IntrVec3, IntrVec4, Mask);
175 TransposedMatrix[1] = Builder.CreateShuffleVector(IntrVec1, IntrVec2, Mask);
176 TransposedMatrix[3] = Builder.CreateShuffleVector(IntrVec3, IntrVec4, Mask);
185 if (!decompose(Inst, Factor, VecTy, DecomposedVectors))
192 transpose_4x4(DecomposedVectors, TransposedVectors);
196 for (
unsigned i = 0;
i < Shuffles.size();
i++)
197 Shuffles[
i]->replaceAllUsesWith(TransposedVectors[Indices[
i]]);
210 "Invalid interleave factor");
211 assert(!Shuffles.
empty() &&
"Empty shufflevector input");
213 "Unmatched number of shufflevectors and indices");
void push_back(const T &Elt)
A parsed version of the target data layout string in and methods for querying it. ...
An instruction for reading from memory.
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Type * getVectorElementType() const
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
size_t size() const
size - Get the array size.
The instances of the Type class are immutable: once they are created, they are never changed...
bool isVectorTy() const
True if this is an instance of VectorType.
Value * getPointerOperand()
bool empty() const
empty - Check if the array is empty.
This class holds necessary information to represent an interleaved access group and supports utilitie...
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower interleaved load(s) into target specific instructions/intrinsics.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Type * getType() const
All values are typed, get the type of this value.
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Class to represent vector types.
bool isSupported() const
Returns true if this interleaved access group can be lowered into x86-specific instructions/intrinsic...
unsigned getAlignment() const
Return the alignment of the access that is being performed.
X86InterleavedAccessGroup(Instruction *I, ArrayRef< ShuffleVectorInst * > Shuffs, ArrayRef< unsigned > Ind, const unsigned F, const X86Subtarget &STarget, IRBuilder<> &B)
In order to form an interleaved access group X86InterleavedAccessGroup requires a wide-load instructi...
bool lowerIntoOptimizedSequence()
Lowers this interleaved access group into X86-specific instructions/intrinsics.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.