46class X86InterleavedAccessGroup {
 
   55  ArrayRef<unsigned> Indices;
 
   58  const unsigned Factor;
 
   61  const X86Subtarget &Subtarget;
 
   69  void decompose(Instruction *Inst, 
unsigned NumSubVectors, FixedVectorType *
T,
 
   70                 SmallVectorImpl<Instruction *> &DecomposedVectors);
 
   86                     SmallVectorImpl<Value *> &TransposedMatrix);
 
   88                             SmallVectorImpl<Value *> &TransposedMatrix,
 
   89                             unsigned NumSubVecElems);
 
   91                                SmallVectorImpl<Value *> &TransposedMatrix);
 
   93                             SmallVectorImpl<Value *> &TransposedMatrix,
 
   94                             unsigned NumSubVecElems);
 
   96                               SmallVectorImpl<Value *> &TransposedMatrix,
 
   97                               unsigned NumSubVecElems);
 
  106  explicit X86InterleavedAccessGroup(Instruction *
I,
 
  108                                     ArrayRef<unsigned> Ind, 
const unsigned F,
 
  109                                     const X86Subtarget &STarget,
 
  111      : Inst(
I), Shuffles(Shuffs), Indices(Ind), Factor(
F), Subtarget(STarget),
 
  112        DL(Inst->getDataLayout()), Builder(
B) {}
 
  116  bool isSupported() 
const;
 
  120  bool lowerIntoOptimizedSequence();
 
  125bool X86InterleavedAccessGroup::isSupported()
 const {
 
  126  VectorType *ShuffleVecTy = Shuffles[0]->getType();
 
  127  Type *ShuffleEltTy = ShuffleVecTy->getElementType();
 
  128  unsigned ShuffleElemSize = 
DL.getTypeSizeInBits(ShuffleEltTy);
 
  129  unsigned WideInstSize;
 
  137  if (!Subtarget.
hasAVX() || (Factor != 4 && Factor != 3))
 
  141    WideInstSize = 
DL.getTypeSizeInBits(Inst->
getType());
 
  145    WideInstSize = 
DL.getTypeSizeInBits(Shuffles[0]->
getType());
 
  149  if (ShuffleElemSize == 64 && WideInstSize == 1024 && Factor == 4)
 
  152  if (ShuffleElemSize == 8 && 
isa<StoreInst>(Inst) && Factor == 4 &&
 
  153      (WideInstSize == 256 || WideInstSize == 512 || WideInstSize == 1024 ||
 
  154       WideInstSize == 2048))
 
  157  if (ShuffleElemSize == 8 && Factor == 3 &&
 
  158      (WideInstSize == 384 || WideInstSize == 768 || WideInstSize == 1536))
 
  164void X86InterleavedAccessGroup::decompose(
 
  165    Instruction *VecInst, 
unsigned NumSubVectors, FixedVectorType *SubVecTy,
 
  166    SmallVectorImpl<Instruction *> &DecomposedVectors) {
 
  168         "Expected Load or Shuffle");
 
  173         DL.getTypeSizeInBits(VecWidth) >=
 
  174             DL.getTypeSizeInBits(SubVecTy) * NumSubVectors &&
 
  175         "Invalid Inst-size!!!");
 
  178    Value *Op0 = SVI->getOperand(0);
 
  179    Value *Op1 = SVI->getOperand(1);
 
  182    for (
unsigned i = 0; i < NumSubVectors; ++i)
 
  194  unsigned int NumLoads = NumSubVectors;
 
  198  unsigned VecLength = 
DL.getTypeSizeInBits(VecWidth);
 
  200  if (VecLength == 768 || VecLength == 1536) {
 
  202    NumLoads = NumSubVectors * (VecLength / 384);
 
  204    VecBaseTy = SubVecTy;
 
  208         "VecBaseTy's size must be a multiple of 8");
 
  212  Align Alignment = FirstAlignment;
 
  213  for (
unsigned i = 0; i < NumLoads; i++) {
 
  220    Alignment = SubsequentAlignment;
 
  233    0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
 
  234    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
 
  235    32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
 
  236    48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
 
 
  258         "This function doesn't accept width smaller then 256");
 
 
  286                             unsigned VecElems, 
unsigned Stride,
 
  289  if (VecElems == 16) {
 
  290    for (
unsigned i = 0; i < Stride; i++)
 
  291      TransposedMatrix[i] = Builder.CreateShuffleVector(Vec[i], VPShuf);
 
  298  for (
unsigned i = 0; i < (VecElems / 16) * Stride; i += 2) {
 
  300                    (i + 1) / Stride * 16);
 
  301    Temp[i / 2] = Builder.CreateShuffleVector(
 
  302        Vec[i % Stride], Vec[(i + 1) % Stride], OptimizeShuf);
 
  303    OptimizeShuf.
clear();
 
  306  if (VecElems == 32) {
 
  307    std::copy(Temp, Temp + Stride, TransposedMatrix.
begin());
 
  310    for (
unsigned i = 0; i < Stride; i++)
 
  311      TransposedMatrix[i] =
 
  312          Builder.CreateShuffleVector(Temp[2 * i], Temp[2 * i + 1], 
Concat);
 
 
  315void X86InterleavedAccessGroup::interleave8bitStride4VF8(
 
  317    SmallVectorImpl<Value *> &TransposedMatrix) {
 
  325  TransposedMatrix.
resize(2);
 
  326  SmallVector<int, 16> MaskLow;
 
  330  for (
unsigned i = 0; i < 8; ++i) {
 
  349  TransposedMatrix[0] =
 
  351  TransposedMatrix[1] =
 
  355void X86InterleavedAccessGroup::interleave8bitStride4(
 
  367  TransposedMatrix.
resize(4);
 
  405  for (
int i = 0; i < 4; i++)
 
  414  if (VT == MVT::v16i8) {
 
  415    std::copy(VecOut, VecOut + 4, TransposedMatrix.
begin());
 
  437  int LaneCount = std::max(VectorSize / 128, 1);
 
  438  for (
int Lane = 0; Lane < LaneCount; Lane++)
 
  439    for (
int i = 0, LaneSize = VF / LaneCount; i != LaneSize; ++i)
 
  440      Mask.push_back((i * Stride) % LaneSize + LaneSize * Lane);
 
 
  450  for (
int i = 0, FirstGroupElement = 0; i < 3; i++) {
 
  451    int GroupSize = std::ceil((VF - FirstGroupElement) / 3.0);
 
  453    FirstGroupElement = ((GroupSize)*3 + FirstGroupElement) % VF;
 
 
  472                              bool AlignDirection = 
true, 
bool Unary = 
false) {
 
  474  unsigned NumLanes = std::max((
int)VT.
getSizeInBits() / 128, 1);
 
  475  unsigned NumLaneElts = NumElts / NumLanes;
 
  477  Imm = AlignDirection ? Imm : (NumLaneElts - Imm);
 
  480  for (
unsigned l = 0; l != NumElts; l += NumLaneElts) {
 
  481    for (
unsigned i = 0; i != NumLaneElts; ++i) {
 
  485      if (
Base >= NumLaneElts)
 
  486        Base = Unary ? 
Base % NumLaneElts : 
Base + NumElts - NumLaneElts;
 
  487      ShuffleMask.push_back(
Base + l);
 
 
  521  if (VecElems == 16) {
 
  522    for (
int i = 0; i < 3; i++)
 
  527  for (
unsigned j = 0; j < VecElems / 32; j++)
 
  528    for (
int i = 0; i < 3; i++)
 
  529      Vec[i + j * 3] = Builder.CreateShuffleVector(
 
  535  for (
int i = 0; i < 3; i++)
 
  536    Vec[i] = Builder.CreateShuffleVector(Vec[i], Vec[i + 3], 
Concat);
 
 
  539void X86InterleavedAccessGroup::deinterleave8bitStride3(
 
  547  TransposedMatrix.
resize(3);
 
  553  Value *Vec[6], *TempVector[3];
 
  560  for (
int i = 0; i < 2; i++)
 
  571  for (
int i = 0; i < 3; i++)
 
  578  for (
int i = 0; i < 3; i++)
 
  586  for (
int i = 0; i < 3; i++)
 
  596  TransposedMatrix[1] = VecElems == 8 ? Vec[2] : TempVec;
 
  597  TransposedMatrix[2] = VecElems == 8 ? TempVec : Vec[2];
 
  605  int IndexGroup[3] = {0, 0, 0};
 
  610  int Lane = (VectorWidth / 128 > 0) ? VectorWidth / 128 : 1;
 
  611  for (
int i = 0; i < 3; i++) {
 
  612    IndexGroup[(Index * 3) % (VF / Lane)] = Index;
 
  616  for (
int i = 0; i < VF / Lane; i++) {
 
 
  622void X86InterleavedAccessGroup::interleave8bitStride3(
 
  630  TransposedMatrix.
resize(3);
 
  637  Value *Vec[3], *TempVector[3];
 
  642  for (
int i = 0; i < 3; i++)
 
  660  for (
int i = 0; i < 3; i++)
 
  668  for (
int i = 0; i < 3; i++)
 
  681void X86InterleavedAccessGroup::transpose_4x4(
 
  683    SmallVectorImpl<Value *> &TransposedMatrix) {
 
  685  TransposedMatrix.
resize(4);
 
  688  static constexpr int IntMask1[] = {0, 1, 4, 5};
 
  694  static constexpr int IntMask2[] = {2, 3, 6, 7};
 
  700  static constexpr int IntMask3[] = {0, 4, 2, 6};
 
  706  static constexpr int IntMask4[] = {1, 5, 3, 7};
 
  714bool X86InterleavedAccessGroup::lowerIntoOptimizedSequence() {
 
  715  SmallVector<Instruction *, 4> DecomposedVectors;
 
  721    unsigned NumSubVecElems = ShuffleEltTy->getNumElements() / Factor;
 
  722    switch (NumSubVecElems) {
 
  730      if (ShuffleTy->getNumElements() != NumSubVecElems)
 
  736    decompose(Inst, Factor, ShuffleTy, DecomposedVectors);
 
  742    if (NumSubVecElems == 4)
 
  743      transpose_4x4(DecomposedVectors, TransposedVectors);
 
  745      deinterleave8bitStride3(DecomposedVectors, TransposedVectors,
 
  750    for (
unsigned i = 0, e = Shuffles.size(); i < e; ++i)
 
  751      Shuffles[i]->replaceAllUsesWith(TransposedVectors[Indices[i]]);
 
  756  Type *ShuffleEltTy = ShuffleTy->getElementType();
 
  757  unsigned NumSubVecElems = ShuffleTy->getNumElements() / Factor;
 
  768  switch (NumSubVecElems) {
 
  770    transpose_4x4(DecomposedVectors, TransposedVectors);
 
  773    interleave8bitStride4VF8(DecomposedVectors, TransposedVectors);
 
  779      interleave8bitStride4(DecomposedVectors, TransposedVectors,
 
  782      interleave8bitStride3(DecomposedVectors, TransposedVectors,
 
  807         "Invalid interleave factor");
 
  808  assert(!Shuffles.
empty() && 
"Empty shufflevector input");
 
  810         "Unmatched number of shufflevectors and indices");
 
  815  assert(!Mask && GapMask.
popcount() == Factor && 
"Unexpected mask on a load");
 
  819  X86InterleavedAccessGroup Grp(LI, Shuffles, Indices, Factor, Subtarget,
 
  822  return Grp.isSupported() && Grp.lowerIntoOptimizedSequence();
 
 
  829                                              const APInt &GapMask)
 const {
 
  831         "Invalid interleave factor");
 
  835         "Invalid interleaved store");
 
  841         "Unexpected mask on store");
 
  852  X86InterleavedAccessGroup Grp(
SI, Shuffles, Indices, Factor, Subtarget,
 
  855  return Grp.isSupported() && Grp.lowerIntoOptimizedSequence();
 
 
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
 
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
 
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
 
static Decomposition decompose(Value *V, SmallVectorImpl< ConditionTy > &Preconditions, bool IsSigned, const DataLayout &DL)
 
This file defines the SmallVector class.
 
static SymbolRef::Type getType(const Symbol *Sym)
 
static void genShuffleBland(MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &Out, int LowOffset, int HighOffset)
 
static MVT scaleVectorType(MVT VT)
 
static constexpr int Concat[]
 
static void group2Shuffle(MVT VT, SmallVectorImpl< int > &Mask, SmallVectorImpl< int > &Output)
 
static void createShuffleStride(MVT VT, int Stride, SmallVectorImpl< int > &Mask)
 
static void concatSubVector(Value **Vec, ArrayRef< Instruction * > InVec, unsigned VecElems, IRBuilder<> &Builder)
 
static void setGroupSize(MVT VT, SmallVectorImpl< int > &SizeInfo)
 
static void reorderSubVector(MVT VT, SmallVectorImpl< Value * > &TransposedMatrix, ArrayRef< Value * > Vec, ArrayRef< int > VPShuf, unsigned VecElems, unsigned Stride, IRBuilder<> &Builder)
 
Class for arbitrary precision integers.
 
unsigned popcount() const
Count the number of bits set.
 
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
 
size_t size() const
size - Get the array size.
 
bool empty() const
empty - Check if the array is empty.
 
unsigned getNumElements() const
 
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
 
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
 
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
 
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
 
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
 
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
 
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
 
Value * getPointerOperand()
 
Align getAlign() const
Return the alignment of the access that is being performed.
 
uint64_t getScalarSizeInBits() const
 
unsigned getVectorNumElements() const
 
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
 
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
 
static MVT getVectorVT(MVT VT, unsigned NumElements)
 
MVT getVectorElementType() const
 
static MVT getIntegerVT(unsigned BitWidth)
 
This instruction constructs a fixed permutation of two input vectors.
 
VectorType * getType() const
Overload to return most specific vector type.
 
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
 
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
 
void push_back(const T &Elt)
 
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
 
bool isVectorTy() const
True if this is an instance of VectorType.
 
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
 
LLVM Value Representation.
 
Type * getType() const
All values are typed, get the type of this value.
 
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
 
bool lowerInterleavedStore(Instruction *Store, Value *Mask, ShuffleVectorInst *SVI, unsigned Factor, const APInt &GapMask) const override
Lower interleaved store(s) into target specific instructions/intrinsics.
 
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
 
bool lowerInterleavedLoad(Instruction *Load, Value *Mask, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor, const APInt &GapMask) const override
Lower interleaved load(s) into target specific instructions/intrinsics.
 
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
 
constexpr ScalarTy getFixedValue() const
 
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
 
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
 
friend class Instruction
Iterator for Instructions in a `BasicBlock.
 
This is an optimization pass for GlobalISel generic memory operations.
 
FunctionAddr VTableAddr Value
 
unsigned getPointerAddressSpace(const Type *T)
 
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
 
LLVM_ABI Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
 
void createUnpackShuffleMask(EVT VT, SmallVectorImpl< int > &Mask, bool Lo, bool Unary)
Generate unpacklo/unpackhi shuffle mask.
 
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
 
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
 
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
 
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
 
void DecodePALIGNRMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
 
ArrayRef(const T &OneElt) -> ArrayRef< T >
 
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
 
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
 
LLVM_ABI llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.