doxygen/VectorUtils_8h_source.html

//===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This file defines some vectorizer utilities.

//

//===----------------------------------------------------------------------===//


#ifndef LLVM_ANALYSIS_VECTORUTILS_H

#define LLVM_ANALYSIS_VECTORUTILS_H


#include "llvm/ADT/MapVector.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/Analysis/LoopAccessAnalysis.h"

#include "llvm/IR/Module.h"

#include "llvm/IR/VFABIDemangler.h"

#include "llvm/IR/VectorTypeUtils.h"

#include "llvm/Support/CheckedArithmetic.h"


namespace llvm {

class TargetLibraryInfo;


/// The Vector Function Database.

///

/// Helper class used to find the vector functions associated to a

/// scalar CallInst.

class VFDatabase {

  /// The Module of the CallInst CI.

  const Module *M;

  /// The CallInst instance being queried for scalar to vector mappings.

  const CallInst &CI;

  /// List of vector functions descriptors associated to the call

  /// instruction.

  const SmallVector<VFInfo, 8> ScalarToVectorMappings;


  /// Retrieve the scalar-to-vector mappings associated to the rule of

  /// a vector Function ABI.

  static void getVFABIMappings(const CallInst &CI,

                               SmallVectorImpl<VFInfo> &Mappings) {

    if (!CI.getCalledFunction())

      return;


    const StringRef ScalarName = CI.getCalledFunction()->getName();


    SmallVector<std::string, 8> ListOfStrings;

    // The check for the vector-function-abi-variant attribute is done when

    // retrieving the vector variant names here.

    VFABI::getVectorVariantNames(CI, ListOfStrings);

    if (ListOfStrings.empty())

      return;

    for (const auto &MangledName : ListOfStrings) {

      const std::optional<VFInfo> Shape =

          VFABI::tryDemangleForVFABI(MangledName, CI.getFunctionType());

      // A match is found via scalar and vector names, and also by

      // ensuring that the variant described in the attribute has a

      // corresponding definition or declaration of the vector

      // function in the Module M.

      if (Shape && (Shape->ScalarName == ScalarName)) {

        assert(CI.getModule()->getFunction(Shape->VectorName) &&

               "Vector function is missing.");

        Mappings.push_back(*Shape);

      }

    }

  }


public:

  /// Retrieve all the VFInfo instances associated to the CallInst CI.

  static SmallVector<VFInfo, 8> getMappings(const CallInst &CI) {

    SmallVector<VFInfo, 8> Ret;


    // Get mappings from the Vector Function ABI variants.

    getVFABIMappings(CI, Ret);


    // Other non-VFABI variants should be retrieved here.


    return Ret;

  }


  static bool hasMaskedVariant(const CallInst &CI,

                               std::optional<ElementCount> VF = std::nullopt) {

    // Check whether we have at least one masked vector version of a scalar

    // function. If no VF is specified then we check for any masked variant,

    // otherwise we look for one that matches the supplied VF.

    auto Mappings = VFDatabase::getMappings(CI);

    for (VFInfo Info : Mappings)

      if (!VF || Info.Shape.VF == *VF)

        if (Info.isMasked())

          return true;


    return false;

  }


  /// Constructor, requires a CallInst instance.

  VFDatabase(CallInst &CI)

      : M(CI.getModule()), CI(CI),

        ScalarToVectorMappings(VFDatabase::getMappings(CI)) {}


  /// \defgroup VFDatabase query interface.

  ///

  /// @{

  /// Retrieve the Function with VFShape \p Shape.

  Function *getVectorizedFunction(const VFShape &Shape) const {

    if (Shape == VFShape::getScalarShape(CI.getFunctionType()))

      return CI.getCalledFunction();


    for (const auto &Info : ScalarToVectorMappings)

      if (Info.Shape == Shape)

        return M->getFunction(Info.VectorName);


    return nullptr;

  }

  /// @}

};


template <typename T> class ArrayRef;

class DemandedBits;

template <typename InstTy> class InterleaveGroup;

class IRBuilderBase;

class Loop;

class TargetTransformInfo;

class Value;


namespace Intrinsic {

typedef unsigned ID;

}


/// Identify if the intrinsic is trivially vectorizable.

/// This method returns true if the intrinsic's argument types are all scalars

/// for the scalar form of the intrinsic and all vectors (or scalars handled by

/// isVectorIntrinsicWithScalarOpAtArg) for the vector form of the intrinsic.

///

/// Note: isTriviallyVectorizable implies isTriviallyScalarizable.

bool isTriviallyVectorizable(Intrinsic::ID ID);


/// Identify if the intrinsic is trivially scalarizable.

/// This method returns true following the same predicates of

/// isTriviallyVectorizable.


/// Note: There are intrinsics where implementing vectorization for the

/// intrinsic is redundant, but we want to implement scalarization of the

/// vector. To prevent the requirement that an intrinsic also implements

/// vectorization we provide this seperate function.

bool isTriviallyScalarizable(Intrinsic::ID ID, const TargetTransformInfo *TTI);


/// Identifies if the vector form of the intrinsic has a scalar operand.

/// \p TTI is used to consider target specific intrinsics, if no target specific

/// intrinsics will be considered then it is appropriate to pass in nullptr.

bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx,

                                        const TargetTransformInfo *TTI);


/// Identifies if the vector form of the intrinsic is overloaded on the type of

/// the operand at index \p OpdIdx, or on the return type if \p OpdIdx is -1.

/// \p TTI is used to consider target specific intrinsics, if no target specific

/// intrinsics will be considered then it is appropriate to pass in nullptr.

bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx,

                                            const TargetTransformInfo *TTI);


/// Identifies if the vector form of the intrinsic that returns a struct is

/// overloaded at the struct element index \p RetIdx. /// \p TTI is used to

/// consider target specific intrinsics, if no target specific intrinsics

/// will be considered then it is appropriate to pass in nullptr.

bool isVectorIntrinsicWithStructReturnOverloadAtField(

    Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI);


/// Returns intrinsic ID for call.

/// For the input call instruction it finds mapping intrinsic and returns

/// its intrinsic ID, in case it does not found it return not_intrinsic.

Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI,

                                          const TargetLibraryInfo *TLI);


/// Given a vector and an element number, see if the scalar value is

/// already around as a register, for example if it were inserted then extracted

/// from the vector.

Value *findScalarElement(Value *V, unsigned EltNo);


/// If all non-negative \p Mask elements are the same value, return that value.

/// If all elements are negative (undefined) or \p Mask contains different

/// non-negative values, return -1.

int getSplatIndex(ArrayRef<int> Mask);


/// Get splat value if the input is a splat vector or return nullptr.

/// The value may be extracted from a splat constants vector or from

/// a sequence of instructions that broadcast a single value into a vector.

Value *getSplatValue(const Value *V);


/// Return true if each element of the vector value \p V is poisoned or equal to

/// every other non-poisoned element. If an index element is specified, either

/// every element of the vector is poisoned or the element at that index is not

/// poisoned and equal to every other non-poisoned element.

/// This may be more powerful than the related getSplatValue() because it is

/// not limited by finding a scalar source value to a splatted vector.

bool isSplatValue(const Value *V, int Index = -1, unsigned Depth = 0);


/// Transform a shuffle mask's output demanded element mask into demanded

/// element masks for the 2 operands, returns false if the mask isn't valid.

/// Both \p DemandedLHS and \p DemandedRHS are initialised to [SrcWidth].

/// \p AllowUndefElts permits "-1" indices to be treated as undef.

bool getShuffleDemandedElts(int SrcWidth, ArrayRef<int> Mask,

                            const APInt &DemandedElts, APInt &DemandedLHS,

                            APInt &DemandedRHS, bool AllowUndefElts = false);


/// Replace each shuffle mask index with the scaled sequential indices for an

/// equivalent mask of narrowed elements. Mask elements that are less than 0

/// (sentinel values) are repeated in the output mask.

///

/// Example with Scale = 4:

///   <4 x i32> <3, 2, 0, -1> -->

///   <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1>

///

/// This is the reverse process of widening shuffle mask elements, but it always

/// succeeds because the indexes can always be multiplied (scaled up) to map to

/// narrower vector elements.

void narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask,

                           SmallVectorImpl<int> &ScaledMask);


/// Try to transform a shuffle mask by replacing elements with the scaled index

/// for an equivalent mask of widened elements. If all mask elements that would

/// map to a wider element of the new mask are the same negative number

/// (sentinel value), that element of the new mask is the same value. If any

/// element in a given slice is negative and some other element in that slice is

/// not the same value, return false (partial matches with sentinel values are

/// not allowed).

///

/// Example with Scale = 4:

///   <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1> -->

///   <4 x i32> <3, 2, 0, -1>

///

/// This is the reverse process of narrowing shuffle mask elements if it

/// succeeds. This transform is not always possible because indexes may not

/// divide evenly (scale down) to map to wider vector elements.

bool widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,

                          SmallVectorImpl<int> &ScaledMask);


/// A variant of the previous method which is specialized for Scale=2, and

/// treats -1 as undef and allows widening when a wider element is partially

/// undef in the narrow form of the mask.  This transformation discards

/// information about which bytes in the original shuffle were undef.

bool widenShuffleMaskElts(ArrayRef<int> M, SmallVectorImpl<int> &NewMask);


/// Attempt to narrow/widen the \p Mask shuffle mask to the \p NumDstElts target

/// width. Internally this will call narrowShuffleMaskElts/widenShuffleMaskElts.

/// This will assert unless NumDstElts is a multiple of Mask.size (or

/// vice-versa). Returns false on failure, and ScaledMask will be in an

/// undefined state.

bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef<int> Mask,

                          SmallVectorImpl<int> &ScaledMask);


/// Repetitively apply `widenShuffleMaskElts()` for as long as it succeeds,

/// to get the shuffle mask with widest possible elements.

void getShuffleMaskWithWidestElts(ArrayRef<int> Mask,

                                  SmallVectorImpl<int> &ScaledMask);


/// Splits and processes shuffle mask depending on the number of input and

/// output registers. The function does 2 main things: 1) splits the

/// source/destination vectors into real registers; 2) do the mask analysis to

/// identify which real registers are permuted. Then the function processes

/// resulting registers mask using provided action items. If no input register

/// is defined, \p NoInputAction action is used. If only 1 input register is

/// used, \p SingleInputAction is used, otherwise \p ManyInputsAction is used to

/// process > 2 input registers and masks.

/// \param Mask Original shuffle mask.

/// \param NumOfSrcRegs Number of source registers.

/// \param NumOfDestRegs Number of destination registers.

/// \param NumOfUsedRegs Number of actually used destination registers.

void processShuffleMasks(

    ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,

    unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,

    function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,

    function_ref<void(ArrayRef<int>, unsigned, unsigned, bool)>

        ManyInputsAction);


/// Compute the demanded elements mask of horizontal binary operations. A

/// horizontal operation combines two adjacent elements in a vector operand.

/// This function returns a mask for the elements that correspond to the first

/// operand of this horizontal combination. For example, for two vectors

/// [X1, X2, X3, X4] and [Y1, Y2, Y3, Y4], the resulting mask can include the

/// elements X1, X3, Y1, and Y3. To get the other operands, simply shift the

/// result of this function to the left by 1.

///

/// \param VectorBitWidth the total bit width of the vector

/// \param DemandedElts   the demanded elements mask for the operation

/// \param DemandedLHS    the demanded elements mask for the left operand

/// \param DemandedRHS    the demanded elements mask for the right operand

void getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth,

                                         const APInt &DemandedElts,

                                         APInt &DemandedLHS,

                                         APInt &DemandedRHS);


/// Compute a map of integer instructions to their minimum legal type

/// size.

///

/// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int

/// type (e.g. i32) whenever arithmetic is performed on them.

///

/// For targets with native i8 or i16 operations, usually InstCombine can shrink

/// the arithmetic type down again. However InstCombine refuses to create

/// illegal types, so for targets without i8 or i16 registers, the lengthening

/// and shrinking remains.

///

/// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when

/// their scalar equivalents do not, so during vectorization it is important to

/// remove these lengthens and truncates when deciding the profitability of

/// vectorization.

///

/// This function analyzes the given range of instructions and determines the

/// minimum type size each can be converted to. It attempts to remove or

/// minimize type size changes across each def-use chain, so for example in the

/// following code:

///

///   %1 = load i8, i8*

///   %2 = add i8 %1, 2

///   %3 = load i16, i16*

///   %4 = zext i8 %2 to i32

///   %5 = zext i16 %3 to i32

///   %6 = add i32 %4, %5

///   %7 = trunc i32 %6 to i16

///

/// Instruction %6 must be done at least in i16, so computeMinimumValueSizes

/// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.

///

/// If the optional TargetTransformInfo is provided, this function tries harder

/// to do less work by only looking at illegal types.

MapVector<Instruction*, uint64_t>

computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks,

                         DemandedBits &DB,

                         const TargetTransformInfo *TTI=nullptr);


/// Compute the union of two access-group lists.

///

/// If the list contains just one access group, it is returned directly. If the

/// list is empty, returns nullptr.

MDNode *uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2);


/// Compute the access-group list of access groups that @p Inst1 and @p Inst2

/// are both in. If either instruction does not access memory at all, it is

/// considered to be in every list.

///

/// If the list contains just one access group, it is returned directly. If the

/// list is empty, returns nullptr.

MDNode *intersectAccessGroups(const Instruction *Inst1,

                              const Instruction *Inst2);


/// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,

/// MD_nontemporal, MD_access_group, MD_mmra].

/// For K in Kinds, we get the MDNode for K from each of the

/// elements of VL, compute their "intersection" (i.e., the most generic

/// metadata value that covers all of the individual values), and set I's

/// metadata for M equal to the intersection value.

///

/// This function always sets a (possibly null) value for each K in Kinds.

Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL);


/// Create a mask that filters the members of an interleave group where there

/// are gaps.

///

/// For example, the mask for \p Group with interleave-factor 3

/// and \p VF 4, that has only its first member present is:

///

///   <1,0,0,1,0,0,1,0,0,1,0,0>

///

/// Note: The result is a mask of 0's and 1's, as opposed to the other

/// create[*]Mask() utilities which create a shuffle mask (mask that

/// consists of indices).

Constant *createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF,

                               const InterleaveGroup<Instruction> &Group);


/// Create a mask with replicated elements.

///

/// This function creates a shuffle mask for replicating each of the \p VF

/// elements in a vector \p ReplicationFactor times. It can be used to

/// transform a mask of \p VF elements into a mask of

/// \p VF * \p ReplicationFactor elements used by a predicated

/// interleaved-group of loads/stores whose Interleaved-factor ==

/// \p ReplicationFactor.

///

/// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:

///

///   <0,0,0,1,1,1,2,2,2,3,3,3>

llvm::SmallVector<int, 16> createReplicatedMask(unsigned ReplicationFactor,

                                                unsigned VF);


/// Create an interleave shuffle mask.

///

/// This function creates a shuffle mask for interleaving \p NumVecs vectors of

/// vectorization factor \p VF into a single wide vector. The mask is of the

/// form:

///

///   <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...>

///

/// For example, the mask for VF = 4 and NumVecs = 2 is:

///

///   <0, 4, 1, 5, 2, 6, 3, 7>.

llvm::SmallVector<int, 16> createInterleaveMask(unsigned VF, unsigned NumVecs);


/// Create a stride shuffle mask.

///

/// This function creates a shuffle mask whose elements begin at \p Start and

/// are incremented by \p Stride. The mask can be used to deinterleave an

/// interleaved vector into separate vectors of vectorization factor \p VF. The

/// mask is of the form:

///

///   <Start, Start + Stride, ..., Start + Stride * (VF - 1)>

///

/// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:

///

///   <0, 2, 4, 6>

llvm::SmallVector<int, 16> createStrideMask(unsigned Start, unsigned Stride,

                                            unsigned VF);


/// Create a sequential shuffle mask.

///

/// This function creates shuffle mask whose elements are sequential and begin

/// at \p Start.  The mask contains \p NumInts integers and is padded with \p

/// NumUndefs undef values. The mask is of the form:

///

///   <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs>

///

/// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:

///

///   <0, 1, 2, 3, undef, undef, undef, undef>

llvm::SmallVector<int, 16>

createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs);


/// Given a shuffle mask for a binary shuffle, create the equivalent shuffle

/// mask assuming both operands are identical. This assumes that the unary

/// shuffle will use elements from operand 0 (operand 1 will be unused).

llvm::SmallVector<int, 16> createUnaryMask(ArrayRef<int> Mask,

                                           unsigned NumElts);


/// Concatenate a list of vectors.

///

/// This function generates code that concatenate the vectors in \p Vecs into a

/// single large vector. The number of vectors should be greater than one, and

/// their element types should be the same. The number of elements in the

/// vectors should also be the same; however, if the last vector has fewer

/// elements, it will be padded with undefs.

Value *concatenateVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vecs);


/// Given a mask vector of i1, Return true if all of the elements of this

/// predicate mask are known to be false or undef.  That is, return true if all

/// lanes can be assumed inactive.

bool maskIsAllZeroOrUndef(Value *Mask);


/// Given a mask vector of i1, Return true if all of the elements of this

/// predicate mask are known to be true or undef.  That is, return true if all

/// lanes can be assumed active.

bool maskIsAllOneOrUndef(Value *Mask);


/// Given a mask vector of i1, Return true if any of the elements of this

/// predicate mask are known to be true or undef.  That is, return true if at

/// least one lane can be assumed active.

bool maskContainsAllOneOrUndef(Value *Mask);


/// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y)

/// for each lane which may be active.

APInt possiblyDemandedEltsInMask(Value *Mask);


/// The group of interleaved loads/stores sharing the same stride and

/// close to each other.

///

/// Each member in this group has an index starting from 0, and the largest

/// index should be less than interleaved factor, which is equal to the absolute

/// value of the access's stride.

///

/// E.g. An interleaved load group of factor 4:

///        for (unsigned i = 0; i < 1024; i+=4) {

///          a = A[i];                           // Member of index 0

///          b = A[i+1];                         // Member of index 1

///          d = A[i+3];                         // Member of index 3

///          ...

///        }

///

///      An interleaved store group of factor 4:

///        for (unsigned i = 0; i < 1024; i+=4) {

///          ...

///          A[i]   = a;                         // Member of index 0

///          A[i+1] = b;                         // Member of index 1

///          A[i+2] = c;                         // Member of index 2

///          A[i+3] = d;                         // Member of index 3

///        }

///

/// Note: the interleaved load group could have gaps (missing members), but

/// the interleaved store group doesn't allow gaps.

template <typename InstTy> class InterleaveGroup {

public:

  InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)

      : Factor(Factor), Reverse(Reverse), Alignment(Alignment),

        InsertPos(nullptr) {}


  InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)

      : Alignment(Alignment), InsertPos(Instr) {

    Factor = std::abs(Stride);

    assert(Factor > 1 && "Invalid interleave factor");


    Reverse = Stride < 0;

    Members[0] = Instr;

  }


  bool isReverse() const { return Reverse; }

  uint32_t getFactor() const { return Factor; }

  Align getAlign() const { return Alignment; }

  uint32_t getNumMembers() const { return Members.size(); }


  /// Try to insert a new member \p Instr with index \p Index and

  /// alignment \p NewAlign. The index is related to the leader and it could be

  /// negative if it is the new leader.

  ///

  /// \returns false if the instruction doesn't belong to the group.

  bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign) {

    // Make sure the key fits in an int32_t.

    std::optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey);

    if (!MaybeKey)

      return false;

    int32_t Key = *MaybeKey;


    // Skip if the key is used for either the tombstone or empty special values.

    if (DenseMapInfo<int32_t>::getTombstoneKey() == Key ||

        DenseMapInfo<int32_t>::getEmptyKey() == Key)

      return false;


    // Skip if there is already a member with the same index.

    if (Members.contains(Key))

      return false;


    if (Key > LargestKey) {

      // The largest index is always less than the interleave factor.

      if (Index >= static_cast<int32_t>(Factor))

        return false;


      LargestKey = Key;

    } else if (Key < SmallestKey) {


      // Make sure the largest index fits in an int32_t.

      std::optional<int32_t> MaybeLargestIndex = checkedSub(LargestKey, Key);

      if (!MaybeLargestIndex)

        return false;


      // The largest index is always less than the interleave factor.

      if (*MaybeLargestIndex >= static_cast<int64_t>(Factor))

        return false;


      SmallestKey = Key;

    }


    // It's always safe to select the minimum alignment.

    Alignment = std::min(Alignment, NewAlign);

    Members[Key] = Instr;

    return true;

  }


  /// Get the member with the given index \p Index

  ///

  /// \returns nullptr if contains no such member.

  InstTy *getMember(uint32_t Index) const {

    int32_t Key = SmallestKey + Index;

    return Members.lookup(Key);

  }


  /// Get the index for the given member. Unlike the key in the member

  /// map, the index starts from 0.

  uint32_t getIndex(const InstTy *Instr) const {

    for (auto I : Members) {

      if (I.second == Instr)

        return I.first - SmallestKey;

    }


    llvm_unreachable("InterleaveGroup contains no such member");

  }


  InstTy *getInsertPos() const { return InsertPos; }

  void setInsertPos(InstTy *Inst) { InsertPos = Inst; }


  /// Add metadata (e.g. alias info) from the instructions in this group to \p

  /// NewInst.

  ///

  /// FIXME: this function currently does not add noalias metadata a'la

  /// addNewMedata.  To do that we need to compute the intersection of the

  /// noalias info from all members.

  void addMetadata(InstTy *NewInst) const;


  /// Returns true if this Group requires a scalar iteration to handle gaps.

  bool requiresScalarEpilogue() const {

    // If the last member of the Group exists, then a scalar epilog is not

    // needed for this group.

    if (getMember(getFactor() - 1))

      return false;


    // We have a group with gaps. It therefore can't be a reversed access,

    // because such groups get invalidated (TODO).

    assert(!isReverse() && "Group should have been invalidated");


    // This is a group of loads, with gaps, and without a last-member

    return true;

  }


private:

  uint32_t Factor; // Interleave Factor.

  bool Reverse;

  Align Alignment;

  DenseMap<int32_t, InstTy *> Members;

  int32_t SmallestKey = 0;

  int32_t LargestKey = 0;


  // To avoid breaking dependences, vectorized instructions of an interleave

  // group should be inserted at either the first load or the last store in

  // program order.

  //

  // E.g. %even = load i32             // Insert Position

  //      %add = add i32 %even         // Use of %even

  //      %odd = load i32

  //

  //      store i32 %even

  //      %odd = add i32               // Def of %odd

  //      store i32 %odd               // Insert Position

  InstTy *InsertPos;

};


/// Drive the analysis of interleaved memory accesses in the loop.

///

/// Use this class to analyze interleaved accesses only when we can vectorize

/// a loop. Otherwise it's meaningless to do analysis as the vectorization

/// on interleaved accesses is unsafe.

///

/// The analysis collects interleave groups and records the relationships

/// between the member and the group in a map.

class InterleavedAccessInfo {

public:

  InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L,

                        DominatorTree *DT, LoopInfo *LI,

                        const LoopAccessInfo *LAI)

      : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}


  ~InterleavedAccessInfo() { invalidateGroups(); }


  /// Analyze the interleaved accesses and collect them in interleave

  /// groups. Substitute symbolic strides using \p Strides.

  /// Consider also predicated loads/stores in the analysis if

  /// \p EnableMaskedInterleavedGroup is true.

  void analyzeInterleaving(bool EnableMaskedInterleavedGroup);


  /// Invalidate groups, e.g., in case all blocks in loop will be predicated

  /// contrary to original assumption. Although we currently prevent group

  /// formation for predicated accesses, we may be able to relax this limitation

  /// in the future once we handle more complicated blocks. Returns true if any

  /// groups were invalidated.

  bool invalidateGroups() {

    if (InterleaveGroups.empty()) {

      assert(

          !RequiresScalarEpilogue &&

          "RequiresScalarEpilog should not be set without interleave groups");

      return false;

    }


    InterleaveGroupMap.clear();

    for (auto *Ptr : InterleaveGroups)

      delete Ptr;

    InterleaveGroups.clear();

    RequiresScalarEpilogue = false;

    return true;

  }


  /// Check if \p Instr belongs to any interleave group.

  bool isInterleaved(Instruction *Instr) const {

    return InterleaveGroupMap.contains(Instr);

  }


  /// Get the interleave group that \p Instr belongs to.

  ///

  /// \returns nullptr if doesn't have such group.

  InterleaveGroup<Instruction> *

  getInterleaveGroup(const Instruction *Instr) const {

    return InterleaveGroupMap.lookup(Instr);

  }


  iterator_range<SmallPtrSetIterator<llvm::InterleaveGroup<Instruction> *>>

  getInterleaveGroups() {

    return make_range(InterleaveGroups.begin(), InterleaveGroups.end());

  }


  /// Returns true if an interleaved group that may access memory

  /// out-of-bounds requires a scalar epilogue iteration for correctness.

  bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }


  /// Invalidate groups that require a scalar epilogue (due to gaps). This can

  /// happen when optimizing for size forbids a scalar epilogue, and the gap

  /// cannot be filtered by masking the load/store.

  void invalidateGroupsRequiringScalarEpilogue();


  /// Returns true if we have any interleave groups.

  bool hasGroups() const { return !InterleaveGroups.empty(); }


private:

  /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.

  /// Simplifies SCEV expressions in the context of existing SCEV assumptions.

  /// The interleaved access analysis can also add new predicates (for example

  /// by versioning strides of pointers).

  PredicatedScalarEvolution &PSE;


  Loop *TheLoop;

  DominatorTree *DT;

  LoopInfo *LI;

  const LoopAccessInfo *LAI;


  /// True if the loop may contain non-reversed interleaved groups with

  /// out-of-bounds accesses. We ensure we don't speculatively access memory

  /// out-of-bounds by executing at least one scalar epilogue iteration.

  bool RequiresScalarEpilogue = false;


  /// Holds the relationships between the members and the interleave group.

  DenseMap<Instruction *, InterleaveGroup<Instruction> *> InterleaveGroupMap;


  SmallPtrSet<InterleaveGroup<Instruction> *, 4> InterleaveGroups;


  /// Holds dependences among the memory accesses in the loop. It maps a source

  /// access to a set of dependent sink accesses.

  DenseMap<Instruction *, SmallPtrSet<Instruction *, 2>> Dependences;


  /// The descriptor for a strided memory access.

  struct StrideDescriptor {

    StrideDescriptor() = default;

    StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,

                     Align Alignment)

        : Stride(Stride), Scev(Scev), Size(Size), Alignment(Alignment) {}


    // The access's stride. It is negative for a reverse access.

    int64_t Stride = 0;


    // The scalar expression of this access.

    const SCEV *Scev = nullptr;


    // The size of the memory object.

    uint64_t Size = 0;


    // The alignment of this access.

    Align Alignment;

  };


  /// A type for holding instructions and their stride descriptors.

  using StrideEntry = std::pair<Instruction *, StrideDescriptor>;


  /// Create a new interleave group with the given instruction \p Instr,

  /// stride \p Stride and alignment \p Align.

  ///

  /// \returns the newly created interleave group.

  InterleaveGroup<Instruction> *

  createInterleaveGroup(Instruction *Instr, int Stride, Align Alignment) {

    assert(!InterleaveGroupMap.count(Instr) &&

           "Already in an interleaved access group");

    InterleaveGroupMap[Instr] =

        new InterleaveGroup<Instruction>(Instr, Stride, Alignment);

    InterleaveGroups.insert(InterleaveGroupMap[Instr]);

    return InterleaveGroupMap[Instr];

  }


  /// Release the group and remove all the relationships.

  void releaseGroup(InterleaveGroup<Instruction> *Group) {

    InterleaveGroups.erase(Group);

    releaseGroupWithoutRemovingFromSet(Group);

  }


  /// Do everything necessary to release the group, apart from removing it from

  /// the InterleaveGroups set.

  void releaseGroupWithoutRemovingFromSet(InterleaveGroup<Instruction> *Group) {

    for (unsigned i = 0; i < Group->getFactor(); i++)

      if (Instruction *Member = Group->getMember(i))

        InterleaveGroupMap.erase(Member);


    delete Group;

  }


  /// Collect all the accesses with a constant stride in program order.

  void collectConstStrideAccesses(

      MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,

      const DenseMap<Value *, const SCEV *> &Strides);


  /// Returns true if \p Stride is allowed in an interleaved group.

  static bool isStrided(int Stride);


  /// Returns true if \p BB is a predicated block.

  bool isPredicated(BasicBlock *BB) const {

    return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);

  }


  /// Returns true if LoopAccessInfo can be used for dependence queries.

  bool areDependencesValid() const {

    return LAI && LAI->getDepChecker().getDependences();

  }


  /// Returns true if memory accesses \p A and \p B can be reordered, if

  /// necessary, when constructing interleaved groups.

  ///

  /// \p A must precede \p B in program order. We return false if reordering is

  /// not necessary or is prevented because \p A and \p B may be dependent.

  bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,

                                                 StrideEntry *B) const {

    // Code motion for interleaved accesses can potentially hoist strided loads

    // and sink strided stores. The code below checks the legality of the

    // following two conditions:

    //

    // 1. Potentially moving a strided load (B) before any store (A) that

    //    precedes B, or

    //

    // 2. Potentially moving a strided store (A) after any load or store (B)

    //    that A precedes.

    //

    // It's legal to reorder A and B if we know there isn't a dependence from A

    // to B. Note that this determination is conservative since some

    // dependences could potentially be reordered safely.


    // A is potentially the source of a dependence.

    auto *Src = A->first;

    auto SrcDes = A->second;


    // B is potentially the sink of a dependence.

    auto *Sink = B->first;

    auto SinkDes = B->second;


    // Code motion for interleaved accesses can't violate WAR dependences.

    // Thus, reordering is legal if the source isn't a write.

    if (!Src->mayWriteToMemory())

      return true;


    // At least one of the accesses must be strided.

    if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))

      return true;


    // If dependence information is not available from LoopAccessInfo,

    // conservatively assume the instructions can't be reordered.

    if (!areDependencesValid())

      return false;


    // If we know there is a dependence from source to sink, assume the

    // instructions can't be reordered. Otherwise, reordering is legal.

    return !Dependences.contains(Src) || !Dependences.lookup(Src).count(Sink);

  }


  /// Collect the dependences from LoopAccessInfo.

  ///

  /// We process the dependences once during the interleaved access analysis to

  /// enable constant-time dependence queries.

  void collectDependences() {

    if (!areDependencesValid())

      return;

    const auto &DepChecker = LAI->getDepChecker();

    auto *Deps = DepChecker.getDependences();

    for (auto Dep : *Deps)

      Dependences[Dep.getSource(DepChecker)].insert(

          Dep.getDestination(DepChecker));

  }

};


} // llvm namespace


#endif

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27

CheckedArithmetic.h

Index
uint32_t Index
Definition: ELFObjHandler.cpp:83

Blocks
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507

Module.h
Module.h This file contains the declarations for the Module class.

LoopAccessAnalysis.h

I
#define I(x, y, z)
Definition: MD5.cpp:58

MapVector.h
This file implements a map that provides insertion order iteration.

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

SmallVector.h
This file defines the SmallVector class.

Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:77

VFABIDemangler.h

VectorTypeUtils.h

llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1341

llvm::CallBase::getFunctionType
FunctionType * getFunctionType() const
Definition: InstrTypes.h:1199

llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1479

llvm::DenseMapBase::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:194

llvm::DenseMapBase::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:321

llvm::DenseMapBase::size
unsigned size() const
Definition: DenseMap.h:99

llvm::DenseMapBase::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:152

llvm::DenseMapBase::contains
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:147

llvm::DenseMapBase::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211

llvm::DenseMap
Definition: DenseMap.h:727

llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162

llvm::Function
Definition: Function.h:63

llvm::Instruction
Definition: Instruction.h:68

llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:68

llvm::InterleaveGroup
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:488

llvm::InterleaveGroup::requiresScalarEpilogue
bool requiresScalarEpilogue() const
Returns true if this Group requires a scalar iteration to handle gaps.
Definition: VectorUtils.h:586

llvm::InterleaveGroup::getFactor
uint32_t getFactor() const
Definition: VectorUtils.h:504

llvm::InterleaveGroup::getMember
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:558

llvm::InterleaveGroup::InterleaveGroup
InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
Definition: VectorUtils.h:490

llvm::InterleaveGroup::getIndex
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
Definition: VectorUtils.h:565

llvm::InterleaveGroup::setInsertPos
void setInsertPos(InstTy *Inst)
Definition: VectorUtils.h:575

llvm::InterleaveGroup::isReverse
bool isReverse() const
Definition: VectorUtils.h:503

llvm::InterleaveGroup::getInsertPos
InstTy * getInsertPos() const
Definition: VectorUtils.h:574

llvm::InterleaveGroup::addMetadata
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Definition: VectorUtils.cpp:1632

llvm::InterleaveGroup::getAlign
Align getAlign() const
Definition: VectorUtils.h:505

llvm::InterleaveGroup::InterleaveGroup
InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
Definition: VectorUtils.h:494

llvm::InterleaveGroup::insertMember
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition: VectorUtils.h:513

llvm::InterleaveGroup::getNumMembers
uint32_t getNumMembers() const
Definition: VectorUtils.h:506

llvm::InterleavedAccessInfo
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:630

llvm::InterleavedAccessInfo::getInterleaveGroup
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VectorUtils.h:675

llvm::InterleavedAccessInfo::requiresScalarEpilogue
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
Definition: VectorUtils.h:686

llvm::InterleavedAccessInfo::hasGroups
bool hasGroups() const
Returns true if we have any interleave groups.
Definition: VectorUtils.h:694

llvm::InterleavedAccessInfo::isInterleaved
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
Definition: VectorUtils.h:667

llvm::InterleavedAccessInfo::invalidateGroups
bool invalidateGroups()
Invalidate groups, e.g., in case all blocks in loop will be predicated contrary to original assumptio...
Definition: VectorUtils.h:650

llvm::InterleavedAccessInfo::getInterleaveGroups
iterator_range< SmallPtrSetIterator< llvm::InterleaveGroup< Instruction > * > > getInterleaveGroups()
Definition: VectorUtils.h:680

llvm::InterleavedAccessInfo::analyzeInterleaving
void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
Definition: VectorUtils.cpp:1303

llvm::InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue
void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
Definition: VectorUtils.cpp:1606

llvm::InterleavedAccessInfo::~InterleavedAccessInfo
~InterleavedAccessInfo()
Definition: VectorUtils.h:637

llvm::InterleavedAccessInfo::InterleavedAccessInfo
InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, DominatorTree *DT, LoopInfo *LI, const LoopAccessInfo *LAI)
Definition: VectorUtils.h:632

llvm::LoopAccessInfo
Drive the analysis of memory accesses in the loop.
Definition: LoopAccessAnalysis.h:636

llvm::LoopAccessInfo::getDepChecker
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
Definition: LoopAccessAnalysis.h:682

llvm::LoopAccessInfo::blockNeedsPredication
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Definition: LoopAccessAnalysis.cpp:2782

llvm::LoopInfo
Definition: LoopInfo.h:407

llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39

llvm::MemoryDepChecker::getDependences
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
Definition: LoopAccessAnalysis.h:229

llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65

llvm::Module::getFunction
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:228

llvm::PredicatedScalarEvolution
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
Definition: ScalarEvolution.h:2383

llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:71

llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:401

llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384

llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519

llvm::SmallVectorBase::empty
bool empty() const
Definition: SmallVector.h:81

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:413

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51

llvm::VFDatabase
The Vector Function Database.
Definition: VectorUtils.h:31

llvm::VFDatabase::VFDatabase
VFDatabase(CallInst &CI)
Constructor, requires a CallInst instance.
Definition: VectorUtils.h:98

llvm::VFDatabase::hasMaskedVariant
static bool hasMaskedVariant(const CallInst &CI, std::optional< ElementCount > VF=std::nullopt)
Definition: VectorUtils.h:83

llvm::VFDatabase::getMappings
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition: VectorUtils.h:72

llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309

llvm::iterator_range
A range adaptor for a pair of iterators.
Definition: iterator_range.h:42

uint32_t

uint64_t

llvm::VFDatabase::getVectorizedFunction
Function * getVectorizedFunction(const VFShape &Shape) const
Definition: VectorUtils.h:106

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24

llvm::ISD::Constant
@ Constant
Definition: ISDOpcodes.h:76

llvm::Intrinsic::ID
unsigned ID
Definition: GenericSSAContext.h:28

llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:29

llvm::VFABI::tryDemangleForVFABI
std::optional< VFInfo > tryDemangleForVFABI(StringRef MangledName, const FunctionType *FTy)
Function to construct a VFInfo out of a mangled names in the following format:
Definition: VFABIDemangler.cpp:379

llvm::VFABI::getVectorVariantNames
void getVectorVariantNames(const CallInst &CI, SmallVectorImpl< std::string > &VariantMappings)
Populates a set of strings representing the Vector Function ABI variants associated to the CallInst C...
Definition: VFABIDemangler.cpp:535

llvm::cl::Sink
@ Sink
Definition: CommandLine.h:165

llvm::rdf::Instr
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::isTriviallyScalarizable
bool isTriviallyScalarizable(Intrinsic::ID ID, const TargetTransformInfo *TTI)
Identify if the intrinsic is trivially scalarizable.
Definition: VectorUtils.cpp:116

llvm::getVectorIntrinsicIDForCall
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Definition: VectorUtils.cpp:209

llvm::TailFoldingOpts::Reverse
@ Reverse

llvm::possiblyDemandedEltsInMask
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
Definition: VectorUtils.cpp:1202

llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36

llvm::createUnaryMask
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
Definition: VectorUtils.cpp:1053

llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:77

llvm::concatenateVectors
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Definition: VectorUtils.cpp:1095

llvm::widenShuffleMaskElts
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Definition: VectorUtils.cpp:431

llvm::propagateMetadata
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Definition: VectorUtils.cpp:942

llvm::getSplatValue
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
Definition: VectorUtils.cpp:312

llvm::intersectAccessGroups
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
Definition: VectorUtils.cpp:895

llvm::getShuffleDemandedElts
bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
Definition: VectorUtils.cpp:373

llvm::isSplatValue
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Definition: VectorUtils.cpp:327

llvm::createBitMaskForGaps
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
Definition: VectorUtils.cpp:992

llvm::createStrideMask
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
Definition: VectorUtils.cpp:1032

llvm::getHorizDemandedEltsForFirstOperand
void getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS)
Compute the demanded elements mask of horizontal binary operations.
Definition: VectorUtils.cpp:671

llvm::createReplicatedMask
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
Definition: VectorUtils.cpp:1012

llvm::maskIsAllOneOrUndef
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
Definition: VectorUtils.cpp:1149

llvm::isVectorIntrinsicWithStructReturnOverloadAtField
bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...
Definition: VectorUtils.cpp:192

llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:208

llvm::narrowShuffleMaskElts
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
Definition: VectorUtils.cpp:410

llvm::createInterleaveMask
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
Definition: VectorUtils.cpp:1021

llvm::isVectorIntrinsicWithScalarOpAtArg
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:134

llvm::findScalarElement
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
Definition: VectorUtils.cpp:226

llvm::uniteAccessGroups
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
Definition: VectorUtils.cpp:874

llvm::maskIsAllZeroOrUndef
bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
Definition: VectorUtils.cpp:1123

llvm::checkedSub
std::enable_if_t< std::is_signed_v< T >, std::optional< T > > checkedSub(T LHS, T RHS)
Subtract two signed integers LHS and RHS.
Definition: CheckedArithmetic.h:55

llvm::getShuffleMaskWithWidestElts
void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
Definition: VectorUtils.cpp:542

llvm::checkedAdd
std::enable_if_t< std::is_signed_v< T >, std::optional< T > > checkedAdd(T LHS, T RHS)
Add two signed integers LHS and RHS.
Definition: CheckedArithmetic.h:46

llvm::processShuffleMasks
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
Definition: VectorUtils.cpp:556

llvm::maskContainsAllOneOrUndef
bool maskContainsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...
Definition: VectorUtils.cpp:1175

llvm::isTriviallyVectorizable
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:46

llvm::createSequentialMask
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Definition: VectorUtils.cpp:1040

llvm::isVectorIntrinsicWithOverloadTypeAtArg
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
Definition: VectorUtils.cpp:162

llvm::computeMinimumValueSizes
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
Definition: VectorUtils.cpp:700

llvm::scaleShuffleMaskElts
bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
Definition: VectorUtils.cpp:517

llvm::getSplatIndex
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
Definition: VectorUtils.cpp:290

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39

llvm::DenseMapInfo
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:52

llvm::VFInfo
Holds the VFShape for a specific scalar to vector function mapping.
Definition: VFABIDemangler.h:124

llvm::VFShape
Contains the information about the kind of vectorization available.
Definition: VFABIDemangler.h:83

llvm::VFShape::getScalarShape
static VFShape getScalarShape(const FunctionType *FTy)
Retrieve the VFShape that can be used to map a scalar function to itself, with VF = 1.
Definition: VFABIDemangler.h:100