docs/doxygen/AArch64PostLegalizerLowering_8cpp_source.html

//=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

///

/// \file

/// Post-legalization lowering for instructions.

///

/// This is used to offload pattern matching from the selector.

///

/// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually

/// a G_ZIP, G_UZP, etc.

///

/// General optimization combines should be handled by either the

/// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner.

///

//===----------------------------------------------------------------------===//


#include "AArch64ExpandImm.h"

#include "AArch64GlobalISelUtils.h"

#include "AArch64PerfectShuffle.h"

#include "AArch64Subtarget.h"

#include "GISel/AArch64LegalizerInfo.h"

#include "MCTargetDesc/AArch64MCTargetDesc.h"

#include "Utils/AArch64BaseInfo.h"

#include "llvm/CodeGen/GlobalISel/Combiner.h"

#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"

#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"

#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"

#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"

#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"

#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"

#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"

#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"

#include "llvm/CodeGen/GlobalISel/Utils.h"

#include "llvm/CodeGen/MachineFrameInfo.h"

#include "llvm/CodeGen/MachineFunctionPass.h"

#include "llvm/CodeGen/MachineInstrBuilder.h"

#include "llvm/CodeGen/MachineRegisterInfo.h"

#include "llvm/CodeGen/TargetOpcodes.h"

#include "llvm/CodeGen/TargetPassConfig.h"

#include "llvm/IR/InstrTypes.h"

#include "llvm/InitializePasses.h"

#include "llvm/Support/ErrorHandling.h"

#include <optional>


#define GET_GICOMBINER_DEPS

#include "AArch64GenPostLegalizeGILowering.inc"

#undef GET_GICOMBINER_DEPS


#define DEBUG_TYPE "aarch64-postlegalizer-lowering"


using namespace llvm;

using namespace MIPatternMatch;

using namespace AArch64GISelUtils;


namespace {


#define GET_GICOMBINER_TYPES

#include "AArch64GenPostLegalizeGILowering.inc"

#undef GET_GICOMBINER_TYPES


/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.

///

/// Used for matching target-supported shuffles before codegen.

struct ShuffleVectorPseudo {

  unsigned Opc;                 ///< Opcode for the instruction. (E.g. G_ZIP1)

  Register Dst;                 ///< Destination register.

  SmallVector<SrcOp, 2> SrcOps; ///< Source registers.

  ShuffleVectorPseudo(unsigned Opc, Register Dst,

                      std::initializer_list<SrcOp> SrcOps)

      : Opc(Opc), Dst(Dst), SrcOps(SrcOps){};

  ShuffleVectorPseudo() = default;

};


/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector

/// sources of the shuffle are different.

std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,

                                                    unsigned NumElts) {

  // Look for the first non-undef element.

  auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });

  if (FirstRealElt == M.end())

    return std::nullopt;


  // Use APInt to handle overflow when calculating expected element.

  unsigned MaskBits = APInt(32, NumElts * 2).logBase2();

  APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1, false, true);


  // The following shuffle indices must be the successive elements after the

  // first real element.

  if (any_of(

          make_range(std::next(FirstRealElt), M.end()),

          [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; }))

    return std::nullopt;


  // The index of an EXT is the first element if it is not UNDEF.

  // Watch out for the beginning UNDEFs. The EXT index should be the expected

  // value of the first element.  E.g.

  // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.

  // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.

  // ExpectedElt is the last mask index plus 1.

  uint64_t Imm = ExpectedElt.getZExtValue();

  bool ReverseExt = false;


  // There are two difference cases requiring to reverse input vectors.

  // For example, for vector <4 x i32> we have the following cases,

  // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)

  // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)

  // For both cases, we finally use mask <5, 6, 7, 0>, which requires

  // to reverse two input vectors.

  if (Imm < NumElts)

    ReverseExt = true;

  else

    Imm -= NumElts;

  return std::make_pair(ReverseExt, Imm);

}


/// Helper function for matchINS.

///

/// \returns a value when \p M is an ins mask for \p NumInputElements.

///

/// First element of the returned pair is true when the produced

/// G_INSERT_VECTOR_ELT destination should be the LHS of the G_SHUFFLE_VECTOR.

///

/// Second element is the destination lane for the G_INSERT_VECTOR_ELT.

std::optional<std::pair<bool, int>> isINSMask(ArrayRef<int> M,

                                              int NumInputElements) {

  if (M.size() != static_cast<size_t>(NumInputElements))

    return std::nullopt;

  int NumLHSMatch = 0, NumRHSMatch = 0;

  int LastLHSMismatch = -1, LastRHSMismatch = -1;

  for (int Idx = 0; Idx < NumInputElements; ++Idx) {

    if (M[Idx] == -1) {

      ++NumLHSMatch;

      ++NumRHSMatch;

      continue;

    }

    M[Idx] == Idx ? ++NumLHSMatch : LastLHSMismatch = Idx;

    M[Idx] == Idx + NumInputElements ? ++NumRHSMatch : LastRHSMismatch = Idx;

  }

  const int NumNeededToMatch = NumInputElements - 1;

  if (NumLHSMatch == NumNeededToMatch)

    return std::make_pair(true, LastLHSMismatch);

  if (NumRHSMatch == NumNeededToMatch)

    return std::make_pair(false, LastRHSMismatch);

  return std::nullopt;

}


/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a

/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.

bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,

              ShuffleVectorPseudo &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();

  Register Dst = MI.getOperand(0).getReg();

  Register Src = MI.getOperand(1).getReg();

  LLT Ty = MRI.getType(Dst);

  unsigned EltSize = Ty.getScalarSizeInBits();


  // Element size for a rev cannot be 64.

  if (EltSize == 64)

    return false;


  unsigned NumElts = Ty.getNumElements();


  // Try to produce a G_REV instruction

  for (unsigned LaneSize : {64U, 32U, 16U}) {

    if (isREVMask(ShuffleMask, EltSize, NumElts, LaneSize)) {

      unsigned Opcode;

      if (LaneSize == 64U)

        Opcode = AArch64::G_REV64;

      else if (LaneSize == 32U)

        Opcode = AArch64::G_REV32;

      else

        Opcode = AArch64::G_REV16;


      MatchInfo = ShuffleVectorPseudo(Opcode, Dst, {Src});

      return true;

    }

  }


  return false;

}


/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with

/// a G_TRN1 or G_TRN2 instruction.

bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,

              ShuffleVectorPseudo &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

  unsigned WhichResult;

  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();

  Register Dst = MI.getOperand(0).getReg();

  unsigned NumElts = MRI.getType(Dst).getNumElements();

  if (!isTRNMask(ShuffleMask, NumElts, WhichResult))

    return false;

  unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;

  Register V1 = MI.getOperand(1).getReg();

  Register V2 = MI.getOperand(2).getReg();

  MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});

  return true;

}


/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with

/// a G_UZP1 or G_UZP2 instruction.

///

/// \param [in] MI - The shuffle vector instruction.

/// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success.

bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,

              ShuffleVectorPseudo &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

  unsigned WhichResult;

  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();

  Register Dst = MI.getOperand(0).getReg();

  unsigned NumElts = MRI.getType(Dst).getNumElements();

  if (!isUZPMask(ShuffleMask, NumElts, WhichResult))

    return false;

  unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;

  Register V1 = MI.getOperand(1).getReg();

  Register V2 = MI.getOperand(2).getReg();

  MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});

  return true;

}


bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,

              ShuffleVectorPseudo &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

  unsigned WhichResult;

  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();

  Register Dst = MI.getOperand(0).getReg();

  unsigned NumElts = MRI.getType(Dst).getNumElements();

  if (!isZIPMask(ShuffleMask, NumElts, WhichResult))

    return false;

  unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;

  Register V1 = MI.getOperand(1).getReg();

  Register V2 = MI.getOperand(2).getReg();

  MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});

  return true;

}


/// Helper function for matchDup.

bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,

                                 MachineRegisterInfo &MRI,

                                 ShuffleVectorPseudo &MatchInfo) {

  if (Lane != 0)

    return false;


  // Try to match a vector splat operation into a dup instruction.

  // We're looking for this pattern:

  //

  // %scalar:gpr(s64) = COPY $x0

  // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF

  // %cst0:gpr(s32) = G_CONSTANT i32 0

  // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)

  // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)

  // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,

  // %zerovec(<2 x s32>)

  //

  // ...into:

  // %splat = G_DUP %scalar


  // Begin matching the insert.

  auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT,

                             MI.getOperand(1).getReg(), MRI);

  if (!InsMI)

    return false;

  // Match the undef vector operand.

  if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(),

                    MRI))

    return false;


  // Match the index constant 0.

  if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt()))

    return false;


  MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(),

                                  {InsMI->getOperand(2).getReg()});

  return true;

}


/// Helper function for matchDup.

bool matchDupFromBuildVector(int Lane, MachineInstr &MI,

                             MachineRegisterInfo &MRI,

                             ShuffleVectorPseudo &MatchInfo) {

  assert(Lane >= 0 && "Expected positive lane?");

  int NumElements = MRI.getType(MI.getOperand(1).getReg()).getNumElements();

  // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the

  // lane's definition directly.

  auto *BuildVecMI =

      getOpcodeDef(TargetOpcode::G_BUILD_VECTOR,

                   MI.getOperand(Lane < NumElements ? 1 : 2).getReg(), MRI);

  // If Lane >= NumElements then it is point to RHS, just check from RHS

  if (NumElements <= Lane)

    Lane -= NumElements;


  if (!BuildVecMI)

    return false;

  Register Reg = BuildVecMI->getOperand(Lane + 1).getReg();

  MatchInfo =

      ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg});

  return true;

}


bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,

              ShuffleVectorPseudo &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

  auto MaybeLane = getSplatIndex(MI);

  if (!MaybeLane)

    return false;

  int Lane = *MaybeLane;

  // If this is undef splat, generate it via "just" vdup, if possible.

  if (Lane < 0)

    Lane = 0;

  if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo))

    return true;

  if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo))

    return true;

  return false;

}


// Check if an EXT instruction can handle the shuffle mask when the vector

// sources of the shuffle are the same.

bool isSingletonExtMask(ArrayRef<int> M, LLT Ty) {

  unsigned NumElts = Ty.getNumElements();


  // Assume that the first shuffle index is not UNDEF.  Fail if it is.

  if (M[0] < 0)

    return false;


  // If this is a VEXT shuffle, the immediate value is the index of the first

  // element.  The other shuffle indices must be the successive elements after

  // the first one.

  unsigned ExpectedElt = M[0];

  for (unsigned I = 1; I < NumElts; ++I) {

    // Increment the expected index.  If it wraps around, just follow it

    // back to index zero and keep going.

    ++ExpectedElt;

    if (ExpectedElt == NumElts)

      ExpectedElt = 0;


    if (M[I] < 0)

      continue; // Ignore UNDEF indices.

    if (ExpectedElt != static_cast<unsigned>(M[I]))

      return false;

  }


  return true;

}


bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,

              ShuffleVectorPseudo &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

  Register Dst = MI.getOperand(0).getReg();

  LLT DstTy = MRI.getType(Dst);

  Register V1 = MI.getOperand(1).getReg();

  Register V2 = MI.getOperand(2).getReg();

  auto Mask = MI.getOperand(3).getShuffleMask();

  uint64_t Imm;

  auto ExtInfo = getExtMask(Mask, DstTy.getNumElements());

  uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;


  if (!ExtInfo) {

    if (!getOpcodeDef<GImplicitDef>(V2, MRI) ||

        !isSingletonExtMask(Mask, DstTy))

      return false;


    Imm = Mask[0] * ExtFactor;

    MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V1, Imm});

    return true;

  }

  bool ReverseExt;

  std::tie(ReverseExt, Imm) = *ExtInfo;

  if (ReverseExt)

    std::swap(V1, V2);

  Imm *= ExtFactor;

  MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm});

  return true;

}


/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.

/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.

void applyShuffleVectorPseudo(MachineInstr &MI,

                              ShuffleVectorPseudo &MatchInfo) {

  MachineIRBuilder MIRBuilder(MI);

  MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);

  MI.eraseFromParent();

}


/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT.

/// Special-cased because the constant operand must be emitted as a G_CONSTANT

/// for the imported tablegen patterns to work.

void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {

  MachineIRBuilder MIRBuilder(MI);

  if (MatchInfo.SrcOps[2].getImm() == 0)

    MIRBuilder.buildCopy(MatchInfo.Dst, MatchInfo.SrcOps[0]);

  else {

    // Tablegen patterns expect an i32 G_CONSTANT as the final op.

    auto Cst =

        MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm());

    MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},

                          {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});

  }

  MI.eraseFromParent();

}


void applyFullRev(MachineInstr &MI, MachineRegisterInfo &MRI) {

  Register Dst = MI.getOperand(0).getReg();

  Register Src = MI.getOperand(1).getReg();

  LLT DstTy = MRI.getType(Dst);

  assert(DstTy.getSizeInBits() == 128 &&

         "Expected 128bit vector in applyFullRev");

  MachineIRBuilder MIRBuilder(MI);

  auto Cst = MIRBuilder.buildConstant(LLT::scalar(32), 8);

  auto Rev = MIRBuilder.buildInstr(AArch64::G_REV64, {DstTy}, {Src});

  MIRBuilder.buildInstr(AArch64::G_EXT, {Dst}, {Rev, Rev, Cst});

  MI.eraseFromParent();

}


bool matchNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI) {

  assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);


  auto ValAndVReg =

      getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);

  return !ValAndVReg;

}


void applyNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI,

                         MachineIRBuilder &Builder) {

  auto &Insert = cast<GInsertVectorElement>(MI);

  Builder.setInstrAndDebugLoc(Insert);


  Register Offset = Insert.getIndexReg();

  LLT VecTy = MRI.getType(Insert.getReg(0));

  LLT EltTy = MRI.getType(Insert.getElementReg());

  LLT IdxTy = MRI.getType(Insert.getIndexReg());


  if (VecTy.isScalableVector())

    return;


  // Create a stack slot and store the vector into it

  MachineFunction &MF = Builder.getMF();

  Align Alignment(

      std::min<uint64_t>(VecTy.getSizeInBytes().getKnownMinValue(), 16));

  int FrameIdx = MF.getFrameInfo().CreateStackObject(VecTy.getSizeInBytes(),

                                                     Alignment, false);

  LLT FramePtrTy = LLT::pointer(0, 64);

  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);

  auto StackTemp = Builder.buildFrameIndex(FramePtrTy, FrameIdx);


  Builder.buildStore(Insert.getOperand(1), StackTemp, PtrInfo, Align(8));


  // Get the pointer to the element, and be sure not to hit undefined behavior

  // if the index is out of bounds.

  assert(isPowerOf2_64(VecTy.getNumElements()) &&

         "Expected a power-2 vector size");

  auto Mask = Builder.buildConstant(IdxTy, VecTy.getNumElements() - 1);

  Register And = Builder.buildAnd(IdxTy, Offset, Mask).getReg(0);

  auto EltSize = Builder.buildConstant(IdxTy, EltTy.getSizeInBytes());

  Register Mul = Builder.buildMul(IdxTy, And, EltSize).getReg(0);

  Register EltPtr =

      Builder.buildPtrAdd(MRI.getType(StackTemp.getReg(0)), StackTemp, Mul)

          .getReg(0);


  // Write the inserted element

  Builder.buildStore(Insert.getElementReg(), EltPtr, PtrInfo, Align(1));

  // Reload the whole vector.

  Builder.buildLoad(Insert.getReg(0), StackTemp, PtrInfo, Align(8));

  Insert.eraseFromParent();

}


/// Match a G_SHUFFLE_VECTOR with a mask which corresponds to a

/// G_INSERT_VECTOR_ELT and G_EXTRACT_VECTOR_ELT pair.

///

/// e.g.

///   %shuf = G_SHUFFLE_VECTOR %left, %right, shufflemask(0, 0)

///

/// Can be represented as

///

///   %extract = G_EXTRACT_VECTOR_ELT %left, 0

///   %ins = G_INSERT_VECTOR_ELT %left, %extract, 1

///

bool matchINS(MachineInstr &MI, MachineRegisterInfo &MRI,

              std::tuple<Register, int, Register, int> &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();

  Register Dst = MI.getOperand(0).getReg();

  int NumElts = MRI.getType(Dst).getNumElements();

  auto DstIsLeftAndDstLane = isINSMask(ShuffleMask, NumElts);

  if (!DstIsLeftAndDstLane)

    return false;

  bool DstIsLeft;

  int DstLane;

  std::tie(DstIsLeft, DstLane) = *DstIsLeftAndDstLane;

  Register Left = MI.getOperand(1).getReg();

  Register Right = MI.getOperand(2).getReg();

  Register DstVec = DstIsLeft ? Left : Right;

  Register SrcVec = Left;


  int SrcLane = ShuffleMask[DstLane];

  if (SrcLane >= NumElts) {

    SrcVec = Right;

    SrcLane -= NumElts;

  }


  MatchInfo = std::make_tuple(DstVec, DstLane, SrcVec, SrcLane);

  return true;

}


void applyINS(MachineInstr &MI, MachineRegisterInfo &MRI,

              MachineIRBuilder &Builder,

              std::tuple<Register, int, Register, int> &MatchInfo) {

  Builder.setInstrAndDebugLoc(MI);

  Register Dst = MI.getOperand(0).getReg();

  auto ScalarTy = MRI.getType(Dst).getElementType();

  Register DstVec, SrcVec;

  int DstLane, SrcLane;

  std::tie(DstVec, DstLane, SrcVec, SrcLane) = MatchInfo;

  auto SrcCst = Builder.buildConstant(LLT::scalar(64), SrcLane);

  auto Extract = Builder.buildExtractVectorElement(ScalarTy, SrcVec, SrcCst);

  auto DstCst = Builder.buildConstant(LLT::scalar(64), DstLane);

  Builder.buildInsertVectorElement(Dst, DstVec, Extract, DstCst);

  MI.eraseFromParent();

}


/// isVShiftRImm - Check if this is a valid vector for the immediate

/// operand of a vector shift right operation. The value must be in the range:

///   1 <= Value <= ElementBits for a right shift.

bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty,

                  int64_t &Cnt) {

  assert(Ty.isVector() && "vector shift count is not a vector type");

  MachineInstr *MI = MRI.getVRegDef(Reg);

  auto Cst = getAArch64VectorSplatScalar(*MI, MRI);

  if (!Cst)

    return false;

  Cnt = *Cst;

  int64_t ElementBits = Ty.getScalarSizeInBits();

  return Cnt >= 1 && Cnt <= ElementBits;

}


/// Match a vector G_ASHR or G_LSHR with a valid immediate shift.

bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,

                       int64_t &Imm) {

  assert(MI.getOpcode() == TargetOpcode::G_ASHR ||

         MI.getOpcode() == TargetOpcode::G_LSHR);

  LLT Ty = MRI.getType(MI.getOperand(1).getReg());

  if (!Ty.isVector())

    return false;

  return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm);

}


void applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,

                       int64_t &Imm) {

  unsigned Opc = MI.getOpcode();

  assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR);

  unsigned NewOpc =

      Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;

  MachineIRBuilder MIB(MI);

  auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm);

  MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef});

  MI.eraseFromParent();

}


/// Determine if it is possible to modify the \p RHS and predicate \p P of a

/// G_ICMP instruction such that the right-hand side is an arithmetic immediate.

///

/// \returns A pair containing the updated immediate and predicate which may

/// be used to optimize the instruction.

///

/// \note This assumes that the comparison has been legalized.

std::optional<std::pair<uint64_t, CmpInst::Predicate>>

tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,

                        const MachineRegisterInfo &MRI) {

  const auto &Ty = MRI.getType(RHS);

  if (Ty.isVector())

    return std::nullopt;

  unsigned Size = Ty.getSizeInBits();

  assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?");


  // If the RHS is not a constant, or the RHS is already a valid arithmetic

  // immediate, then there is nothing to change.

  auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, MRI);

  if (!ValAndVReg)

    return std::nullopt;

  uint64_t OriginalC = ValAndVReg->Value.getZExtValue();

  uint64_t C = OriginalC;

  if (isLegalArithImmed(C))

    return std::nullopt;


  // We have a non-arithmetic immediate. Check if adjusting the immediate and

  // adjusting the predicate will result in a legal arithmetic immediate.

  switch (P) {

  default:

    return std::nullopt;

  case CmpInst::ICMP_SLT:

  case CmpInst::ICMP_SGE:

    // Check for

    //

    // x slt c => x sle c - 1

    // x sge c => x sgt c - 1

    //

    // When c is not the smallest possible negative number.

    if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||

        (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))

      return std::nullopt;

    P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;

    C -= 1;

    break;

  case CmpInst::ICMP_ULT:

  case CmpInst::ICMP_UGE:

    // Check for

    //

    // x ult c => x ule c - 1

    // x uge c => x ugt c - 1

    //

    // When c is not zero.

    if (C == 0)

      return std::nullopt;

    P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;

    C -= 1;

    break;

  case CmpInst::ICMP_SLE:

  case CmpInst::ICMP_SGT:

    // Check for

    //

    // x sle c => x slt c + 1

    // x sgt c => s sge c + 1

    //

    // When c is not the largest possible signed integer.

    if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||

        (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))

      return std::nullopt;

    P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;

    C += 1;

    break;

  case CmpInst::ICMP_ULE:

  case CmpInst::ICMP_UGT:

    // Check for

    //

    // x ule c => x ult c + 1

    // x ugt c => s uge c + 1

    //

    // When c is not the largest possible unsigned integer.

    if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||

        (Size == 64 && C == UINT64_MAX))

      return std::nullopt;

    P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;

    C += 1;

    break;

  }


  // Check if the new constant is valid, and return the updated constant and

  // predicate if it is.

  if (Size == 32)

    C = static_cast<uint32_t>(C);

  if (isLegalArithImmed(C))

    return {{C, P}};


  auto IsMaterializableInSingleInstruction = [=](uint64_t Imm) {

    SmallVector<AArch64_IMM::ImmInsnModel> Insn;

    AArch64_IMM::expandMOVImm(Imm, 32, Insn);

    return Insn.size() == 1;

  };


  if (!IsMaterializableInSingleInstruction(OriginalC) &&

      IsMaterializableInSingleInstruction(C))

    return {{C, P}};


  return std::nullopt;

}


/// Determine whether or not it is possible to update the RHS and predicate of

/// a G_ICMP instruction such that the RHS will be selected as an arithmetic

/// immediate.

///

/// \p MI - The G_ICMP instruction

/// \p MatchInfo - The new RHS immediate and predicate on success

///

/// See tryAdjustICmpImmAndPred for valid transformations.

bool matchAdjustICmpImmAndPred(

    MachineInstr &MI, const MachineRegisterInfo &MRI,

    std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_ICMP);

  Register RHS = MI.getOperand(3).getReg();

  auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());

  if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) {

    MatchInfo = *MaybeNewImmAndPred;

    return true;

  }

  return false;

}


void applyAdjustICmpImmAndPred(

    MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo,

    MachineIRBuilder &MIB, GISelChangeObserver &Observer) {

  MIB.setInstrAndDebugLoc(MI);

  MachineOperand &RHS = MI.getOperand(3);

  MachineRegisterInfo &MRI = *MIB.getMRI();

  auto Cst = MIB.buildConstant(MRI.cloneVirtualRegister(RHS.getReg()),

                               MatchInfo.first);

  Observer.changingInstr(MI);

  RHS.setReg(Cst->getOperand(0).getReg());

  MI.getOperand(1).setPredicate(MatchInfo.second);

  Observer.changedInstr(MI);

}


bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,

                  std::pair<unsigned, int> &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

  Register Src1Reg = MI.getOperand(1).getReg();

  const LLT SrcTy = MRI.getType(Src1Reg);

  const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());


  auto LaneIdx = getSplatIndex(MI);

  if (!LaneIdx)

    return false;


  // The lane idx should be within the first source vector.

  if (*LaneIdx >= SrcTy.getNumElements())

    return false;


  if (DstTy != SrcTy)

    return false;


  LLT ScalarTy = SrcTy.getElementType();

  unsigned ScalarSize = ScalarTy.getSizeInBits();


  unsigned Opc = 0;

  switch (SrcTy.getNumElements()) {

  case 2:

    if (ScalarSize == 64)

      Opc = AArch64::G_DUPLANE64;

    else if (ScalarSize == 32)

      Opc = AArch64::G_DUPLANE32;

    break;

  case 4:

    if (ScalarSize == 32)

      Opc = AArch64::G_DUPLANE32;

    else if (ScalarSize == 16)

      Opc = AArch64::G_DUPLANE16;

    break;

  case 8:

    if (ScalarSize == 8)

      Opc = AArch64::G_DUPLANE8;

    else if (ScalarSize == 16)

      Opc = AArch64::G_DUPLANE16;

    break;

  case 16:

    if (ScalarSize == 8)

      Opc = AArch64::G_DUPLANE8;

    break;

  default:

    break;

  }

  if (!Opc)

    return false;


  MatchInfo.first = Opc;

  MatchInfo.second = *LaneIdx;

  return true;

}


void applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,

                  MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

  Register Src1Reg = MI.getOperand(1).getReg();

  const LLT SrcTy = MRI.getType(Src1Reg);


  B.setInstrAndDebugLoc(MI);

  auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second);


  Register DupSrc = MI.getOperand(1).getReg();

  // For types like <2 x s32>, we can use G_DUPLANE32, with a <4 x s32> source.

  // To do this, we can use a G_CONCAT_VECTORS to do the widening.

  if (SrcTy.getSizeInBits() == 64) {

    auto Undef = B.buildUndef(SrcTy);

    DupSrc = B.buildConcatVectors(SrcTy.multiplyElements(2),

                                  {Src1Reg, Undef.getReg(0)})

                 .getReg(0);

  }

  B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()}, {DupSrc, Lane});

  MI.eraseFromParent();

}


bool matchScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI) {

  auto &Unmerge = cast<GUnmerge>(MI);

  Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);

  const LLT SrcTy = MRI.getType(Src1Reg);

  if (SrcTy.getSizeInBits() != 128 && SrcTy.getSizeInBits() != 64)

    return false;

  return SrcTy.isVector() && !SrcTy.isScalable() &&

         Unmerge.getNumOperands() == (unsigned)SrcTy.getNumElements() + 1;

}


void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,

                                 MachineIRBuilder &B) {

  auto &Unmerge = cast<GUnmerge>(MI);

  Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);

  const LLT SrcTy = MRI.getType(Src1Reg);

  assert((SrcTy.isVector() && !SrcTy.isScalable()) &&

         "Expected a fixed length vector");


  for (int I = 0; I < SrcTy.getNumElements(); ++I)

    B.buildExtractVectorElementConstant(Unmerge.getReg(I), Src1Reg, I);

  MI.eraseFromParent();

}


bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {

  assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);

  auto Splat = getAArch64VectorSplat(MI, MRI);

  if (!Splat)

    return false;

  if (Splat->isReg())

    return true;

  // Later, during selection, we'll try to match imported patterns using

  // immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower

  // G_BUILD_VECTORs which could match those patterns.

  int64_t Cst = Splat->getCst();

  return (Cst != 0 && Cst != -1);

}


void applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI,

                           MachineIRBuilder &B) {

  B.setInstrAndDebugLoc(MI);

  B.buildInstr(AArch64::G_DUP, {MI.getOperand(0).getReg()},

               {MI.getOperand(1).getReg()});

  MI.eraseFromParent();

}


/// \returns how many instructions would be saved by folding a G_ICMP's shift

/// and/or extension operations.

unsigned getCmpOperandFoldingProfit(Register CmpOp, MachineRegisterInfo &MRI) {

  // No instructions to save if there's more than one use or no uses.

  if (!MRI.hasOneNonDBGUse(CmpOp))

    return 0;


  // FIXME: This is duplicated with the selector. (See: selectShiftedRegister)

  auto IsSupportedExtend = [&](const MachineInstr &MI) {

    if (MI.getOpcode() == TargetOpcode::G_SEXT_INREG)

      return true;

    if (MI.getOpcode() != TargetOpcode::G_AND)

      return false;

    auto ValAndVReg =

        getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);

    if (!ValAndVReg)

      return false;

    uint64_t Mask = ValAndVReg->Value.getZExtValue();

    return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);

  };


  MachineInstr *Def = getDefIgnoringCopies(CmpOp, MRI);

  if (IsSupportedExtend(*Def))

    return 1;


  unsigned Opc = Def->getOpcode();

  if (Opc != TargetOpcode::G_SHL && Opc != TargetOpcode::G_ASHR &&

      Opc != TargetOpcode::G_LSHR)

    return 0;


  auto MaybeShiftAmt =

      getIConstantVRegValWithLookThrough(Def->getOperand(2).getReg(), MRI);

  if (!MaybeShiftAmt)

    return 0;

  uint64_t ShiftAmt = MaybeShiftAmt->Value.getZExtValue();

  MachineInstr *ShiftLHS =

      getDefIgnoringCopies(Def->getOperand(1).getReg(), MRI);


  // Check if we can fold an extend and a shift.

  // FIXME: This is duplicated with the selector. (See:

  // selectArithExtendedRegister)

  if (IsSupportedExtend(*ShiftLHS))

    return (ShiftAmt <= 4) ? 2 : 1;


  LLT Ty = MRI.getType(Def->getOperand(0).getReg());

  if (Ty.isVector())

    return 0;

  unsigned ShiftSize = Ty.getSizeInBits();

  if ((ShiftSize == 32 && ShiftAmt <= 31) ||

      (ShiftSize == 64 && ShiftAmt <= 63))

    return 1;

  return 0;

}


/// \returns true if it would be profitable to swap the LHS and RHS of a G_ICMP

/// instruction \p MI.

bool trySwapICmpOperands(MachineInstr &MI, MachineRegisterInfo &MRI) {

  assert(MI.getOpcode() == TargetOpcode::G_ICMP);

  // Swap the operands if it would introduce a profitable folding opportunity.

  // (e.g. a shift + extend).

  //

  //  For example:

  //    lsl     w13, w11, #1

  //    cmp     w13, w12

  // can be turned into:

  //    cmp     w12, w11, lsl #1


  // Don't swap if there's a constant on the RHS, because we know we can fold

  // that.

  Register RHS = MI.getOperand(3).getReg();

  auto RHSCst = getIConstantVRegValWithLookThrough(RHS, MRI);

  if (RHSCst && isLegalArithImmed(RHSCst->Value.getSExtValue()))

    return false;


  Register LHS = MI.getOperand(2).getReg();

  auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());

  auto GetRegForProfit = [&](Register Reg) {

    MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);

    return isCMN(Def, Pred, MRI) ? Def->getOperand(2).getReg() : Reg;

  };


  // Don't have a constant on the RHS. If we swap the LHS and RHS of the

  // compare, would we be able to fold more instructions?

  Register TheLHS = GetRegForProfit(LHS);

  Register TheRHS = GetRegForProfit(RHS);


  // If the LHS is more likely to give us a folding opportunity, then swap the

  // LHS and RHS.

  return (getCmpOperandFoldingProfit(TheLHS, MRI) >

          getCmpOperandFoldingProfit(TheRHS, MRI));

}


void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) {

  auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());

  Register LHS = MI.getOperand(2).getReg();

  Register RHS = MI.getOperand(3).getReg();

  Observer.changedInstr(MI);

  MI.getOperand(1).setPredicate(CmpInst::getSwappedPredicate(Pred));

  MI.getOperand(2).setReg(RHS);

  MI.getOperand(3).setReg(LHS);

  Observer.changedInstr(MI);

}


/// \returns a function which builds a vector floating point compare instruction

/// for a condition code \p CC.

/// \param [in] IsZero - True if the comparison is against 0.

/// \param [in] NoNans - True if the target has NoNansFPMath.

std::function<Register(MachineIRBuilder &)>

getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool IsZero,

              bool NoNans, MachineRegisterInfo &MRI) {

  LLT DstTy = MRI.getType(LHS);

  assert(DstTy.isVector() && "Expected vector types only?");

  assert(DstTy == MRI.getType(RHS) && "Src and Dst types must match!");

  switch (CC) {

  default:

    llvm_unreachable("Unexpected condition code!");

  case AArch64CC::NE:

    return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {

      auto FCmp = IsZero

                      ? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS})

                      : MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});

      return MIB.buildNot(DstTy, FCmp).getReg(0);

    };

  case AArch64CC::EQ:

    return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {

      return IsZero

                 ? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS}).getReg(0)

                 : MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS})

                       .getReg(0);

    };

  case AArch64CC::GE:

    return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {

      return IsZero

                 ? MIB.buildInstr(AArch64::G_FCMGEZ, {DstTy}, {LHS}).getReg(0)

                 : MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS})

                       .getReg(0);

    };

  case AArch64CC::GT:

    return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {

      return IsZero

                 ? MIB.buildInstr(AArch64::G_FCMGTZ, {DstTy}, {LHS}).getReg(0)

                 : MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS})

                       .getReg(0);

    };

  case AArch64CC::LS:

    return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {

      return IsZero

                 ? MIB.buildInstr(AArch64::G_FCMLEZ, {DstTy}, {LHS}).getReg(0)

                 : MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS})

                       .getReg(0);

    };

  case AArch64CC::MI:

    return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {

      return IsZero

                 ? MIB.buildInstr(AArch64::G_FCMLTZ, {DstTy}, {LHS}).getReg(0)

                 : MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS})

                       .getReg(0);

    };

  }

}


/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.

bool matchLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,

                          MachineIRBuilder &MIB) {

  assert(MI.getOpcode() == TargetOpcode::G_FCMP);

  const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();


  Register Dst = MI.getOperand(0).getReg();

  LLT DstTy = MRI.getType(Dst);

  if (!DstTy.isVector() || !ST.hasNEON())

    return false;

  Register LHS = MI.getOperand(2).getReg();

  unsigned EltSize = MRI.getType(LHS).getScalarSizeInBits();

  if (EltSize == 16 && !ST.hasFullFP16())

    return false;

  if (EltSize != 16 && EltSize != 32 && EltSize != 64)

    return false;


  return true;

}


/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.

void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,

                          MachineIRBuilder &MIB) {

  assert(MI.getOpcode() == TargetOpcode::G_FCMP);

  const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();


  const auto &CmpMI = cast<GFCmp>(MI);


  Register Dst = CmpMI.getReg(0);

  CmpInst::Predicate Pred = CmpMI.getCond();

  Register LHS = CmpMI.getLHSReg();

  Register RHS = CmpMI.getRHSReg();


  LLT DstTy = MRI.getType(Dst);


  auto Splat = getAArch64VectorSplat(*MRI.getVRegDef(RHS), MRI);


  // Compares against 0 have special target-specific pseudos.

  bool IsZero = Splat && Splat->isCst() && Splat->getCst() == 0;


  bool Invert = false;

  AArch64CC::CondCode CC, CC2 = AArch64CC::AL;

  if ((Pred == CmpInst::Predicate::FCMP_ORD ||

       Pred == CmpInst::Predicate::FCMP_UNO) &&

      IsZero) {

    // The special case "fcmp ord %a, 0" is the canonical check that LHS isn't

    // NaN, so equivalent to a == a and doesn't need the two comparisons an

    // "ord" normally would.

    // Similarly, "fcmp uno %a, 0" is the canonical check that LHS is NaN and is

    // thus equivalent to a != a.

    RHS = LHS;

    IsZero = false;

    CC = Pred == CmpInst::Predicate::FCMP_ORD ? AArch64CC::EQ : AArch64CC::NE;

  } else

    changeVectorFCMPPredToAArch64CC(Pred, CC, CC2, Invert);


  // Instead of having an apply function, just build here to simplify things.

  MIB.setInstrAndDebugLoc(MI);


  const bool NoNans =

      ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;


  auto Cmp = getVectorFCMP(CC, LHS, RHS, IsZero, NoNans, MRI);

  Register CmpRes;

  if (CC2 == AArch64CC::AL)

    CmpRes = Cmp(MIB);

  else {

    auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, IsZero, NoNans, MRI);

    auto Cmp2Dst = Cmp2(MIB);

    auto Cmp1Dst = Cmp(MIB);

    CmpRes = MIB.buildOr(DstTy, Cmp1Dst, Cmp2Dst).getReg(0);

  }

  if (Invert)

    CmpRes = MIB.buildNot(DstTy, CmpRes).getReg(0);

  MRI.replaceRegWith(Dst, CmpRes);

  MI.eraseFromParent();

}


// Matches G_BUILD_VECTOR where at least one source operand is not a constant

bool matchLowerBuildToInsertVecElt(MachineInstr &MI, MachineRegisterInfo &MRI) {

  auto *GBuildVec = cast<GBuildVector>(&MI);


  // Check if the values are all constants

  for (unsigned I = 0; I < GBuildVec->getNumSources(); ++I) {

    auto ConstVal =

        getAnyConstantVRegValWithLookThrough(GBuildVec->getSourceReg(I), MRI);


    if (!ConstVal.has_value())

      return true;

  }


  return false;

}


void applyLowerBuildToInsertVecElt(MachineInstr &MI, MachineRegisterInfo &MRI,

                                   MachineIRBuilder &B) {

  auto *GBuildVec = cast<GBuildVector>(&MI);

  LLT DstTy = MRI.getType(GBuildVec->getReg(0));

  Register DstReg = B.buildUndef(DstTy).getReg(0);


  for (unsigned I = 0; I < GBuildVec->getNumSources(); ++I) {

    Register SrcReg = GBuildVec->getSourceReg(I);

    if (mi_match(SrcReg, MRI, m_GImplicitDef()))

      continue;

    auto IdxReg = B.buildConstant(LLT::scalar(64), I);

    DstReg =

        B.buildInsertVectorElement(DstTy, DstReg, SrcReg, IdxReg).getReg(0);

  }

  B.buildCopy(GBuildVec->getReg(0), DstReg);

  GBuildVec->eraseFromParent();

}


bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,

                         Register &SrcReg) {

  assert(MI.getOpcode() == TargetOpcode::G_STORE);

  Register DstReg = MI.getOperand(0).getReg();

  if (MRI.getType(DstReg).isVector())

    return false;

  // Match a store of a truncate.

  if (!mi_match(DstReg, MRI, m_GTrunc(m_Reg(SrcReg))))

    return false;

  // Only form truncstores for value types of max 64b.

  return MRI.getType(SrcReg).getSizeInBits() <= 64;

}


void applyFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,

                         MachineIRBuilder &B, GISelChangeObserver &Observer,

                         Register &SrcReg) {

  assert(MI.getOpcode() == TargetOpcode::G_STORE);

  Observer.changingInstr(MI);

  MI.getOperand(0).setReg(SrcReg);

  Observer.changedInstr(MI);

}


// Lower vector G_SEXT_INREG back to shifts for selection. We allowed them to

// form in the first place for combine opportunities, so any remaining ones

// at this stage need be lowered back.

bool matchVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI) {

  assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);

  Register DstReg = MI.getOperand(0).getReg();

  LLT DstTy = MRI.getType(DstReg);

  return DstTy.isVector();

}


void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI,

                          MachineIRBuilder &B, GISelChangeObserver &Observer) {

  assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);

  B.setInstrAndDebugLoc(MI);

  LegalizerHelper Helper(*MI.getMF(), Observer, B);

  Helper.lower(MI, 0, /* Unused hint type */ LLT());

}


/// Combine <N x t>, unused = unmerge(G_EXT <2*N x t> v, undef, N)

///           => unused, <N x t> = unmerge v

bool matchUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,

                              Register &MatchInfo) {

  auto &Unmerge = cast<GUnmerge>(MI);

  if (Unmerge.getNumDefs() != 2)

    return false;

  if (!MRI.use_nodbg_empty(Unmerge.getReg(1)))

    return false;


  LLT DstTy = MRI.getType(Unmerge.getReg(0));

  if (!DstTy.isVector())

    return false;


  MachineInstr *Ext = getOpcodeDef(AArch64::G_EXT, Unmerge.getSourceReg(), MRI);

  if (!Ext)

    return false;


  Register ExtSrc1 = Ext->getOperand(1).getReg();

  Register ExtSrc2 = Ext->getOperand(2).getReg();

  auto LowestVal =

      getIConstantVRegValWithLookThrough(Ext->getOperand(3).getReg(), MRI);

  if (!LowestVal || LowestVal->Value.getZExtValue() != DstTy.getSizeInBytes())

    return false;


  if (!getOpcodeDef<GImplicitDef>(ExtSrc2, MRI))

    return false;


  MatchInfo = ExtSrc1;

  return true;

}


void applyUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,

                              MachineIRBuilder &B,

                              GISelChangeObserver &Observer, Register &SrcReg) {

  Observer.changingInstr(MI);

  // Swap dst registers.

  Register Dst1 = MI.getOperand(0).getReg();

  MI.getOperand(0).setReg(MI.getOperand(1).getReg());

  MI.getOperand(1).setReg(Dst1);

  MI.getOperand(2).setReg(SrcReg);

  Observer.changedInstr(MI);

}


// Match mul({z/s}ext , {z/s}ext) => {u/s}mull OR

// Match v2s64 mul instructions, which will then be scalarised later on

// Doing these two matches in one function to ensure that the order of matching

// will always be the same.

// Try lowering MUL to MULL before trying to scalarize if needed.

bool matchExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI) {

  // Get the instructions that defined the source operand

  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());

  MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);

  MachineInstr *I2 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);


  if (DstTy.isVector()) {

    // If the source operands were EXTENDED before, then {U/S}MULL can be used

    unsigned I1Opc = I1->getOpcode();

    unsigned I2Opc = I2->getOpcode();

    if (((I1Opc == TargetOpcode::G_ZEXT && I2Opc == TargetOpcode::G_ZEXT) ||

         (I1Opc == TargetOpcode::G_SEXT && I2Opc == TargetOpcode::G_SEXT)) &&

        (MRI.getType(I1->getOperand(0).getReg()).getScalarSizeInBits() ==

         MRI.getType(I1->getOperand(1).getReg()).getScalarSizeInBits() * 2) &&

        (MRI.getType(I2->getOperand(0).getReg()).getScalarSizeInBits() ==

         MRI.getType(I2->getOperand(1).getReg()).getScalarSizeInBits() * 2)) {

      return true;

    }

    // If result type is v2s64, scalarise the instruction

    else if (DstTy == LLT::fixed_vector(2, 64)) {

      return true;

    }

  }

  return false;

}


void applyExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI,

                       MachineIRBuilder &B, GISelChangeObserver &Observer) {

  assert(MI.getOpcode() == TargetOpcode::G_MUL &&

         "Expected a G_MUL instruction");


  // Get the instructions that defined the source operand

  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());

  MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);

  MachineInstr *I2 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);


  // If the source operands were EXTENDED before, then {U/S}MULL can be used

  unsigned I1Opc = I1->getOpcode();

  unsigned I2Opc = I2->getOpcode();

  if (((I1Opc == TargetOpcode::G_ZEXT && I2Opc == TargetOpcode::G_ZEXT) ||

       (I1Opc == TargetOpcode::G_SEXT && I2Opc == TargetOpcode::G_SEXT)) &&

      (MRI.getType(I1->getOperand(0).getReg()).getScalarSizeInBits() ==

       MRI.getType(I1->getOperand(1).getReg()).getScalarSizeInBits() * 2) &&

      (MRI.getType(I2->getOperand(0).getReg()).getScalarSizeInBits() ==

       MRI.getType(I2->getOperand(1).getReg()).getScalarSizeInBits() * 2)) {


    B.setInstrAndDebugLoc(MI);

    B.buildInstr(I1->getOpcode() == TargetOpcode::G_ZEXT ? AArch64::G_UMULL

                                                         : AArch64::G_SMULL,

                 {MI.getOperand(0).getReg()},

                 {I1->getOperand(1).getReg(), I2->getOperand(1).getReg()});

    MI.eraseFromParent();

  }

  // If result type is v2s64, scalarise the instruction

  else if (DstTy == LLT::fixed_vector(2, 64)) {

    LegalizerHelper Helper(*MI.getMF(), Observer, B);

    B.setInstrAndDebugLoc(MI);

    Helper.fewerElementsVector(

        MI, 0,

        DstTy.changeElementCount(

            DstTy.getElementCount().divideCoefficientBy(2)));

  }

}


class AArch64PostLegalizerLoweringImpl : public Combiner {

protected:

  const CombinerHelper Helper;

  const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig;

  const AArch64Subtarget &STI;


public:

  AArch64PostLegalizerLoweringImpl(

      MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,

      GISelCSEInfo *CSEInfo,

      const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,

      const AArch64Subtarget &STI);


  static const char *getName() { return "AArch6400PreLegalizerCombiner"; }


  bool tryCombineAll(MachineInstr &I) const override;


private:

#define GET_GICOMBINER_CLASS_MEMBERS

#include "AArch64GenPostLegalizeGILowering.inc"

#undef GET_GICOMBINER_CLASS_MEMBERS

};


#define GET_GICOMBINER_IMPL

#include "AArch64GenPostLegalizeGILowering.inc"

#undef GET_GICOMBINER_IMPL


AArch64PostLegalizerLoweringImpl::AArch64PostLegalizerLoweringImpl(

    MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,

    GISelCSEInfo *CSEInfo,

    const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,

    const AArch64Subtarget &STI)

    : Combiner(MF, CInfo, TPC, /*KB*/ nullptr, CSEInfo),

      Helper(Observer, B, /*IsPreLegalize*/ true), RuleConfig(RuleConfig),

      STI(STI),

#define GET_GICOMBINER_CONSTRUCTOR_INITS

#include "AArch64GenPostLegalizeGILowering.inc"

#undef GET_GICOMBINER_CONSTRUCTOR_INITS

{

}


class AArch64PostLegalizerLowering : public MachineFunctionPass {

public:

  static char ID;


  AArch64PostLegalizerLowering();


  StringRef getPassName() const override {

    return "AArch64PostLegalizerLowering";

  }


  bool runOnMachineFunction(MachineFunction &MF) override;

  void getAnalysisUsage(AnalysisUsage &AU) const override;


private:

  AArch64PostLegalizerLoweringImplRuleConfig RuleConfig;

};

} // end anonymous namespace


void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const {

  AU.addRequired<TargetPassConfig>();

  AU.setPreservesCFG();

  getSelectionDAGFallbackAnalysisUsage(AU);

  MachineFunctionPass::getAnalysisUsage(AU);

}


AArch64PostLegalizerLowering::AArch64PostLegalizerLowering()

    : MachineFunctionPass(ID) {

  initializeAArch64PostLegalizerLoweringPass(*PassRegistry::getPassRegistry());


  if (!RuleConfig.parseCommandLineOption())

    report_fatal_error("Invalid rule identifier");

}


bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) {

  if (MF.getProperties().hasProperty(

          MachineFunctionProperties::Property::FailedISel))

    return false;

  assert(MF.getProperties().hasProperty(

             MachineFunctionProperties::Property::Legalized) &&

         "Expected a legalized function?");

  auto *TPC = &getAnalysis<TargetPassConfig>();

  const Function &F = MF.getFunction();


  const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();

  CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,

                     /*LegalizerInfo*/ nullptr, /*OptEnabled=*/true,

                     F.hasOptSize(), F.hasMinSize());

  // Disable fixed-point iteration to reduce compile-time

  CInfo.MaxIterations = 1;

  CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;

  // PostLegalizerCombiner performs DCE, so a full DCE pass is unnecessary.

  CInfo.EnableFullDCE = false;

  AArch64PostLegalizerLoweringImpl Impl(MF, CInfo, TPC, /*CSEInfo*/ nullptr,

                                        RuleConfig, ST);

  return Impl.combineMachineInstrs();

}


char AArch64PostLegalizerLowering::ID = 0;

INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE,

                      "Lower AArch64 MachineInstrs after legalization", false,

                      false)

INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)

INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE,

                    "Lower AArch64 MachineInstrs after legalization", false,

                    false)


namespace llvm {

FunctionPass *createAArch64PostLegalizerLowering() {

  return new AArch64PostLegalizerLowering();

}

} // end namespace llvm

MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105

AArch64BaseInfo.h

AArch64ExpandImm.h

AArch64GlobalISelUtils.h

isVShiftRImm
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
Definition: AArch64ISelLowering.cpp:15593

isLegalArithImmed
static bool isLegalArithImmed(uint64_t C)
Definition: AArch64ISelLowering.cpp:3590

isCMN
static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:3613

isINSMask
static bool isINSMask(ArrayRef< int > M, int NumInputElements, bool &DstIsLeft, int &Anomaly)
Definition: AArch64ISelLowering.cpp:13333

getCmpOperandFoldingProfit
static unsigned getCmpOperandFoldingProfit(SDValue Op)
Returns how profitable it is to fold a comparison's operand's shift and/or extension operations.
Definition: AArch64ISelLowering.cpp:3989

AArch64LegalizerInfo.h
This file declares the targeting of the Machinelegalizer class for AArch64.

AArch64MCTargetDesc.h

Insn
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
Definition: AArch64MIPeepholeOpt.cpp:167

AArch64PerfectShuffle.h

GET_GICOMBINER_CONSTRUCTOR_INITS
#define GET_GICOMBINER_CONSTRUCTOR_INITS

legalization
Lower AArch64 MachineInstrs after legalization
Definition: AArch64PostLegalizerLowering.cpp:1361

DEBUG_TYPE
#define DEBUG_TYPE
Definition: AArch64PostLegalizerLowering.cpp:54

AArch64Subtarget.h

true
basic Basic Alias true
Definition: BasicAliasAnalysis.cpp:1981

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Utils.h

CombinerHelper.h
This contains common combine transformations that may be used in a combine pass,or by the target else...

CombinerInfo.h
Option class for Targets to specify which operations are combined how and when.

Combiner.h
This contains the base class for all Combiners generated by TableGen.

Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition: DeadArgumentElimination.cpp:353

Size
uint64_t Size
Definition: ELFObjHandler.cpp:81

GIMatchTableExecutorImpl.h

GISelChangeObserver.h
This contains common code to allow clients to notify changes to machine instr.

GenericMachineInstrs.h
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...

MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:112

InitializePasses.h

InstrTypes.h

LegalizerHelper.h

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

MIPatternMatch.h
Contains matchers for matching SSA Machine Instructions.

MachineFrameInfo.h

MachineFunctionPass.h

MachineIRBuilder.h
This file declares the MachineIRBuilder class.

MachineInstrBuilder.h

MachineRegisterInfo.h

getReg
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
Definition: MipsDisassembler.cpp:520

P
#define P(N)

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52

getName
static StringRef getName(Value *V)
Definition: ProvenanceAnalysisEvaluator.cpp:20

CC
auto CC
Definition: RISCVRedundantCopyElimination.cpp:79

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

TargetOpcodes.h

TargetPassConfig.h
Target-Independent Code Generator Pass Configuration Options pass.

RHS
Value * RHS
Definition: X86PartialReduction.cpp:74

LHS
Value * LHS
Definition: X86PartialReduction.cpp:73

Mul
BinaryOperator * Mul
Definition: X86PartialReduction.cpp:68

llvm::AArch64Subtarget
Definition: AArch64Subtarget.h:38

llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:78

llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520

llvm::APInt::logBase2
unsigned logBase2() const
Definition: APInt.h:1739

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75

llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41

llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673

llvm::CmpInst::ICMP_SLT
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:702

llvm::CmpInst::ICMP_SLE
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:703

llvm::CmpInst::ICMP_UGE
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:697

llvm::CmpInst::ICMP_UGT
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:696

llvm::CmpInst::ICMP_SGT
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:700

llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:698

llvm::CmpInst::ICMP_SGE
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:701

llvm::CmpInst::ICMP_ULE
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:699

llvm::CmpInst::getSwappedPredicate
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:825

llvm::CombinerHelper
Definition: CombinerHelper.h:104

llvm::Combiner
Combiner implementation.
Definition: Combiner.h:34

llvm::Combiner::tryCombineAll
virtual bool tryCombineAll(MachineInstr &I) const =0

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310

llvm::Function
Definition: Function.h:63

llvm::GISelCSEInfo
The CSE Analysis object.
Definition: CSEInfo.h:70

llvm::GISelChangeObserver
Abstract class that contains various methods for clients to notify about changes.
Definition: GISelChangeObserver.h:29

llvm::GISelChangeObserver::changingInstr
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.

llvm::GISelChangeObserver::changedInstr
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.

llvm::LLT
Definition: LowLevelType.h:39

llvm::LLT::isScalableVector
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:181

llvm::LLT::getScalarSizeInBits
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:264

llvm::LLT::multiplyElements
constexpr LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
Definition: LowLevelType.h:251

llvm::LLT::scalar
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42

llvm::LLT::getNumElements
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159

llvm::LLT::isVector
constexpr bool isVector() const
Definition: LowLevelType.h:148

llvm::LLT::pointer
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57

llvm::LLT::isScalable
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:170

llvm::LLT::getSizeInBits
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:190

llvm::LLT::getElementType
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:277

llvm::LLT::getElementCount
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:183

llvm::LLT::fixed_vector
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100

llvm::LLT::changeElementCount
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition: LowLevelType.h:227

llvm::LLT::getSizeInBytes
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:200

llvm::LegalizerHelper
Definition: LegalizerHelper.h:48

llvm::MachineFrameInfo::CreateStackObject
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Definition: MachineFrameInfo.cpp:51

llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30

llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:169

llvm::MachineFunctionPass::runOnMachineFunction
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...

llvm::MachineFunctionProperties::hasProperty
bool hasProperty(Property P) const
Definition: MachineFunction.h:203

llvm::MachineFunction
Definition: MachineFunction.h:267

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:733

llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:749

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:704

llvm::MachineFunction::getProperties
const MachineFunctionProperties & getProperties() const
Get the function properties.
Definition: MachineFunction.h:824

llvm::MachineIRBuilder
Helper class to build MachineInstr.
Definition: MachineIRBuilder.h:235

llvm::MachineIRBuilder::buildNot
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
Definition: MachineIRBuilder.h:1911

llvm::MachineIRBuilder::buildMul
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
Definition: MachineIRBuilder.h:1769

llvm::MachineIRBuilder::buildAnd
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
Definition: MachineIRBuilder.h:1881

llvm::MachineIRBuilder::buildExtractVectorElement
MachineInstrBuilder buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
Definition: MachineIRBuilder.cpp:979

llvm::MachineIRBuilder::buildLoad
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
Definition: MachineIRBuilder.h:959

llvm::MachineIRBuilder::buildPtrAdd
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
Definition: MachineIRBuilder.cpp:202

llvm::MachineIRBuilder::buildStore
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
Definition: MachineIRBuilder.cpp:468

llvm::MachineIRBuilder::buildInstr
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
Definition: MachineIRBuilder.h:417

llvm::MachineIRBuilder::buildFrameIndex
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
Definition: MachineIRBuilder.cpp:147

llvm::MachineIRBuilder::getMF
MachineFunction & getMF()
Getter for the function we currently build.
Definition: MachineIRBuilder.h:287

llvm::MachineIRBuilder::setInstrAndDebugLoc
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
Definition: MachineIRBuilder.h:376

llvm::MachineIRBuilder::buildInsertVectorElement
MachineInstrBuilder buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx)
Build and insert Res = G_INSERT_VECTOR_ELT Val, Elt, Idx.
Definition: MachineIRBuilder.cpp:973

llvm::MachineIRBuilder::getMRI
MachineRegisterInfo * getMRI()
Getter for MRI.
Definition: MachineIRBuilder.h:309

llvm::MachineIRBuilder::buildOr
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
Definition: MachineIRBuilder.h:1896

llvm::MachineIRBuilder::buildConstant
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Definition: MachineIRBuilder.cpp:317

llvm::MachineInstrBuilder::getReg
Register getReg(unsigned Idx) const
Get the register for the operand index.
Definition: MachineInstrBuilder.h:96

llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:71

llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:577

llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:587

llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48

llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:369

llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:51

llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24

llvm::Pass::getPassName
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81

llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51

llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:85

llvm::details::FixedOrScalableQuantity::getKnownMinValue
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168

llvm::details::FixedOrScalableQuantity::divideCoefficientBy
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254

uint32_t

uint64_t

unsigned

UINT64_MAX
#define UINT64_MAX
Definition: DataTypes.h:77

INT64_MIN
#define INT64_MIN
Definition: DataTypes.h:74

INT64_MAX
#define INT64_MAX
Definition: DataTypes.h:71

ErrorHandling.h

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143

false
Definition: StackSlotColoring.cpp:193

llvm::AArch64CC::CondCode
CondCode
Definition: AArch64BaseInfo.h:254

llvm::AArch64CC::NE
@ NE
Definition: AArch64BaseInfo.h:256

llvm::AArch64CC::GE
@ GE
Definition: AArch64BaseInfo.h:265

llvm::AArch64CC::EQ
@ EQ
Definition: AArch64BaseInfo.h:255

llvm::AArch64CC::MI
@ MI
Definition: AArch64BaseInfo.h:259

llvm::AArch64CC::GT
@ GT
Definition: AArch64BaseInfo.h:267

llvm::AArch64CC::AL
@ AL
Definition: AArch64BaseInfo.h:269

llvm::AArch64CC::LS
@ LS
Definition: AArch64BaseInfo.h:264

llvm::AArch64GISelUtils::getAArch64VectorSplat
std::optional< RegOrConstant > getAArch64VectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
Definition: AArch64GlobalISelUtils.cpp:19

llvm::AArch64GISelUtils::changeVectorFCMPPredToAArch64CC
void changeVectorFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2, bool &Invert)
Find the AArch64 condition codes necessary to represent P for a vector floating point comparison.
Definition: AArch64GlobalISelUtils.cpp:186

llvm::AArch64GISelUtils::getAArch64VectorSplatScalar
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
Definition: AArch64GlobalISelUtils.cpp:33

llvm::AArch64_IMM::expandMOVImm
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
Definition: AArch64ExpandImm.cpp:533

llvm::AMDGPU::Imm
@ Imm
Definition: AMDGPURegBankLegalizeRules.h:105

llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73

llvm::ARM::ProfileKind::M
@ M

llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24

llvm::MIPatternMatch::m_Reg
operand_type_match m_Reg()
Definition: MIPatternMatch.h:270

llvm::MIPatternMatch::m_ZeroInt
SpecificConstantMatch m_ZeroInt()
{ Convenience matchers for specific integer values.
Definition: MIPatternMatch.h:238

llvm::MIPatternMatch::m_GImplicitDef
ImplicitDefMatch m_GImplicitDef()
Definition: MIPatternMatch.h:414

llvm::MIPatternMatch::mi_match
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Definition: MIPatternMatch.h:25

llvm::MIPatternMatch::m_GTrunc
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
Definition: MIPatternMatch.h:643

llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:157

llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:163

llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:54

llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:621

llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp

llvm::cfg::UpdateKind::Insert
@ Insert

llvm::dxil::ElementType::I1
@ I1

llvm::rdf::Def
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::Offset
@ Offset
Definition: DWP.cpp:480

llvm::getOpcodeDef
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:645

llvm::initializeAArch64PostLegalizerLoweringPass
void initializeAArch64PostLegalizerLoweringPass(PassRegistry &)

llvm::AlignStyle::Right
@ Right

llvm::AlignStyle::Left
@ Left

llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:77

llvm::isTRNMask
bool isTRNMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResult)
Return true for trn1 or trn2 masks of the form: <0, 8, 2, 10, 4, 12, 6, 14> or <1,...
Definition: AArch64PerfectShuffle.h:6686

llvm::isPowerOf2_64
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:297

llvm::getDefIgnoringCopies
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:486

llvm::createAArch64PostLegalizerLowering
FunctionPass * createAArch64PostLegalizerLowering()
Definition: AArch64PostLegalizerLowering.cpp:1365

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746

llvm::HexPrintStyle::Lower
@ Lower

llvm::ComplexDeinterleavingOperation::Splat
@ Splat

llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167

llvm::isUZPMask
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
Definition: AArch64PerfectShuffle.h:6659

llvm::isREVMask
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
Definition: AArch64PerfectShuffle.h:6702

llvm::getAnyConstantVRegValWithLookThrough
std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
Definition: Utils.cpp:439

llvm::getSelectionDAGFallbackAnalysisUsage
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:1168

llvm::isZIPMask
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> or <4, 12,...
Definition: AArch64PerfectShuffle.h:6626

llvm::getIConstantVRegValWithLookThrough
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:433

llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766

llvm::getSplatIndex
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
Definition: VectorUtils.cpp:290

std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39

llvm::CombinerInfo
Definition: CombinerInfo.h:24

llvm::MIPatternMatch::And
Matching combinators.
Definition: MIPatternMatch.h:273

llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition: MachineMemOperand.h:41

llvm::MachinePointerInfo::getFixedStack
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Definition: MachineOperand.cpp:1072