docs/doxygen/AArch64PostLegalizerCombiner_8cpp_source.html

//=== AArch64PostLegalizerCombiner.cpp --------------------------*- C++ -*-===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

///

/// \file

/// Post-legalization combines on generic MachineInstrs.

///

/// The combines here must preserve instruction legality.

///

/// Lowering combines (e.g. pseudo matching) should be handled by

/// AArch64PostLegalizerLowering.

///

/// Combines which don't rely on instruction legality should go in the

/// AArch64PreLegalizerCombiner.

///

//===----------------------------------------------------------------------===//


#include "AArch64TargetMachine.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/CodeGen/GlobalISel/CSEInfo.h"

#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"

#include "llvm/CodeGen/GlobalISel/Combiner.h"

#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"

#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"

#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"

#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"

#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"

#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"

#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"

#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"

#include "llvm/CodeGen/GlobalISel/Utils.h"

#include "llvm/CodeGen/MachineDominators.h"

#include "llvm/CodeGen/MachineFunctionPass.h"

#include "llvm/CodeGen/MachineRegisterInfo.h"

#include "llvm/CodeGen/TargetOpcodes.h"

#include "llvm/CodeGen/TargetPassConfig.h"

#include "llvm/Support/Debug.h"


#define GET_GICOMBINER_DEPS

#include "AArch64GenPostLegalizeGICombiner.inc"

#undef GET_GICOMBINER_DEPS


#define DEBUG_TYPE "aarch64-postlegalizer-combiner"


using namespace llvm;

using namespace MIPatternMatch;


namespace {


#define GET_GICOMBINER_TYPES

#include "AArch64GenPostLegalizeGICombiner.inc"

#undef GET_GICOMBINER_TYPES


/// This combine tries do what performExtractVectorEltCombine does in SDAG.

/// Rewrite for pairwise fadd pattern

///   (s32 (g_extract_vector_elt

///           (g_fadd (vXs32 Other)

///                  (g_vector_shuffle (vXs32 Other) undef <1,X,...> )) 0))

/// ->

///   (s32 (g_fadd (g_extract_vector_elt (vXs32 Other) 0)

///              (g_extract_vector_elt (vXs32 Other) 1))

bool matchExtractVecEltPairwiseAdd(

    MachineInstr &MI, MachineRegisterInfo &MRI,

    std::tuple<unsigned, LLT, Register> &MatchInfo) {

  Register Src1 = MI.getOperand(1).getReg();

  Register Src2 = MI.getOperand(2).getReg();

  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());


  auto Cst = getIConstantVRegValWithLookThrough(Src2, MRI);

  if (!Cst || Cst->Value != 0)

    return false;

  // SDAG also checks for FullFP16, but this looks to be beneficial anyway.


  // Now check for an fadd operation. TODO: expand this for integer add?

  auto *FAddMI = getOpcodeDef(TargetOpcode::G_FADD, Src1, MRI);

  if (!FAddMI)

    return false;


  // If we add support for integer add, must restrict these types to just s64.

  unsigned DstSize = DstTy.getSizeInBits();

  if (DstSize != 16 && DstSize != 32 && DstSize != 64)

    return false;


  Register Src1Op1 = FAddMI->getOperand(1).getReg();

  Register Src1Op2 = FAddMI->getOperand(2).getReg();

  MachineInstr *Shuffle =

      getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op2, MRI);

  MachineInstr *Other = MRI.getVRegDef(Src1Op1);

  if (!Shuffle) {

    Shuffle = getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op1, MRI);

    Other = MRI.getVRegDef(Src1Op2);

  }


  // We're looking for a shuffle that moves the second element to index 0.

  if (Shuffle && Shuffle->getOperand(3).getShuffleMask()[0] == 1 &&

      Other == MRI.getVRegDef(Shuffle->getOperand(1).getReg())) {

    std::get<0>(MatchInfo) = TargetOpcode::G_FADD;

    std::get<1>(MatchInfo) = DstTy;

    std::get<2>(MatchInfo) = Other->getOperand(0).getReg();

    return true;

  }

  return false;

}


void applyExtractVecEltPairwiseAdd(

    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,

    std::tuple<unsigned, LLT, Register> &MatchInfo) {

  unsigned Opc = std::get<0>(MatchInfo);

  assert(Opc == TargetOpcode::G_FADD && "Unexpected opcode!");

  // We want to generate two extracts of elements 0 and 1, and add them.

  LLT Ty = std::get<1>(MatchInfo);

  Register Src = std::get<2>(MatchInfo);

  LLT s64 = LLT::scalar(64);

  B.setInstrAndDebugLoc(MI);

  auto Elt0 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 0));

  auto Elt1 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 1));

  B.buildInstr(Opc, {MI.getOperand(0).getReg()}, {Elt0, Elt1});

  MI.eraseFromParent();

}


bool isSignExtended(Register R, MachineRegisterInfo &MRI) {

  // TODO: check if extended build vector as well.

  unsigned Opc = MRI.getVRegDef(R)->getOpcode();

  return Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG;

}


bool isZeroExtended(Register R, MachineRegisterInfo &MRI) {

  // TODO: check if extended build vector as well.

  return MRI.getVRegDef(R)->getOpcode() == TargetOpcode::G_ZEXT;

}


bool matchAArch64MulConstCombine(

    MachineInstr &MI, MachineRegisterInfo &MRI,

    std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {

  assert(MI.getOpcode() == TargetOpcode::G_MUL);

  Register LHS = MI.getOperand(1).getReg();

  Register RHS = MI.getOperand(2).getReg();

  Register Dst = MI.getOperand(0).getReg();

  const LLT Ty = MRI.getType(LHS);


  // The below optimizations require a constant RHS.

  auto Const = getIConstantVRegValWithLookThrough(RHS, MRI);

  if (!Const)

    return false;


  APInt ConstValue = Const->Value.sext(Ty.getSizeInBits());

  // The following code is ported from AArch64ISelLowering.

  // Multiplication of a power of two plus/minus one can be done more

  // cheaply as shift+add/sub. For now, this is true unilaterally. If

  // future CPUs have a cheaper MADD instruction, this may need to be

  // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and

  // 64-bit is 5 cycles, so this is always a win.

  // More aggressively, some multiplications N0 * C can be lowered to

  // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M,

  // e.g. 6=3*2=(2+1)*2.

  // TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45

  // which equals to (1+2)*16-(1+2).

  // TrailingZeroes is used to test if the mul can be lowered to

  // shift+add+shift.

  unsigned TrailingZeroes = ConstValue.countr_zero();

  if (TrailingZeroes) {

    // Conservatively do not lower to shift+add+shift if the mul might be

    // folded into smul or umul.

    if (MRI.hasOneNonDBGUse(LHS) &&

        (isSignExtended(LHS, MRI) || isZeroExtended(LHS, MRI)))

      return false;

    // Conservatively do not lower to shift+add+shift if the mul might be

    // folded into madd or msub.

    if (MRI.hasOneNonDBGUse(Dst)) {

      MachineInstr &UseMI = *MRI.use_instr_begin(Dst);

      unsigned UseOpc = UseMI.getOpcode();

      if (UseOpc == TargetOpcode::G_ADD || UseOpc == TargetOpcode::G_PTR_ADD ||

          UseOpc == TargetOpcode::G_SUB)

        return false;

    }

  }

  // Use ShiftedConstValue instead of ConstValue to support both shift+add/sub

  // and shift+add+shift.

  APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);


  unsigned ShiftAmt, AddSubOpc;

  // Is the shifted value the LHS operand of the add/sub?

  bool ShiftValUseIsLHS = true;

  // Do we need to negate the result?

  bool NegateResult = false;


  if (ConstValue.isNonNegative()) {

    // (mul x, 2^N + 1) => (add (shl x, N), x)

    // (mul x, 2^N - 1) => (sub (shl x, N), x)

    // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)

    APInt SCVMinus1 = ShiftedConstValue - 1;

    APInt CVPlus1 = ConstValue + 1;

    if (SCVMinus1.isPowerOf2()) {

      ShiftAmt = SCVMinus1.logBase2();

      AddSubOpc = TargetOpcode::G_ADD;

    } else if (CVPlus1.isPowerOf2()) {

      ShiftAmt = CVPlus1.logBase2();

      AddSubOpc = TargetOpcode::G_SUB;

    } else

      return false;

  } else {

    // (mul x, -(2^N - 1)) => (sub x, (shl x, N))

    // (mul x, -(2^N + 1)) => - (add (shl x, N), x)

    APInt CVNegPlus1 = -ConstValue + 1;

    APInt CVNegMinus1 = -ConstValue - 1;

    if (CVNegPlus1.isPowerOf2()) {

      ShiftAmt = CVNegPlus1.logBase2();

      AddSubOpc = TargetOpcode::G_SUB;

      ShiftValUseIsLHS = false;

    } else if (CVNegMinus1.isPowerOf2()) {

      ShiftAmt = CVNegMinus1.logBase2();

      AddSubOpc = TargetOpcode::G_ADD;

      NegateResult = true;

    } else

      return false;

  }


  if (NegateResult && TrailingZeroes)

    return false;


  ApplyFn = [=](MachineIRBuilder &B, Register DstReg) {

    auto Shift = B.buildConstant(LLT::scalar(64), ShiftAmt);

    auto ShiftedVal = B.buildShl(Ty, LHS, Shift);


    Register AddSubLHS = ShiftValUseIsLHS ? ShiftedVal.getReg(0) : LHS;

    Register AddSubRHS = ShiftValUseIsLHS ? LHS : ShiftedVal.getReg(0);

    auto Res = B.buildInstr(AddSubOpc, {Ty}, {AddSubLHS, AddSubRHS});

    assert(!(NegateResult && TrailingZeroes) &&

           "NegateResult and TrailingZeroes cannot both be true for now.");

    // Negate the result.

    if (NegateResult) {

      B.buildSub(DstReg, B.buildConstant(Ty, 0), Res);

      return;

    }

    // Shift the result.

    if (TrailingZeroes) {

      B.buildShl(DstReg, Res, B.buildConstant(LLT::scalar(64), TrailingZeroes));

      return;

    }

    B.buildCopy(DstReg, Res.getReg(0));

  };

  return true;

}


void applyAArch64MulConstCombine(

    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,

    std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {

  B.setInstrAndDebugLoc(MI);

  ApplyFn(B, MI.getOperand(0).getReg());

  MI.eraseFromParent();

}


/// Try to fold a G_MERGE_VALUES of 2 s32 sources, where the second source

/// is a zero, into a G_ZEXT of the first.

bool matchFoldMergeToZext(MachineInstr &MI, MachineRegisterInfo &MRI) {

  auto &Merge = cast<GMerge>(MI);

  LLT SrcTy = MRI.getType(Merge.getSourceReg(0));

  if (SrcTy != LLT::scalar(32) || Merge.getNumSources() != 2)

    return false;

  return mi_match(Merge.getSourceReg(1), MRI, m_SpecificICst(0));

}


void applyFoldMergeToZext(MachineInstr &MI, MachineRegisterInfo &MRI,

                          MachineIRBuilder &B, GISelChangeObserver &Observer) {

  // Mutate %d(s64) = G_MERGE_VALUES %a(s32), 0(s32)

  //  ->

  // %d(s64) = G_ZEXT %a(s32)

  Observer.changingInstr(MI);

  MI.setDesc(B.getTII().get(TargetOpcode::G_ZEXT));

  MI.removeOperand(2);

  Observer.changedInstr(MI);

}


/// \returns True if a G_ANYEXT instruction \p MI should be mutated to a G_ZEXT

/// instruction.

bool matchMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI) {

  // If this is coming from a scalar compare then we can use a G_ZEXT instead of

  // a G_ANYEXT:

  //

  // %cmp:_(s32) = G_[I|F]CMP ... <-- produces 0/1.

  // %ext:_(s64) = G_ANYEXT %cmp(s32)

  //

  // By doing this, we can leverage more KnownBits combines.

  assert(MI.getOpcode() == TargetOpcode::G_ANYEXT);

  Register Dst = MI.getOperand(0).getReg();

  Register Src = MI.getOperand(1).getReg();

  return MRI.getType(Dst).isScalar() &&

         mi_match(Src, MRI,

                  m_any_of(m_GICmp(m_Pred(), m_Reg(), m_Reg()),

                           m_GFCmp(m_Pred(), m_Reg(), m_Reg())));

}


void applyMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI,

                             MachineIRBuilder &B,

                             GISelChangeObserver &Observer) {

  Observer.changingInstr(MI);

  MI.setDesc(B.getTII().get(TargetOpcode::G_ZEXT));

  Observer.changedInstr(MI);

}


/// Match a 128b store of zero and split it into two 64 bit stores, for

/// size/performance reasons.

bool matchSplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI) {

  GStore &Store = cast<GStore>(MI);

  if (!Store.isSimple())

    return false;

  LLT ValTy = MRI.getType(Store.getValueReg());

  if (ValTy.isScalableVector())

    return false;

  if (!ValTy.isVector() || ValTy.getSizeInBits() != 128)

    return false;

  if (Store.getMemSizeInBits() != ValTy.getSizeInBits())

    return false; // Don't split truncating stores.

  if (!MRI.hasOneNonDBGUse(Store.getValueReg()))

    return false;

  auto MaybeCst = isConstantOrConstantSplatVector(

      *MRI.getVRegDef(Store.getValueReg()), MRI);

  return MaybeCst && MaybeCst->isZero();

}


void applySplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI,

                            MachineIRBuilder &B,

                            GISelChangeObserver &Observer) {

  B.setInstrAndDebugLoc(MI);

  GStore &Store = cast<GStore>(MI);

  assert(MRI.getType(Store.getValueReg()).isVector() &&

         "Expected a vector store value");

  LLT NewTy = LLT::scalar(64);

  Register PtrReg = Store.getPointerReg();

  auto Zero = B.buildConstant(NewTy, 0);

  auto HighPtr = B.buildPtrAdd(MRI.getType(PtrReg), PtrReg,

                               B.buildConstant(LLT::scalar(64), 8));

  auto &MF = *MI.getMF();

  auto *LowMMO = MF.getMachineMemOperand(&Store.getMMO(), 0, NewTy);

  auto *HighMMO = MF.getMachineMemOperand(&Store.getMMO(), 8, NewTy);

  B.buildStore(Zero, PtrReg, *LowMMO);

  B.buildStore(Zero, HighPtr, *HighMMO);

  Store.eraseFromParent();

}


bool matchOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,

                  std::tuple<Register, Register, Register> &MatchInfo) {

  const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());

  if (!DstTy.isVector())

    return false;


  Register AO1, AO2, BVO1, BVO2;

  if (!mi_match(MI, MRI,

                m_GOr(m_GAnd(m_Reg(AO1), m_Reg(BVO1)),

                      m_GAnd(m_Reg(AO2), m_Reg(BVO2)))))

    return false;


  auto *BV1 = getOpcodeDef<GBuildVector>(BVO1, MRI);

  auto *BV2 = getOpcodeDef<GBuildVector>(BVO2, MRI);

  if (!BV1 || !BV2)

    return false;


  for (int I = 0, E = DstTy.getNumElements(); I < E; I++) {

    auto ValAndVReg1 =

        getIConstantVRegValWithLookThrough(BV1->getSourceReg(I), MRI);

    auto ValAndVReg2 =

        getIConstantVRegValWithLookThrough(BV2->getSourceReg(I), MRI);

    if (!ValAndVReg1 || !ValAndVReg2 ||

        ValAndVReg1->Value != ~ValAndVReg2->Value)

      return false;

  }


  MatchInfo = {AO1, AO2, BVO1};

  return true;

}


void applyOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,

                  MachineIRBuilder &B,

                  std::tuple<Register, Register, Register> &MatchInfo) {

  B.setInstrAndDebugLoc(MI);

  B.buildInstr(

      AArch64::G_BSP, {MI.getOperand(0).getReg()},

      {std::get<2>(MatchInfo), std::get<0>(MatchInfo), std::get<1>(MatchInfo)});

  MI.eraseFromParent();

}


// Combines Mul(And(Srl(X, 15), 0x10001), 0xffff) into CMLTz

bool matchCombineMulCMLT(MachineInstr &MI, MachineRegisterInfo &MRI,

                         Register &SrcReg) {

  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());


  if (DstTy != LLT::fixed_vector(2, 64) && DstTy != LLT::fixed_vector(2, 32) &&

      DstTy != LLT::fixed_vector(4, 32) && DstTy != LLT::fixed_vector(4, 16) &&

      DstTy != LLT::fixed_vector(8, 16))

    return false;


  auto AndMI = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);

  if (AndMI->getOpcode() != TargetOpcode::G_AND)

    return false;

  auto LShrMI = getDefIgnoringCopies(AndMI->getOperand(1).getReg(), MRI);

  if (LShrMI->getOpcode() != TargetOpcode::G_LSHR)

    return false;


  // Check the constant splat values

  auto V1 = isConstantOrConstantSplatVector(

      *MRI.getVRegDef(MI.getOperand(2).getReg()), MRI);

  auto V2 = isConstantOrConstantSplatVector(

      *MRI.getVRegDef(AndMI->getOperand(2).getReg()), MRI);

  auto V3 = isConstantOrConstantSplatVector(

      *MRI.getVRegDef(LShrMI->getOperand(2).getReg()), MRI);

  if (!V1.has_value() || !V2.has_value() || !V3.has_value())

    return false;

  unsigned HalfSize = DstTy.getScalarSizeInBits() / 2;

  if (!V1.value().isMask(HalfSize) || V2.value() != (1ULL | 1ULL << HalfSize) ||

      V3 != (HalfSize - 1))

    return false;


  SrcReg = LShrMI->getOperand(1).getReg();


  return true;

}


void applyCombineMulCMLT(MachineInstr &MI, MachineRegisterInfo &MRI,

                         MachineIRBuilder &B, Register &SrcReg) {

  Register DstReg = MI.getOperand(0).getReg();

  LLT DstTy = MRI.getType(DstReg);

  LLT HalfTy =

      DstTy.changeElementCount(DstTy.getElementCount().multiplyCoefficientBy(2))

          .changeElementSize(DstTy.getScalarSizeInBits() / 2);


  Register ZeroVec = B.buildConstant(HalfTy, 0).getReg(0);

  Register CastReg =

      B.buildInstr(TargetOpcode::G_BITCAST, {HalfTy}, {SrcReg}).getReg(0);

  Register CMLTReg =

      B.buildICmp(CmpInst::Predicate::ICMP_SLT, HalfTy, CastReg, ZeroVec)

          .getReg(0);


  B.buildInstr(TargetOpcode::G_BITCAST, {DstReg}, {CMLTReg}).getReg(0);

  MI.eraseFromParent();

}


// Match mul({z/s}ext , {z/s}ext) => {u/s}mull

bool matchExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI,

                       GISelValueTracking *KB,

                       std::tuple<bool, Register, Register> &MatchInfo) {

  // Get the instructions that defined the source operand

  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());

  MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);

  MachineInstr *I2 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);

  unsigned I1Opc = I1->getOpcode();

  unsigned I2Opc = I2->getOpcode();

  unsigned EltSize = DstTy.getScalarSizeInBits();


  if (!DstTy.isVector() || I1->getNumOperands() < 2 || I2->getNumOperands() < 2)

    return false;


  auto IsAtLeastDoubleExtend = [&](Register R) {

    LLT Ty = MRI.getType(R);

    return EltSize >= Ty.getScalarSizeInBits() * 2;

  };


  // If the source operands were EXTENDED before, then {U/S}MULL can be used

  bool IsZExt1 =

      I1Opc == TargetOpcode::G_ZEXT || I1Opc == TargetOpcode::G_ANYEXT;

  bool IsZExt2 =

      I2Opc == TargetOpcode::G_ZEXT || I2Opc == TargetOpcode::G_ANYEXT;

  if (IsZExt1 && IsZExt2 && IsAtLeastDoubleExtend(I1->getOperand(1).getReg()) &&

      IsAtLeastDoubleExtend(I2->getOperand(1).getReg())) {

    get<0>(MatchInfo) = true;

    get<1>(MatchInfo) = I1->getOperand(1).getReg();

    get<2>(MatchInfo) = I2->getOperand(1).getReg();

    return true;

  }


  bool IsSExt1 =

      I1Opc == TargetOpcode::G_SEXT || I1Opc == TargetOpcode::G_ANYEXT;

  bool IsSExt2 =

      I2Opc == TargetOpcode::G_SEXT || I2Opc == TargetOpcode::G_ANYEXT;

  if (IsSExt1 && IsSExt2 && IsAtLeastDoubleExtend(I1->getOperand(1).getReg()) &&

      IsAtLeastDoubleExtend(I2->getOperand(1).getReg())) {

    get<0>(MatchInfo) = false;

    get<1>(MatchInfo) = I1->getOperand(1).getReg();

    get<2>(MatchInfo) = I2->getOperand(1).getReg();

    return true;

  }


  // Select UMULL if we can replace the other operand with an extend.

  APInt Mask = APInt::getHighBitsSet(EltSize, EltSize / 2);

  if (KB && (IsZExt1 || IsZExt2) &&

      IsAtLeastDoubleExtend(IsZExt1 ? I1->getOperand(1).getReg()

                                    : I2->getOperand(1).getReg())) {

    Register ZExtOp =

        IsZExt1 ? MI.getOperand(2).getReg() : MI.getOperand(1).getReg();

    if (KB->maskedValueIsZero(ZExtOp, Mask)) {

      get<0>(MatchInfo) = true;

      get<1>(MatchInfo) = IsZExt1 ? I1->getOperand(1).getReg() : ZExtOp;

      get<2>(MatchInfo) = IsZExt1 ? ZExtOp : I2->getOperand(1).getReg();

      return true;

    }

  } else if (KB && DstTy == LLT::fixed_vector(2, 64) &&

             KB->maskedValueIsZero(MI.getOperand(1).getReg(), Mask) &&

             KB->maskedValueIsZero(MI.getOperand(2).getReg(), Mask)) {

    get<0>(MatchInfo) = true;

    get<1>(MatchInfo) = MI.getOperand(1).getReg();

    get<2>(MatchInfo) = MI.getOperand(2).getReg();

    return true;

  }


  if (KB && (IsSExt1 || IsSExt2) &&

      IsAtLeastDoubleExtend(IsSExt1 ? I1->getOperand(1).getReg()

                                    : I2->getOperand(1).getReg())) {

    Register SExtOp =

        IsSExt1 ? MI.getOperand(2).getReg() : MI.getOperand(1).getReg();

    if (KB->computeNumSignBits(SExtOp) > EltSize / 2) {

      get<0>(MatchInfo) = false;

      get<1>(MatchInfo) = IsSExt1 ? I1->getOperand(1).getReg() : SExtOp;

      get<2>(MatchInfo) = IsSExt1 ? SExtOp : I2->getOperand(1).getReg();

      return true;

    }

  } else if (KB && DstTy == LLT::fixed_vector(2, 64) &&

             KB->computeNumSignBits(MI.getOperand(1).getReg()) > EltSize / 2 &&

             KB->computeNumSignBits(MI.getOperand(2).getReg()) > EltSize / 2) {

    get<0>(MatchInfo) = false;

    get<1>(MatchInfo) = MI.getOperand(1).getReg();

    get<2>(MatchInfo) = MI.getOperand(2).getReg();

    return true;

  }


  return false;

}


void applyExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI,

                       MachineIRBuilder &B, GISelChangeObserver &Observer,

                       std::tuple<bool, Register, Register> &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_MUL &&

         "Expected a G_MUL instruction");


  // Get the instructions that defined the source operand

  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());

  bool IsZExt = get<0>(MatchInfo);

  Register Src1Reg = get<1>(MatchInfo);

  Register Src2Reg = get<2>(MatchInfo);

  LLT Src1Ty = MRI.getType(Src1Reg);

  LLT Src2Ty = MRI.getType(Src2Reg);

  LLT HalfDstTy = DstTy.changeElementSize(DstTy.getScalarSizeInBits() / 2);

  unsigned ExtOpc = IsZExt ? TargetOpcode::G_ZEXT : TargetOpcode::G_SEXT;


  if (Src1Ty.getScalarSizeInBits() * 2 != DstTy.getScalarSizeInBits())

    Src1Reg = B.buildExtOrTrunc(ExtOpc, {HalfDstTy}, {Src1Reg}).getReg(0);

  if (Src2Ty.getScalarSizeInBits() * 2 != DstTy.getScalarSizeInBits())

    Src2Reg = B.buildExtOrTrunc(ExtOpc, {HalfDstTy}, {Src2Reg}).getReg(0);


  B.buildInstr(IsZExt ? AArch64::G_UMULL : AArch64::G_SMULL,

               {MI.getOperand(0).getReg()}, {Src1Reg, Src2Reg});

  MI.eraseFromParent();

}


class AArch64PostLegalizerCombinerImpl : public Combiner {

protected:

  const CombinerHelper Helper;

  const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig;

  const AArch64Subtarget &STI;


public:

  AArch64PostLegalizerCombinerImpl(

      MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,

      GISelValueTracking &VT, GISelCSEInfo *CSEInfo,

      const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig,

      const AArch64Subtarget &STI, MachineDominatorTree *MDT,

      const LegalizerInfo *LI);


  static const char *getName() { return "AArch64PostLegalizerCombiner"; }


  bool tryCombineAll(MachineInstr &I) const override;


private:

#define GET_GICOMBINER_CLASS_MEMBERS

#include "AArch64GenPostLegalizeGICombiner.inc"

#undef GET_GICOMBINER_CLASS_MEMBERS

};


#define GET_GICOMBINER_IMPL

#include "AArch64GenPostLegalizeGICombiner.inc"

#undef GET_GICOMBINER_IMPL


AArch64PostLegalizerCombinerImpl::AArch64PostLegalizerCombinerImpl(

    MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,

    GISelValueTracking &VT, GISelCSEInfo *CSEInfo,

    const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig,

    const AArch64Subtarget &STI, MachineDominatorTree *MDT,

    const LegalizerInfo *LI)

    : Combiner(MF, CInfo, TPC, &VT, CSEInfo),

      Helper(Observer, B, /*IsPreLegalize*/ false, &VT, MDT, LI),

      RuleConfig(RuleConfig), STI(STI),

#define GET_GICOMBINER_CONSTRUCTOR_INITS

#include "AArch64GenPostLegalizeGICombiner.inc"

#undef GET_GICOMBINER_CONSTRUCTOR_INITS

{

}


class AArch64PostLegalizerCombiner : public MachineFunctionPass {

public:

  static char ID;


  AArch64PostLegalizerCombiner(bool IsOptNone = false);


  StringRef getPassName() const override {

    return "AArch64PostLegalizerCombiner";

  }


  bool runOnMachineFunction(MachineFunction &MF) override;

  void getAnalysisUsage(AnalysisUsage &AU) const override;


private:

  bool IsOptNone;

  AArch64PostLegalizerCombinerImplRuleConfig RuleConfig;


  struct StoreInfo {

    GStore *St = nullptr;

    // The G_PTR_ADD that's used by the store. We keep this to cache the

    // MachineInstr def.

    GPtrAdd *Ptr = nullptr;

    // The signed offset to the Ptr instruction.

    int64_t Offset = 0;

    LLT StoredType;

  };

  bool tryOptimizeConsecStores(SmallVectorImpl<StoreInfo> &Stores,

                               CSEMIRBuilder &MIB);


  bool optimizeConsecutiveMemOpAddressing(MachineFunction &MF,

                                          CSEMIRBuilder &MIB);

};

} // end anonymous namespace


void AArch64PostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {

  AU.addRequired<TargetPassConfig>();

  AU.setPreservesCFG();

  getSelectionDAGFallbackAnalysisUsage(AU);

  AU.addRequired<GISelValueTrackingAnalysisLegacy>();

  AU.addPreserved<GISelValueTrackingAnalysisLegacy>();

  if (!IsOptNone) {

    AU.addRequired<MachineDominatorTreeWrapperPass>();

    AU.addPreserved<MachineDominatorTreeWrapperPass>();

    AU.addRequired<GISelCSEAnalysisWrapperPass>();

    AU.addPreserved<GISelCSEAnalysisWrapperPass>();

  }

  MachineFunctionPass::getAnalysisUsage(AU);

}


AArch64PostLegalizerCombiner::AArch64PostLegalizerCombiner(bool IsOptNone)

    : MachineFunctionPass(ID), IsOptNone(IsOptNone) {

  if (!RuleConfig.parseCommandLineOption())

    report_fatal_error("Invalid rule identifier");

}


bool AArch64PostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {

  if (MF.getProperties().hasFailedISel())

    return false;

  assert(MF.getProperties().hasLegalized() && "Expected a legalized function?");

  auto *TPC = &getAnalysis<TargetPassConfig>();

  const Function &F = MF.getFunction();

  bool EnableOpt =

      MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);


  const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();

  const auto *LI = ST.getLegalizerInfo();


  GISelValueTracking *VT =

      &getAnalysis<GISelValueTrackingAnalysisLegacy>().get(MF);

  MachineDominatorTree *MDT =

      IsOptNone ? nullptr

                : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();

  GISelCSEAnalysisWrapper &Wrapper =

      getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();

  auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig());


  CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,

                     /*LegalizerInfo*/ nullptr, EnableOpt, F.hasOptSize(),

                     F.hasMinSize());

  // Disable fixed-point iteration to reduce compile-time

  CInfo.MaxIterations = 1;

  CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;

  // Legalizer performs DCE, so a full DCE pass is unnecessary.

  CInfo.EnableFullDCE = false;

  AArch64PostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *VT, CSEInfo,

                                        RuleConfig, ST, MDT, LI);

  bool Changed = Impl.combineMachineInstrs();


  auto MIB = CSEMIRBuilder(MF);

  MIB.setCSEInfo(CSEInfo);

  Changed |= optimizeConsecutiveMemOpAddressing(MF, MIB);

  return Changed;

}


bool AArch64PostLegalizerCombiner::tryOptimizeConsecStores(

    SmallVectorImpl<StoreInfo> &Stores, CSEMIRBuilder &MIB) {

  if (Stores.size() <= 2)

    return false;


  // Profitabity checks:

  int64_t BaseOffset = Stores[0].Offset;

  unsigned NumPairsExpected = Stores.size() / 2;

  unsigned TotalInstsExpected = NumPairsExpected + (Stores.size() % 2);

  // Size savings will depend on whether we can fold the offset, as an

  // immediate of an ADD.

  auto &TLI = *MIB.getMF().getSubtarget().getTargetLowering();

  if (!TLI.isLegalAddImmediate(BaseOffset))

    TotalInstsExpected++;

  int SavingsExpected = Stores.size() - TotalInstsExpected;

  if (SavingsExpected <= 0)

    return false;


  auto &MRI = MIB.getMF().getRegInfo();


  // We have a series of consecutive stores. Factor out the common base

  // pointer and rewrite the offsets.

  Register NewBase = Stores[0].Ptr->getReg(0);

  for (auto &SInfo : Stores) {

    // Compute a new pointer with the new base ptr and adjusted offset.

    MIB.setInstrAndDebugLoc(*SInfo.St);

    auto NewOff = MIB.buildConstant(LLT::scalar(64), SInfo.Offset - BaseOffset);

    auto NewPtr = MIB.buildPtrAdd(MRI.getType(SInfo.St->getPointerReg()),

                                  NewBase, NewOff);

    if (MIB.getObserver())

      MIB.getObserver()->changingInstr(*SInfo.St);

    SInfo.St->getOperand(1).setReg(NewPtr.getReg(0));

    if (MIB.getObserver())

      MIB.getObserver()->changedInstr(*SInfo.St);

  }

  LLVM_DEBUG(dbgs() << "Split a series of " << Stores.size()

                    << " stores into a base pointer and offsets.\n");

  return true;

}


static cl::opt<bool>

    EnableConsecutiveMemOpOpt("aarch64-postlegalizer-consecutive-memops",

                              cl::init(true), cl::Hidden,

                              cl::desc("Enable consecutive memop optimization "

                                       "in AArch64PostLegalizerCombiner"));


bool AArch64PostLegalizerCombiner::optimizeConsecutiveMemOpAddressing(

    MachineFunction &MF, CSEMIRBuilder &MIB) {

  // This combine needs to run after all reassociations/folds on pointer

  // addressing have been done, specifically those that combine two G_PTR_ADDs

  // with constant offsets into a single G_PTR_ADD with a combined offset.

  // The goal of this optimization is to undo that combine in the case where

  // doing so has prevented the formation of pair stores due to illegal

  // addressing modes of STP. The reason that we do it here is because

  // it's much easier to undo the transformation of a series consecutive

  // mem ops, than it is to detect when doing it would be a bad idea looking

  // at a single G_PTR_ADD in the reassociation/ptradd_immed_chain combine.

  //

  // An example:

  //   G_STORE %11:_(<2 x s64>), %base:_(p0) :: (store (<2 x s64>), align 1)

  //   %off1:_(s64) = G_CONSTANT i64 4128

  //   %p1:_(p0) = G_PTR_ADD %0:_, %off1:_(s64)

  //   G_STORE %11:_(<2 x s64>), %p1:_(p0) :: (store (<2 x s64>), align 1)

  //   %off2:_(s64) = G_CONSTANT i64 4144

  //   %p2:_(p0) = G_PTR_ADD %0:_, %off2:_(s64)

  //   G_STORE %11:_(<2 x s64>), %p2:_(p0) :: (store (<2 x s64>), align 1)

  //   %off3:_(s64) = G_CONSTANT i64 4160

  //   %p3:_(p0) = G_PTR_ADD %0:_, %off3:_(s64)

  //   G_STORE %11:_(<2 x s64>), %17:_(p0) :: (store (<2 x s64>), align 1)

  bool Changed = false;

  auto &MRI = MF.getRegInfo();


  if (!EnableConsecutiveMemOpOpt)

    return Changed;


  SmallVector<StoreInfo, 8> Stores;

  // If we see a load, then we keep track of any values defined by it.

  // In the following example, STP formation will fail anyway because

  // the latter store is using a load result that appears after the

  // the prior store. In this situation if we factor out the offset then

  // we increase code size for no benefit.

  //   G_STORE %v1:_(s64), %base:_(p0) :: (store (s64))

  //   %v2:_(s64) = G_LOAD %ldptr:_(p0) :: (load (s64))

  //   G_STORE %v2:_(s64), %base:_(p0) :: (store (s64))

  SmallVector<Register> LoadValsSinceLastStore;


  auto storeIsValid = [&](StoreInfo &Last, StoreInfo New) {

    // Check if this store is consecutive to the last one.

    if (Last.Ptr->getBaseReg() != New.Ptr->getBaseReg() ||

        (Last.Offset + static_cast<int64_t>(Last.StoredType.getSizeInBytes()) !=

         New.Offset) ||

        Last.StoredType != New.StoredType)

      return false;


    // Check if this store is using a load result that appears after the

    // last store. If so, bail out.

    if (any_of(LoadValsSinceLastStore, [&](Register LoadVal) {

          return New.St->getValueReg() == LoadVal;

        }))

      return false;


    // Check if the current offset would be too large for STP.

    // If not, then STP formation should be able to handle it, so we don't

    // need to do anything.

    int64_t MaxLegalOffset;

    switch (New.StoredType.getSizeInBits()) {

    case 32:

      MaxLegalOffset = 252;

      break;

    case 64:

      MaxLegalOffset = 504;

      break;

    case 128:

      MaxLegalOffset = 1008;

      break;

    default:

      llvm_unreachable("Unexpected stored type size");

    }

    if (New.Offset < MaxLegalOffset)

      return false;


    // If factoring it out still wouldn't help then don't bother.

    return New.Offset - Stores[0].Offset <= MaxLegalOffset;

  };


  auto resetState = [&]() {

    Stores.clear();

    LoadValsSinceLastStore.clear();

  };


  for (auto &MBB : MF) {

    // We're looking inside a single BB at a time since the memset pattern

    // should only be in a single block.

    resetState();

    for (auto &MI : MBB) {

      // Skip for scalable vectors

      if (auto *LdSt = dyn_cast<GLoadStore>(&MI);

          LdSt && MRI.getType(LdSt->getOperand(0).getReg()).isScalableVector())

        continue;


      if (auto *St = dyn_cast<GStore>(&MI)) {

        Register PtrBaseReg;

        APInt Offset;

        LLT StoredValTy = MRI.getType(St->getValueReg());

        unsigned ValSize = StoredValTy.getSizeInBits();

        if (ValSize < 32 || St->getMMO().getSizeInBits() != ValSize)

          continue;


        Register PtrReg = St->getPointerReg();

        if (mi_match(

                PtrReg, MRI,

                m_OneNonDBGUse(m_GPtrAdd(m_Reg(PtrBaseReg), m_ICst(Offset))))) {

          GPtrAdd *PtrAdd = cast<GPtrAdd>(MRI.getVRegDef(PtrReg));

          StoreInfo New = {St, PtrAdd, Offset.getSExtValue(), StoredValTy};


          if (Stores.empty()) {

            Stores.push_back(New);

            continue;

          }


          // Check if this store is a valid continuation of the sequence.

          auto &Last = Stores.back();

          if (storeIsValid(Last, New)) {

            Stores.push_back(New);

            LoadValsSinceLastStore.clear(); // Reset the load value tracking.

          } else {

            // The store isn't a valid to consider for the prior sequence,

            // so try to optimize what we have so far and start a new sequence.

            Changed |= tryOptimizeConsecStores(Stores, MIB);

            resetState();

            Stores.push_back(New);

          }

        }

      } else if (auto *Ld = dyn_cast<GLoad>(&MI)) {

        LoadValsSinceLastStore.push_back(Ld->getDstReg());

      }

    }

    Changed |= tryOptimizeConsecStores(Stores, MIB);

    resetState();

  }


  return Changed;

}


char AArch64PostLegalizerCombiner::ID = 0;

INITIALIZE_PASS_BEGIN(AArch64PostLegalizerCombiner, DEBUG_TYPE,

                      "Combine AArch64 MachineInstrs after legalization", false,

                      false)

INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)

INITIALIZE_PASS_DEPENDENCY(GISelValueTrackingAnalysisLegacy)

INITIALIZE_PASS_END(AArch64PostLegalizerCombiner, DEBUG_TYPE,

                    "Combine AArch64 MachineInstrs after legalization", false,

                    false)


namespace llvm {


FunctionPass *createAArch64PostLegalizerCombiner(bool IsOptNone) {

  return new AArch64PostLegalizerCombiner(IsOptNone);

}


} // end namespace llvm

MRI
unsigned const MachineRegisterInfo * MRI
Definition AArch64AdvSIMDScalarPass.cpp:103

UseMI
MachineInstrBuilder & UseMI
Definition AArch64ExpandPseudoInsts.cpp:120

isZeroExtended
static bool isZeroExtended(SDValue N, SelectionDAG &DAG)
Definition AArch64ISelLowering.cpp:5544

isSignExtended
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
Definition AArch64ISelLowering.cpp:5538

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

GET_GICOMBINER_CONSTRUCTOR_INITS
#define GET_GICOMBINER_CONSTRUCTOR_INITS

EnableConsecutiveMemOpOpt
static cl::opt< bool > EnableConsecutiveMemOpOpt("aarch64-postlegalizer-consecutive-memops", cl::init(true), cl::Hidden, cl::desc("Enable consecutive memop optimization " "in AArch64PostLegalizerCombiner"))

AArch64TargetMachine.h

Wrapper
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
Definition AMDGPUAliasAnalysis.cpp:31

MBB
MachineBasicBlock & MBB
Definition ARMSLSHardening.cpp:71

E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

CSEInfo.h
Provides analysis for continuously CSEing during GISel passes.

CSEMIRBuilder.h
This file implements a version of MachineIRBuilder which CSEs insts within a MachineBasicBlock.

Utils.h

CombinerHelper.h
This contains common combine transformations that may be used in a combine pass,or by the target else...

CombinerInfo.h
Option class for Targets to specify which operations are combined how and when.

Combiner.h
This contains the base class for all Combiners generated by TableGen.

GIMatchTableExecutorImpl.h

GISelChangeObserver.h
This contains common code to allow clients to notify changes to machine instr.

GISelValueTracking.h
Provides analysis for querying information about KnownBits during GISel passes.

DEBUG_TYPE
#define DEBUG_TYPE
Definition GenericCycleImpl.h:31

GenericMachineInstrs.h
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...

MI
IRTranslator LLVM IR MI
Definition IRTranslator.cpp:110

F
#define F(x, y, z)
Definition MD5.cpp:55

I
#define I(x, y, z)
Definition MD5.cpp:58

MIPatternMatch.h
Contains matchers for matching SSA Machine Instructions.

MachineDominators.h

MachineFunctionPass.h

MachineIRBuilder.h
This file declares the MachineIRBuilder class.

MachineRegisterInfo.h

getReg
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
Definition MipsDisassembler.cpp:106

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39

getName
static StringRef getName(Value *V)
Definition ProvenanceAnalysisEvaluator.cpp:20

Merge
R600 Clause Merge
Definition R600ClauseMergePass.cpp:70

Opc
auto Opc
Definition RISCVRedundantCopyElimination.cpp:75

STLExtras.h
This file contains some templates that are useful if you are working with the STL at all.

Debug.h

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition Debug.h:114

Ptr
@ Ptr
Definition TargetLibraryInfo.cpp:77

TargetOpcodes.h

TargetPassConfig.h
Target-Independent Code Generator Pass Configuration Options pass.

RHS
Value * RHS
Definition X86PartialReduction.cpp:74

LHS
Value * LHS
Definition X86PartialReduction.cpp:73

llvm::AArch64Subtarget
Definition AArch64Subtarget.h:38

llvm::APInt
Class for arbitrary precision integers.
Definition APInt.h:78

llvm::APInt::countr_zero
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639

llvm::APInt::logBase2
unsigned logBase2() const
Definition APInt.h:1761

llvm::APInt::ashr
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:827

llvm::APInt::isNonNegative
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:334

llvm::APInt::isPowerOf2
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:440

llvm::APInt::getHighBitsSet
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:296

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition PassAnalysisSupport.h:48

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition PassAnalysisSupport.h:76

llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition PassAnalysisSupport.h:99

llvm::AnalysisUsage::setPreservesCFG
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270

llvm::CSEMIRBuilder
Defines a builder that does CSE of MachineInstructions using GISelCSEInfo.
Definition CSEMIRBuilder.h:39

llvm::CSEMIRBuilder::buildConstant
MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val) override
Build and insert Res = G_CONSTANT Val.
Definition CSEMIRBuilder.cpp:334

llvm::CmpInst::ICMP_SLT
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705

llvm::Combiner
Combiner implementation.
Definition Combiner.h:34

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314

llvm::Function
Definition Function.h:64

llvm::GISelCSEAnalysisWrapperPass
The actual analysis pass wrapper.
Definition CSEInfo.h:229

llvm::GISelCSEAnalysisWrapper
Simple wrapper that does the following.
Definition CSEInfo.h:211

llvm::GISelCSEInfo
The CSE Analysis object.
Definition CSEInfo.h:71

llvm::GISelChangeObserver
Abstract class that contains various methods for clients to notify about changes.
Definition GISelChangeObserver.h:30

llvm::GISelChangeObserver::changingInstr
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.

llvm::GISelChangeObserver::changedInstr
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.

llvm::GISelValueTrackingAnalysisLegacy
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelValueTrackingInfoAnal...
Definition GISelValueTracking.h:163

llvm::GISelValueTracking
Definition GISelValueTracking.h:34

llvm::GISelValueTracking::maskedValueIsZero
bool maskedValueIsZero(Register Val, const APInt &Mask)
Definition GISelValueTracking.h:87

llvm::GISelValueTracking::computeNumSignBits
unsigned computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth=0)
Definition GISelValueTracking.cpp:1783

llvm::GPtrAdd
Represents a G_PTR_ADD.
Definition GenericMachineInstrs.h:331

llvm::GStore
Represents a G_STORE.
Definition GenericMachineInstrs.h:241

llvm::LLT
Definition LowLevelType.h:40

llvm::LLT::getScalarSizeInBits
constexpr unsigned getScalarSizeInBits() const
Definition LowLevelType.h:265

llvm::LLT::scalar
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition LowLevelType.h:43

llvm::LLT::getNumElements
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition LowLevelType.h:160

llvm::LLT::isVector
constexpr bool isVector() const
Definition LowLevelType.h:149

llvm::LLT::getSizeInBits
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition LowLevelType.h:191

llvm::LLT::getElementCount
constexpr ElementCount getElementCount() const
Definition LowLevelType.h:184

llvm::LLT::changeElementSize
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition LowLevelType.h:219

llvm::LLT::fixed_vector
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition LowLevelType.h:101

llvm::LLT::changeElementCount
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition LowLevelType.h:228

llvm::LegalizerInfo
Definition LegalizerInfo.h:1331

llvm::MachineDominatorTreeWrapperPass
Analysis pass which computes a MachineDominatorTree.
Definition MachineDominators.h:127

llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition MachineDominators.h:71

llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition MachineFunctionPass.h:31

llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition MachineFunctionPass.cpp:184

llvm::MachineFunctionPass::runOnMachineFunction
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...

llvm::MachineFunction
Definition MachineFunction.h:286

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition MachineFunction.h:762

llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition MachineFunction.h:772

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition MachineFunction.h:733

llvm::MachineFunction::getProperties
const MachineFunctionProperties & getProperties() const
Get the function properties.
Definition MachineFunction.h:853

llvm::MachineFunction::getTarget
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition MachineFunction.h:758

llvm::MachineIRBuilder
Helper class to build MachineInstr.
Definition MachineIRBuilder.h:236

llvm::MachineIRBuilder::getObserver
GISelChangeObserver * getObserver()
Definition MachineIRBuilder.h:386

llvm::MachineIRBuilder::buildPtrAdd
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
Definition MachineIRBuilder.cpp:202

llvm::MachineIRBuilder::getMF
MachineFunction & getMF()
Getter for the function we currently build.
Definition MachineIRBuilder.h:288

llvm::MachineIRBuilder::setInstrAndDebugLoc
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
Definition MachineIRBuilder.h:377

llvm::MachineInstr
Representation of each machine instruction.
Definition MachineInstr.h:72

llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition MachineInstr.h:587

llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition MachineInstr.h:590

llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition MachineInstr.h:595

llvm::MachineOperand::getShuffleMask
ArrayRef< int > getShuffleMask() const
Definition MachineOperand.h:622

llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition MachineOperand.h:368

llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition MachineRegisterInfo.h:53

llvm::Pass::getPassName
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition Pass.cpp:85

llvm::Register
Wrapper class representing virtual and physical registers.
Definition Register.h:19

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition SmallVector.h:573

llvm::SmallVectorImpl::clear
void clear()
Definition SmallVector.h:610

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:416

llvm::SmallVectorTemplateCommon::size
size_t size() const
Definition SmallVector.h:79

llvm::SmallVectorTemplateCommon::back
reference back()
Definition SmallVector.h:311

llvm::SmallVectorTemplateCommon::empty
bool empty() const
Definition SmallVector.h:82

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1196

llvm::StoreInfo
Definition GVNHoist.cpp:194

llvm::TargetMachine::getOptLevel
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Definition TargetMachine.h:289

llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition TargetPassConfig.h:84

llvm::TargetSubtargetInfo::getTargetLowering
virtual const TargetLowering * getTargetLowering() const
Definition TargetSubtargetInfo.h:103

llvm::cl::opt
Definition CommandLine.h:1455

llvm::details::FixedOrScalableQuantity::multiplyCoefficientBy
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition TypeSize.h:257

Changed
Changed
Definition ObjCARCOpts.cpp:2369

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

false
Definition MachinePipeliner.cpp:244

llvm::ARM_MB::ST
@ ST
Definition ARMBaseInfo.h:73

llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition BitmaskEnum.h:127

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::MIPatternMatch
Definition MIPatternMatch.h:25

llvm::MIPatternMatch::m_Reg
operand_type_match m_Reg()
Definition MIPatternMatch.h:311

llvm::MIPatternMatch::m_SpecificICst
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
Definition MIPatternMatch.h:213

llvm::MIPatternMatch::m_Pred
operand_type_match m_Pred()
Definition MIPatternMatch.h:430

llvm::MIPatternMatch::m_ICst
ConstantMatch< APInt > m_ICst(APInt &Cst)
Definition MIPatternMatch.h:102

llvm::MIPatternMatch::m_GOr
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
Definition MIPatternMatch.h:617

llvm::MIPatternMatch::m_OneNonDBGUse
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
Definition MIPatternMatch.h:70

llvm::MIPatternMatch::m_GICmp
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_ICMP > m_GICmp(const Pred &P, const LHS &L, const RHS &R)
Definition MIPatternMatch.h:829

llvm::MIPatternMatch::mi_match
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Definition MIPatternMatch.h:28

llvm::MIPatternMatch::m_GPtrAdd
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Definition MIPatternMatch.h:570

llvm::MIPatternMatch::m_any_of
Or< Preds... > m_any_of(Preds &&... preds)
Definition MIPatternMatch.h:355

llvm::MIPatternMatch::m_GAnd
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
Definition MIPatternMatch.h:606

llvm::MIPatternMatch::m_GFCmp
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
Definition MIPatternMatch.h:835

llvm::NVPTX::Const
@ Const
Definition NVPTX.h:185

llvm::RISCVFenceField::R
@ R
Definition RISCVBaseInfo.h:463

llvm::SPII::Store
@ Store
Definition SparcInstrInfo.h:33

llvm::cl::Hidden
@ Hidden
Definition CommandLine.h:139

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition CommandLine.h:445

llvm::dxil::ElementType::I1
@ I1
Definition DXILABI.h:62

llvm::logicalview::LVAttributeKind::Zero
@ Zero
Definition LVOptions.h:130

llvm::ms_demangle::IntrinsicFunctionKind::New
@ New
Definition MicrosoftDemangleNodes.h:121

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition AddressRanges.h:18

llvm::Offset
@ Offset
Definition DWP.cpp:477

llvm::getOpcodeDef
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:651

llvm::dyn_cast
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643

llvm::isConstantOrConstantSplatVector
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1564

llvm::getDefIgnoringCopies
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:492

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732

llvm::get
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
Definition PointerIntPair.h:268

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207

llvm::report_fatal_error
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167

llvm::CodeGenOptLevel::None
@ None
-O0
Definition CodeGen.h:83

llvm::createAArch64PostLegalizerCombiner
FunctionPass * createAArch64PostLegalizerCombiner(bool IsOptNone)
Definition AArch64PostLegalizerCombiner.cpp:890

llvm::IRMemLocation::Other
@ Other
Any other memory.
Definition ModRef.h:68

llvm::getSelectionDAGFallbackAnalysisUsage
LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition Utils.cpp:1184

llvm::PseudoProbeReservedId::Last
@ Last
Definition PseudoProbe.h:28

llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559

llvm::getIConstantVRegValWithLookThrough
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433

llvm::CombinerInfo
Definition CombinerInfo.h:24

llvm::CombinerInfo::ObserverLevel::SinglePass
@ SinglePass
Enables Observer-based DCE and additional heuristics that retry combining defined and used instructio...
Definition CombinerInfo.h:71

llvm::cl::desc
Definition CommandLine.h:411