doxygen/AMDGPUPostLegalizerCombiner_8cpp_source.html

//=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp --------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This pass does combining of machine instructions at the generic MI level,

// after the legalizer.

//

//===----------------------------------------------------------------------===//


#include "AMDGPU.h"

#include "AMDGPUCombinerHelper.h"

#include "AMDGPULegalizerInfo.h"

#include "GCNSubtarget.h"

#include "MCTargetDesc/AMDGPUMCTargetDesc.h"

#include "llvm/CodeGen/GlobalISel/Combiner.h"

#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"

#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"

#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"

#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"

#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"

#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"

#include "llvm/CodeGen/MachineDominators.h"

#include "llvm/CodeGen/TargetPassConfig.h"

#include "llvm/IR/IntrinsicsAMDGPU.h"

#include "llvm/Target/TargetMachine.h"


#define GET_GICOMBINER_DEPS

#include "AMDGPUGenPreLegalizeGICombiner.inc"

#undef GET_GICOMBINER_DEPS


#define DEBUG_TYPE "amdgpu-postlegalizer-combiner"


using namespace llvm;

using namespace MIPatternMatch;


namespace {

#define GET_GICOMBINER_TYPES

#include "AMDGPUGenPostLegalizeGICombiner.inc"

#undef GET_GICOMBINER_TYPES


class AMDGPUPostLegalizerCombinerImpl : public Combiner {

protected:

  const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig;

  const GCNSubtarget &STI;

  const SIInstrInfo &TII;

  // TODO: Make CombinerHelper methods const.

  mutable AMDGPUCombinerHelper Helper;


public:

  AMDGPUPostLegalizerCombinerImpl(

      MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,

      GISelKnownBits &KB, GISelCSEInfo *CSEInfo,

      const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig,

      const GCNSubtarget &STI, MachineDominatorTree *MDT,

      const LegalizerInfo *LI);


  static const char *getName() { return "AMDGPUPostLegalizerCombinerImpl"; }


  bool tryCombineAllImpl(MachineInstr &I) const;

  bool tryCombineAll(MachineInstr &I) const override;


  struct FMinFMaxLegacyInfo {

    Register LHS;

    Register RHS;

    Register True;

    Register False;

    CmpInst::Predicate Pred;

  };


  // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize

  bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info) const;

  void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,

                                         const FMinFMaxLegacyInfo &Info) const;


  bool matchUCharToFloat(MachineInstr &MI) const;

  void applyUCharToFloat(MachineInstr &MI) const;


  bool

  matchRcpSqrtToRsq(MachineInstr &MI,

                    std::function<void(MachineIRBuilder &)> &MatchInfo) const;


  bool matchFDivSqrtToRsqF16(MachineInstr &MI) const;

  void applyFDivSqrtToRsqF16(MachineInstr &MI, const Register &X) const;


  // FIXME: Should be able to have 2 separate matchdatas rather than custom

  // struct boilerplate.

  struct CvtF32UByteMatchInfo {

    Register CvtVal;

    unsigned ShiftOffset;

  };


  bool matchCvtF32UByteN(MachineInstr &MI,

                         CvtF32UByteMatchInfo &MatchInfo) const;

  void applyCvtF32UByteN(MachineInstr &MI,

                         const CvtF32UByteMatchInfo &MatchInfo) const;


  bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg) const;


  // Combine unsigned buffer load and signed extension instructions to generate

  // signed buffer laod instructions.

  bool matchCombineSignExtendInReg(

      MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const;

  void applyCombineSignExtendInReg(

      MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const;


  // Find the s_mul_u64 instructions where the higher bits are either

  // zero-extended or sign-extended.

  bool matchCombine_s_mul_u64(MachineInstr &MI, unsigned &NewOpcode) const;

  // Replace the s_mul_u64 instructions with S_MUL_I64_I32_PSEUDO if the higher

  // 33 bits are sign extended and with S_MUL_U64_U32_PSEUDO if the higher 32

  // bits are zero extended.

  void applyCombine_s_mul_u64(MachineInstr &MI, unsigned &NewOpcode) const;


private:

#define GET_GICOMBINER_CLASS_MEMBERS

#define AMDGPUSubtarget GCNSubtarget

#include "AMDGPUGenPostLegalizeGICombiner.inc"

#undef GET_GICOMBINER_CLASS_MEMBERS

#undef AMDGPUSubtarget

};


#define GET_GICOMBINER_IMPL

#define AMDGPUSubtarget GCNSubtarget

#include "AMDGPUGenPostLegalizeGICombiner.inc"

#undef AMDGPUSubtarget

#undef GET_GICOMBINER_IMPL


AMDGPUPostLegalizerCombinerImpl::AMDGPUPostLegalizerCombinerImpl(

    MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,

    GISelKnownBits &KB, GISelCSEInfo *CSEInfo,

    const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig,

    const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)

    : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),

      TII(*STI.getInstrInfo()),

      Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI),

#define GET_GICOMBINER_CONSTRUCTOR_INITS

#include "AMDGPUGenPostLegalizeGICombiner.inc"

#undef GET_GICOMBINER_CONSTRUCTOR_INITS

{

}


bool AMDGPUPostLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {

  if (tryCombineAllImpl(MI))

    return true;


  switch (MI.getOpcode()) {

  case TargetOpcode::G_SHL:

  case TargetOpcode::G_LSHR:

  case TargetOpcode::G_ASHR:

    // On some subtargets, 64-bit shift is a quarter rate instruction. In the

    // common case, splitting this into a move and a 32-bit shift is faster and

    // the same code size.

    return Helper.tryCombineShiftToUnmerge(MI, 32);

  }


  return false;

}


bool AMDGPUPostLegalizerCombinerImpl::matchFMinFMaxLegacy(

    MachineInstr &MI, FMinFMaxLegacyInfo &Info) const {

  // FIXME: Type predicate on pattern

  if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32))

    return false;


  Register Cond = MI.getOperand(1).getReg();

  if (!MRI.hasOneNonDBGUse(Cond) ||

      !mi_match(Cond, MRI,

                m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS))))

    return false;


  Info.True = MI.getOperand(2).getReg();

  Info.False = MI.getOperand(3).getReg();


  // TODO: Handle case where the the selected value is an fneg and the compared

  // constant is the negation of the selected value.

  if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&

      !(Info.LHS == Info.False && Info.RHS == Info.True))

    return false;


  switch (Info.Pred) {

  case CmpInst::FCMP_FALSE:

  case CmpInst::FCMP_OEQ:

  case CmpInst::FCMP_ONE:

  case CmpInst::FCMP_ORD:

  case CmpInst::FCMP_UNO:

  case CmpInst::FCMP_UEQ:

  case CmpInst::FCMP_UNE:

  case CmpInst::FCMP_TRUE:

    return false;

  default:

    return true;

  }

}


void AMDGPUPostLegalizerCombinerImpl::applySelectFCmpToFMinToFMaxLegacy(

    MachineInstr &MI, const FMinFMaxLegacyInfo &Info) const {

  B.setInstrAndDebugLoc(MI);

  auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) {

    B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());

  };


  switch (Info.Pred) {

  case CmpInst::FCMP_ULT:

  case CmpInst::FCMP_ULE:

    if (Info.LHS == Info.True)

      buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);

    else

      buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);

    break;

  case CmpInst::FCMP_OLE:

  case CmpInst::FCMP_OLT: {

    // We need to permute the operands to get the correct NaN behavior. The

    // selected operand is the second one based on the failing compare with NaN,

    // so permute it based on the compare type the hardware uses.

    if (Info.LHS == Info.True)

      buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);

    else

      buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);

    break;

  }

  case CmpInst::FCMP_UGE:

  case CmpInst::FCMP_UGT: {

    if (Info.LHS == Info.True)

      buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);

    else

      buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);

    break;

  }

  case CmpInst::FCMP_OGT:

  case CmpInst::FCMP_OGE: {

    if (Info.LHS == Info.True)

      buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);

    else

      buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);

    break;

  }

  default:

    llvm_unreachable("predicate should not have matched");

  }


  MI.eraseFromParent();

}


bool AMDGPUPostLegalizerCombinerImpl::matchUCharToFloat(

    MachineInstr &MI) const {

  Register DstReg = MI.getOperand(0).getReg();


  // TODO: We could try to match extracting the higher bytes, which would be

  // easier if i8 vectors weren't promoted to i32 vectors, particularly after

  // types are legalized. v4i8 -> v4f32 is probably the only case to worry

  // about in practice.

  LLT Ty = MRI.getType(DstReg);

  if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) {

    Register SrcReg = MI.getOperand(1).getReg();

    unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();

    assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64);

    const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8);

    return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask);

  }


  return false;

}


void AMDGPUPostLegalizerCombinerImpl::applyUCharToFloat(

    MachineInstr &MI) const {

  B.setInstrAndDebugLoc(MI);


  const LLT S32 = LLT::scalar(32);


  Register DstReg = MI.getOperand(0).getReg();

  Register SrcReg = MI.getOperand(1).getReg();

  LLT Ty = MRI.getType(DstReg);

  LLT SrcTy = MRI.getType(SrcReg);

  if (SrcTy != S32)

    SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0);


  if (Ty == S32) {

    B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg}, {SrcReg},

                 MI.getFlags());

  } else {

    auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32}, {SrcReg},

                             MI.getFlags());

    B.buildFPTrunc(DstReg, Cvt0, MI.getFlags());

  }


  MI.eraseFromParent();

}


bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq(

    MachineInstr &MI,

    std::function<void(MachineIRBuilder &)> &MatchInfo) const {

  auto getRcpSrc = [=](const MachineInstr &MI) -> MachineInstr * {

    if (!MI.getFlag(MachineInstr::FmContract))

      return nullptr;


    if (auto *GI = dyn_cast<GIntrinsic>(&MI)) {

      if (GI->is(Intrinsic::amdgcn_rcp))

        return MRI.getVRegDef(MI.getOperand(2).getReg());

    }

    return nullptr;

  };


  auto getSqrtSrc = [=](const MachineInstr &MI) -> MachineInstr * {

    if (!MI.getFlag(MachineInstr::FmContract))

      return nullptr;

    MachineInstr *SqrtSrcMI = nullptr;

    auto Match =

        mi_match(MI.getOperand(0).getReg(), MRI, m_GFSqrt(m_MInstr(SqrtSrcMI)));

    (void)Match;

    return SqrtSrcMI;

  };


  MachineInstr *RcpSrcMI = nullptr, *SqrtSrcMI = nullptr;

  // rcp(sqrt(x))

  if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) {

    MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) {

      B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)})

          .addUse(SqrtSrcMI->getOperand(0).getReg())

          .setMIFlags(MI.getFlags());

    };

    return true;

  }


  // sqrt(rcp(x))

  if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) {

    MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) {

      B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)})

          .addUse(RcpSrcMI->getOperand(0).getReg())

          .setMIFlags(MI.getFlags());

    };

    return true;

  }

  return false;

}


bool AMDGPUPostLegalizerCombinerImpl::matchFDivSqrtToRsqF16(

    MachineInstr &MI) const {

  Register Sqrt = MI.getOperand(2).getReg();

  return MRI.hasOneNonDBGUse(Sqrt);

}


void AMDGPUPostLegalizerCombinerImpl::applyFDivSqrtToRsqF16(

    MachineInstr &MI, const Register &X) const {

  Register Dst = MI.getOperand(0).getReg();

  Register Y = MI.getOperand(1).getReg();

  LLT DstTy = MRI.getType(Dst);

  uint32_t Flags = MI.getFlags();

  Register RSQ = B.buildIntrinsic(Intrinsic::amdgcn_rsq, {DstTy})

                     .addUse(X)

                     .setMIFlags(Flags)

                     .getReg(0);

  B.buildFMul(Dst, RSQ, Y, Flags);

  MI.eraseFromParent();

}


bool AMDGPUPostLegalizerCombinerImpl::matchCvtF32UByteN(

    MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) const {

  Register SrcReg = MI.getOperand(1).getReg();


  // Look through G_ZEXT.

  bool IsShr = mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg)));


  Register Src0;

  int64_t ShiftAmt;

  IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt)));

  if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) {

    const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0;


    unsigned ShiftOffset = 8 * Offset;

    if (IsShr)

      ShiftOffset += ShiftAmt;

    else

      ShiftOffset -= ShiftAmt;


    MatchInfo.CvtVal = Src0;

    MatchInfo.ShiftOffset = ShiftOffset;

    return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0;

  }


  // TODO: Simplify demanded bits.

  return false;

}


void AMDGPUPostLegalizerCombinerImpl::applyCvtF32UByteN(

    MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) const {

  B.setInstrAndDebugLoc(MI);

  unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;


  const LLT S32 = LLT::scalar(32);

  Register CvtSrc = MatchInfo.CvtVal;

  LLT SrcTy = MRI.getType(MatchInfo.CvtVal);

  if (SrcTy != S32) {

    assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8);

    CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0);

  }


  assert(MI.getOpcode() != NewOpc);

  B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags());

  MI.eraseFromParent();

}


bool AMDGPUPostLegalizerCombinerImpl::matchRemoveFcanonicalize(

    MachineInstr &MI, Register &Reg) const {

  const SITargetLowering *TLI = static_cast<const SITargetLowering *>(

      MF.getSubtarget().getTargetLowering());

  Reg = MI.getOperand(1).getReg();

  return TLI->isCanonicalized(Reg, MF);

}


// The buffer_load_{i8, i16} intrinsics are intially lowered as buffer_load_{u8,

// u16} instructions. Here, the buffer_load_{u8, u16} instructions are combined

// with sign extension instrucions in order to generate buffer_load_{i8, i16}

// instructions.


// Identify buffer_load_{u8, u16}.

bool AMDGPUPostLegalizerCombinerImpl::matchCombineSignExtendInReg(

    MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const {

  Register LoadReg = MI.getOperand(1).getReg();

  if (!MRI.hasOneNonDBGUse(LoadReg))

    return false;


  // Check if the first operand of the sign extension is a subword buffer load

  // instruction.

  MachineInstr *LoadMI = MRI.getVRegDef(LoadReg);

  int64_t Width = MI.getOperand(2).getImm();

  switch (LoadMI->getOpcode()) {

  case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:

    MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE};

    return Width == 8;

  case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:

    MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT};

    return Width == 16;

  case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:

    MatchData = {LoadMI, AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE};

    return Width == 8;

  case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:

    MatchData = {LoadMI, AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT};

    return Width == 16;

  }

  return false;

}


// Combine buffer_load_{u8, u16} and the sign extension instruction to generate

// buffer_load_{i8, i16}.

void AMDGPUPostLegalizerCombinerImpl::applyCombineSignExtendInReg(

    MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const {

  auto [LoadMI, NewOpcode] = MatchData;

  LoadMI->setDesc(TII.get(NewOpcode));

  // Update the destination register of the load with the destination register

  // of the sign extension.

  Register SignExtendInsnDst = MI.getOperand(0).getReg();

  LoadMI->getOperand(0).setReg(SignExtendInsnDst);

  // Remove the sign extension.

  MI.eraseFromParent();

}


bool AMDGPUPostLegalizerCombinerImpl::matchCombine_s_mul_u64(

    MachineInstr &MI, unsigned &NewOpcode) const {

  Register Src0 = MI.getOperand(1).getReg();

  Register Src1 = MI.getOperand(2).getReg();

  if (MRI.getType(Src0) != LLT::scalar(64))

    return false;


  if (KB->getKnownBits(Src1).countMinLeadingZeros() >= 32 &&

      KB->getKnownBits(Src0).countMinLeadingZeros() >= 32) {

    NewOpcode = AMDGPU::G_AMDGPU_S_MUL_U64_U32;

    return true;

  }


  if (KB->computeNumSignBits(Src1) >= 33 &&

      KB->computeNumSignBits(Src0) >= 33) {

    NewOpcode = AMDGPU::G_AMDGPU_S_MUL_I64_I32;

    return true;

  }

  return false;

}


void AMDGPUPostLegalizerCombinerImpl::applyCombine_s_mul_u64(

    MachineInstr &MI, unsigned &NewOpcode) const {

  Helper.replaceOpcodeWith(MI, NewOpcode);

}


// Pass boilerplate

// ================


class AMDGPUPostLegalizerCombiner : public MachineFunctionPass {

public:

  static char ID;


  AMDGPUPostLegalizerCombiner(bool IsOptNone = false);


  StringRef getPassName() const override {

    return "AMDGPUPostLegalizerCombiner";

  }


  bool runOnMachineFunction(MachineFunction &MF) override;


  void getAnalysisUsage(AnalysisUsage &AU) const override;


private:

  bool IsOptNone;

  AMDGPUPostLegalizerCombinerImplRuleConfig RuleConfig;

};

} // end anonymous namespace


void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {

  AU.addRequired<TargetPassConfig>();

  AU.setPreservesCFG();

  getSelectionDAGFallbackAnalysisUsage(AU);

  AU.addRequired<GISelKnownBitsAnalysis>();

  AU.addPreserved<GISelKnownBitsAnalysis>();

  if (!IsOptNone) {

    AU.addRequired<MachineDominatorTree>();

    AU.addPreserved<MachineDominatorTree>();

  }

  MachineFunctionPass::getAnalysisUsage(AU);

}


AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)

    : MachineFunctionPass(ID), IsOptNone(IsOptNone) {

  initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());


  if (!RuleConfig.parseCommandLineOption())

    report_fatal_error("Invalid rule identifier");

}


bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {

  if (MF.getProperties().hasProperty(

          MachineFunctionProperties::Property::FailedISel))

    return false;

  auto *TPC = &getAnalysis<TargetPassConfig>();

  const Function &F = MF.getFunction();

  bool EnableOpt =

      MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);


  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const AMDGPULegalizerInfo *LI =

      static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());


  GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);

  MachineDominatorTree *MDT =

      IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();


  CombinerInfo CInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,

                     LI, EnableOpt, F.hasOptSize(), F.hasMinSize());


  AMDGPUPostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, /*CSEInfo*/ nullptr,

                                       RuleConfig, ST, MDT, LI);

  return Impl.combineMachineInstrs();

}


char AMDGPUPostLegalizerCombiner::ID = 0;

INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,

                      "Combine AMDGPU machine instrs after legalization", false,

                      false)

INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)

INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)

INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,

                    "Combine AMDGPU machine instrs after legalization", false,

                    false)


namespace llvm {

FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) {

  return new AMDGPUPostLegalizerCombiner(IsOptNone);

}

} // end namespace llvm

MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105

AMDGPUCombinerHelper.h
This contains common combine transformations that may be used in a combine pass.

S32
static const LLT S32
Definition: AMDGPULegalizerInfo.cpp:285

AMDGPULegalizerInfo.h
This file declares the targeting of the Machinelegalizer class for AMDGPU.

AMDGPUMCTargetDesc.h
Provides AMDGPU specific target descriptions.

GET_GICOMBINER_CONSTRUCTOR_INITS
#define GET_GICOMBINER_CONSTRUCTOR_INITS

legalization
Combine AMDGPU machine instrs after legalization
Definition: AMDGPUPostLegalizerCombiner.cpp:563

DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUPostLegalizerCombiner.cpp:35

AMDGPU.h

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27

CombinerHelper.h
This contains common combine transformations that may be used in a combine pass,or by the target else...

CombinerInfo.h
Option class for Targets to specify which operations are combined how and when.

Combiner.h
This contains the base class for all Combiners generated by TableGen.

X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")

GCNSubtarget.h
AMD GCN specific subclass of TargetSubtarget.

GIMatchTableExecutorImpl.h

GISelKnownBits.h
Provides analysis for querying information about KnownBits during GISel passes.

GenericMachineInstrs.h
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...

TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125

Combine
Hexagon Vector Combine
Definition: HexagonVectorCombine.cpp:2987

MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:113

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

MIPatternMatch.h
Contains matchers for matching SSA Machine Instructions.

MachineDominators.h

Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52

getName
static StringRef getName(Value *V)
Definition: ProvenanceAnalysisEvaluator.cpp:20

Cond
const SmallVectorImpl< MachineOperand > & Cond
Definition: RISCVRedundantCopyElimination.cpp:75

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

TargetPassConfig.h
Target-Independent Code Generator Pass Configuration Options pass.

RHS
Value * RHS
Definition: X86PartialReduction.cpp:76

LHS
Value * LHS
Definition: X86PartialReduction.cpp:75

AMDGPUCombinerHelper
Definition: AMDGPUCombinerHelper.h:23

llvm::AMDGPULegalizerInfo
Definition: AMDGPULegalizerInfo.h:30

llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:76

llvm::APInt::getHighBitsSet
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75

llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98

llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269

llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993

llvm::CmpInst::FCMP_OEQ
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:996

llvm::CmpInst::FCMP_TRUE
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:1010

llvm::CmpInst::FCMP_OLT
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:999

llvm::CmpInst::FCMP_ULE
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:1008

llvm::CmpInst::FCMP_OGT
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:997

llvm::CmpInst::FCMP_OGE
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:998

llvm::CmpInst::FCMP_ULT
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:1007

llvm::CmpInst::FCMP_ONE
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:1001

llvm::CmpInst::FCMP_UEQ
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:1004

llvm::CmpInst::FCMP_UGT
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:1005

llvm::CmpInst::FCMP_OLE
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:1000

llvm::CmpInst::FCMP_ORD
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:1002

llvm::CmpInst::FCMP_UNE
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:1009

llvm::CmpInst::FCMP_UGE
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:1006

llvm::CmpInst::FCMP_FALSE
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:995

llvm::CmpInst::FCMP_UNO
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:1003

llvm::Combiner
Combiner implementation.
Definition: Combiner.h:34

llvm::Combiner::tryCombineAll
virtual bool tryCombineAll(MachineInstr &I) const =0

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311

llvm::Function
Definition: Function.h:62

llvm::GCNSubtarget
Definition: GCNSubtarget.h:35

llvm::GISelCSEInfo
The CSE Analysis object.
Definition: CSEInfo.h:69

llvm::GISelKnownBitsAnalysis
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
Definition: GISelKnownBits.h:113

llvm::GISelKnownBits
Definition: GISelKnownBits.h:29

llvm::LLT
Definition: LowLevelType.h:39

llvm::LLT::isScalar
constexpr bool isScalar() const
Definition: LowLevelType.h:146

llvm::LLT::scalar
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42

llvm::LLT::getSizeInBits
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193

llvm::LegalizerInfo
Definition: LegalizerInfo.h:1239

llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition: MachineDominators.h:51

llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30

llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:168

llvm::MachineFunctionPass::runOnMachineFunction
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...

llvm::MachineFunctionProperties::hasProperty
bool hasProperty(Property P) const
Definition: MachineFunction.h:194

llvm::MachineFunction
Definition: MachineFunction.h:259

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:718

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:684

llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:714

llvm::MachineFunction::getProperties
const MachineFunctionProperties & getProperties() const
Get the function properties.
Definition: MachineFunction.h:809

llvm::MachineIRBuilder
Helper class to build MachineInstr.
Definition: MachineIRBuilder.h:222

llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:69

llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:546

llvm::MachineInstr::setDesc
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
Definition: MachineInstr.cpp:142

llvm::MachineInstr::FmContract
@ FmContract
Definition: MachineInstr.h:99

llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:556

llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:61

llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:369

llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24

llvm::Pass::getPassName
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81

llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19

llvm::SIInstrInfo
Definition: SIInstrInfo.h:83

llvm::SITargetLowering
Definition: SIISelLowering.h:31

llvm::SITargetLowering::isCanonicalized
bool isCanonicalized(SelectionDAG &DAG, SDValue Op, unsigned MaxDepth=5) const
Definition: SIISelLowering.cpp:12537

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50

llvm::TargetMachine::getOptLevel
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Definition: TargetMachine.cpp:265

llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:85

uint32_t

unsigned

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143

TargetMachine.h

false
Definition: StackSlotColoring.cpp:184

llvm::AMDGPUISD::RSQ
@ RSQ
Definition: AMDGPUISelLowering.h:468

llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73

llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24

llvm::MIPatternMatch::m_Reg
operand_type_match m_Reg()
Definition: MIPatternMatch.h:270

llvm::MIPatternMatch::m_Pred
operand_type_match m_Pred()
Definition: MIPatternMatch.h:373

llvm::MIPatternMatch::m_GZExt
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
Definition: MIPatternMatch.h:581

llvm::MIPatternMatch::m_ICst
ConstantMatch< APInt > m_ICst(APInt &Cst)
Definition: MIPatternMatch.h:93

llvm::MIPatternMatch::m_GFSqrt
UnaryOp_match< SrcTy, TargetOpcode::G_FSQRT > m_GFSqrt(const SrcTy &Src)
Definition: MIPatternMatch.h:635

llvm::MIPatternMatch::mi_match
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Definition: MIPatternMatch.h:25

llvm::MIPatternMatch::m_GShl
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Definition: MIPatternMatch.h:524

llvm::MIPatternMatch::m_MInstr
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
Definition: MIPatternMatch.h:370

llvm::MIPatternMatch::m_GLShr
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
Definition: MIPatternMatch.h:530

llvm::MIPatternMatch::m_GFCmp
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
Definition: MIPatternMatch.h:680

llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:614

llvm::omp::RTLDependInfoFields::Flags
@ Flags

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::Offset
@ Offset
Definition: DWP.cpp:456

llvm::createAMDGPUPostLegalizeCombiner
FunctionPass * createAMDGPUPostLegalizeCombiner(bool IsOptNone)
Definition: AMDGPUPostLegalizerCombiner.cpp:567

llvm::DiagnosticPredicateTy::Match
@ Match

llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156

llvm::initializeAMDGPUPostLegalizerCombinerPass
void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &)

llvm::getSelectionDAGFallbackAnalysisUsage
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:1140

llvm::instrs
auto instrs(const MachineBasicBlock &BB)
Definition: MachineSSAContext.h:33

llvm::CombinerInfo
Definition: CombinerInfo.h:24