doxygen/AMDGPUPostLegalizerCombiner_8cpp_source.html

//=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp --------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This pass does combining of machine instructions at the generic MI level,

// after the legalizer.

//

//===----------------------------------------------------------------------===//


#include "AMDGPU.h"

#include "AMDGPUCombinerHelper.h"

#include "AMDGPULegalizerInfo.h"

#include "GCNSubtarget.h"

#include "MCTargetDesc/AMDGPUMCTargetDesc.h"

#include "llvm/CodeGen/GlobalISel/Combiner.h"

#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"

#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"

#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"

#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"

#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"

#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"

#include "llvm/CodeGen/MachineDominators.h"

#include "llvm/CodeGen/TargetPassConfig.h"

#include "llvm/IR/IntrinsicsAMDGPU.h"

#include "llvm/Target/TargetMachine.h"


#define GET_GICOMBINER_DEPS

#include "AMDGPUGenPreLegalizeGICombiner.inc"

#undef GET_GICOMBINER_DEPS


#define DEBUG_TYPE "amdgpu-postlegalizer-combiner"


using namespace llvm;

using namespace MIPatternMatch;


namespace {

#define GET_GICOMBINER_TYPES

#include "AMDGPUGenPostLegalizeGICombiner.inc"

#undef GET_GICOMBINER_TYPES


class AMDGPUPostLegalizerCombinerImpl : public Combiner {

protected:

  const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig;

  const GCNSubtarget &STI;

  const SIInstrInfo &TII;

  // TODO: Make CombinerHelper methods const.

  mutable AMDGPUCombinerHelper Helper;


public:

  AMDGPUPostLegalizerCombinerImpl(

      MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,

      GISelValueTracking &VT, GISelCSEInfo *CSEInfo,

      const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig,

      const GCNSubtarget &STI, MachineDominatorTree *MDT,

      const LegalizerInfo *LI);


  static const char *getName() { return "AMDGPUPostLegalizerCombinerImpl"; }


  bool tryCombineAllImpl(MachineInstr &I) const;

  bool tryCombineAll(MachineInstr &I) const override;


  struct FMinFMaxLegacyInfo {

    Register LHS;

    Register RHS;

    CmpInst::Predicate Pred;

  };


  // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize

  bool matchFMinFMaxLegacy(MachineInstr &MI, MachineInstr &FCmp,

                           FMinFMaxLegacyInfo &Info) const;

  void applySelectFCmpToFMinFMaxLegacy(MachineInstr &MI,

                                       const FMinFMaxLegacyInfo &Info) const;


  bool matchUCharToFloat(MachineInstr &MI) const;

  void applyUCharToFloat(MachineInstr &MI) const;


  bool

  matchRcpSqrtToRsq(MachineInstr &MI,

                    std::function<void(MachineIRBuilder &)> &MatchInfo) const;


  bool matchFDivSqrtToRsqF16(MachineInstr &MI) const;

  void applyFDivSqrtToRsqF16(MachineInstr &MI, const Register &X) const;


  // FIXME: Should be able to have 2 separate matchdatas rather than custom

  // struct boilerplate.

  struct CvtF32UByteMatchInfo {

    Register CvtVal;

    unsigned ShiftOffset;

  };


  bool matchCvtF32UByteN(MachineInstr &MI,

                         CvtF32UByteMatchInfo &MatchInfo) const;

  void applyCvtF32UByteN(MachineInstr &MI,

                         const CvtF32UByteMatchInfo &MatchInfo) const;


  bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg) const;


  // Combine unsigned buffer load and signed extension instructions to generate

  // signed buffer load instructions.

  bool matchCombineSignExtendInReg(

      MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const;

  void applyCombineSignExtendInReg(

      MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const;


  // Find the s_mul_u64 instructions where the higher bits are either

  // zero-extended or sign-extended.

  // Replace the s_mul_u64 instructions with S_MUL_I64_I32_PSEUDO if the higher

  // 33 bits are sign extended and with S_MUL_U64_U32_PSEUDO if the higher 32

  // bits are zero extended.

  bool matchCombine_s_mul_u64(MachineInstr &MI, unsigned &NewOpcode) const;


private:

#define GET_GICOMBINER_CLASS_MEMBERS

#define AMDGPUSubtarget GCNSubtarget

#include "AMDGPUGenPostLegalizeGICombiner.inc"

#undef GET_GICOMBINER_CLASS_MEMBERS

#undef AMDGPUSubtarget

};


#define GET_GICOMBINER_IMPL

#define AMDGPUSubtarget GCNSubtarget

#include "AMDGPUGenPostLegalizeGICombiner.inc"

#undef AMDGPUSubtarget

#undef GET_GICOMBINER_IMPL


AMDGPUPostLegalizerCombinerImpl::AMDGPUPostLegalizerCombinerImpl(

    MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,

    GISelValueTracking &VT, GISelCSEInfo *CSEInfo,

    const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig,

    const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)

    : Combiner(MF, CInfo, TPC, &VT, CSEInfo), RuleConfig(RuleConfig), STI(STI),

      TII(*STI.getInstrInfo()),

      Helper(Observer, B, /*IsPreLegalize*/ false, &VT, MDT, LI, STI),

#define GET_GICOMBINER_CONSTRUCTOR_INITS

#include "AMDGPUGenPostLegalizeGICombiner.inc"

#undef GET_GICOMBINER_CONSTRUCTOR_INITS

{

}


bool AMDGPUPostLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {

  if (tryCombineAllImpl(MI))

    return true;


  switch (MI.getOpcode()) {

  case TargetOpcode::G_SHL:

  case TargetOpcode::G_LSHR:

  case TargetOpcode::G_ASHR:

    // On some subtargets, 64-bit shift is a quarter rate instruction. In the

    // common case, splitting this into a move and a 32-bit shift is faster and

    // the same code size.

    return Helper.tryCombineShiftToUnmerge(MI, 32);

  }


  return false;

}


bool AMDGPUPostLegalizerCombinerImpl::matchFMinFMaxLegacy(

    MachineInstr &MI, MachineInstr &FCmp, FMinFMaxLegacyInfo &Info) const {

  if (!MRI.hasOneNonDBGUse(FCmp.getOperand(0).getReg()))

    return false;


  Info.Pred =

      static_cast<CmpInst::Predicate>(FCmp.getOperand(1).getPredicate());

  Info.LHS = FCmp.getOperand(2).getReg();

  Info.RHS = FCmp.getOperand(3).getReg();

  Register True = MI.getOperand(2).getReg();

  Register False = MI.getOperand(3).getReg();


  // TODO: Handle case where the the selected value is an fneg and the compared

  // constant is the negation of the selected value.

  if ((Info.LHS != True || Info.RHS != False) &&

      (Info.LHS != False || Info.RHS != True))

    return false;


  // Invert the predicate if necessary so that the apply function can assume

  // that the select operands are the same as the fcmp operands.

  // (select (fcmp P, L, R), R, L) -> (select (fcmp !P, L, R), L, R)

  if (Info.LHS != True)

    Info.Pred = CmpInst::getInversePredicate(Info.Pred);


  // Only match </<=/>=/> not ==/!= etc.

  return Info.Pred != CmpInst::getSwappedPredicate(Info.Pred);

}


void AMDGPUPostLegalizerCombinerImpl::applySelectFCmpToFMinFMaxLegacy(

    MachineInstr &MI, const FMinFMaxLegacyInfo &Info) const {

  unsigned Opc = (Info.Pred & CmpInst::FCMP_OGT) ? AMDGPU::G_AMDGPU_FMAX_LEGACY

                                                 : AMDGPU::G_AMDGPU_FMIN_LEGACY;

  Register X = Info.LHS;

  Register Y = Info.RHS;

  if (Info.Pred == CmpInst::getUnorderedPredicate(Info.Pred)) {

    // We need to permute the operands to get the correct NaN behavior. The

    // selected operand is the second one based on the failing compare with NaN,

    // so permute it based on the compare type the hardware uses.

    std::swap(X, Y);

  }


  B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());


  MI.eraseFromParent();

}


bool AMDGPUPostLegalizerCombinerImpl::matchUCharToFloat(

    MachineInstr &MI) const {

  Register DstReg = MI.getOperand(0).getReg();


  // TODO: We could try to match extracting the higher bytes, which would be

  // easier if i8 vectors weren't promoted to i32 vectors, particularly after

  // types are legalized. v4i8 -> v4f32 is probably the only case to worry

  // about in practice.

  LLT Ty = MRI.getType(DstReg);

  if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) {

    Register SrcReg = MI.getOperand(1).getReg();

    unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();

    assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64);

    const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8);

    return Helper.getValueTracking()->maskedValueIsZero(SrcReg, Mask);

  }


  return false;

}


void AMDGPUPostLegalizerCombinerImpl::applyUCharToFloat(

    MachineInstr &MI) const {

  const LLT S32 = LLT::scalar(32);


  Register DstReg = MI.getOperand(0).getReg();

  Register SrcReg = MI.getOperand(1).getReg();

  LLT Ty = MRI.getType(DstReg);

  LLT SrcTy = MRI.getType(SrcReg);

  if (SrcTy != S32)

    SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0);


  if (Ty == S32) {

    B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg}, {SrcReg},

                 MI.getFlags());

  } else {

    auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32}, {SrcReg},

                             MI.getFlags());

    B.buildFPTrunc(DstReg, Cvt0, MI.getFlags());

  }


  MI.eraseFromParent();

}


bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq(

    MachineInstr &MI,

    std::function<void(MachineIRBuilder &)> &MatchInfo) const {

  auto getRcpSrc = [=](const MachineInstr &MI) -> MachineInstr * {

    if (!MI.getFlag(MachineInstr::FmContract))

      return nullptr;


    if (auto *GI = dyn_cast<GIntrinsic>(&MI)) {

      if (GI->is(Intrinsic::amdgcn_rcp))

        return MRI.getVRegDef(MI.getOperand(2).getReg());

    }

    return nullptr;

  };


  auto getSqrtSrc = [=](const MachineInstr &MI) -> MachineInstr * {

    if (!MI.getFlag(MachineInstr::FmContract))

      return nullptr;

    MachineInstr *SqrtSrcMI = nullptr;

    auto Match =

        mi_match(MI.getOperand(0).getReg(), MRI, m_GFSqrt(m_MInstr(SqrtSrcMI)));

    (void)Match;

    return SqrtSrcMI;

  };


  MachineInstr *RcpSrcMI = nullptr, *SqrtSrcMI = nullptr;

  // rcp(sqrt(x))

  if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) {

    MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) {

      B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)})

          .addUse(SqrtSrcMI->getOperand(0).getReg())

          .setMIFlags(MI.getFlags());

    };

    return true;

  }


  // sqrt(rcp(x))

  if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) {

    MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) {

      B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)})

          .addUse(RcpSrcMI->getOperand(0).getReg())

          .setMIFlags(MI.getFlags());

    };

    return true;

  }

  return false;

}


bool AMDGPUPostLegalizerCombinerImpl::matchFDivSqrtToRsqF16(

    MachineInstr &MI) const {

  Register Sqrt = MI.getOperand(2).getReg();

  return MRI.hasOneNonDBGUse(Sqrt);

}


void AMDGPUPostLegalizerCombinerImpl::applyFDivSqrtToRsqF16(

    MachineInstr &MI, const Register &X) const {

  Register Dst = MI.getOperand(0).getReg();

  Register Y = MI.getOperand(1).getReg();

  LLT DstTy = MRI.getType(Dst);

  uint32_t Flags = MI.getFlags();

  Register RSQ = B.buildIntrinsic(Intrinsic::amdgcn_rsq, {DstTy})

                     .addUse(X)

                     .setMIFlags(Flags)

                     .getReg(0);

  B.buildFMul(Dst, RSQ, Y, Flags);

  MI.eraseFromParent();

}


bool AMDGPUPostLegalizerCombinerImpl::matchCvtF32UByteN(

    MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) const {

  Register SrcReg = MI.getOperand(1).getReg();


  // Look through G_ZEXT.

  bool IsShr = mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg)));


  Register Src0;

  int64_t ShiftAmt;

  IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt)));

  if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) {

    const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0;


    unsigned ShiftOffset = 8 * Offset;

    if (IsShr)

      ShiftOffset += ShiftAmt;

    else

      ShiftOffset -= ShiftAmt;


    MatchInfo.CvtVal = Src0;

    MatchInfo.ShiftOffset = ShiftOffset;

    return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0;

  }


  // TODO: Simplify demanded bits.

  return false;

}


void AMDGPUPostLegalizerCombinerImpl::applyCvtF32UByteN(

    MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) const {

  unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;


  const LLT S32 = LLT::scalar(32);

  Register CvtSrc = MatchInfo.CvtVal;

  LLT SrcTy = MRI.getType(MatchInfo.CvtVal);

  if (SrcTy != S32) {

    assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8);

    CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0);

  }


  assert(MI.getOpcode() != NewOpc);

  B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags());

  MI.eraseFromParent();

}


bool AMDGPUPostLegalizerCombinerImpl::matchRemoveFcanonicalize(

    MachineInstr &MI, Register &Reg) const {

  const SITargetLowering *TLI = static_cast<const SITargetLowering *>(

      MF.getSubtarget().getTargetLowering());

  Reg = MI.getOperand(1).getReg();

  return TLI->isCanonicalized(Reg, MF);

}


// The buffer_load_{i8, i16} intrinsics are intially lowered as buffer_load_{u8,

// u16} instructions. Here, the buffer_load_{u8, u16} instructions are combined

// with sign extension instrucions in order to generate buffer_load_{i8, i16}

// instructions.


// Identify buffer_load_{u8, u16}.

bool AMDGPUPostLegalizerCombinerImpl::matchCombineSignExtendInReg(

    MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const {

  Register LoadReg = MI.getOperand(1).getReg();

  if (!MRI.hasOneNonDBGUse(LoadReg))

    return false;


  // Check if the first operand of the sign extension is a subword buffer load

  // instruction.

  MachineInstr *LoadMI = MRI.getVRegDef(LoadReg);

  int64_t Width = MI.getOperand(2).getImm();

  switch (LoadMI->getOpcode()) {

  case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:

    MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE};

    return Width == 8;

  case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:

    MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT};

    return Width == 16;

  case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:

    MatchData = {LoadMI, AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE};

    return Width == 8;

  case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:

    MatchData = {LoadMI, AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT};

    return Width == 16;

  }

  return false;

}


// Combine buffer_load_{u8, u16} and the sign extension instruction to generate

// buffer_load_{i8, i16}.

void AMDGPUPostLegalizerCombinerImpl::applyCombineSignExtendInReg(

    MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const {

  auto [LoadMI, NewOpcode] = MatchData;

  LoadMI->setDesc(TII.get(NewOpcode));

  // Update the destination register of the load with the destination register

  // of the sign extension.

  Register SignExtendInsnDst = MI.getOperand(0).getReg();

  LoadMI->getOperand(0).setReg(SignExtendInsnDst);

  // Remove the sign extension.

  MI.eraseFromParent();

}


bool AMDGPUPostLegalizerCombinerImpl::matchCombine_s_mul_u64(

    MachineInstr &MI, unsigned &NewOpcode) const {

  Register Src0 = MI.getOperand(1).getReg();

  Register Src1 = MI.getOperand(2).getReg();

  if (MRI.getType(Src0) != LLT::scalar(64))

    return false;


  if (VT->getKnownBits(Src1).countMinLeadingZeros() >= 32 &&

      VT->getKnownBits(Src0).countMinLeadingZeros() >= 32) {

    NewOpcode = AMDGPU::G_AMDGPU_S_MUL_U64_U32;

    return true;

  }


  if (VT->computeNumSignBits(Src1) >= 33 &&

      VT->computeNumSignBits(Src0) >= 33) {

    NewOpcode = AMDGPU::G_AMDGPU_S_MUL_I64_I32;

    return true;

  }

  return false;

}


// Pass boilerplate

// ================


class AMDGPUPostLegalizerCombiner : public MachineFunctionPass {

public:

  static char ID;


  AMDGPUPostLegalizerCombiner(bool IsOptNone = false);


  StringRef getPassName() const override {

    return "AMDGPUPostLegalizerCombiner";

  }


  bool runOnMachineFunction(MachineFunction &MF) override;


  void getAnalysisUsage(AnalysisUsage &AU) const override;


private:

  bool IsOptNone;

  AMDGPUPostLegalizerCombinerImplRuleConfig RuleConfig;

};

} // end anonymous namespace


void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {

  AU.addRequired<TargetPassConfig>();

  AU.setPreservesCFG();

  getSelectionDAGFallbackAnalysisUsage(AU);

  AU.addRequired<GISelValueTrackingAnalysisLegacy>();

  AU.addPreserved<GISelValueTrackingAnalysisLegacy>();

  if (!IsOptNone) {

    AU.addRequired<MachineDominatorTreeWrapperPass>();

    AU.addPreserved<MachineDominatorTreeWrapperPass>();

  }

  MachineFunctionPass::getAnalysisUsage(AU);

}


AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)

    : MachineFunctionPass(ID), IsOptNone(IsOptNone) {

  if (!RuleConfig.parseCommandLineOption())

    report_fatal_error("Invalid rule identifier");

}


bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {

  if (MF.getProperties().hasFailedISel())

    return false;

  auto *TPC = &getAnalysis<TargetPassConfig>();

  const Function &F = MF.getFunction();

  bool EnableOpt =

      MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);


  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const AMDGPULegalizerInfo *LI =

      static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());


  GISelValueTracking *VT =

      &getAnalysis<GISelValueTrackingAnalysisLegacy>().get(MF);

  MachineDominatorTree *MDT =

      IsOptNone ? nullptr

                : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();


  CombinerInfo CInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,

                     LI, EnableOpt, F.hasOptSize(), F.hasMinSize());

  // Disable fixed-point iteration to reduce compile-time

  CInfo.MaxIterations = 1;

  CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;

  // Legalizer performs DCE, so a full DCE pass is unnecessary.

  CInfo.EnableFullDCE = false;

  AMDGPUPostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *VT, /*CSEInfo*/ nullptr,

                                       RuleConfig, ST, MDT, LI);

  return Impl.combineMachineInstrs();

}


char AMDGPUPostLegalizerCombiner::ID = 0;

INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,

                      "Combine AMDGPU machine instrs after legalization", false,

                      false)

INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)

INITIALIZE_PASS_DEPENDENCY(GISelValueTrackingAnalysisLegacy)

INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,

                    "Combine AMDGPU machine instrs after legalization", false,

                    false)


FunctionPass *llvm::createAMDGPUPostLegalizeCombiner(bool IsOptNone) {

  return new AMDGPUPostLegalizerCombiner(IsOptNone);

}


MRI
unsigned const MachineRegisterInfo * MRI
Definition AArch64AdvSIMDScalarPass.cpp:103

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

GET_GICOMBINER_CONSTRUCTOR_INITS
#define GET_GICOMBINER_CONSTRUCTOR_INITS

AMDGPUCombinerHelper.h
This contains common combine transformations that may be used in a combine pass.

S32
constexpr LLT S32
Definition AMDGPULegalizerInfo.cpp:297

AMDGPULegalizerInfo.h
This file declares the targeting of the Machinelegalizer class for AMDGPU.

AMDGPUMCTargetDesc.h
Provides AMDGPU specific target descriptions.

AMDGPU.h

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Info
Analysis containing CSE Info
Definition CSEInfo.cpp:27

CombinerHelper.h
This contains common combine transformations that may be used in a combine pass,or by the target else...

CombinerInfo.h
Option class for Targets to specify which operations are combined how and when.

Combiner.h
This contains the base class for all Combiners generated by TableGen.

GCNSubtarget.h
AMD GCN specific subclass of TargetSubtarget.

GIMatchTableExecutorImpl.h

GISelValueTracking.h
Provides analysis for querying information about KnownBits during GISel passes.

DEBUG_TYPE
#define DEBUG_TYPE
Definition GenericCycleImpl.h:31

GenericMachineInstrs.h
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...

TII
const HexagonInstrInfo * TII
Definition HexagonCopyToCombine.cpp:118

MI
IRTranslator LLVM IR MI
Definition IRTranslator.cpp:110

F
#define F(x, y, z)
Definition MD5.cpp:55

I
#define I(x, y, z)
Definition MD5.cpp:58

MIPatternMatch.h
Contains matchers for matching SSA Machine Instructions.

MachineDominators.h

Reg
Register Reg
Definition MachineSink.cpp:2117

Register
Promote Memory to Register
Definition Mem2Reg.cpp:110

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39

getName
static StringRef getName(Value *V)
Definition ProvenanceAnalysisEvaluator.cpp:20

Opc
auto Opc
Definition RISCVRedundantCopyElimination.cpp:75

Y
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

X
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")

TargetPassConfig.h
Target-Independent Code Generator Pass Configuration Options pass.

RHS
Value * RHS
Definition X86PartialReduction.cpp:74

LHS
Value * LHS
Definition X86PartialReduction.cpp:73

llvm::AMDGPUCombinerHelper
Definition AMDGPUCombinerHelper.h:23

llvm::AMDGPULegalizerInfo
Definition AMDGPULegalizerInfo.h:30

llvm::APInt::getHighBitsSet
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:296

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition PassAnalysisSupport.h:76

llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition PassAnalysisSupport.h:99

llvm::AnalysisUsage::setPreservesCFG
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270

llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676

llvm::CmpInst::FCMP_OGT
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680

llvm::CmpInst::getSwappedPredicate
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827

llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789

llvm::CmpInst::getUnorderedPredicate
Predicate getUnorderedPredicate() const
Definition InstrTypes.h:811

llvm::CombinerHelper::getValueTracking
GISelValueTracking * getValueTracking() const
Definition CombinerHelper.h:132

llvm::CombinerHelper::tryCombineShiftToUnmerge
bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount) const
Definition CombinerHelper.cpp:2582

llvm::Combiner
Combiner implementation.
Definition Combiner.h:34

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314

llvm::Function
Definition Function.h:64

llvm::GCNSubtarget
Definition GCNSubtarget.h:34

llvm::GISelCSEInfo
The CSE Analysis object.
Definition CSEInfo.h:71

llvm::GISelValueTrackingAnalysisLegacy
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelValueTrackingInfoAnal...
Definition GISelValueTracking.h:163

llvm::GISelValueTracking
Definition GISelValueTracking.h:34

llvm::GISelValueTracking::maskedValueIsZero
bool maskedValueIsZero(Register Val, const APInt &Mask)
Definition GISelValueTracking.h:87

llvm::LLT::isScalar
constexpr bool isScalar() const
Definition LowLevelType.h:147

llvm::LLT::scalar
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition LowLevelType.h:43

llvm::LLT::getSizeInBits
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition LowLevelType.h:191

llvm::LegalizerInfo
Definition LegalizerInfo.h:1330

llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition MachineDominators.h:71

llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition MachineFunctionPass.cpp:184

llvm::MachineFunction
Definition MachineFunction.h:286

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition MachineFunction.h:762

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition MachineFunction.h:733

llvm::MachineFunction::getProperties
const MachineFunctionProperties & getProperties() const
Get the function properties.
Definition MachineFunction.h:853

llvm::MachineFunction::getTarget
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition MachineFunction.h:758

llvm::MachineIRBuilder
Helper class to build MachineInstr.
Definition MachineIRBuilder.h:236

llvm::MachineInstr
Representation of each machine instruction.
Definition MachineInstr.h:72

llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition MachineInstr.h:587

llvm::MachineInstr::setDesc
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
Definition MachineInstr.cpp:145

llvm::MachineInstr::FmContract
@ FmContract
Definition MachineInstr.h:102

llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition MachineInstr.h:595

llvm::MachineOperand::setReg
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Definition MachineOperand.cpp:60

llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition MachineOperand.h:368

llvm::Register
Wrapper class representing virtual and physical registers.
Definition Register.h:19

llvm::SIInstrInfo
Definition SIInstrInfo.h:90

llvm::SITargetLowering::isCanonicalized
bool isCanonicalized(SelectionDAG &DAG, SDValue Op, unsigned MaxDepth=5) const
Definition SIISelLowering.cpp:14301

llvm::TargetMachine::getOptLevel
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Definition TargetMachine.h:289

llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition TargetPassConfig.h:84

TargetMachine.h

false
Definition MachinePipeliner.cpp:244

llvm::AMDGPUISD::RSQ
@ RSQ
Definition AMDGPUISelLowering.h:497

llvm::ARM_MB::ST
@ ST
Definition ARMBaseInfo.h:73

llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition BitmaskEnum.h:127

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::MIPatternMatch
Definition MIPatternMatch.h:25

llvm::MIPatternMatch::m_Reg
operand_type_match m_Reg()
Definition MIPatternMatch.h:310

llvm::MIPatternMatch::m_GZExt
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
Definition MIPatternMatch.h:705

llvm::MIPatternMatch::m_ICst
ConstantMatch< APInt > m_ICst(APInt &Cst)
Definition MIPatternMatch.h:102

llvm::MIPatternMatch::m_GFSqrt
UnaryOp_match< SrcTy, TargetOpcode::G_FSQRT > m_GFSqrt(const SrcTy &Src)
Definition MIPatternMatch.h:759

llvm::MIPatternMatch::mi_match
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Definition MIPatternMatch.h:28

llvm::MIPatternMatch::m_GShl
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Definition MIPatternMatch.h:636

llvm::MIPatternMatch::m_MInstr
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
Definition MIPatternMatch.h:423

llvm::MIPatternMatch::m_GLShr
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
Definition MIPatternMatch.h:642

llvm::PPC::getPredicate
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition PPCPredicates.h:87

llvm::sframe::Flags
Flags
Definition SFrame.h:39

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition AddressRanges.h:18

llvm::Offset
@ Offset
Definition DWP.cpp:477

llvm::dyn_cast
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644

llvm::report_fatal_error
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167

llvm::CodeGenOptLevel::None
@ None
-O0
Definition CodeGen.h:83

llvm::getSelectionDAGFallbackAnalysisUsage
LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition Utils.cpp:1185

llvm::createAMDGPUPostLegalizeCombiner
FunctionPass * createAMDGPUPostLegalizeCombiner(bool IsOptNone)
Definition AMDGPUPostLegalizerCombiner.cpp:520

std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869

llvm::CombinerInfo
Definition CombinerInfo.h:24

llvm::CombinerInfo::ObserverLevel::SinglePass
@ SinglePass
Enables Observer-based DCE and additional heuristics that retry combining defined and used instructio...
Definition CombinerInfo.h:71