docs/doxygen/AMDGPUPreLegalizerCombiner_8cpp_source.html

//=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This pass does combining of machine instructions at the generic MI level,

// before the legalizer.

//

//===----------------------------------------------------------------------===//


#include "AMDGPU.h"

#include "AMDGPUCombinerHelper.h"

#include "AMDGPULegalizerInfo.h"

#include "GCNSubtarget.h"

#include "MCTargetDesc/AMDGPUMCTargetDesc.h"

#include "llvm/CodeGen/GlobalISel/CSEInfo.h"

#include "llvm/CodeGen/GlobalISel/Combiner.h"

#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"

#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"

#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"

#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"

#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"

#include "llvm/CodeGen/MachineDominators.h"

#include "llvm/CodeGen/TargetPassConfig.h"

#include "llvm/Target/TargetMachine.h"


#define GET_GICOMBINER_DEPS

#include "AMDGPUGenPreLegalizeGICombiner.inc"

#undef GET_GICOMBINER_DEPS


#define DEBUG_TYPE "amdgpu-prelegalizer-combiner"


using namespace llvm;

using namespace MIPatternMatch;

namespace {


#define GET_GICOMBINER_TYPES

#include "AMDGPUGenPreLegalizeGICombiner.inc"

#undef GET_GICOMBINER_TYPES


class AMDGPUPreLegalizerCombinerImpl : public Combiner {

protected:

  const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig;

  const GCNSubtarget &STI;

  const AMDGPUCombinerHelper Helper;


public:

  AMDGPUPreLegalizerCombinerImpl(

      MachineFunction &MF, CombinerInfo &CInfo, GISelValueTracking &VT,

      GISelCSEInfo *CSEInfo,

      const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,

      const GCNSubtarget &STI, MachineDominatorTree *MDT,

      const LegalizerInfo *LI);


  static const char *getName() { return "AMDGPUPreLegalizerCombinerImpl"; }


  bool tryCombineAllImpl(MachineInstr &MI) const;

  bool tryCombineAll(MachineInstr &I) const override;


  struct ClampI64ToI16MatchInfo {

    int64_t Cmp1 = 0;

    int64_t Cmp2 = 0;

    Register Origin;

  };


  bool matchClampI64ToI16(MachineInstr &MI, const MachineRegisterInfo &MRI,

                          const MachineFunction &MF,

                          ClampI64ToI16MatchInfo &MatchInfo) const;


  void applyClampI64ToI16(MachineInstr &MI,

                          const ClampI64ToI16MatchInfo &MatchInfo) const;


private:

#define GET_GICOMBINER_CLASS_MEMBERS

#define AMDGPUSubtarget GCNSubtarget

#include "AMDGPUGenPreLegalizeGICombiner.inc"

#undef GET_GICOMBINER_CLASS_MEMBERS

#undef AMDGPUSubtarget

};


#define GET_GICOMBINER_IMPL

#define AMDGPUSubtarget GCNSubtarget

#include "AMDGPUGenPreLegalizeGICombiner.inc"

#undef AMDGPUSubtarget

#undef GET_GICOMBINER_IMPL


AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl(

    MachineFunction &MF, CombinerInfo &CInfo, GISelValueTracking &VT,

    GISelCSEInfo *CSEInfo,

    const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,

    const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)

    : Combiner(MF, CInfo, &VT, CSEInfo), RuleConfig(RuleConfig), STI(STI),

      Helper(Observer, B, /*IsPreLegalize*/ true, &VT, MDT, LI, STI),

#define GET_GICOMBINER_CONSTRUCTOR_INITS

#include "AMDGPUGenPreLegalizeGICombiner.inc"

#undef GET_GICOMBINER_CONSTRUCTOR_INITS

{

}


bool AMDGPUPreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {

  if (tryCombineAllImpl(MI))

    return true;


  switch (MI.getOpcode()) {

  case TargetOpcode::G_SHUFFLE_VECTOR:

    return Helper.tryCombineShuffleVector(MI);

  }


  return false;

}


bool AMDGPUPreLegalizerCombinerImpl::matchClampI64ToI16(

    MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineFunction &MF,

    ClampI64ToI16MatchInfo &MatchInfo) const {

  assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!");


  // Try to find a pattern where an i64 value should get clamped to short.

  const LLT SrcType = MRI.getType(MI.getOperand(1).getReg());

  if (SrcType != LLT::scalar(64))

    return false;


  const LLT DstType = MRI.getType(MI.getOperand(0).getReg());

  if (DstType != LLT::scalar(16))

    return false;


  Register Base;


  auto IsApplicableForCombine = [&MatchInfo]() -> bool {

    const auto Cmp1 = MatchInfo.Cmp1;

    const auto Cmp2 = MatchInfo.Cmp2;

    const auto Diff = std::abs(Cmp2 - Cmp1);


    // If the difference between both comparison values is 0 or 1, there is no

    // need to clamp.

    if (Diff == 0 || Diff == 1)

      return false;


    const int64_t Min = std::numeric_limits<int16_t>::min();

    const int64_t Max = std::numeric_limits<int16_t>::max();


    // Check if the comparison values are between SHORT_MIN and SHORT_MAX.

    return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) ||

            (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min));

  };


  // Try to match a combination of min / max MIR opcodes.

  if (mi_match(MI.getOperand(1).getReg(), MRI,

               m_GSMin(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) {

    if (mi_match(Base, MRI,

                 m_GSMax(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) {

      return IsApplicableForCombine();

    }

  }


  if (mi_match(MI.getOperand(1).getReg(), MRI,

               m_GSMax(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) {

    if (mi_match(Base, MRI,

                 m_GSMin(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) {

      return IsApplicableForCombine();

    }

  }


  return false;

}


// We want to find a combination of instructions that

// gets generated when an i64 gets clamped to i16.

// The corresponding pattern is:

// G_MAX / G_MAX for i16 <= G_TRUNC i64.

// This can be efficiently written as following:

// v_cvt_pk_i16_i32 v0, v0, v1

// v_med3_i32 v0, Clamp_Min, v0, Clamp_Max

void AMDGPUPreLegalizerCombinerImpl::applyClampI64ToI16(

    MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) const {


  Register Src = MatchInfo.Origin;

  assert(MI.getMF()->getRegInfo().getType(Src) == LLT::scalar(64));

  const LLT S32 = LLT::scalar(32);


  auto Unmerge = B.buildUnmerge(S32, Src);


  assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32);


  const LLT V2S16 = LLT::fixed_vector(2, 16);

  auto CvtPk =

      B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, {V2S16},

                   {Unmerge.getReg(0), Unmerge.getReg(1)}, MI.getFlags());


  auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2);

  auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2);

  auto MinBoundaryDst = B.buildConstant(S32, MinBoundary);

  auto MaxBoundaryDst = B.buildConstant(S32, MaxBoundary);


  auto Bitcast = B.buildBitcast({S32}, CvtPk);


  auto Med3 = B.buildInstr(

      AMDGPU::G_AMDGPU_SMED3, {S32},

      {MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)},

      MI.getFlags());


  B.buildTrunc(MI.getOperand(0).getReg(), Med3);


  MI.eraseFromParent();

}


// Pass boilerplate

// ================


class AMDGPUPreLegalizerCombiner : public MachineFunctionPass {

public:

  static char ID;


  AMDGPUPreLegalizerCombiner(bool IsOptNone = false);


  StringRef getPassName() const override {

    return "AMDGPUPreLegalizerCombiner";

  }


  bool runOnMachineFunction(MachineFunction &MF) override;


  void getAnalysisUsage(AnalysisUsage &AU) const override;


private:

  bool IsOptNone;

  AMDGPUPreLegalizerCombinerImplRuleConfig RuleConfig;

};

} // end anonymous namespace


void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {

  AU.addRequired<TargetPassConfig>();

  AU.setPreservesCFG();

  getSelectionDAGFallbackAnalysisUsage(AU);

  AU.addRequired<GISelValueTrackingAnalysisLegacy>();

  AU.addPreserved<GISelValueTrackingAnalysisLegacy>();

  if (!IsOptNone) {

    AU.addRequired<MachineDominatorTreeWrapperPass>();

    AU.addPreserved<MachineDominatorTreeWrapperPass>();

  }


  AU.addRequired<GISelCSEAnalysisWrapperPass>();

  AU.addPreserved<GISelCSEAnalysisWrapperPass>();

  MachineFunctionPass::getAnalysisUsage(AU);

}


AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone)

    : MachineFunctionPass(ID), IsOptNone(IsOptNone) {

  if (!RuleConfig.parseCommandLineOption())

    report_fatal_error("Invalid rule identifier");

}


bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {

  if (MF.getProperties().hasFailedISel())

    return false;

  auto *TPC = &getAnalysis<TargetPassConfig>();

  const Function &F = MF.getFunction();

  bool EnableOpt =

      MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);

  GISelValueTracking *VT =

      &getAnalysis<GISelValueTrackingAnalysisLegacy>().get(MF);


  // Enable CSE.

  GISelCSEAnalysisWrapper &Wrapper =

      getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();

  auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig());


  const GCNSubtarget &STI = MF.getSubtarget<GCNSubtarget>();

  MachineDominatorTree *MDT =

      IsOptNone ? nullptr

                : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();

  CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,

                     nullptr, EnableOpt, F.hasOptSize(), F.hasMinSize());

  // Disable fixed-point iteration to reduce compile-time

  CInfo.MaxIterations = 1;

  CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;

  // This is the first Combiner, so the input IR might contain dead

  // instructions.

  CInfo.EnableFullDCE = true;

  AMDGPUPreLegalizerCombinerImpl Impl(MF, CInfo, *VT, CSEInfo, RuleConfig, STI,

                                      MDT, STI.getLegalizerInfo());

  return Impl.combineMachineInstrs();

}


char AMDGPUPreLegalizerCombiner::ID = 0;

INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,

                      "Combine AMDGPU machine instrs before legalization",

                      false, false)

INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)

INITIALIZE_PASS_DEPENDENCY(GISelValueTrackingAnalysisLegacy)

INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,

                    "Combine AMDGPU machine instrs before legalization", false,

                    false)


FunctionPass *llvm::createAMDGPUPreLegalizeCombiner(bool IsOptNone) {

  return new AMDGPUPreLegalizerCombiner(IsOptNone);

}


assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

GET_GICOMBINER_CONSTRUCTOR_INITS
#define GET_GICOMBINER_CONSTRUCTOR_INITS

Wrapper
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
Definition AMDGPUAliasAnalysis.cpp:31

AMDGPUCombinerHelper.h
This contains common combine transformations that may be used in a combine pass.

V2S16
constexpr LLT V2S16
Definition AMDGPULegalizerInfo.cpp:314

S32
constexpr LLT S32
Definition AMDGPULegalizerInfo.cpp:299

AMDGPULegalizerInfo.h
This file declares the targeting of the Machinelegalizer class for AMDGPU.

AMDGPUMCTargetDesc.h
Provides AMDGPU specific target descriptions.

AMDGPU.h

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

CSEInfo.h
Provides analysis for continuously CSEing during GISel passes.

CombinerHelper.h
This contains common combine transformations that may be used in a combine pass,or by the target else...

CombinerInfo.h
Option class for Targets to specify which operations are combined how and when.

Combiner.h
This contains the base class for all Combiners generated by TableGen.

GCNSubtarget.h
AMD GCN specific subclass of TargetSubtarget.

GIMatchTableExecutorImpl.h

GISelValueTracking.h
Provides analysis for querying information about KnownBits during GISel passes.

DEBUG_TYPE
#define DEBUG_TYPE
Definition GenericCycleImpl.h:31

MI
IRTranslator LLVM IR MI
Definition IRTranslator.cpp:110

F
#define F(x, y, z)
Definition MD5.cpp:54

I
#define I(x, y, z)
Definition MD5.cpp:57

MIPatternMatch.h
Contains matchers for matching SSA Machine Instructions.

MachineDominators.h

Register
Promote Memory to Register
Definition Mem2Reg.cpp:110

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39

getName
static StringRef getName(Value *V)
Definition ProvenanceAnalysisEvaluator.cpp:20

TargetPassConfig.h
Target-Independent Code Generator Pass Configuration Options pass.

llvm::AMDGPUCombinerHelper
Definition AMDGPUCombinerHelper.h:23

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition PassAnalysisSupport.h:76

llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition PassAnalysisSupport.h:99

llvm::AnalysisUsage::setPreservesCFG
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270

llvm::CombinerHelper::tryCombineShuffleVector
bool tryCombineShuffleVector(MachineInstr &MI) const
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
Definition CombinerHelper.cpp:582

llvm::Combiner
Combiner implementation.
Definition Combiner.h:33

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314

llvm::Function
Definition Function.h:65

llvm::GCNSubtarget
Definition GCNSubtarget.h:34

llvm::GCNSubtarget::getLegalizerInfo
const LegalizerInfo * getLegalizerInfo() const override
Definition GCNSubtarget.h:138

llvm::GISelCSEAnalysisWrapper
Simple wrapper that does the following.
Definition CSEInfo.h:212

llvm::GISelCSEInfo
The CSE Analysis object.
Definition CSEInfo.h:72

llvm::GISelValueTrackingAnalysisLegacy
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelValueTrackingInfoAnal...
Definition GISelValueTracking.h:171

llvm::GISelValueTracking
Definition GISelValueTracking.h:34

llvm::LLT::scalar
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition LowLevelType.h:88

llvm::LLT::fixed_vector
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition LowLevelType.h:203

llvm::LegalizerInfo
Definition LegalizerInfo.h:1371

llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition MachineDominators.h:70

llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition MachineFunctionPass.cpp:188

llvm::MachineFunction
Definition MachineFunction.h:294

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition MachineFunction.h:788

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition MachineFunction.h:749

llvm::MachineFunction::getProperties
const MachineFunctionProperties & getProperties() const
Get the function properties.
Definition MachineFunction.h:877

llvm::MachineFunction::getTarget
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition MachineFunction.h:784

llvm::MachineInstr
Representation of each machine instruction.
Definition MachineInstr.h:73

llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition MachineRegisterInfo.h:53

llvm::MachineRegisterInfo::getType
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Definition MachineRegisterInfo.h:771

llvm::Register
Wrapper class representing virtual and physical registers.
Definition Register.h:20

llvm::TargetMachine::getOptLevel
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Definition TargetMachine.h:289

llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition TargetPassConfig.h:84

TargetMachine.h

false
Definition MachinePipeliner.cpp:245

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::LegacyLegalizeActions::Bitcast
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition LegacyLegalizerInfo.h:56

llvm::MIPatternMatch
Definition MIPatternMatch.h:25

llvm::MIPatternMatch::m_Reg
operand_type_match m_Reg()
Definition MIPatternMatch.h:311

llvm::MIPatternMatch::m_ICst
ConstantMatch< APInt > m_ICst(APInt &Cst)
Definition MIPatternMatch.h:102

llvm::MIPatternMatch::mi_match
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Definition MIPatternMatch.h:28

llvm::MIPatternMatch::m_GSMin
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, true > m_GSMin(const LHS &L, const RHS &R)
Definition MIPatternMatch.h:661

llvm::MIPatternMatch::m_GSMax
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, true > m_GSMax(const LHS &L, const RHS &R)
Definition MIPatternMatch.h:655

llvm::pdb::DbgHeaderType::Max
@ Max
Definition RawConstants.h:98

llvm::sampleprof::Base
@ Base
Definition Discriminator.h:58

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition FunctionInfo.h:25

llvm::report_fatal_error
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163

llvm::CodeGenOptLevel::None
@ None
-O0
Definition CodeGen.h:83

llvm::getSelectionDAGFallbackAnalysisUsage
LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition Utils.cpp:1147

llvm::createAMDGPUPreLegalizeCombiner
FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)
Definition AMDGPUPreLegalizerCombiner.cpp:296

true
Definition SPIRVConvergenceRegionAnalysis.cpp:41

llvm::CombinerInfo
Definition CombinerInfo.h:24

llvm::CombinerInfo::ObserverLevel::SinglePass
@ SinglePass
Enables Observer-based DCE and additional heuristics that retry combining defined and used instructio...
Definition CombinerInfo.h:71