doxygen/AMDGPUPreLegalizerCombiner_8cpp_source.html

//=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This pass does combining of machine instructions at the generic MI level,

// before the legalizer.

//

//===----------------------------------------------------------------------===//


#include "AMDGPU.h"

#include "AMDGPUCombinerHelper.h"

#include "AMDGPULegalizerInfo.h"

#include "GCNSubtarget.h"

#include "MCTargetDesc/AMDGPUMCTargetDesc.h"

#include "llvm/CodeGen/GlobalISel/CSEInfo.h"

#include "llvm/CodeGen/GlobalISel/Combiner.h"

#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"

#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"

#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"

#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"

#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"

#include "llvm/CodeGen/MachineDominators.h"

#include "llvm/CodeGen/TargetPassConfig.h"

#include "llvm/Target/TargetMachine.h"


#define GET_GICOMBINER_DEPS

#include "AMDGPUGenPreLegalizeGICombiner.inc"

#undef GET_GICOMBINER_DEPS


#define DEBUG_TYPE "amdgpu-prelegalizer-combiner"


using namespace llvm;

using namespace MIPatternMatch;

namespace {


#define GET_GICOMBINER_TYPES

#include "AMDGPUGenPreLegalizeGICombiner.inc"

#undef GET_GICOMBINER_TYPES


class AMDGPUPreLegalizerCombinerImpl : public Combiner {

protected:

  const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig;

  const GCNSubtarget &STI;

  const AMDGPUCombinerHelper Helper;


public:

  AMDGPUPreLegalizerCombinerImpl(

      MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,

      GISelKnownBits &KB, GISelCSEInfo *CSEInfo,

      const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,

      const GCNSubtarget &STI, MachineDominatorTree *MDT,

      const LegalizerInfo *LI);


  static const char *getName() { return "AMDGPUPreLegalizerCombinerImpl"; }


  bool tryCombineAllImpl(MachineInstr &MI) const;

  bool tryCombineAll(MachineInstr &I) const override;


  struct ClampI64ToI16MatchInfo {

    int64_t Cmp1 = 0;

    int64_t Cmp2 = 0;

    Register Origin;

  };


  bool matchClampI64ToI16(MachineInstr &MI, const MachineRegisterInfo &MRI,

                          const MachineFunction &MF,

                          ClampI64ToI16MatchInfo &MatchInfo) const;


  void applyClampI64ToI16(MachineInstr &MI,

                          const ClampI64ToI16MatchInfo &MatchInfo) const;


private:

#define GET_GICOMBINER_CLASS_MEMBERS

#define AMDGPUSubtarget GCNSubtarget

#include "AMDGPUGenPreLegalizeGICombiner.inc"

#undef GET_GICOMBINER_CLASS_MEMBERS

#undef AMDGPUSubtarget

};


#define GET_GICOMBINER_IMPL

#define AMDGPUSubtarget GCNSubtarget

#include "AMDGPUGenPreLegalizeGICombiner.inc"

#undef AMDGPUSubtarget

#undef GET_GICOMBINER_IMPL


AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl(

    MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,

    GISelKnownBits &KB, GISelCSEInfo *CSEInfo,

    const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,

    const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)

    : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),

      Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI, STI),

#define GET_GICOMBINER_CONSTRUCTOR_INITS

#include "AMDGPUGenPreLegalizeGICombiner.inc"

#undef GET_GICOMBINER_CONSTRUCTOR_INITS

{

}


bool AMDGPUPreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {

  if (tryCombineAllImpl(MI))

    return true;


  switch (MI.getOpcode()) {

  case TargetOpcode::G_SHUFFLE_VECTOR:

    return Helper.tryCombineShuffleVector(MI);

  }


  return false;

}


bool AMDGPUPreLegalizerCombinerImpl::matchClampI64ToI16(

    MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineFunction &MF,

    ClampI64ToI16MatchInfo &MatchInfo) const {

  assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!");


  // Try to find a pattern where an i64 value should get clamped to short.

  const LLT SrcType = MRI.getType(MI.getOperand(1).getReg());

  if (SrcType != LLT::scalar(64))

    return false;


  const LLT DstType = MRI.getType(MI.getOperand(0).getReg());

  if (DstType != LLT::scalar(16))

    return false;


  Register Base;


  auto IsApplicableForCombine = [&MatchInfo]() -> bool {

    const auto Cmp1 = MatchInfo.Cmp1;

    const auto Cmp2 = MatchInfo.Cmp2;

    const auto Diff = std::abs(Cmp2 - Cmp1);


    // If the difference between both comparison values is 0 or 1, there is no

    // need to clamp.

    if (Diff == 0 || Diff == 1)

      return false;


    const int64_t Min = std::numeric_limits<int16_t>::min();

    const int64_t Max = std::numeric_limits<int16_t>::max();


    // Check if the comparison values are between SHORT_MIN and SHORT_MAX.

    return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) ||

            (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min));

  };


  // Try to match a combination of min / max MIR opcodes.

  if (mi_match(MI.getOperand(1).getReg(), MRI,

               m_GSMin(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) {

    if (mi_match(Base, MRI,

                 m_GSMax(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) {

      return IsApplicableForCombine();

    }

  }


  if (mi_match(MI.getOperand(1).getReg(), MRI,

               m_GSMax(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) {

    if (mi_match(Base, MRI,

                 m_GSMin(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) {

      return IsApplicableForCombine();

    }

  }


  return false;

}


// We want to find a combination of instructions that

// gets generated when an i64 gets clamped to i16.

// The corresponding pattern is:

// G_MAX / G_MAX for i16 <= G_TRUNC i64.

// This can be efficiently written as following:

// v_cvt_pk_i16_i32 v0, v0, v1

// v_med3_i32 v0, Clamp_Min, v0, Clamp_Max

void AMDGPUPreLegalizerCombinerImpl::applyClampI64ToI16(

    MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) const {


  Register Src = MatchInfo.Origin;

  assert(MI.getParent()->getParent()->getRegInfo().getType(Src) ==

         LLT::scalar(64));

  const LLT S32 = LLT::scalar(32);


  auto Unmerge = B.buildUnmerge(S32, Src);


  assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32);


  const LLT V2S16 = LLT::fixed_vector(2, 16);

  auto CvtPk =

      B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, {V2S16},

                   {Unmerge.getReg(0), Unmerge.getReg(1)}, MI.getFlags());


  auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2);

  auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2);

  auto MinBoundaryDst = B.buildConstant(S32, MinBoundary);

  auto MaxBoundaryDst = B.buildConstant(S32, MaxBoundary);


  auto Bitcast = B.buildBitcast({S32}, CvtPk);


  auto Med3 = B.buildInstr(

      AMDGPU::G_AMDGPU_SMED3, {S32},

      {MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)},

      MI.getFlags());


  B.buildTrunc(MI.getOperand(0).getReg(), Med3);


  MI.eraseFromParent();

}


// Pass boilerplate

// ================


class AMDGPUPreLegalizerCombiner : public MachineFunctionPass {

public:

  static char ID;


  AMDGPUPreLegalizerCombiner(bool IsOptNone = false);


  StringRef getPassName() const override {

    return "AMDGPUPreLegalizerCombiner";

  }


  bool runOnMachineFunction(MachineFunction &MF) override;


  void getAnalysisUsage(AnalysisUsage &AU) const override;


private:

  bool IsOptNone;

  AMDGPUPreLegalizerCombinerImplRuleConfig RuleConfig;

};

} // end anonymous namespace


void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {

  AU.addRequired<TargetPassConfig>();

  AU.setPreservesCFG();

  getSelectionDAGFallbackAnalysisUsage(AU);

  AU.addRequired<GISelKnownBitsAnalysis>();

  AU.addPreserved<GISelKnownBitsAnalysis>();

  if (!IsOptNone) {

    AU.addRequired<MachineDominatorTreeWrapperPass>();

    AU.addPreserved<MachineDominatorTreeWrapperPass>();

  }


  AU.addRequired<GISelCSEAnalysisWrapperPass>();

  AU.addPreserved<GISelCSEAnalysisWrapperPass>();

  MachineFunctionPass::getAnalysisUsage(AU);

}


AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone)

    : MachineFunctionPass(ID), IsOptNone(IsOptNone) {

  initializeAMDGPUPreLegalizerCombinerPass(*PassRegistry::getPassRegistry());


  if (!RuleConfig.parseCommandLineOption())

    report_fatal_error("Invalid rule identifier");

}


bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {

  if (MF.getProperties().hasProperty(

          MachineFunctionProperties::Property::FailedISel))

    return false;

  auto *TPC = &getAnalysis<TargetPassConfig>();

  const Function &F = MF.getFunction();

  bool EnableOpt =

      MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);

  GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);


  // Enable CSE.

  GISelCSEAnalysisWrapper &Wrapper =

      getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();

  auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig());


  const GCNSubtarget &STI = MF.getSubtarget<GCNSubtarget>();

  MachineDominatorTree *MDT =

      IsOptNone ? nullptr

                : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();

  CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,

                     nullptr, EnableOpt, F.hasOptSize(), F.hasMinSize());

  // Disable fixed-point iteration to reduce compile-time

  CInfo.MaxIterations = 1;

  CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;

  // This is the first Combiner, so the input IR might contain dead

  // instructions.

  CInfo.EnableFullDCE = true;

  AMDGPUPreLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, CSEInfo, RuleConfig,

                                      STI, MDT, STI.getLegalizerInfo());

  return Impl.combineMachineInstrs();

}


char AMDGPUPreLegalizerCombiner::ID = 0;

INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,

                      "Combine AMDGPU machine instrs before legalization",

                      false, false)

INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)

INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)

INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,

                    "Combine AMDGPU machine instrs before legalization", false,

                    false)


namespace llvm {

FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone) {

  return new AMDGPUPreLegalizerCombiner(IsOptNone);

}

} // end namespace llvm

MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105

Wrapper
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
Definition: AMDGPUAliasAnalysis.cpp:31

AMDGPUCombinerHelper.h
This contains common combine transformations that may be used in a combine pass.

V2S16
static const LLT V2S16
Definition: AMDGPULegalizerInfo.cpp:300

S32
static const LLT S32
Definition: AMDGPULegalizerInfo.cpp:285

AMDGPULegalizerInfo.h
This file declares the targeting of the Machinelegalizer class for AMDGPU.

AMDGPUMCTargetDesc.h
Provides AMDGPU specific target descriptions.

GET_GICOMBINER_CONSTRUCTOR_INITS
#define GET_GICOMBINER_CONSTRUCTOR_INITS

legalization
Combine AMDGPU machine instrs before legalization
Definition: AMDGPUPreLegalizerCombiner.cpp:296

DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUPreLegalizerCombiner.cpp:34

AMDGPU.h

true
basic Basic Alias true
Definition: BasicAliasAnalysis.cpp:1981

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

CSEInfo.h
Provides analysis for continuously CSEing during GISel passes.

CombinerHelper.h
This contains common combine transformations that may be used in a combine pass,or by the target else...

CombinerInfo.h
Option class for Targets to specify which operations are combined how and when.

Combiner.h
This contains the base class for all Combiners generated by TableGen.

GCNSubtarget.h
AMD GCN specific subclass of TargetSubtarget.

GIMatchTableExecutorImpl.h

GISelKnownBits.h
Provides analysis for querying information about KnownBits during GISel passes.

Combine
Hexagon Vector Combine
Definition: HexagonVectorCombine.cpp:2985

MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:112

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

MIPatternMatch.h
Contains matchers for matching SSA Machine Instructions.

MachineDominators.h

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52

getName
static StringRef getName(Value *V)
Definition: ProvenanceAnalysisEvaluator.cpp:20

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

TargetPassConfig.h
Target-Independent Code Generator Pass Configuration Options pass.

llvm::AMDGPUCombinerHelper
Definition: AMDGPUCombinerHelper.h:23

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75

llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98

llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256

llvm::Combiner
Combiner implementation.
Definition: Combiner.h:34

llvm::Combiner::tryCombineAll
virtual bool tryCombineAll(MachineInstr &I) const =0

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310

llvm::Function
Definition: Function.h:63

llvm::GCNSubtarget
Definition: GCNSubtarget.h:34

llvm::GCNSubtarget::getLegalizerInfo
const LegalizerInfo * getLegalizerInfo() const override
Definition: GCNSubtarget.h:309

llvm::GISelCSEAnalysisWrapperPass
The actual analysis pass wrapper.
Definition: CSEInfo.h:225

llvm::GISelCSEAnalysisWrapper
Simple wrapper that does the following.
Definition: CSEInfo.h:207

llvm::GISelCSEInfo
The CSE Analysis object.
Definition: CSEInfo.h:70

llvm::GISelKnownBitsAnalysis
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
Definition: GISelKnownBits.h:113

llvm::GISelKnownBits
Definition: GISelKnownBits.h:29

llvm::LLT
Definition: LowLevelType.h:39

llvm::LLT::scalar
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42

llvm::LLT::fixed_vector
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100

llvm::LegalizerInfo
Definition: LegalizerInfo.h:1311

llvm::MachineDominatorTreeWrapperPass
Analysis pass which computes a MachineDominatorTree.
Definition: MachineDominators.h:131

llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition: MachineDominators.h:75

llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30

llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:169

llvm::MachineFunctionPass::runOnMachineFunction
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...

llvm::MachineFunctionProperties::hasProperty
bool hasProperty(Property P) const
Definition: MachineFunction.h:194

llvm::MachineFunction
Definition: MachineFunction.h:258

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:724

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:695

llvm::MachineFunction::getProperties
const MachineFunctionProperties & getProperties() const
Get the function properties.
Definition: MachineFunction.h:815

llvm::MachineFunction::getTarget
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:720

llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:69

llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:51

llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24

llvm::Pass::getPassName
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81

llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51

llvm::TargetMachine::getOptLevel
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Definition: TargetMachine.h:257

llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:85

unsigned

TargetMachine.h

false
Definition: StackSlotColoring.cpp:193

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24

llvm::LegacyLegalizeActions::Bitcast
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegacyLegalizerInfo.h:55

llvm::MIPatternMatch::m_Reg
operand_type_match m_Reg()
Definition: MIPatternMatch.h:270

llvm::MIPatternMatch::m_ICst
ConstantMatch< APInt > m_ICst(APInt &Cst)
Definition: MIPatternMatch.h:93

llvm::MIPatternMatch::mi_match
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Definition: MIPatternMatch.h:25

llvm::MIPatternMatch::m_GSMin
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, true > m_GSMin(const LHS &L, const RHS &R)
Definition: MIPatternMatch.h:588

llvm::MIPatternMatch::m_GSMax
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, true > m_GSMax(const LHS &L, const RHS &R)
Definition: MIPatternMatch.h:582

llvm::pdb::DbgHeaderType::Max
@ Max

llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::createAMDGPUPreLegalizeCombiner
FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)
Definition: AMDGPUPreLegalizerCombiner.cpp:300

llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167

llvm::initializeAMDGPUPreLegalizerCombinerPass
void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &)

llvm::getSelectionDAGFallbackAnalysisUsage
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:1168

llvm::instrs
auto instrs(const MachineBasicBlock &BB)
Definition: MachineSSAContext.h:27

llvm::CombinerInfo
Definition: CombinerInfo.h:24