docs/doxygen/AMDGPUSetWavePriority_8cpp_source.html

//===- AMDGPUSetWavePriority.cpp - Set wave priority ----------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file

/// Pass to temporarily raise the wave priority beginning the start of

/// the shader function until its last VMEM instructions to allow younger

/// waves to issue their VMEM instructions as well.

//

//===----------------------------------------------------------------------===//


#include "AMDGPU.h"

#include "GCNSubtarget.h"

#include "MCTargetDesc/AMDGPUMCTargetDesc.h"

#include "SIInstrInfo.h"

#include "llvm/ADT/PostOrderIterator.h"

#include "llvm/CodeGen/MachineFunctionPass.h"


using namespace llvm;


#define DEBUG_TYPE "amdgpu-set-wave-priority"


static cl::opt<unsigned> DefaultVALUInstsThreshold(

    "amdgpu-set-wave-priority-valu-insts-threshold",

    cl::desc("VALU instruction count threshold for adjusting wave priority"),

    cl::init(100), cl::Hidden);


namespace {


struct MBBInfo {

  MBBInfo() = default;

  unsigned NumVALUInstsAtStart = 0;

  bool MayReachVMEMLoad = false;

  MachineInstr *LastVMEMLoad = nullptr;

};


using MBBInfoSet = DenseMap<const MachineBasicBlock *, MBBInfo>;


class AMDGPUSetWavePriority : public MachineFunctionPass {

public:

  static char ID;


  AMDGPUSetWavePriority() : MachineFunctionPass(ID) {}


  StringRef getPassName() const override { return "Set wave priority"; }


  bool runOnMachineFunction(MachineFunction &MF) override;


private:

  MachineInstr *BuildSetprioMI(MachineBasicBlock &MBB,

                               MachineBasicBlock::iterator I,

                               unsigned priority) const;


  const SIInstrInfo *TII;

};


} // End anonymous namespace.


INITIALIZE_PASS(AMDGPUSetWavePriority, DEBUG_TYPE, "Set wave priority", false,

                false)


char AMDGPUSetWavePriority::ID = 0;


FunctionPass *llvm::createAMDGPUSetWavePriorityPass() {

  return new AMDGPUSetWavePriority();

}


MachineInstr *

AMDGPUSetWavePriority::BuildSetprioMI(MachineBasicBlock &MBB,

                                      MachineBasicBlock::iterator I,

                                      unsigned priority) const {

  return BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_SETPRIO))

      .addImm(priority);

}


// Checks that for every predecessor Pred that can reach a VMEM load,

// none of Pred's successors can reach a VMEM load.

static bool CanLowerPriorityDirectlyInPredecessors(const MachineBasicBlock &MBB,

                                                   MBBInfoSet &MBBInfos) {

  for (const MachineBasicBlock *Pred : MBB.predecessors()) {

    if (!MBBInfos[Pred].MayReachVMEMLoad)

      continue;

    for (const MachineBasicBlock *Succ : Pred->successors()) {

      if (MBBInfos[Succ].MayReachVMEMLoad)

        return false;

    }

  }

  return true;

}


static bool isVMEMLoad(const MachineInstr &MI) {

  return SIInstrInfo::isVMEM(MI) && MI.mayLoad();

}


bool AMDGPUSetWavePriority::runOnMachineFunction(MachineFunction &MF) {

  const unsigned HighPriority = 3;

  const unsigned LowPriority = 0;


  Function &F = MF.getFunction();

  if (skipFunction(F) || !AMDGPU::isEntryFunctionCC(F.getCallingConv()))

    return false;


  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  TII = ST.getInstrInfo();


  unsigned VALUInstsThreshold = DefaultVALUInstsThreshold;

  Attribute A = F.getFnAttribute("amdgpu-wave-priority-threshold");

  if (A.isValid())

    A.getValueAsString().getAsInteger(0, VALUInstsThreshold);


  // Find VMEM loads that may be executed before long-enough sequences of

  // VALU instructions. We currently assume that backedges/loops, branch

  // probabilities and other details can be ignored, so we essentially

  // determine the largest number of VALU instructions along every

  // possible path from the start of the function that may potentially be

  // executed provided no backedge is ever taken.

  MBBInfoSet MBBInfos;

  for (MachineBasicBlock *MBB : post_order(&MF)) {

    bool AtStart = true;

    unsigned MaxNumVALUInstsInMiddle = 0;

    unsigned NumVALUInstsAtEnd = 0;

    for (MachineInstr &MI : *MBB) {

      if (isVMEMLoad(MI)) {

        AtStart = false;

        MBBInfo &Info = MBBInfos[MBB];

        Info.NumVALUInstsAtStart = 0;

        MaxNumVALUInstsInMiddle = 0;

        NumVALUInstsAtEnd = 0;

        Info.LastVMEMLoad = &MI;

      } else if (SIInstrInfo::isDS(MI)) {

        AtStart = false;

        MaxNumVALUInstsInMiddle =

            std::max(MaxNumVALUInstsInMiddle, NumVALUInstsAtEnd);

        NumVALUInstsAtEnd = 0;

      } else if (SIInstrInfo::isVALU(MI)) {

        if (AtStart)

          ++MBBInfos[MBB].NumVALUInstsAtStart;

        ++NumVALUInstsAtEnd;

      }

    }


    bool SuccsMayReachVMEMLoad = false;

    unsigned NumFollowingVALUInsts = 0;

    for (const MachineBasicBlock *Succ : MBB->successors()) {

      SuccsMayReachVMEMLoad |= MBBInfos[Succ].MayReachVMEMLoad;

      NumFollowingVALUInsts =

          std::max(NumFollowingVALUInsts, MBBInfos[Succ].NumVALUInstsAtStart);

    }

    MBBInfo &Info = MBBInfos[MBB];

    if (AtStart)

      Info.NumVALUInstsAtStart += NumFollowingVALUInsts;

    NumVALUInstsAtEnd += NumFollowingVALUInsts;


    unsigned MaxNumVALUInsts =

        std::max(MaxNumVALUInstsInMiddle, NumVALUInstsAtEnd);

    Info.MayReachVMEMLoad =

        SuccsMayReachVMEMLoad ||

        (Info.LastVMEMLoad && MaxNumVALUInsts >= VALUInstsThreshold);

  }


  MachineBasicBlock &Entry = MF.front();

  if (!MBBInfos[&Entry].MayReachVMEMLoad)

    return false;


  // Raise the priority at the beginning of the shader.

  MachineBasicBlock::iterator I = Entry.begin(), E = Entry.end();

  while (I != E && !SIInstrInfo::isVALU(*I) && !I->isTerminator())

    ++I;

  BuildSetprioMI(Entry, I, HighPriority);


  // Lower the priority on edges where control leaves blocks from which

  // the VMEM loads are reachable.

  SmallSet<MachineBasicBlock *, 16> PriorityLoweringBlocks;

  for (MachineBasicBlock &MBB : MF) {

    if (MBBInfos[&MBB].MayReachVMEMLoad) {

      if (MBB.succ_empty())

        PriorityLoweringBlocks.insert(&MBB);

      continue;

    }


    if (CanLowerPriorityDirectlyInPredecessors(MBB, MBBInfos)) {

      for (MachineBasicBlock *Pred : MBB.predecessors()) {

        if (MBBInfos[Pred].MayReachVMEMLoad)

          PriorityLoweringBlocks.insert(Pred);

      }

      continue;

    }


    // Where lowering the priority in predecessors is not possible, the

    // block receiving control either was not part of a loop in the first

    // place or the loop simplification/canonicalization pass should have

    // already tried to split the edge and insert a preheader, and if for

    // whatever reason it failed to do so, then this leaves us with the

    // only option of lowering the priority within the loop.

    PriorityLoweringBlocks.insert(&MBB);

  }


  for (MachineBasicBlock *MBB : PriorityLoweringBlocks) {

    BuildSetprioMI(

        *MBB,

        MBBInfos[MBB].LastVMEMLoad

            ? std::next(MachineBasicBlock::iterator(MBBInfos[MBB].LastVMEMLoad))

            : MBB->begin(),

        LowPriority);

  }


  return true;

}

AMDGPUMCTargetDesc.h
Provides AMDGPU specific target descriptions.

DefaultVALUInstsThreshold
static cl::opt< unsigned > DefaultVALUInstsThreshold("amdgpu-set-wave-priority-valu-insts-threshold", cl::desc("VALU instruction count threshold for adjusting wave priority"), cl::init(100), cl::Hidden)

CanLowerPriorityDirectlyInPredecessors
static bool CanLowerPriorityDirectlyInPredecessors(const MachineBasicBlock &MBB, MBBInfoSet &MBBInfos)
Definition: AMDGPUSetWavePriority.cpp:82

isVMEMLoad
static bool isVMEMLoad(const MachineInstr &MI)
Definition: AMDGPUSetWavePriority.cpp:95

DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUSetWavePriority.cpp:25

AMDGPU.h

MBB
MachineBasicBlock & MBB
Definition: ARMSLSHardening.cpp:71

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27

GCNSubtarget.h
AMD GCN specific subclass of TargetSubtarget.

TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125

MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:112

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

MachineFunctionPass.h

INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38

PostOrderIterator.h
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.

SIInstrInfo.h
Interface definition for SIInstrInfo.

char

llvm::Attribute
Definition: Attributes.h:67

llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33

llvm::DenseMap
Definition: DenseMap.h:727

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310

llvm::Function
Definition: Function.h:63

llvm::GCNSubtarget
Definition: GCNSubtarget.h:34

llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:125

llvm::MachineBasicBlock::succ_empty
bool succ_empty() const
Definition: MachineBasicBlock.h:436

llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:355

llvm::MachineBasicBlock::successors
iterator_range< succ_iterator > successors()
Definition: MachineBasicBlock.h:444

llvm::MachineBasicBlock::predecessors
iterator_range< pred_iterator > predecessors()
Definition: MachineBasicBlock.h:438

llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30

llvm::MachineFunctionPass::runOnMachineFunction
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...

llvm::MachineFunction
Definition: MachineFunction.h:258

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:724

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:695

llvm::MachineFunction::front
const MachineBasicBlock & front() const
Definition: MachineFunction.h:950

llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:133

llvm::MachineInstrBundleIterator< MachineInstr >

llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:69

llvm::Pass::getPassName
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81

llvm::SIInstrInfo
Definition: SIInstrInfo.h:85

llvm::SIInstrInfo::isDS
static bool isDS(const MachineInstr &MI)
Definition: SIInstrInfo.h:562

llvm::SIInstrInfo::isVMEM
static bool isVMEM(const MachineInstr &MI)
Definition: SIInstrInfo.h:440

llvm::SIInstrInfo::isVALU
static bool isVALU(const MachineInstr &MI)
Definition: SIInstrInfo.h:424

llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132

llvm::SmallSet::insert
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51

llvm::cl::opt
Definition: CommandLine.h:1423

unsigned

llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:2066

llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73

llvm::COFF::Entry
@ Entry
Definition: COFF.h:844

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24

llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:137

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::createAMDGPUSetWavePriorityPass
FunctionPass * createAMDGPUSetWavePriorityPass()

llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:373

llvm::post_order
iterator_range< po_iterator< T > > post_order(const T &G)
Definition: PostOrderIterator.h:197

llvm::cl::desc
Definition: CommandLine.h:409