docs/doxygen/AMDGPUInsertDelayAlu_8cpp_source.html

//===- AMDGPUInsertDelayAlu.cpp - Insert s_delay_alu instructions ---------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file

/// Insert s_delay_alu instructions to avoid stalls on GFX11+.

//

//===----------------------------------------------------------------------===//


#include "AMDGPU.h"

#include "GCNSubtarget.h"

#include "MCTargetDesc/AMDGPUMCTargetDesc.h"

#include "SIInstrInfo.h"

#include "llvm/ADT/SetVector.h"


using namespace llvm;


#define DEBUG_TYPE "amdgpu-insert-delay-alu"


namespace {


class AMDGPUInsertDelayAlu : public MachineFunctionPass {

public:

  static char ID;


  const SIInstrInfo *SII;

  const TargetRegisterInfo *TRI;


  TargetSchedModel SchedModel;


  AMDGPUInsertDelayAlu() : MachineFunctionPass(ID) {}


  void getAnalysisUsage(AnalysisUsage &AU) const override {

    AU.setPreservesCFG();

    MachineFunctionPass::getAnalysisUsage(AU);

  }


  // Return true if MI waits for all outstanding VALU instructions to complete.

  static bool instructionWaitsForVALU(const MachineInstr &MI) {

    // These instruction types wait for VA_VDST==0 before issuing.

    const uint64_t VA_VDST_0 = SIInstrFlags::DS | SIInstrFlags::EXP |

                               SIInstrFlags::FLAT | SIInstrFlags::MIMG |

                               SIInstrFlags::MTBUF | SIInstrFlags::MUBUF;

    if (MI.getDesc().TSFlags & VA_VDST_0)

      return true;

    if (MI.getOpcode() == AMDGPU::S_SENDMSG_RTN_B32 ||

        MI.getOpcode() == AMDGPU::S_SENDMSG_RTN_B64)

      return true;

    if (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&

        AMDGPU::DepCtr::decodeFieldVaVdst(MI.getOperand(0).getImm()) == 0)

      return true;

    return false;

  }


  // Types of delay that can be encoded in an s_delay_alu instruction.

  enum DelayType { VALU, TRANS, SALU, OTHER };


  // Get the delay type for an instruction with the specified TSFlags.

  static DelayType getDelayType(uint64_t TSFlags) {

    if (TSFlags & SIInstrFlags::TRANS)

      return TRANS;

    if (TSFlags & SIInstrFlags::VALU)

      return VALU;

    if (TSFlags & SIInstrFlags::SALU)

      return SALU;

    return OTHER;

  }


  // Information about the last instruction(s) that wrote to a particular

  // regunit. In straight-line code there will only be one such instruction, but

  // when control flow converges we merge the delay information from each path

  // to represent the union of the worst-case delays of each type.

  struct DelayInfo {

    // One larger than the maximum number of (non-TRANS) VALU instructions we

    // can encode in an s_delay_alu instruction.

    static constexpr unsigned VALU_MAX = 5;


    // One larger than the maximum number of TRANS instructions we can encode in

    // an s_delay_alu instruction.

    static constexpr unsigned TRANS_MAX = 4;


    // One larger than the maximum number of SALU cycles we can encode in an

    // s_delay_alu instruction.

    static constexpr unsigned SALU_CYCLES_MAX = 4;


    // If it was written by a (non-TRANS) VALU, remember how many clock cycles

    // are left until it completes, and how many other (non-TRANS) VALU we have

    // seen since it was issued.

    uint8_t VALUCycles = 0;

    uint8_t VALUNum = VALU_MAX;


    // If it was written by a TRANS, remember how many clock cycles are left

    // until it completes, and how many other TRANS we have seen since it was

    // issued.

    uint8_t TRANSCycles = 0;

    uint8_t TRANSNum = TRANS_MAX;

    // Also remember how many other (non-TRANS) VALU we have seen since it was

    // issued. When an instruction depends on both a prior TRANS and a prior

    // non-TRANS VALU, this is used to decide whether to encode a wait for just

    // one or both of them.

    uint8_t TRANSNumVALU = VALU_MAX;


    // If it was written by an SALU, remember how many clock cycles are left

    // until it completes.

    uint8_t SALUCycles = 0;


    DelayInfo() = default;


    DelayInfo(DelayType Type, unsigned Cycles) {

      switch (Type) {

      default:

        llvm_unreachable("unexpected type");

      case VALU:

        VALUCycles = Cycles;

        VALUNum = 0;

        break;

      case TRANS:

        TRANSCycles = Cycles;

        TRANSNum = 0;

        TRANSNumVALU = 0;

        break;

      case SALU:

        // Guard against pseudo-instructions like SI_CALL which are marked as

        // SALU but with a very high latency.

        SALUCycles = std::min(Cycles, SALU_CYCLES_MAX);

        break;

      }

    }


    bool operator==(const DelayInfo &RHS) const {

      return VALUCycles == RHS.VALUCycles && VALUNum == RHS.VALUNum &&

             TRANSCycles == RHS.TRANSCycles && TRANSNum == RHS.TRANSNum &&

             TRANSNumVALU == RHS.TRANSNumVALU && SALUCycles == RHS.SALUCycles;

    }


    bool operator!=(const DelayInfo &RHS) const { return !(*this == RHS); }


    // Merge another DelayInfo into this one, to represent the union of the

    // worst-case delays of each type.

    void merge(const DelayInfo &RHS) {

      VALUCycles = std::max(VALUCycles, RHS.VALUCycles);

      VALUNum = std::min(VALUNum, RHS.VALUNum);

      TRANSCycles = std::max(TRANSCycles, RHS.TRANSCycles);

      TRANSNum = std::min(TRANSNum, RHS.TRANSNum);

      TRANSNumVALU = std::min(TRANSNumVALU, RHS.TRANSNumVALU);

      SALUCycles = std::max(SALUCycles, RHS.SALUCycles);

    }


    // Update this DelayInfo after issuing an instruction. IsVALU should be 1

    // when issuing a (non-TRANS) VALU, else 0. IsTRANS should be 1 when issuing

    // a TRANS, else 0. Cycles is the number of cycles it takes to issue the

    // instruction.  Return true if there is no longer any useful delay info.

    bool advance(DelayType Type, unsigned Cycles) {

      bool Erase = true;


      VALUNum += (Type == VALU);

      if (VALUNum >= VALU_MAX || VALUCycles <= Cycles) {

        // Forget about the VALU instruction. It was too far back or has

        // definitely completed by now.

        VALUNum = VALU_MAX;

        VALUCycles = 0;

      } else {

        VALUCycles -= Cycles;

        Erase = false;

      }


      TRANSNum += (Type == TRANS);

      TRANSNumVALU += (Type == VALU);

      if (TRANSNum >= TRANS_MAX || TRANSCycles <= Cycles) {

        // Forget about any TRANS instruction. It was too far back or has

        // definitely completed by now.

        TRANSNum = TRANS_MAX;

        TRANSNumVALU = VALU_MAX;

        TRANSCycles = 0;

      } else {

        TRANSCycles -= Cycles;

        Erase = false;

      }


      if (SALUCycles <= Cycles) {

        // Forget about any SALU instruction. It has definitely completed by

        // now.

        SALUCycles = 0;

      } else {

        SALUCycles -= Cycles;

        Erase = false;

      }


      return Erase;

    }


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

    void dump() const {

      if (VALUCycles)

        dbgs() << " VALUCycles=" << (int)VALUCycles;

      if (VALUNum < VALU_MAX)

        dbgs() << " VALUNum=" << (int)VALUNum;

      if (TRANSCycles)

        dbgs() << " TRANSCycles=" << (int)TRANSCycles;

      if (TRANSNum < TRANS_MAX)

        dbgs() << " TRANSNum=" << (int)TRANSNum;

      if (TRANSNumVALU < VALU_MAX)

        dbgs() << " TRANSNumVALU=" << (int)TRANSNumVALU;

      if (SALUCycles)

        dbgs() << " SALUCycles=" << (int)SALUCycles;

    }

#endif

  };


  // A map from regunits to the delay info for that regunit.

  struct DelayState : DenseMap<unsigned, DelayInfo> {

    // Merge another DelayState into this one by merging the delay info for each

    // regunit.

    void merge(const DelayState &RHS) {

      for (const auto &KV : RHS) {

        iterator It;

        bool Inserted;

        std::tie(It, Inserted) = insert(KV);

        if (!Inserted)

          It->second.merge(KV.second);

      }

    }


    // Advance the delay info for each regunit, erasing any that are no longer

    // useful.

    void advance(DelayType Type, unsigned Cycles) {

      iterator Next;

      for (auto I = begin(), E = end(); I != E; I = Next) {

        Next = std::next(I);

        if (I->second.advance(Type, Cycles))

          erase(I);

      }

    }


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

    void dump(const TargetRegisterInfo *TRI) const {

      if (empty()) {

        dbgs() << "    empty\n";

        return;

      }


      // Dump DelayInfo for each RegUnit in numerical order.

      SmallVector<const_iterator, 8> Order;

      Order.reserve(size());

      for (const_iterator I = begin(), E = end(); I != E; ++I)

        Order.push_back(I);

      llvm::sort(Order, [](const const_iterator &A, const const_iterator &B) {

        return A->first < B->first;

      });

      for (const_iterator I : Order) {

        dbgs() << "    " << printRegUnit(I->first, TRI);

        I->second.dump();

        dbgs() << "\n";

      }

    }

#endif

  };


  // The saved delay state at the end of each basic block.

  DenseMap<MachineBasicBlock *, DelayState> BlockState;


  // Emit an s_delay_alu instruction if necessary before MI.

  MachineInstr *emitDelayAlu(MachineInstr &MI, DelayInfo Delay,

                             MachineInstr *LastDelayAlu) {

    unsigned Imm = 0;


    // Wait for a TRANS instruction.

    if (Delay.TRANSNum < DelayInfo::TRANS_MAX)

      Imm |= 4 + Delay.TRANSNum;


    // Wait for a VALU instruction (if it's more recent than any TRANS

    // instruction that we're also waiting for).

    if (Delay.VALUNum < DelayInfo::VALU_MAX &&

        Delay.VALUNum <= Delay.TRANSNumVALU) {

      if (Imm & 0xf)

        Imm |= Delay.VALUNum << 7;

      else

        Imm |= Delay.VALUNum;

    }


    // Wait for an SALU instruction.

    if (Delay.SALUCycles) {

      assert(Delay.SALUCycles < DelayInfo::SALU_CYCLES_MAX);

      if (Imm & 0x780) {

        // We have already encoded a VALU and a TRANS delay. There's no room in

        // the encoding for an SALU delay as well, so just drop it.

      } else if (Imm & 0xf) {

        Imm |= (Delay.SALUCycles + 8) << 7;

      } else {

        Imm |= Delay.SALUCycles + 8;

      }

    }


    // Don't emit the s_delay_alu instruction if there's nothing to wait for.

    if (!Imm)

      return LastDelayAlu;


    // If we only need to wait for one instruction, try encoding it in the last

    // s_delay_alu that we emitted.

    if (!(Imm & 0x780) && LastDelayAlu) {

      unsigned Skip = 0;

      for (auto I = MachineBasicBlock::instr_iterator(LastDelayAlu),

                E = MachineBasicBlock::instr_iterator(MI);

           ++I != E;) {

        if (!I->isBundle() && !I->isMetaInstruction())

          ++Skip;

      }

      if (Skip < 6) {

        MachineOperand &Op = LastDelayAlu->getOperand(0);

        unsigned LastImm = Op.getImm();

        assert((LastImm & ~0xf) == 0 &&

               "Remembered an s_delay_alu with no room for another delay!");

        LastImm |= Imm << 7 | Skip << 4;

        Op.setImm(LastImm);

        return nullptr;

      }

    }


    auto &MBB = *MI.getParent();

    MachineInstr *DelayAlu =

        BuildMI(MBB, MI, DebugLoc(), SII->get(AMDGPU::S_DELAY_ALU)).addImm(Imm);

    // Remember the s_delay_alu for next time if there is still room in it to

    // encode another delay.

    return (Imm & 0x780) ? nullptr : DelayAlu;

  }


  bool runOnMachineBasicBlock(MachineBasicBlock &MBB, bool Emit) {

    DelayState State;

    for (auto *Pred : MBB.predecessors())

      State.merge(BlockState[Pred]);


    LLVM_DEBUG(dbgs() << "  State at start of " << printMBBReference(MBB)

                      << "\n";

               State.dump(TRI););


    bool Changed = false;

    MachineInstr *LastDelayAlu = nullptr;


    // Iterate over the contents of bundles, but don't emit any instructions

    // inside a bundle.

    for (auto &MI : MBB.instrs()) {

      if (MI.isBundle() || MI.isMetaInstruction())

        continue;


      // Ignore some more instructions that do not generate any code.

      switch (MI.getOpcode()) {

      case AMDGPU::SI_RETURN_TO_EPILOG:

        continue;

      }


      DelayType Type = getDelayType(MI.getDesc().TSFlags);


      if (instructionWaitsForVALU(MI)) {

        // Forget about all outstanding VALU delays.

        // TODO: This is overkill since it also forgets about SALU delays.

        State = DelayState();

      } else if (Type != OTHER) {

        DelayInfo Delay;

        // TODO: Scan implicit uses too?

        for (const auto &Op : MI.explicit_uses()) {

          if (Op.isReg()) {

            // One of the operands of the writelane is also the output operand.

            // This creates the insertion of redundant delays. Hence, we have to

            // ignore this operand.

            if (MI.getOpcode() == AMDGPU::V_WRITELANE_B32 && Op.isTied())

              continue;

            for (MCRegUnit Unit : TRI->regunits(Op.getReg())) {

              auto It = State.find(Unit);

              if (It != State.end()) {

                Delay.merge(It->second);

                State.erase(Unit);

              }

            }

          }

        }

        if (Emit && !MI.isBundledWithPred()) {

          // TODO: For VALU->SALU delays should we use s_delay_alu or s_nop or

          // just ignore them?

          LastDelayAlu = emitDelayAlu(MI, Delay, LastDelayAlu);

        }

      }


      if (Type != OTHER) {

        // TODO: Scan implicit defs too?

        for (const auto &Op : MI.defs()) {

          unsigned Latency = SchedModel.computeOperandLatency(

              &MI, Op.getOperandNo(), nullptr, 0);

          for (MCRegUnit Unit : TRI->regunits(Op.getReg()))

            State[Unit] = DelayInfo(Type, Latency);

        }

      }


      // Advance by the number of cycles it takes to issue this instruction.

      // TODO: Use a more advanced model that accounts for instructions that

      // take multiple cycles to issue on a particular pipeline.

      unsigned Cycles = SIInstrInfo::getNumWaitStates(MI);

      // TODO: In wave64 mode, double the number of cycles for VALU and VMEM

      // instructions on the assumption that they will usually have to be issued

      // twice?

      State.advance(Type, Cycles);


      LLVM_DEBUG(dbgs() << "  State after " << MI; State.dump(TRI););

    }


    if (Emit) {

      assert(State == BlockState[&MBB] &&

             "Basic block state should not have changed on final pass!");

    } else if (State != BlockState[&MBB]) {

      BlockState[&MBB] = std::move(State);

      Changed = true;

    }

    return Changed;

  }


  bool runOnMachineFunction(MachineFunction &MF) override {

    if (skipFunction(MF.getFunction()))

      return false;


    LLVM_DEBUG(dbgs() << "AMDGPUInsertDelayAlu running on " << MF.getName()

                      << "\n");


    const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

    if (!ST.hasDelayAlu())

      return false;


    SII = ST.getInstrInfo();

    TRI = ST.getRegisterInfo();


    SchedModel.init(&ST);


    // Calculate the delay state for each basic block, iterating until we reach

    // a fixed point.

    SetVector<MachineBasicBlock *> WorkList;

    for (auto &MBB : reverse(MF))

      WorkList.insert(&MBB);

    while (!WorkList.empty()) {

      auto &MBB = *WorkList.pop_back_val();

      bool Changed = runOnMachineBasicBlock(MBB, false);

      if (Changed)

        WorkList.insert(MBB.succ_begin(), MBB.succ_end());

    }


    LLVM_DEBUG(dbgs() << "Final pass over all BBs\n");


    // Make one last pass over all basic blocks to emit s_delay_alu

    // instructions.

    bool Changed = false;

    for (auto &MBB : MF)

      Changed |= runOnMachineBasicBlock(MBB, true);

    return Changed;

  }

};


} // namespace


char AMDGPUInsertDelayAlu::ID = 0;


char &llvm::AMDGPUInsertDelayAluID = AMDGPUInsertDelayAlu::ID;


INITIALIZE_PASS(AMDGPUInsertDelayAlu, DEBUG_TYPE, "AMDGPU Insert Delay ALU",

                false, false)

MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:72

DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUInsertDelayAlu.cpp:22

AMDGPUMCTargetDesc.h
Provides AMDGPU specific target descriptions.

AMDGPU.h

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

Emit
dxil metadata DXIL Metadata Emit
Definition: DXILTranslateMetadata.cpp:74

LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101

GCNSubtarget.h
AMD GCN specific subclass of TargetSubtarget.

MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:113

merge
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
Definition: LoopDeletion.cpp:51

I
#define I(x, y, z)
Definition: MD5.cpp:58

TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1875

INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38

SIInstrInfo.h
Interface definition for SIInstrInfo.

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

SetVector.h
This file implements a set that has insertion order iteration characteristics.

RHS
Value * RHS
Definition: X86PartialReduction.cpp:76

const_iterator

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47

llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269

llvm::DWARFExpression::Operation
This class represents an Operation in the Expression.
Definition: DWARFExpression.h:32

llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33

llvm::DenseMap
Definition: DenseMap.h:742

llvm::FunctionPass::skipFunction
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:178

llvm::GCNSubtarget
Definition: GCNSubtarget.h:35

llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:102

llvm::MachineBasicBlock::succ_end
succ_iterator succ_end()
Definition: MachineBasicBlock.h:395

llvm::MachineBasicBlock::instrs
instr_range instrs()
Definition: MachineBasicBlock.h:324

llvm::MachineBasicBlock::succ_begin
succ_iterator succ_begin()
Definition: MachineBasicBlock.h:393

llvm::MachineBasicBlock::instr_iterator
Instructions::iterator instr_iterator
Definition: MachineBasicBlock.h:288

llvm::MachineBasicBlock::predecessors
iterator_range< pred_iterator > predecessors()
Definition: MachineBasicBlock.h:410

llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30

llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:168

llvm::MachineFunctionPass::runOnMachineFunction
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...

llvm::MachineFunction
Definition: MachineFunction.h:259

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:718

llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:609

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:684

llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:132

llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:69

llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:568

llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48

llvm::Pass::dump
void dump() const
Definition: Pass.cpp:136

llvm::SIInstrInfo
Definition: SIInstrInfo.h:83

llvm::SIInstrInfo::getNumWaitStates
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
Definition: SIInstrInfo.cpp:2082

llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:57

llvm::SetVector::empty
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93

llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162

llvm::SetVector::pop_back_val
value_type pop_back_val()
Definition: SetVector.h:285

llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:676

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:426

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209

llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:238

llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition: TargetSchedule.h:30

llvm::TargetSchedModel::init
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
Definition: TargetSchedule.cpp:51

llvm::TargetSchedModel::computeOperandLatency
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
Definition: TargetSchedule.cpp:173

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45

uint64_t

unsigned

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143

llvm::AMDGPU::DepCtr::decodeFieldVaVdst
unsigned decodeFieldVaVdst(unsigned Encoded)
Definition: AMDGPUBaseInfo.cpp:1670

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24

llvm::SIInstrFlags::MIMG
@ MIMG
Definition: SIDefines.h:83

llvm::SIInstrFlags::VALU
@ VALU
Definition: SIDefines.h:56

llvm::SIInstrFlags::MUBUF
@ MUBUF
Definition: SIDefines.h:80

llvm::SIInstrFlags::EXP
@ EXP
Definition: SIDefines.h:86

llvm::SIInstrFlags::MTBUF
@ MTBUF
Definition: SIDefines.h:81

llvm::SIInstrFlags::TRANS
@ TRANS
Definition: SIDefines.h:77

llvm::SIInstrFlags::DS
@ DS
Definition: SIDefines.h:88

llvm::SIInstrFlags::FLAT
@ FLAT
Definition: SIDefines.h:87

llvm::SIInstrFlags::SALU
@ SALU
Definition: SIDefines.h:55

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:877

llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680

llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:372

llvm::Latency
@ Latency
Definition: SIMachineScheduler.h:34

llvm::operator!=
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2043

llvm::printRegUnit
Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI)
Create Printable object to print register units on a raw_ostream.
Definition: TargetRegisterInfo.cpp:139

llvm::operator==
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
Definition: AddressRanges.h:153

llvm::erase
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2059

llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419

llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647

llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163

llvm::AMDGPUInsertDelayAluID
char & AMDGPUInsertDelayAluID
Definition: AMDGPUInsertDelayAlu.cpp:462

llvm::printMBBReference
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Definition: MachineBasicBlock.cpp:120