LLVM  7.0.0svn
Macros | Enumerations | Functions | Variables
SIInsertWaitcnts.cpp File Reference

Insert wait instructions for memory reads and writes. More...

#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "SIDefines.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <cstring>
#include <memory>
#include <utility>
#include <vector>
Include dependency graph for SIInsertWaitcnts.cpp:

Go to the source code of this file.

Macros

#define DEBUG_TYPE   "si-insert-waitcnts"
 
#define CNT_MASK(t)   (1u << (t))
 
#define ForAllWaitEventType(w)
 

Enumerations

enum  InstCounterType
 
enum  WaitEventType
 
enum  RegisterMapping
 

Functions

 DEBUG_COUNTER (ForceExpCounter, DEBUG_TYPE"-forceexp", "Force emit s_waitcnt expcnt(0) instrs")
 
 DEBUG_COUNTER (ForceLgkmCounter, DEBUG_TYPE"-forcelgkm", "Force emit s_waitcnt lgkmcnt(0) instrs")
 
 DEBUG_COUNTER (ForceVMCounter, DEBUG_TYPE"-forcevm", "Force emit s_waitcnt vmcnt(0) instrs")
 
 INITIALIZE_PASS_BEGIN (SIInsertWaitcnts, DEBUG_TYPE, "SI Insert Waitcnts", false, false) INITIALIZE_PASS_END(SIInsertWaitcnts
 
static bool readsVCCZ (const MachineInstr &MI)
 

Variables

static cl::opt< unsignedForceEmitZeroFlag ("amdgpu-waitcnt-forcezero", cl::desc("Force all waitcnt instrs to be emitted as s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)"), cl::init(0), cl::Hidden)
 
 DEBUG_TYPE
 
SI Insert Waitcnts
 
SI Insert false
 

Detailed Description

Insert wait instructions for memory reads and writes.

Memory reads and writes are issued asynchronously, so we need to insert S_WAITCNT instructions when we want to access any of their results or overwrite any register that's used asynchronously.

Definition in file SIInsertWaitcnts.cpp.

Macro Definition Documentation

◆ CNT_MASK

#define CNT_MASK (   t)    (1u << (t))

Definition at line 76 of file SIInsertWaitcnts.cpp.

Referenced by readsVCCZ().

◆ DEBUG_TYPE

#define DEBUG_TYPE   "si-insert-waitcnts"

Definition at line 56 of file SIInsertWaitcnts.cpp.

◆ ForAllWaitEventType

#define ForAllWaitEventType (   w)
Value:
for (enum WaitEventType w = (enum WaitEventType)0; \
(w) < (enum WaitEventType)NUM_WAIT_EVENTS; \
(w) = (enum WaitEventType)((w) + 1))
WaitEventType

Definition at line 125 of file SIInsertWaitcnts.cpp.

Referenced by readsVCCZ().

Enumeration Type Documentation

◆ InstCounterType

Definition at line 78 of file SIInsertWaitcnts.cpp.

◆ RegisterMapping

Definition at line 117 of file SIInsertWaitcnts.cpp.

◆ WaitEventType

Definition at line 97 of file SIInsertWaitcnts.cpp.

Function Documentation

◆ DEBUG_COUNTER() [1/3]

DEBUG_COUNTER ( ForceExpCounter  ,
DEBUG_TYPE"-forceexp"  ,
"Force emit s_waitcnt expcnt(0) instrs"   
)

◆ DEBUG_COUNTER() [2/3]

DEBUG_COUNTER ( ForceLgkmCounter  ,
DEBUG_TYPE"-forcelgkm"  ,
"Force emit s_waitcnt lgkmcnt(0) instrs"   
)

◆ DEBUG_COUNTER() [3/3]

DEBUG_COUNTER ( ForceVMCounter  ,
DEBUG_TYPE"-forcevm"  ,
"Force emit s_waitcnt vmcnt(0) instrs"   
)

◆ INITIALIZE_PASS_BEGIN()

INITIALIZE_PASS_BEGIN ( SIInsertWaitcnts  ,
DEBUG_TYPE  ,
"SI Insert Waitcnts ,
false  ,
false   
)

◆ readsVCCZ()

static bool readsVCCZ ( const MachineInstr MI)
static

Definition at line 848 of file SIInsertWaitcnts.cpp.

References AS, assert(), B, llvm::MachineBasicBlock::begin(), llvm::ReversePostOrderTraversal< GraphT, GT >::begin(), llvm::MachineFunction::begin(), llvm::LoopBase< BlockT, LoopT >::blocks(), llvm::BuildMI(), CNT_MASK, llvm::count(), llvm::dbgs(), llvm::AMDGPU::decodeExpcnt(), llvm::AMDGPU::decodeLgkmcnt(), llvm::AMDGPU::decodeVmcnt(), llvm::tgtok::Def, E, llvm::MachineBasicBlock::empty(), llvm::AMDGPU::encodeWaitcnt(), llvm::MachineBasicBlock::end(), llvm::ReversePostOrderTraversal< GraphT, GT >::end(), llvm::MachineFunction::end(), llvm::SIInstrFlags::EXP, llvm::SIInstrFlags::EXP_CNT, llvm::find(), ForAllWaitEventType, ForceEmitZeroFlag, llvm::MachineFunction::front(), llvm::AMDGPUSubtarget::get(), llvm::AMDGPU::getAtomicNoRetOp(), llvm::MachineInstr::getDebugLoc(), llvm::AMDGPU::getExpcntBitMask(), llvm::MachineBasicBlock::getFirstNonPHI(), llvm::LoopBase< BlockT, LoopT >::getHeader(), llvm::MachineOperand::getImm(), llvm::MachineFunction::getInfo(), llvm::AMDGPU::IsaInfo::getIsaVersion(), llvm::ilist_node_impl< OptionsT >::getIterator(), llvm::AMDGPU::getLgkmcntBitMask(), llvm::MachineBasicBlock::getNumber(), llvm::MachineInstr::getNumOperands(), llvm::MachineInstr::getOpcode(), llvm::MachineInstr::getOperand(), llvm::MachineInstr::getParent(), llvm::MachineBasicBlock::getParent(), llvm::MachineOperand::getReg(), llvm::MachineFunction::getRegInfo(), llvm::GCNSubtarget::getRegisterInfo(), llvm::MachineFunction::getSubtarget(), llvm::AMDGPU::getVmcntBitMask(), I, llvm::AMDGPU::SendMsg::ID_GS_DONE, llvm::AMDGPU::SendMsg::ID_MASK_, llvm::MachineBasicBlock::insert(), llvm::MachineInstr::isBranch(), llvm::MachineInstr::isDebugInstr(), llvm::MachineBasicBlock::isSuccessor(), llvm::MachineOperand::isUndef(), llvm::SIInstrInfo::isVMEM(), llvm::AMDGPUISD::KILL, llvm::SIInstrFlags::LGKM_CNT, LLVM_DEBUG, llvm::max(), llvm::MachineInstr::mayLoad(), llvm::MachineInstr::mayStore(), llvm::MachineInstr::memoperands(), llvm::MachineInstr::memoperands_empty(), MI, Modified, llvm::MachineInstr::modifiesRegister(), MRI, llvm::MachineBasicBlock::pred_begin(), llvm::MachineBasicBlock::pred_end(), llvm::MachineBasicBlock::predecessors(), llvm::MachineInstr::print(), llvm::SmallVectorTemplateBase< T, isPodLike< T >::value >::push_back(), llvm::MachineBasicBlock::push_back(), llvm::MachineInstr::removeFromParent(), llvm::AMDGPUSubtarget::SEA_ISLANDS, llvm::MachineOperand::setImm(), llvm::ARM_MB::ST, llvm::MachineBasicBlock::succ_empty(), T, TII, TRI, llvm::SIInstrFlags::VM_CNT, llvm::RISCVFenceField::W, X, and Y.

Variable Documentation

◆ DEBUG_TYPE

DEBUG_TYPE

Definition at line 837 of file SIInsertWaitcnts.cpp.

◆ false

SI Insert false

Definition at line 837 of file SIInsertWaitcnts.cpp.

◆ ForceEmitZeroFlag

cl::opt<unsigned> ForceEmitZeroFlag("amdgpu-waitcnt-forcezero", cl::desc("Force all waitcnt instrs to be emitted as s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)"), cl::init(0), cl::Hidden)
static

Referenced by readsVCCZ().

◆ Waitcnts

SI Insert Waitcnts

Definition at line 837 of file SIInsertWaitcnts.cpp.