30 #define DEBUG_TYPE "si-insert-waits"
33 using namespace llvm::AMDGPU;
54 typedef Counters RegCounters[512];
55 typedef std::pair<unsigned, unsigned> RegInterval;
67 static const Counters ZeroCounts;
70 Counters HardwareLimits;
77 Counters DelayedWaitOn;
86 RegCounters DefinedRegs;
89 unsigned ExpInstrTypesSeen;
94 bool LastInstWritesM0;
97 bool IsFlatOutstanding;
118 const Counters& Increment);
123 const Counters &Counts);
139 bool hasOutstandingLGKM()
const;
149 ExpInstrTypesSeen(0),
150 VCCZCorrupt(
false) { }
155 return "SI insert wait instructions";
167 "SI Insert Waits",
false,
false)
171 char SIInsertWaits::
ID = 0;
176 return new SIInsertWaits();
179 const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
183 return (Opc == AMDGPU::S_CBRANCH_VCCNZ || Opc == AMDGPU::S_CBRANCH_VCCZ) &&
187 bool SIInsertWaits::hasOutstandingLGKM()
const {
188 return WaitedOn.Named.LGKM != LastIssued.Named.LGKM;
193 Counters Result = { { 0, 0, 0 } };
203 if (
TII->isSMRD(MI)) {
207 "First LGKM operand must be a register!");
212 Result.Named.LGKM = Size > 4 ? 2 : 1;
217 Result.Named.LGKM = 1;
221 Result.Named.LGKM = 1;
225 Result.Named.LGKM = 0;
233 if (!Op.
isReg() || !TRI->isInAllocatableClass(Op.
getReg()))
264 if (
TII->isFLAT(MI)) {
275 if (
I->isReg() &&
I->isUse())
288 Result.first = TRI->getEncodingValue(Reg.
getReg());
289 Result.second = Result.first + Size / 4;
296 const Counters &Increment) {
299 Counters Limit = ZeroCounts;
302 if (
TII->mayAccessFlatAddressSpace(*I))
303 IsFlatOutstanding =
true;
305 for (
unsigned i = 0;
i < 3; ++
i) {
306 LastIssued.Array[
i] += Increment.Array[
i];
307 if (Increment.Array[
i])
308 Limit.Array[
i] = LastIssued.Array[
i];
309 Sum += Increment.Array[
i];
314 LastOpcodeType = OTHER;
328 if (LastOpcodeType == VMEM && Increment.Named.VM) {
332 LastInstWritesM0 =
false;
336 LastOpcodeType = SMEM;
337 else if (Increment.Named.VM)
338 LastOpcodeType = VMEM;
342 if (Increment.Named.EXP) {
343 ExpInstrTypesSeen |=
TII->isEXP(*I) ? 1 : 2;
346 for (
unsigned i = 0, e = I->getNumOperands();
i != e; ++
i) {
348 if (!isOpRelevant(Op))
352 RegInterval
Interval = getRegInterval(RC, Op);
353 for (
unsigned j = Interval.first; j < Interval.second; ++j) {
357 DefinedRegs[j] = Limit;
373 if (I != MBB.
end() && I->getOpcode() == AMDGPU::S_ENDPGM && ReturnsVoid)
381 Ordered[0] = !IsFlatOutstanding;
384 Ordered[1] = ExpInstrTypesSeen == 3;
390 Counters Counts = HardwareLimits;
393 bool NeedWait =
false;
395 for (
unsigned i = 0;
i < 3; ++
i) {
397 if (Required.Array[
i] <= WaitedOn.Array[
i])
403 unsigned Value = LastIssued.Array[
i] - Required.Array[
i];
406 Counts.Array[
i] =
std::min(Value, HardwareLimits.Array[i]);
412 WaitedOn.Array[
i] = LastIssued.Array[
i] - Counts.Array[
i];
419 if (Counts.Named.EXP == 0)
420 ExpInstrTypesSeen = 0;
429 LastOpcodeType = OTHER;
430 LastInstWritesM0 =
false;
431 IsFlatOutstanding =
false;
438 for (
unsigned i = 0;
i < 3; ++
i)
439 Dst.Array[
i] = std::max(Dst.Array[
i], Src.Array[
i]);
444 for (
unsigned i = 0;
i < 3; ++
i)
445 if (Counter.Array[
i])
451 assert(I->getOpcode() == AMDGPU::S_WAITCNT);
453 unsigned Imm = I->getOperand(0).getImm();
454 Counters Counts, WaitOn;
460 for (
unsigned i = 0;
i < 3; ++
i) {
461 if (Counts.Array[
i] <= LastIssued.Array[
i])
462 WaitOn.Array[
i] = LastIssued.Array[
i] - Counts.Array[
i];
470 Counters SIInsertWaits::handleOperands(
MachineInstr &MI) {
472 Counters Result = ZeroCounts;
481 if (!Op.
isReg() || !TRI->isInAllocatableClass(Op.
getReg()))
485 RegInterval Interval = getRegInterval(RC, Op);
486 for (
unsigned j = Interval.first; j < Interval.second; ++j) {
507 if (LastInstWritesM0 && (I->getOpcode() == AMDGPU::S_SENDMSG || I->getOpcode() == AMDGPU::S_SENDMSGHALT)) {
509 LastInstWritesM0 =
false;
514 LastInstWritesM0 =
false;
516 unsigned NumOperands = I->getNumOperands();
517 for (
unsigned i = 0;
i < NumOperands;
i++) {
521 LastInstWritesM0 =
true;
528 bool Changes =
false;
531 TII =
ST->getInstrInfo();
541 WaitedOn = ZeroCounts;
542 DelayedWaitOn = ZeroCounts;
543 LastIssued = ZeroCounts;
544 LastOpcodeType = OTHER;
545 LastInstWritesM0 =
false;
546 IsFlatOutstanding =
false;
547 ReturnsVoid = MFI->returnsVoid();
549 memset(&UsedRegs, 0,
sizeof(UsedRegs));
550 memset(&DefinedRegs, 0,
sizeof(DefinedRegs));
555 bool HaveScalarStores =
false;
565 if (!HaveScalarStores && TII->isScalarStore(*I))
566 HaveScalarStores =
true;
577 if (TII->isSMRD(I->getOpcode())) {
579 }
else if (!hasOutstandingLGKM() && I->modifiesRegister(AMDGPU::VCC, TRI)) {
588 DEBUG(
dbgs() <<
"Inserting vccz bug work-around before: " << *I <<
'\n');
595 insertWait(MBB, I, LastIssued);
600 BuildMI(MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_MOV_B64),
602 .addReg(AMDGPU::VCC);
607 if (I->getOpcode() == AMDGPU::S_WAITCNT) {
608 handleExistingWait(*I);
620 if ((I->getOpcode() == AMDGPU::S_BARRIER &&
621 ST->needWaitcntBeforeBarrier()) ||
622 I->getOpcode() == AMDGPU::S_SENDMSG ||
623 I->getOpcode() == AMDGPU::S_SENDMSGHALT)
624 Required = LastIssued;
626 Required = handleOperands(*I);
628 Counters Increment = getHwCounts(*I);
633 Changes |= insertWait(MBB, I, Required);
635 pushInstruction(MBB, I, Increment);
636 handleSendMsg(MBB, I);
638 if (I->getOpcode() == AMDGPU::S_ENDPGM ||
639 I->getOpcode() == AMDGPU::SI_RETURN)
647 if (HaveScalarStores) {
656 bool SeenDCacheWB =
false;
661 if (I->getOpcode() == AMDGPU::S_DCACHE_WB)
663 else if (TII->isScalarStore(*I))
664 SeenDCacheWB =
false;
667 if ((I->getOpcode() == AMDGPU::S_ENDPGM ||
668 I->getOpcode() == AMDGPU::SI_RETURN) && !SeenDCacheWB) {
670 BuildMI(*MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_DCACHE_WB));
677 I->eraseFromParent();
unsigned getExpcntBitMask(IsaVersion Version)
void push_back(const T &Elt)
mop_iterator operands_end()
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
AMDGPU specific subclass of TargetSubtarget.
unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt)
Interval Class - An Interval is a set of nodes defined such that every node in the interval has all o...
bool mayStore() const
Return true if this instruction could possibly modify memory.
unsigned getVmcntBitMask(IsaVersion Version)
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
unsigned getSize() const
Return the size of the register in bytes, which is also the size of a stack slot allocated to hold a ...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Reg
All possible values of the reg field in the ModR/M byte.
unsigned encodeWaitcnt(IsaVersion Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
unsigned getNumOperands() const
Access to explicit operands of the instruction.
static void increaseCounters(Counters &Dst, const Counters &Src)
helper function for handleOperands
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned const MachineRegisterInfo * MRI
IsaVersion getIsaVersion(const FeatureBitset &Features)
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const MachineOperand & getOperand(unsigned i) const
Represent the analysis usage information of a pass.
unsigned getSize(const MachineInstr &MI) const
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
static bool countersNonZero(const Counters &Counter)
check whether any of the counters is non-zero
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
FunctionPass class - This class is used to implement most global optimizations.
static bool readsVCCZ(const MachineInstr &MI)
unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt)
unsigned getLgkmcntBitMask(IsaVersion Version)
Iterator for intrusive lists based on ilist_node.
const SIRegisterInfo * getRegisterInfo() const override
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
void setPreservesCFG()
This function should be called by the pass, iff they do not:
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt)
INITIALIZE_PASS_BEGIN(SIInsertWaits, DEBUG_TYPE,"SI Insert Waits", false, false) INITIALIZE_PASS_END(SIInsertWaits
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
FunctionPass * createSIInsertWaitsPass()
unsigned getReg() const
getReg - Returns the register number.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
mop_iterator operands_begin()
StringRef - Represent a constant reference to a string, i.e.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...