LLVM 22.0.0git
AMDGPUBarrierLatency.cpp
Go to the documentation of this file.
1//===--- AMDGPUBarrierLatency.cpp - AMDGPU Barrier Latency ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file contains a DAG scheduling mutation to add latency to
10/// barrier edges between ATOMIC_FENCE instructions and preceding
11/// memory accesses potentially affected by the fence.
12/// This encourages the scheduling of more instructions before
13/// ATOMIC_FENCE instructions. ATOMIC_FENCE instructions may
14/// introduce wait counting or indicate an impending S_BARRIER
15/// wait. Having more instructions in-flight across these
16/// constructs improves latency hiding.
17//
18//===----------------------------------------------------------------------===//
19
22#include "SIInstrInfo.h"
24
25using namespace llvm;
26
27namespace {
28
29class BarrierLatency : public ScheduleDAGMutation {
30private:
31 SmallSet<SyncScope::ID, 4> IgnoredScopes;
32
33public:
34 BarrierLatency(MachineFunction *MF) {
35 LLVMContext &Context = MF->getFunction().getContext();
36 IgnoredScopes.insert(SyncScope::SingleThread);
37 IgnoredScopes.insert(Context.getOrInsertSyncScopeID("wavefront"));
38 IgnoredScopes.insert(Context.getOrInsertSyncScopeID("wavefront-one-as"));
39 IgnoredScopes.insert(Context.getOrInsertSyncScopeID("singlethread-one-as"));
40 }
41 void apply(ScheduleDAGInstrs *DAG) override;
42};
43
44void BarrierLatency::apply(ScheduleDAGInstrs *DAG) {
45 constexpr unsigned SyntheticLatency = 2000;
46 for (SUnit &SU : DAG->SUnits) {
47 const MachineInstr *MI = SU.getInstr();
48 if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
49 continue;
50
51 // Update latency on barrier edges of ATOMIC_FENCE.
52 // Ignore scopes not expected to have any latency.
53 SyncScope::ID SSID = static_cast<SyncScope::ID>(MI->getOperand(1).getImm());
54 if (IgnoredScopes.contains(SSID))
55 continue;
56
57 for (SDep &PredDep : SU.Preds) {
58 if (!PredDep.isBarrier())
59 continue;
60 SUnit *PredSU = PredDep.getSUnit();
61 MachineInstr *MI = PredSU->getInstr();
62 // Only consider memory loads
63 if (!MI->mayLoad() || MI->mayStore())
64 continue;
65 SDep ForwardD = PredDep;
66 ForwardD.setSUnit(&SU);
67 for (SDep &SuccDep : PredSU->Succs) {
68 if (SuccDep == ForwardD) {
69 SuccDep.setLatency(SuccDep.getLatency() + SyntheticLatency);
70 break;
71 }
72 }
73 PredDep.setLatency(PredDep.getLatency() + SyntheticLatency);
74 PredSU->setDepthDirty();
75 SU.setDepthDirty();
76 }
77 }
78}
79
80} // end namespace
81
82std::unique_ptr<ScheduleDAGMutation>
84 return std::make_unique<BarrierLatency>(MF);
85}
Provides AMDGPU specific target descriptions.
IRTranslator LLVM IR MI
Interface definition for SIInstrInfo.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
Function & getFunction()
Return the LLVM function that this machine code represents.
SUnit * getSUnit() const
void setLatency(unsigned Lat)
Sets the latency for this edge.
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
void setSUnit(SUnit *SU)
bool isBarrier() const
Tests if this is an Order dependence that is marked as a barrier.
SmallVector< SDep, 4 > Succs
All sunit successors.
LLVM_ABI void setDepthDirty()
Sets a flag in this node to indicate that its stored Depth value will require recomputation the next ...
SmallVector< SDep, 4 > Preds
All sunit predecessors.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
A ScheduleDAG for scheduling lists of MachineInstr.
Mutate the DAG as a postpass after normal DAG building.
std::vector< SUnit > SUnits
The scheduling units.
bool contains(const T &V) const
Check if the SmallSet contains the given element.
Definition SmallSet.h:228
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
void apply(Opt *O, const Mod &M, const Mods &... Ms)
This is an optimization pass for GlobalISel generic memory operations.
std::unique_ptr< ScheduleDAGMutation > createAMDGPUBarrierLatencyDAGMutation(MachineFunction *MF)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)