LLVM 20.0.0git
SIInsertHardClauses.cpp
Go to the documentation of this file.
1//===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Insert s_clause instructions to form hard clauses.
11///
12/// Clausing load instructions can give cache coherency benefits. Before gfx10,
13/// the hardware automatically detected "soft clauses", which were sequences of
14/// memory instructions of the same type. In gfx10 this detection was removed,
15/// and the s_clause instruction was introduced to explicitly mark "hard
16/// clauses".
17///
18/// It's the scheduler's job to form the clauses by putting similar memory
19/// instructions next to each other. Our job is just to insert an s_clause
20/// instruction to mark the start of each clause.
21///
22/// Note that hard clauses are very similar to, but logically distinct from, the
23/// groups of instructions that have to be restartable when XNACK is enabled.
24/// The rules are slightly different in each case. For example an s_nop
25/// instruction breaks a restartable group, but can appear in the middle of a
26/// hard clause. (Before gfx10 there wasn't a distinction, and both were called
27/// "soft clauses" or just "clauses".)
28///
29/// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable
30/// groups, not hard clauses.
31//
32//===----------------------------------------------------------------------===//
33
34#include "AMDGPU.h"
35#include "GCNSubtarget.h"
39
40using namespace llvm;
41
42#define DEBUG_TYPE "si-insert-hard-clauses"
43
44namespace {
45
46enum HardClauseType {
47 // For GFX10:
48
49 // Texture, buffer, global or scratch memory instructions.
50 HARDCLAUSE_VMEM,
51 // Flat (not global or scratch) memory instructions.
52 HARDCLAUSE_FLAT,
53
54 // For GFX11:
55
56 // Texture memory instructions.
57 HARDCLAUSE_MIMG_LOAD,
58 HARDCLAUSE_MIMG_STORE,
59 HARDCLAUSE_MIMG_ATOMIC,
60 HARDCLAUSE_MIMG_SAMPLE,
61 // Buffer, global or scratch memory instructions.
62 HARDCLAUSE_VMEM_LOAD,
63 HARDCLAUSE_VMEM_STORE,
64 HARDCLAUSE_VMEM_ATOMIC,
65 // Flat (not global or scratch) memory instructions.
66 HARDCLAUSE_FLAT_LOAD,
67 HARDCLAUSE_FLAT_STORE,
68 HARDCLAUSE_FLAT_ATOMIC,
69 // BVH instructions.
70 HARDCLAUSE_BVH,
71
72 // Common:
73
74 // Instructions that access LDS.
75 HARDCLAUSE_LDS,
76 // Scalar memory instructions.
77 HARDCLAUSE_SMEM,
78 // VALU instructions.
79 HARDCLAUSE_VALU,
80 LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU,
81
82 // Internal instructions, which are allowed in the middle of a hard clause,
83 // except for s_waitcnt.
84 HARDCLAUSE_INTERNAL,
85 // Meta instructions that do not result in any ISA like KILL.
86 HARDCLAUSE_IGNORE,
87 // Instructions that are not allowed in a hard clause: SALU, export, branch,
88 // message, GDS, s_waitcnt and anything else not mentioned above.
89 HARDCLAUSE_ILLEGAL,
90};
91
92class SIInsertHardClauses : public MachineFunctionPass {
93public:
94 static char ID;
95 const GCNSubtarget *ST = nullptr;
96
97 SIInsertHardClauses() : MachineFunctionPass(ID) {}
98
99 void getAnalysisUsage(AnalysisUsage &AU) const override {
100 AU.setPreservesCFG();
102 }
103
104 HardClauseType getHardClauseType(const MachineInstr &MI) {
105 if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) {
106 if (ST->getGeneration() == AMDGPUSubtarget::GFX10) {
108 if (ST->hasNSAClauseBug()) {
109 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
110 if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)
111 return HARDCLAUSE_ILLEGAL;
112 }
113 return HARDCLAUSE_VMEM;
114 }
116 return HARDCLAUSE_FLAT;
117 } else {
118 assert(ST->getGeneration() >= AMDGPUSubtarget::GFX11);
119 if (SIInstrInfo::isMIMG(MI)) {
120 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
121 const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo =
123 if (BaseInfo->BVH)
124 return HARDCLAUSE_BVH;
125 if (BaseInfo->Sampler)
126 return HARDCLAUSE_MIMG_SAMPLE;
127 return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_MIMG_ATOMIC
128 : HARDCLAUSE_MIMG_LOAD
129 : HARDCLAUSE_MIMG_STORE;
130 }
132 return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_VMEM_ATOMIC
133 : HARDCLAUSE_VMEM_LOAD
134 : HARDCLAUSE_VMEM_STORE;
135 }
136 if (SIInstrInfo::isFLAT(MI)) {
137 return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_FLAT_ATOMIC
138 : HARDCLAUSE_FLAT_LOAD
139 : HARDCLAUSE_FLAT_STORE;
140 }
141 }
142 // TODO: LDS
144 return HARDCLAUSE_SMEM;
145 }
146
147 // Don't form VALU clauses. It's not clear what benefit they give, if any.
148
149 // In practice s_nop is the only internal instruction we're likely to see.
150 // It's safe to treat the rest as illegal.
151 if (MI.getOpcode() == AMDGPU::S_NOP)
152 return HARDCLAUSE_INTERNAL;
153 if (MI.isMetaInstruction())
154 return HARDCLAUSE_IGNORE;
155 return HARDCLAUSE_ILLEGAL;
156 }
157
158 // Track information about a clause as we discover it.
159 struct ClauseInfo {
160 // The type of all (non-internal) instructions in the clause.
161 HardClauseType Type = HARDCLAUSE_ILLEGAL;
162 // The first (necessarily non-internal) instruction in the clause.
163 MachineInstr *First = nullptr;
164 // The last non-internal instruction in the clause.
165 MachineInstr *Last = nullptr;
166 // The length of the clause including any internal instructions in the
167 // middle (but not at the end) of the clause.
168 unsigned Length = 0;
169 // Internal instructions at the and of a clause should not be included in
170 // the clause. Count them in TrailingInternalLength until a new memory
171 // instruction is added.
172 unsigned TrailingInternalLength = 0;
173 // The base operands of *Last.
175 };
176
177 bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
178 if (CI.First == CI.Last)
179 return false;
180 assert(CI.Length <= ST->maxHardClauseLength() &&
181 "Hard clause is too long!");
182
183 auto &MBB = *CI.First->getParent();
184 auto ClauseMI =
185 BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE))
186 .addImm(CI.Length - 1);
187 finalizeBundle(MBB, ClauseMI->getIterator(),
188 std::next(CI.Last->getIterator()));
189 return true;
190 }
191
192 bool runOnMachineFunction(MachineFunction &MF) override {
193 if (skipFunction(MF.getFunction()))
194 return false;
195
196 ST = &MF.getSubtarget<GCNSubtarget>();
197 if (!ST->hasHardClauses())
198 return false;
199
200 const SIInstrInfo *SII = ST->getInstrInfo();
201 const TargetRegisterInfo *TRI = ST->getRegisterInfo();
202
203 bool Changed = false;
204 for (auto &MBB : MF) {
205 ClauseInfo CI;
206 for (auto &MI : MBB) {
207 HardClauseType Type = getHardClauseType(MI);
208
209 int64_t Dummy1;
210 bool Dummy2;
211 LocationSize Dummy3 = 0;
213 if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
214 if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2,
215 Dummy3, TRI)) {
216 // We failed to get the base operands, so we'll never clause this
217 // instruction with any other, so pretend it's illegal.
218 Type = HARDCLAUSE_ILLEGAL;
219 }
220 }
221
222 if (CI.Length == ST->maxHardClauseLength() ||
223 (CI.Length && Type != HARDCLAUSE_INTERNAL &&
224 Type != HARDCLAUSE_IGNORE &&
225 (Type != CI.Type ||
226 // Note that we lie to shouldClusterMemOps about the size of the
227 // cluster. When shouldClusterMemOps is called from the machine
228 // scheduler it limits the size of the cluster to avoid increasing
229 // register pressure too much, but this pass runs after register
230 // allocation so there is no need for that kind of limit.
231 // We also lie about the Offset and OffsetIsScalable parameters,
232 // as they aren't used in the SIInstrInfo implementation.
233 !SII->shouldClusterMemOps(CI.BaseOps, 0, false, BaseOps, 0, false,
234 2, 2)))) {
235 // Finish the current clause.
236 Changed |= emitClause(CI, SII);
237 CI = ClauseInfo();
238 }
239
240 if (CI.Length) {
241 // Extend the current clause.
242 if (Type != HARDCLAUSE_IGNORE) {
243 if (Type == HARDCLAUSE_INTERNAL) {
244 ++CI.TrailingInternalLength;
245 } else {
246 ++CI.Length;
247 CI.Length += CI.TrailingInternalLength;
248 CI.TrailingInternalLength = 0;
249 CI.Last = &MI;
250 CI.BaseOps = std::move(BaseOps);
251 }
252 }
253 } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
254 // Start a new clause.
255 CI = ClauseInfo{Type, &MI, &MI, 1, 0, std::move(BaseOps)};
256 }
257 }
258
259 // Finish the last clause in the basic block if any.
260 if (CI.Length)
261 Changed |= emitClause(CI, SII);
262 }
263
264 return Changed;
265 }
266};
267
268} // namespace
269
270char SIInsertHardClauses::ID = 0;
271
272char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID;
273
274INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses",
275 false, false)
Provides AMDGPU specific target descriptions.
MachineBasicBlock & MBB
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
AMD GCN specific subclass of TargetSubtarget.
IRTranslator LLVM IR MI
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
#define DEBUG_TYPE
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
A debug info location.
Definition: DebugLoc.h:33
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:178
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
static bool isVMEM(const MachineInstr &MI)
Definition: SIInstrInfo.h:438
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
static bool isSMRD(const MachineInstr &MI)
Definition: SIInstrInfo.h:550
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
Definition: SIInstrInfo.h:624
static bool isMIMG(const MachineInstr &MI)
Definition: SIInstrInfo.h:586
static bool isFLAT(const MachineInstr &MI)
Definition: SIInstrInfo.h:618
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Type(LLVMContext &C, TypeID tid)
Definition: Type.h:93
self_iterator getIterator()
Definition: ilist_node.h:132
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Length
Definition: DWP.cpp:480
void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
char & SIInsertHardClausesID