Line data Source code
1 : //===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : /// \file
11 : /// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold
12 : /// 128 Alu instructions ; these instructions can access up to 4 prefetched
13 : /// 4 lines of 16 registers from constant buffers. Such ALU clauses are
14 : /// initiated by CF_ALU instructions.
15 : //===----------------------------------------------------------------------===//
16 :
17 : #include "AMDGPU.h"
18 : #include "AMDGPUSubtarget.h"
19 : #include "R600Defines.h"
20 : #include "R600InstrInfo.h"
21 : #include "R600RegisterInfo.h"
22 : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
23 : #include "llvm/ADT/SmallVector.h"
24 : #include "llvm/ADT/StringRef.h"
25 : #include "llvm/CodeGen/MachineBasicBlock.h"
26 : #include "llvm/CodeGen/MachineFunction.h"
27 : #include "llvm/CodeGen/MachineFunctionPass.h"
28 : #include "llvm/CodeGen/MachineInstr.h"
29 : #include "llvm/CodeGen/MachineInstrBuilder.h"
30 : #include "llvm/CodeGen/MachineOperand.h"
31 : #include "llvm/Pass.h"
32 : #include "llvm/Support/ErrorHandling.h"
33 : #include <cassert>
34 : #include <cstdint>
35 : #include <utility>
36 : #include <vector>
37 :
38 : using namespace llvm;
39 :
40 : namespace llvm {
41 :
42 : void initializeR600EmitClauseMarkersPass(PassRegistry&);
43 :
44 : } // end namespace llvm
45 :
46 : namespace {
47 :
48 : class R600EmitClauseMarkers : public MachineFunctionPass {
49 : private:
50 : const R600InstrInfo *TII = nullptr;
51 : int Address = 0;
52 :
53 0 : unsigned OccupiedDwords(MachineInstr &MI) const {
54 0 : switch (MI.getOpcode()) {
55 : case R600::INTERP_PAIR_XY:
56 : case R600::INTERP_PAIR_ZW:
57 : case R600::INTERP_VEC_LOAD:
58 : case R600::DOT_4:
59 : return 4;
60 0 : case R600::KILL:
61 0 : return 0;
62 : default:
63 : break;
64 : }
65 :
66 : // These will be expanded to two ALU instructions in the
67 : // ExpandSpecialInstructions pass.
68 0 : if (TII->isLDSRetInstr(MI.getOpcode()))
69 0 : return 2;
70 :
71 0 : if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()) ||
72 0 : TII->isReductionOp(MI.getOpcode()))
73 0 : return 4;
74 :
75 : unsigned NumLiteral = 0;
76 0 : for (MachineInstr::mop_iterator It = MI.operands_begin(),
77 0 : E = MI.operands_end();
78 0 : It != E; ++It) {
79 : MachineOperand &MO = *It;
80 0 : if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X)
81 0 : ++NumLiteral;
82 : }
83 0 : return 1 + NumLiteral;
84 : }
85 :
86 0 : bool isALU(const MachineInstr &MI) const {
87 0 : if (TII->isALUInstr(MI.getOpcode()))
88 0 : return true;
89 0 : if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()))
90 0 : return true;
91 0 : switch (MI.getOpcode()) {
92 : case R600::PRED_X:
93 : case R600::INTERP_PAIR_XY:
94 : case R600::INTERP_PAIR_ZW:
95 : case R600::INTERP_VEC_LOAD:
96 : case R600::COPY:
97 : case R600::DOT_4:
98 : return true;
99 0 : default:
100 0 : return false;
101 : }
102 : }
103 :
104 0 : bool IsTrivialInst(MachineInstr &MI) const {
105 105746 : switch (MI.getOpcode()) {
106 : case R600::KILL:
107 : case R600::RETURN:
108 : case R600::IMPLICIT_DEF:
109 : return true;
110 0 : default:
111 0 : return false;
112 : }
113 : }
114 :
115 0 : std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const {
116 : // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2
117 : // (See also R600ISelLowering.cpp)
118 : // ConstIndex value is in [0, 4095];
119 : return std::pair<unsigned, unsigned>(
120 12190 : ((Sel >> 2) - 512) >> 12, // KC_BANK
121 : // Line Number of ConstIndex
122 : // A line contains 16 constant registers however KCX bank can lock
123 : // two line at the same time ; thus we want to get an even line number.
124 : // Line number can be retrieved with (>>4), using (>>5) <<1 generates
125 : // an even number.
126 12190 : ((((Sel >> 2) - 512) & 4095) >> 5) << 1);
127 : }
128 :
129 : bool
130 49441 : SubstituteKCacheBank(MachineInstr &MI,
131 : std::vector<std::pair<unsigned, unsigned>> &CachedConsts,
132 : bool UpdateInstr = true) const {
133 : std::vector<std::pair<unsigned, unsigned>> UsedKCache;
134 :
135 98882 : if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != R600::DOT_4)
136 : return true;
137 :
138 : const SmallVectorImpl<std::pair<MachineOperand *, int64_t>> &Consts =
139 49439 : TII->getSrcs(MI);
140 : assert(
141 : (TII->isALUInstr(MI.getOpcode()) || MI.getOpcode() == R600::DOT_4) &&
142 : "Can't assign Const");
143 145369 : for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
144 191860 : if (Consts[i].first->getReg() != R600::ALU_CONST)
145 : continue;
146 12190 : unsigned Sel = Consts[i].second;
147 12190 : unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31;
148 12190 : unsigned KCacheIndex = Index * 4 + Chan;
149 12190 : const std::pair<unsigned, unsigned> &BankLine = getAccessedBankLine(Sel);
150 12190 : if (CachedConsts.empty()) {
151 3319 : CachedConsts.push_back(BankLine);
152 3319 : UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
153 3319 : continue;
154 : }
155 8871 : if (CachedConsts[0] == BankLine) {
156 8871 : UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
157 8871 : continue;
158 : }
159 0 : if (CachedConsts.size() == 1) {
160 0 : CachedConsts.push_back(BankLine);
161 0 : UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
162 0 : continue;
163 : }
164 0 : if (CachedConsts[1] == BankLine) {
165 0 : UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
166 0 : continue;
167 : }
168 : return false;
169 : }
170 :
171 49439 : if (!UpdateInstr)
172 : return true;
173 :
174 144201 : for (unsigned i = 0, j = 0, n = Consts.size(); i < n; ++i) {
175 190692 : if (Consts[i].first->getReg() != R600::ALU_CONST)
176 : continue;
177 24380 : switch(UsedKCache[j].first) {
178 12190 : case 0:
179 24380 : Consts[i].first->setReg(
180 : R600::R600_KC0RegClass.getRegister(UsedKCache[j].second));
181 12190 : break;
182 0 : case 1:
183 0 : Consts[i].first->setReg(
184 : R600::R600_KC1RegClass.getRegister(UsedKCache[j].second));
185 0 : break;
186 0 : default:
187 0 : llvm_unreachable("Wrong Cache Line");
188 : }
189 12190 : j++;
190 : }
191 : return true;
192 : }
193 :
194 48857 : bool canClauseLocalKillFitInClause(
195 : unsigned AluInstCount,
196 : std::vector<std::pair<unsigned, unsigned>> KCacheBanks,
197 : MachineBasicBlock::iterator Def,
198 : MachineBasicBlock::iterator BBEnd) {
199 48857 : const R600RegisterInfo &TRI = TII->getRegisterInfo();
200 : //TODO: change this to defs?
201 904995 : for (MachineInstr::const_mop_iterator
202 48857 : MOI = Def->operands_begin(),
203 953852 : MOE = Def->operands_end(); MOI != MOE; ++MOI) {
204 951569 : if (!MOI->isReg() || !MOI->isDef() ||
205 46282 : TRI.isPhysRegLiveAcrossClauses(MOI->getReg()))
206 : continue;
207 :
208 : // Def defines a clause local register, so check that its use will fit
209 : // in the clause.
210 : unsigned LastUseCount = 0;
211 584 : for (MachineBasicBlock::iterator UseI = Def; UseI != BBEnd; ++UseI) {
212 584 : AluInstCount += OccupiedDwords(*UseI);
213 : // Make sure we won't need to end the clause due to KCache limitations.
214 584 : if (!SubstituteKCacheBank(*UseI, KCacheBanks, false))
215 : return false;
216 :
217 : // We have reached the maximum instruction limit before finding the
218 : // use that kills this register, so we cannot use this def in the
219 : // current clause.
220 584 : if (AluInstCount >= TII->getMaxAlusPerClause())
221 : return false;
222 :
223 : // TODO: Is this true? kill flag appears to work OK below
224 : // Register kill flags have been cleared by the time we get to this
225 : // pass, but it is safe to assume that all uses of this register
226 : // occur in the same basic block as its definition, because
227 : // it is illegal for the scheduler to schedule them in
228 : // different blocks.
229 584 : if (UseI->readsRegister(MOI->getReg()))
230 : LastUseCount = AluInstCount;
231 :
232 : // Exit early if the current use kills the register
233 584 : if (UseI != Def && UseI->killsRegister(MOI->getReg()))
234 : break;
235 : }
236 292 : if (LastUseCount)
237 292 : return LastUseCount <= TII->getMaxAlusPerClause();
238 0 : llvm_unreachable("Clause local register live at end of clause.");
239 : }
240 : return true;
241 : }
242 :
243 : MachineBasicBlock::iterator
244 3962 : MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) {
245 : MachineBasicBlock::iterator ClauseHead = I;
246 : std::vector<std::pair<unsigned, unsigned>> KCacheBanks;
247 : bool PushBeforeModifier = false;
248 : unsigned AluInstCount = 0;
249 53205 : for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
250 : if (IsTrivialInst(*I))
251 : continue;
252 52573 : if (!isALU(*I))
253 : break;
254 49254 : if (AluInstCount > TII->getMaxAlusPerClause())
255 : break;
256 98060 : if (I->getOpcode() == R600::PRED_X) {
257 : // We put PRED_X in its own clause to ensure that ifcvt won't create
258 : // clauses with more than 128 insts.
259 : // IfCvt is indeed checking that "then" and "else" branches of an if
260 : // statement have less than ~60 insts thus converted clauses can't be
261 : // bigger than ~121 insts (predicate setter needs to be in the same
262 : // clause as predicated alus).
263 169 : if (AluInstCount > 0)
264 : break;
265 86 : if (TII->getFlagOp(*I).getImm() & MO_FLAG_PUSH)
266 : PushBeforeModifier = true;
267 : AluInstCount ++;
268 86 : continue;
269 : }
270 : // XXX: GROUP_BARRIER instructions cannot be in the same ALU clause as:
271 : //
272 : // * KILL or INTERP instructions
273 : // * Any instruction that sets UPDATE_EXEC_MASK or UPDATE_PRED bits
274 : // * Uses waterfalling (i.e. INDEX_MODE = AR.X)
275 : //
276 : // XXX: These checks have not been implemented yet.
277 48861 : if (TII->mustBeLastInClause(I->getOpcode())) {
278 : I++;
279 : break;
280 : }
281 :
282 : // If this instruction defines a clause local register, make sure
283 : // its use can fit in this clause.
284 97714 : if (!canClauseLocalKillFitInClause(AluInstCount, KCacheBanks, I, E))
285 : break;
286 :
287 48857 : if (!SubstituteKCacheBank(*I, KCacheBanks))
288 : break;
289 48857 : AluInstCount += OccupiedDwords(*I);
290 : }
291 3962 : unsigned Opcode = PushBeforeModifier ?
292 : R600::CF_ALU_PUSH_BEFORE : R600::CF_ALU;
293 3962 : BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode))
294 : // We don't use the ADDR field until R600ControlFlowFinalizer pass, where
295 : // it is safe to assume it is 0. However if we always put 0 here, the ifcvt
296 : // pass may assume that identical ALU clause starter at the beginning of a
297 : // true and false branch can be factorized which is not the case.
298 3962 : .addImm(Address++) // ADDR
299 3962 : .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0
300 7924 : .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1
301 3962 : .addImm(KCacheBanks.empty()?0:2) // KM0
302 7924 : .addImm((KCacheBanks.size() < 2)?0:2) // KM1
303 3962 : .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0
304 7924 : .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1
305 3962 : .addImm(AluInstCount) // COUNT
306 : .addImm(1); // Enabled
307 3962 : return I;
308 : }
309 :
310 : public:
311 : static char ID;
312 :
313 282 : R600EmitClauseMarkers() : MachineFunctionPass(ID) {
314 282 : initializeR600EmitClauseMarkersPass(*PassRegistry::getPassRegistry());
315 282 : }
316 :
317 2297 : bool runOnMachineFunction(MachineFunction &MF) override {
318 2297 : const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
319 2297 : TII = ST.getInstrInfo();
320 :
321 : for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
322 4765 : BB != BB_E; ++BB) {
323 : MachineBasicBlock &MBB = *BB;
324 : MachineBasicBlock::iterator I = MBB.begin();
325 2468 : if (I != MBB.end() && I->getOpcode() == R600::CF_ALU)
326 : continue; // BB was already parsed
327 13878 : for (MachineBasicBlock::iterator E = MBB.end(); I != E;) {
328 11410 : if (isALU(*I)) {
329 3962 : auto next = MakeALUClause(MBB, I);
330 : assert(next != I);
331 3962 : I = next;
332 : } else
333 : ++I;
334 : }
335 : }
336 2297 : return false;
337 : }
338 :
339 282 : StringRef getPassName() const override {
340 282 : return "R600 Emit Clause Markers Pass";
341 : }
342 : };
343 :
344 : char R600EmitClauseMarkers::ID = 0;
345 :
346 : } // end anonymous namespace
347 :
348 282 : INITIALIZE_PASS_BEGIN(R600EmitClauseMarkers, "emitclausemarkers",
349 : "R600 Emit Clause Markters", false, false)
350 564 : INITIALIZE_PASS_END(R600EmitClauseMarkers, "emitclausemarkers",
351 : "R600 Emit Clause Markters", false, false)
352 :
353 282 : FunctionPass *llvm::createR600EmitClauseMarkers() {
354 282 : return new R600EmitClauseMarkers();
355 : }
|