File: | lib/Target/R600/SILowerControlFlow.cpp |
Location: | line 458, column 9 |
Description: | Value stored to 'NeedM0' is never read |
1 | //===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===// |
2 | // |
3 | // The LLVM Compiler Infrastructure |
4 | // |
5 | // This file is distributed under the University of Illinois Open Source |
6 | // License. See LICENSE.TXT for details. |
7 | // |
8 | //===----------------------------------------------------------------------===// |
9 | // |
10 | /// \file |
11 | /// \brief This pass lowers the pseudo control flow instructions to real |
12 | /// machine instructions. |
13 | /// |
14 | /// All control flow is handled using predicated instructions and |
15 | /// a predicate stack. Each Scalar ALU controls the operations of 64 Vector |
16 | /// ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs |
17 | /// by writting to the 64-bit EXEC register (each bit corresponds to a |
18 | /// single vector ALU). Typically, for predicates, a vector ALU will write |
19 | /// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each |
20 | /// Vector ALU) and then the ScalarALU will AND the VCC register with the |
21 | /// EXEC to update the predicates. |
22 | /// |
23 | /// For example: |
24 | /// %VCC = V_CMP_GT_F32 %VGPR1, %VGPR2 |
25 | /// %SGPR0 = SI_IF %VCC |
26 | /// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 |
27 | /// %SGPR0 = SI_ELSE %SGPR0 |
28 | /// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR0 |
29 | /// SI_END_CF %SGPR0 |
30 | /// |
31 | /// becomes: |
32 | /// |
33 | /// %SGPR0 = S_AND_SAVEEXEC_B64 %VCC // Save and update the exec mask |
34 | /// %SGPR0 = S_XOR_B64 %SGPR0, %EXEC // Clear live bits from saved exec mask |
35 | /// S_CBRANCH_EXECZ label0 // This instruction is an optional |
36 | /// // optimization which allows us to |
37 | /// // branch if all the bits of |
38 | /// // EXEC are zero. |
39 | /// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 // Do the IF block of the branch |
40 | /// |
41 | /// label0: |
42 | /// %SGPR0 = S_OR_SAVEEXEC_B64 %EXEC // Restore the exec mask for the Then block |
43 | /// %EXEC = S_XOR_B64 %SGPR0, %EXEC // Clear live bits from saved exec mask |
44 | /// S_BRANCH_EXECZ label1 // Use our branch optimization |
45 | /// // instruction again. |
46 | /// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR // Do the THEN block |
47 | /// label1: |
48 | /// %EXEC = S_OR_B64 %EXEC, %SGPR0 // Re-enable saved exec mask bits |
49 | //===----------------------------------------------------------------------===// |
50 | |
51 | #include "AMDGPU.h" |
52 | #include "AMDGPUSubtarget.h" |
53 | #include "SIInstrInfo.h" |
54 | #include "SIMachineFunctionInfo.h" |
55 | #include "llvm/CodeGen/MachineFrameInfo.h" |
56 | #include "llvm/CodeGen/MachineFunction.h" |
57 | #include "llvm/CodeGen/MachineFunctionPass.h" |
58 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
59 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
60 | #include "llvm/IR/Constants.h" |
61 | |
62 | using namespace llvm; |
63 | |
64 | namespace { |
65 | |
66 | class SILowerControlFlowPass : public MachineFunctionPass { |
67 | |
68 | private: |
69 | static const unsigned SkipThreshold = 12; |
70 | |
71 | static char ID; |
72 | const SIRegisterInfo *TRI; |
73 | const SIInstrInfo *TII; |
74 | |
75 | bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To); |
76 | |
77 | void Skip(MachineInstr &From, MachineOperand &To); |
78 | void SkipIfDead(MachineInstr &MI); |
79 | |
80 | void If(MachineInstr &MI); |
81 | void Else(MachineInstr &MI); |
82 | void Break(MachineInstr &MI); |
83 | void IfBreak(MachineInstr &MI); |
84 | void ElseBreak(MachineInstr &MI); |
85 | void Loop(MachineInstr &MI); |
86 | void EndCf(MachineInstr &MI); |
87 | |
88 | void Kill(MachineInstr &MI); |
89 | void Branch(MachineInstr &MI); |
90 | |
91 | void LoadM0(MachineInstr &MI, MachineInstr *MovRel); |
92 | void IndirectSrc(MachineInstr &MI); |
93 | void IndirectDst(MachineInstr &MI); |
94 | |
95 | public: |
96 | SILowerControlFlowPass(TargetMachine &tm) : |
97 | MachineFunctionPass(ID), TRI(nullptr), TII(nullptr) { } |
98 | |
99 | bool runOnMachineFunction(MachineFunction &MF) override; |
100 | |
101 | const char *getPassName() const override { |
102 | return "SI Lower control flow instructions"; |
103 | } |
104 | |
105 | }; |
106 | |
107 | } // End anonymous namespace |
108 | |
109 | char SILowerControlFlowPass::ID = 0; |
110 | |
111 | FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) { |
112 | return new SILowerControlFlowPass(tm); |
113 | } |
114 | |
115 | bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From, |
116 | MachineBasicBlock *To) { |
117 | |
118 | unsigned NumInstr = 0; |
119 | |
120 | for (MachineBasicBlock *MBB = From; MBB != To && !MBB->succ_empty(); |
121 | MBB = *MBB->succ_begin()) { |
122 | |
123 | for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); |
124 | NumInstr < SkipThreshold && I != E; ++I) { |
125 | |
126 | if (I->isBundle() || !I->isBundled()) |
127 | if (++NumInstr >= SkipThreshold) |
128 | return true; |
129 | } |
130 | } |
131 | |
132 | return false; |
133 | } |
134 | |
135 | void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) { |
136 | |
137 | if (!shouldSkip(*From.getParent()->succ_begin(), To.getMBB())) |
138 | return; |
139 | |
140 | DebugLoc DL = From.getDebugLoc(); |
141 | BuildMI(*From.getParent(), &From, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ)) |
142 | .addOperand(To) |
143 | .addReg(AMDGPU::EXEC); |
144 | } |
145 | |
146 | void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) { |
147 | |
148 | MachineBasicBlock &MBB = *MI.getParent(); |
149 | DebugLoc DL = MI.getDebugLoc(); |
150 | |
151 | if (MBB.getParent()->getInfo<SIMachineFunctionInfo>()->getShaderType() != |
152 | ShaderType::PIXEL || |
153 | !shouldSkip(&MBB, &MBB.getParent()->back())) |
154 | return; |
155 | |
156 | MachineBasicBlock::iterator Insert = &MI; |
157 | ++Insert; |
158 | |
159 | // If the exec mask is non-zero, skip the next two instructions |
160 | BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) |
161 | .addImm(3) |
162 | .addReg(AMDGPU::EXEC); |
163 | |
164 | // Exec mask is zero: Export to NULL target... |
165 | BuildMI(MBB, Insert, DL, TII->get(AMDGPU::EXP)) |
166 | .addImm(0) |
167 | .addImm(0x09) // V_008DFC_SQ_EXP_NULL |
168 | .addImm(0) |
169 | .addImm(1) |
170 | .addImm(1) |
171 | .addReg(AMDGPU::VGPR0) |
172 | .addReg(AMDGPU::VGPR0) |
173 | .addReg(AMDGPU::VGPR0) |
174 | .addReg(AMDGPU::VGPR0); |
175 | |
176 | // ... and terminate wavefront |
177 | BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM)); |
178 | } |
179 | |
180 | void SILowerControlFlowPass::If(MachineInstr &MI) { |
181 | MachineBasicBlock &MBB = *MI.getParent(); |
182 | DebugLoc DL = MI.getDebugLoc(); |
183 | unsigned Reg = MI.getOperand(0).getReg(); |
184 | unsigned Vcc = MI.getOperand(1).getReg(); |
185 | |
186 | BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), Reg) |
187 | .addReg(Vcc); |
188 | |
189 | BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), Reg) |
190 | .addReg(AMDGPU::EXEC) |
191 | .addReg(Reg); |
192 | |
193 | Skip(MI, MI.getOperand(2)); |
194 | |
195 | MI.eraseFromParent(); |
196 | } |
197 | |
198 | void SILowerControlFlowPass::Else(MachineInstr &MI) { |
199 | MachineBasicBlock &MBB = *MI.getParent(); |
200 | DebugLoc DL = MI.getDebugLoc(); |
201 | unsigned Dst = MI.getOperand(0).getReg(); |
202 | unsigned Src = MI.getOperand(1).getReg(); |
203 | |
204 | BuildMI(MBB, MBB.getFirstNonPHI(), DL, |
205 | TII->get(AMDGPU::S_OR_SAVEEXEC_B64), Dst) |
206 | .addReg(Src); // Saved EXEC |
207 | |
208 | BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC) |
209 | .addReg(AMDGPU::EXEC) |
210 | .addReg(Dst); |
211 | |
212 | Skip(MI, MI.getOperand(2)); |
213 | |
214 | MI.eraseFromParent(); |
215 | } |
216 | |
217 | void SILowerControlFlowPass::Break(MachineInstr &MI) { |
218 | MachineBasicBlock &MBB = *MI.getParent(); |
219 | DebugLoc DL = MI.getDebugLoc(); |
220 | |
221 | unsigned Dst = MI.getOperand(0).getReg(); |
222 | unsigned Src = MI.getOperand(1).getReg(); |
223 | |
224 | BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst) |
225 | .addReg(AMDGPU::EXEC) |
226 | .addReg(Src); |
227 | |
228 | MI.eraseFromParent(); |
229 | } |
230 | |
231 | void SILowerControlFlowPass::IfBreak(MachineInstr &MI) { |
232 | MachineBasicBlock &MBB = *MI.getParent(); |
233 | DebugLoc DL = MI.getDebugLoc(); |
234 | |
235 | unsigned Dst = MI.getOperand(0).getReg(); |
236 | unsigned Vcc = MI.getOperand(1).getReg(); |
237 | unsigned Src = MI.getOperand(2).getReg(); |
238 | |
239 | BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst) |
240 | .addReg(Vcc) |
241 | .addReg(Src); |
242 | |
243 | MI.eraseFromParent(); |
244 | } |
245 | |
246 | void SILowerControlFlowPass::ElseBreak(MachineInstr &MI) { |
247 | MachineBasicBlock &MBB = *MI.getParent(); |
248 | DebugLoc DL = MI.getDebugLoc(); |
249 | |
250 | unsigned Dst = MI.getOperand(0).getReg(); |
251 | unsigned Saved = MI.getOperand(1).getReg(); |
252 | unsigned Src = MI.getOperand(2).getReg(); |
253 | |
254 | BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst) |
255 | .addReg(Saved) |
256 | .addReg(Src); |
257 | |
258 | MI.eraseFromParent(); |
259 | } |
260 | |
261 | void SILowerControlFlowPass::Loop(MachineInstr &MI) { |
262 | MachineBasicBlock &MBB = *MI.getParent(); |
263 | DebugLoc DL = MI.getDebugLoc(); |
264 | unsigned Src = MI.getOperand(0).getReg(); |
265 | |
266 | BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ANDN2_B64), AMDGPU::EXEC) |
267 | .addReg(AMDGPU::EXEC) |
268 | .addReg(Src); |
269 | |
270 | BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) |
271 | .addOperand(MI.getOperand(1)) |
272 | .addReg(AMDGPU::EXEC); |
273 | |
274 | MI.eraseFromParent(); |
275 | } |
276 | |
277 | void SILowerControlFlowPass::EndCf(MachineInstr &MI) { |
278 | MachineBasicBlock &MBB = *MI.getParent(); |
279 | DebugLoc DL = MI.getDebugLoc(); |
280 | unsigned Reg = MI.getOperand(0).getReg(); |
281 | |
282 | BuildMI(MBB, MBB.getFirstNonPHI(), DL, |
283 | TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC) |
284 | .addReg(AMDGPU::EXEC) |
285 | .addReg(Reg); |
286 | |
287 | MI.eraseFromParent(); |
288 | } |
289 | |
290 | void SILowerControlFlowPass::Branch(MachineInstr &MI) { |
291 | if (MI.getOperand(0).getMBB() == MI.getParent()->getNextNode()) |
292 | MI.eraseFromParent(); |
293 | |
294 | // If these aren't equal, this is probably an infinite loop. |
295 | } |
296 | |
297 | void SILowerControlFlowPass::Kill(MachineInstr &MI) { |
298 | MachineBasicBlock &MBB = *MI.getParent(); |
299 | DebugLoc DL = MI.getDebugLoc(); |
300 | const MachineOperand &Op = MI.getOperand(0); |
301 | |
302 | #ifndef NDEBUG |
303 | const SIMachineFunctionInfo *MFI |
304 | = MBB.getParent()->getInfo<SIMachineFunctionInfo>(); |
305 | // Kill is only allowed in pixel / geometry shaders. |
306 | assert(MFI->getShaderType() == ShaderType::PIXEL ||((MFI->getShaderType() == ShaderType::PIXEL || MFI->getShaderType () == ShaderType::GEOMETRY) ? static_cast<void> (0) : __assert_fail ("MFI->getShaderType() == ShaderType::PIXEL || MFI->getShaderType() == ShaderType::GEOMETRY" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn222646/lib/Target/R600/SILowerControlFlow.cpp" , 307, __PRETTY_FUNCTION__)) |
307 | MFI->getShaderType() == ShaderType::GEOMETRY)((MFI->getShaderType() == ShaderType::PIXEL || MFI->getShaderType () == ShaderType::GEOMETRY) ? static_cast<void> (0) : __assert_fail ("MFI->getShaderType() == ShaderType::PIXEL || MFI->getShaderType() == ShaderType::GEOMETRY" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn222646/lib/Target/R600/SILowerControlFlow.cpp" , 307, __PRETTY_FUNCTION__)); |
308 | #endif |
309 | |
310 | // Clear this thread from the exec mask if the operand is negative |
311 | if ((Op.isImm() || Op.isFPImm())) { |
312 | // Constant operand: Set exec mask to 0 or do nothing |
313 | if (Op.isImm() ? (Op.getImm() & 0x80000000) : |
314 | Op.getFPImm()->isNegative()) { |
315 | BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC) |
316 | .addImm(0); |
317 | } |
318 | } else { |
319 | BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC) |
320 | .addImm(0) |
321 | .addOperand(Op); |
322 | } |
323 | |
324 | MI.eraseFromParent(); |
325 | } |
326 | |
327 | void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) { |
328 | |
329 | MachineBasicBlock &MBB = *MI.getParent(); |
330 | DebugLoc DL = MI.getDebugLoc(); |
331 | MachineBasicBlock::iterator I = MI; |
332 | |
333 | unsigned Save = MI.getOperand(1).getReg(); |
334 | unsigned Idx = MI.getOperand(3).getReg(); |
335 | |
336 | if (AMDGPU::SReg_32RegClass.contains(Idx)) { |
337 | BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) |
338 | .addReg(Idx); |
339 | MBB.insert(I, MovRel); |
340 | } else { |
341 | |
342 | assert(AMDGPU::SReg_64RegClass.contains(Save))((AMDGPU::SReg_64RegClass.contains(Save)) ? static_cast<void > (0) : __assert_fail ("AMDGPU::SReg_64RegClass.contains(Save)" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn222646/lib/Target/R600/SILowerControlFlow.cpp" , 342, __PRETTY_FUNCTION__)); |
343 | assert(AMDGPU::VReg_32RegClass.contains(Idx))((AMDGPU::VReg_32RegClass.contains(Idx)) ? static_cast<void > (0) : __assert_fail ("AMDGPU::VReg_32RegClass.contains(Idx)" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn222646/lib/Target/R600/SILowerControlFlow.cpp" , 343, __PRETTY_FUNCTION__)); |
344 | |
345 | // Save the EXEC mask |
346 | BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save) |
347 | .addReg(AMDGPU::EXEC); |
348 | |
349 | // Read the next variant into VCC (lower 32 bits) <- also loop target |
350 | BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), |
351 | AMDGPU::VCC_LO) |
352 | .addReg(Idx); |
353 | |
354 | // Move index from VCC into M0 |
355 | BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) |
356 | .addReg(AMDGPU::VCC_LO); |
357 | |
358 | // Compare the just read M0 value to all possible Idx values |
359 | BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC) |
360 | .addReg(AMDGPU::M0) |
361 | .addReg(Idx); |
362 | |
363 | // Update EXEC, save the original EXEC value to VCC |
364 | BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC) |
365 | .addReg(AMDGPU::VCC); |
366 | |
367 | // Do the actual move |
368 | MBB.insert(I, MovRel); |
369 | |
370 | // Update EXEC, switch all done bits to 0 and all todo bits to 1 |
371 | BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC) |
372 | .addReg(AMDGPU::EXEC) |
373 | .addReg(AMDGPU::VCC); |
374 | |
375 | // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover |
376 | BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) |
377 | .addImm(-7) |
378 | .addReg(AMDGPU::EXEC); |
379 | |
380 | // Restore EXEC |
381 | BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC) |
382 | .addReg(Save); |
383 | |
384 | } |
385 | MI.eraseFromParent(); |
386 | } |
387 | |
388 | void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) { |
389 | |
390 | MachineBasicBlock &MBB = *MI.getParent(); |
391 | DebugLoc DL = MI.getDebugLoc(); |
392 | |
393 | unsigned Dst = MI.getOperand(0).getReg(); |
394 | unsigned Vec = MI.getOperand(2).getReg(); |
395 | unsigned Off = MI.getOperand(4).getImm(); |
396 | unsigned SubReg = TRI->getSubReg(Vec, AMDGPU::sub0); |
397 | if (!SubReg) |
398 | SubReg = Vec; |
399 | |
400 | MachineInstr *MovRel = |
401 | BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst) |
402 | .addReg(SubReg + Off) |
403 | .addReg(AMDGPU::M0, RegState::Implicit) |
404 | .addReg(Vec, RegState::Implicit); |
405 | |
406 | LoadM0(MI, MovRel); |
407 | } |
408 | |
409 | void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) { |
410 | |
411 | MachineBasicBlock &MBB = *MI.getParent(); |
412 | DebugLoc DL = MI.getDebugLoc(); |
413 | |
414 | unsigned Dst = MI.getOperand(0).getReg(); |
415 | unsigned Off = MI.getOperand(4).getImm(); |
416 | unsigned Val = MI.getOperand(5).getReg(); |
417 | unsigned SubReg = TRI->getSubReg(Dst, AMDGPU::sub0); |
418 | if (!SubReg) |
419 | SubReg = Dst; |
420 | |
421 | MachineInstr *MovRel = |
422 | BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32)) |
423 | .addReg(SubReg + Off, RegState::Define) |
424 | .addReg(Val) |
425 | .addReg(AMDGPU::M0, RegState::Implicit) |
426 | .addReg(Dst, RegState::Implicit); |
427 | |
428 | LoadM0(MI, MovRel); |
429 | } |
430 | |
431 | bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { |
432 | TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); |
433 | TRI = |
434 | static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo()); |
435 | SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
436 | |
437 | bool HaveKill = false; |
438 | bool NeedM0 = false; |
439 | bool NeedWQM = false; |
440 | bool NeedFlat = false; |
441 | unsigned Depth = 0; |
442 | |
443 | for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); |
444 | BI != BE; ++BI) { |
445 | |
446 | MachineBasicBlock &MBB = *BI; |
447 | MachineBasicBlock::iterator I, Next; |
448 | for (I = MBB.begin(); I != MBB.end(); I = Next) { |
449 | Next = std::next(I); |
450 | |
451 | MachineInstr &MI = *I; |
452 | if (TII->isDS(MI.getOpcode())) { |
453 | NeedWQM = true; |
454 | } |
455 | |
456 | // Flat uses m0 in case it needs to access LDS. |
457 | if (TII->isFLAT(MI.getOpcode())) { |
458 | NeedM0 = true; |
Value stored to 'NeedM0' is never read | |
459 | NeedFlat = true; |
460 | } |
461 | |
462 | switch (MI.getOpcode()) { |
463 | default: break; |
464 | case AMDGPU::SI_IF: |
465 | ++Depth; |
466 | If(MI); |
467 | break; |
468 | |
469 | case AMDGPU::SI_ELSE: |
470 | Else(MI); |
471 | break; |
472 | |
473 | case AMDGPU::SI_BREAK: |
474 | Break(MI); |
475 | break; |
476 | |
477 | case AMDGPU::SI_IF_BREAK: |
478 | IfBreak(MI); |
479 | break; |
480 | |
481 | case AMDGPU::SI_ELSE_BREAK: |
482 | ElseBreak(MI); |
483 | break; |
484 | |
485 | case AMDGPU::SI_LOOP: |
486 | ++Depth; |
487 | Loop(MI); |
488 | break; |
489 | |
490 | case AMDGPU::SI_END_CF: |
491 | if (--Depth == 0 && HaveKill) { |
492 | SkipIfDead(MI); |
493 | HaveKill = false; |
494 | } |
495 | EndCf(MI); |
496 | break; |
497 | |
498 | case AMDGPU::SI_KILL: |
499 | if (Depth == 0) |
500 | SkipIfDead(MI); |
501 | else |
502 | HaveKill = true; |
503 | Kill(MI); |
504 | break; |
505 | |
506 | case AMDGPU::S_BRANCH: |
507 | Branch(MI); |
508 | break; |
509 | |
510 | case AMDGPU::SI_INDIRECT_SRC: |
511 | IndirectSrc(MI); |
512 | break; |
513 | |
514 | case AMDGPU::SI_INDIRECT_DST_V1: |
515 | case AMDGPU::SI_INDIRECT_DST_V2: |
516 | case AMDGPU::SI_INDIRECT_DST_V4: |
517 | case AMDGPU::SI_INDIRECT_DST_V8: |
518 | case AMDGPU::SI_INDIRECT_DST_V16: |
519 | IndirectDst(MI); |
520 | break; |
521 | |
522 | case AMDGPU::V_INTERP_P1_F32: |
523 | case AMDGPU::V_INTERP_P2_F32: |
524 | case AMDGPU::V_INTERP_MOV_F32: |
525 | NeedWQM = true; |
526 | break; |
527 | } |
528 | } |
529 | } |
530 | |
531 | if (NeedWQM && MFI->getShaderType() == ShaderType::PIXEL) { |
532 | MachineBasicBlock &MBB = MF.front(); |
533 | BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64), |
534 | AMDGPU::EXEC).addReg(AMDGPU::EXEC); |
535 | } |
536 | |
537 | // FIXME: This seems inappropriate to do here. |
538 | if (NeedFlat && MFI->IsKernel) { |
539 | // Insert the prologue initializing the SGPRs pointing to the scratch space |
540 | // for flat accesses. |
541 | const MachineFrameInfo *FrameInfo = MF.getFrameInfo(); |
542 | |
543 | // TODO: What to use with function calls? |
544 | |
545 | // FIXME: This is reporting stack size that is used in a scratch buffer |
546 | // rather than registers as well. |
547 | uint64_t StackSizeBytes = FrameInfo->getStackSize(); |
548 | |
549 | int IndirectBegin |
550 | = static_cast<const AMDGPUInstrInfo*>(TII)->getIndirectIndexBegin(MF); |
551 | // Convert register index to 256-byte unit. |
552 | uint64_t StackOffset = IndirectBegin < 0 ? 0 : (4 * IndirectBegin / 256); |
553 | |
554 | assert((StackSizeBytes < 0xffff) && StackOffset < 0xffff &&(((StackSizeBytes < 0xffff) && StackOffset < 0xffff && "Stack limits should be smaller than 16-bits") ? static_cast <void> (0) : __assert_fail ("(StackSizeBytes < 0xffff) && StackOffset < 0xffff && \"Stack limits should be smaller than 16-bits\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn222646/lib/Target/R600/SILowerControlFlow.cpp" , 555, __PRETTY_FUNCTION__)) |
555 | "Stack limits should be smaller than 16-bits")(((StackSizeBytes < 0xffff) && StackOffset < 0xffff && "Stack limits should be smaller than 16-bits") ? static_cast <void> (0) : __assert_fail ("(StackSizeBytes < 0xffff) && StackOffset < 0xffff && \"Stack limits should be smaller than 16-bits\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn222646/lib/Target/R600/SILowerControlFlow.cpp" , 555, __PRETTY_FUNCTION__)); |
556 | |
557 | // Initialize the flat scratch register pair. |
558 | // TODO: Can we use one s_mov_b64 here? |
559 | |
560 | // Offset is in units of 256-bytes. |
561 | MachineBasicBlock &MBB = MF.front(); |
562 | DebugLoc NoDL; |
563 | MachineBasicBlock::iterator Start = MBB.getFirstNonPHI(); |
564 | const MCInstrDesc &SMovK = TII->get(AMDGPU::S_MOVK_I32); |
565 | |
566 | assert(isInt<16>(StackOffset) && isInt<16>(StackSizeBytes))((isInt<16>(StackOffset) && isInt<16>(StackSizeBytes )) ? static_cast<void> (0) : __assert_fail ("isInt<16>(StackOffset) && isInt<16>(StackSizeBytes)" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn222646/lib/Target/R600/SILowerControlFlow.cpp" , 566, __PRETTY_FUNCTION__)); |
567 | |
568 | BuildMI(MBB, Start, NoDL, SMovK, AMDGPU::FLAT_SCR_LO) |
569 | .addImm(StackOffset); |
570 | |
571 | // Documentation says size is "per-thread scratch size in bytes" |
572 | BuildMI(MBB, Start, NoDL, SMovK, AMDGPU::FLAT_SCR_HI) |
573 | .addImm(StackSizeBytes); |
574 | } |
575 | |
576 | return true; |
577 | } |