Line data Source code
1 : //===- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies ---------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : /// \file
11 : /// Copies from VGPR to SGPR registers are illegal and the register coalescer
12 : /// will sometimes generate these illegal copies in situations like this:
13 : ///
14 : /// Register Class <vsrc> is the union of <vgpr> and <sgpr>
15 : ///
16 : /// BB0:
17 : /// %0 <sgpr> = SCALAR_INST
18 : /// %1 <vsrc> = COPY %0 <sgpr>
19 : /// ...
20 : /// BRANCH %cond BB1, BB2
21 : /// BB1:
22 : /// %2 <vgpr> = VECTOR_INST
23 : /// %3 <vsrc> = COPY %2 <vgpr>
24 : /// BB2:
25 : /// %4 <vsrc> = PHI %1 <vsrc>, <%bb.0>, %3 <vrsc>, <%bb.1>
26 : /// %5 <vgpr> = VECTOR_INST %4 <vsrc>
27 : ///
28 : ///
29 : /// The coalescer will begin at BB0 and eliminate its copy, then the resulting
30 : /// code will look like this:
31 : ///
32 : /// BB0:
33 : /// %0 <sgpr> = SCALAR_INST
34 : /// ...
35 : /// BRANCH %cond BB1, BB2
36 : /// BB1:
37 : /// %2 <vgpr> = VECTOR_INST
38 : /// %3 <vsrc> = COPY %2 <vgpr>
39 : /// BB2:
40 : /// %4 <sgpr> = PHI %0 <sgpr>, <%bb.0>, %3 <vsrc>, <%bb.1>
41 : /// %5 <vgpr> = VECTOR_INST %4 <sgpr>
42 : ///
43 : /// Now that the result of the PHI instruction is an SGPR, the register
44 : /// allocator is now forced to constrain the register class of %3 to
45 : /// <sgpr> so we end up with final code like this:
46 : ///
47 : /// BB0:
48 : /// %0 <sgpr> = SCALAR_INST
49 : /// ...
50 : /// BRANCH %cond BB1, BB2
51 : /// BB1:
52 : /// %2 <vgpr> = VECTOR_INST
53 : /// %3 <sgpr> = COPY %2 <vgpr>
54 : /// BB2:
55 : /// %4 <sgpr> = PHI %0 <sgpr>, <%bb.0>, %3 <sgpr>, <%bb.1>
56 : /// %5 <vgpr> = VECTOR_INST %4 <sgpr>
57 : ///
58 : /// Now this code contains an illegal copy from a VGPR to an SGPR.
59 : ///
60 : /// In order to avoid this problem, this pass searches for PHI instructions
61 : /// which define a <vsrc> register and constrains its definition class to
62 : /// <vgpr> if the user of the PHI's definition register is a vector instruction.
63 : /// If the PHI's definition class is constrained to <vgpr> then the coalescer
64 : /// will be unable to perform the COPY removal from the above example which
65 : /// ultimately led to the creation of an illegal COPY.
66 : //===----------------------------------------------------------------------===//
67 :
68 : #include "AMDGPU.h"
69 : #include "AMDGPUSubtarget.h"
70 : #include "SIInstrInfo.h"
71 : #include "SIRegisterInfo.h"
72 : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
73 : #include "llvm/ADT/DenseSet.h"
74 : #include "llvm/ADT/STLExtras.h"
75 : #include "llvm/ADT/SmallSet.h"
76 : #include "llvm/ADT/SmallVector.h"
77 : #include "llvm/CodeGen/MachineBasicBlock.h"
78 : #include "llvm/CodeGen/MachineDominators.h"
79 : #include "llvm/CodeGen/MachineFunction.h"
80 : #include "llvm/CodeGen/MachineFunctionPass.h"
81 : #include "llvm/CodeGen/MachineInstr.h"
82 : #include "llvm/CodeGen/MachineInstrBuilder.h"
83 : #include "llvm/CodeGen/MachineOperand.h"
84 : #include "llvm/CodeGen/MachineRegisterInfo.h"
85 : #include "llvm/CodeGen/TargetRegisterInfo.h"
86 : #include "llvm/Pass.h"
87 : #include "llvm/Support/CodeGen.h"
88 : #include "llvm/Support/CommandLine.h"
89 : #include "llvm/Support/Debug.h"
90 : #include "llvm/Support/raw_ostream.h"
91 : #include "llvm/Target/TargetMachine.h"
92 : #include <cassert>
93 : #include <cstdint>
94 : #include <iterator>
95 : #include <list>
96 : #include <map>
97 : #include <tuple>
98 : #include <utility>
99 :
100 : using namespace llvm;
101 :
102 : #define DEBUG_TYPE "si-fix-sgpr-copies"
103 :
104 : static cl::opt<bool> EnableM0Merge(
105 : "amdgpu-enable-merge-m0",
106 : cl::desc("Merge and hoist M0 initializations"),
107 : cl::init(false));
108 :
109 : namespace {
110 :
111 : class SIFixSGPRCopies : public MachineFunctionPass {
112 : MachineDominatorTree *MDT;
113 :
114 : public:
115 : static char ID;
116 :
117 1963 : SIFixSGPRCopies() : MachineFunctionPass(ID) {}
118 :
119 : bool runOnMachineFunction(MachineFunction &MF) override;
120 :
121 7 : StringRef getPassName() const override { return "SI Fix SGPR copies"; }
122 :
123 1952 : void getAnalysisUsage(AnalysisUsage &AU) const override {
124 : AU.addRequired<MachineDominatorTree>();
125 : AU.addPreserved<MachineDominatorTree>();
126 1952 : AU.setPreservesCFG();
127 1952 : MachineFunctionPass::getAnalysisUsage(AU);
128 1952 : }
129 : };
130 :
131 : } // end anonymous namespace
132 :
133 85105 : INITIALIZE_PASS_BEGIN(SIFixSGPRCopies, DEBUG_TYPE,
134 : "SI Fix SGPR copies", false, false)
135 85105 : INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
136 199024 : INITIALIZE_PASS_END(SIFixSGPRCopies, DEBUG_TYPE,
137 : "SI Fix SGPR copies", false, false)
138 :
139 : char SIFixSGPRCopies::ID = 0;
140 :
141 : char &llvm::SIFixSGPRCopiesID = SIFixSGPRCopies::ID;
142 :
143 0 : FunctionPass *llvm::createSIFixSGPRCopiesPass() {
144 0 : return new SIFixSGPRCopies();
145 : }
146 :
147 44116 : static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
148 44116 : const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
149 303634 : for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
150 531270 : if (!MI.getOperand(i).isReg() ||
151 157934 : !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
152 : continue;
153 :
154 157934 : if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg())))
155 : return true;
156 : }
157 : return false;
158 : }
159 :
160 : static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
161 272160 : getCopyRegClasses(const MachineInstr &Copy,
162 : const SIRegisterInfo &TRI,
163 : const MachineRegisterInfo &MRI) {
164 272160 : unsigned DstReg = Copy.getOperand(0).getReg();
165 272160 : unsigned SrcReg = Copy.getOperand(1).getReg();
166 :
167 : const TargetRegisterClass *SrcRC =
168 272160 : TargetRegisterInfo::isVirtualRegister(SrcReg) ?
169 : MRI.getRegClass(SrcReg) :
170 40471 : TRI.getPhysRegClass(SrcReg);
171 :
172 : // We don't really care about the subregister here.
173 : // SrcRC = TRI.getSubRegClass(SrcRC, Copy.getOperand(1).getSubReg());
174 :
175 : const TargetRegisterClass *DstRC =
176 272160 : TargetRegisterInfo::isVirtualRegister(DstReg) ?
177 : MRI.getRegClass(DstReg) :
178 0 : TRI.getPhysRegClass(DstReg);
179 :
180 272160 : return std::make_pair(SrcRC, DstRC);
181 : }
182 :
183 266172 : static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC,
184 : const TargetRegisterClass *DstRC,
185 : const SIRegisterInfo &TRI) {
186 266172 : return TRI.isSGPRClass(DstRC) && TRI.hasVGPRs(SrcRC);
187 : }
188 :
189 240137 : static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC,
190 : const TargetRegisterClass *DstRC,
191 : const SIRegisterInfo &TRI) {
192 240137 : return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC);
193 : }
194 :
195 48152 : static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI,
196 : const SIRegisterInfo *TRI,
197 : const SIInstrInfo *TII) {
198 48152 : MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
199 48152 : auto &Src = MI.getOperand(1);
200 48152 : unsigned DstReg = MI.getOperand(0).getReg();
201 48152 : unsigned SrcReg = Src.getReg();
202 48152 : if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
203 : !TargetRegisterInfo::isVirtualRegister(DstReg))
204 : return false;
205 :
206 98749 : for (const auto &MO : MRI.reg_nodbg_operands(DstReg)) {
207 96423 : const auto *UseMI = MO.getParent();
208 96423 : if (UseMI == &MI)
209 : continue;
210 48271 : if (MO.isDef() || UseMI->getParent() != MI.getParent() ||
211 139923 : UseMI->getOpcode() <= TargetOpcode::GENERIC_OP_END ||
212 43761 : !TII->isOperandLegal(*UseMI, UseMI->getOperandNo(&MO), &Src))
213 : return false;
214 : }
215 : // Change VGPR to SGPR destination.
216 2326 : MRI.setRegClass(DstReg, TRI->getEquivalentSGPRClass(MRI.getRegClass(DstReg)));
217 2326 : return true;
218 : }
219 :
220 : // Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE.
221 : //
222 : // SGPRx = ...
223 : // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
224 : // VGPRz = COPY SGPRy
225 : //
226 : // ==>
227 : //
228 : // VGPRx = COPY SGPRx
229 : // VGPRz = REG_SEQUENCE VGPRx, sub0
230 : //
231 : // This exposes immediate folding opportunities when materializing 64-bit
232 : // immediates.
233 59553 : static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
234 : const SIRegisterInfo *TRI,
235 : const SIInstrInfo *TII,
236 : MachineRegisterInfo &MRI) {
237 : assert(MI.isRegSequence());
238 :
239 59553 : unsigned DstReg = MI.getOperand(0).getReg();
240 59553 : if (!TRI->isSGPRClass(MRI.getRegClass(DstReg)))
241 : return false;
242 :
243 37999 : if (!MRI.hasOneUse(DstReg))
244 : return false;
245 :
246 26471 : MachineInstr &CopyUse = *MRI.use_instr_begin(DstReg);
247 26471 : if (!CopyUse.isCopy())
248 : return false;
249 :
250 : // It is illegal to have vreg inputs to a physreg defining reg_sequence.
251 12000 : if (TargetRegisterInfo::isPhysicalRegister(CopyUse.getOperand(0).getReg()))
252 : return false;
253 :
254 : const TargetRegisterClass *SrcRC, *DstRC;
255 5988 : std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI);
256 :
257 5988 : if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI))
258 : return false;
259 :
260 5333 : if (tryChangeVGPRtoSGPRinCopy(CopyUse, TRI, TII))
261 : return true;
262 :
263 : // TODO: Could have multiple extracts?
264 5282 : unsigned SubReg = CopyUse.getOperand(1).getSubReg();
265 5282 : if (SubReg != AMDGPU::NoSubRegister)
266 : return false;
267 :
268 5282 : MRI.setRegClass(DstReg, DstRC);
269 :
270 : // SGPRx = ...
271 : // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
272 : // VGPRz = COPY SGPRy
273 :
274 : // =>
275 : // VGPRx = COPY SGPRx
276 : // VGPRz = REG_SEQUENCE VGPRx, sub0
277 :
278 5282 : MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg());
279 :
280 19538 : for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
281 28512 : unsigned SrcReg = MI.getOperand(I).getReg();
282 : unsigned SrcSubReg = MI.getOperand(I).getSubReg();
283 :
284 : const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
285 : assert(TRI->isSGPRClass(SrcRC) &&
286 : "Expected SGPR REG_SEQUENCE to only have SGPR inputs");
287 :
288 14256 : SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg);
289 14256 : const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC);
290 :
291 14256 : unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC);
292 :
293 14256 : BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY),
294 14256 : TmpReg)
295 14256 : .add(MI.getOperand(I));
296 :
297 28512 : MI.getOperand(I).setReg(TmpReg);
298 : }
299 :
300 5282 : CopyUse.eraseFromParent();
301 5282 : return true;
302 : }
303 :
304 0 : static bool phiHasVGPROperands(const MachineInstr &PHI,
305 : const MachineRegisterInfo &MRI,
306 : const SIRegisterInfo *TRI,
307 : const SIInstrInfo *TII) {
308 0 : for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
309 0 : unsigned Reg = PHI.getOperand(i).getReg();
310 0 : if (TRI->hasVGPRs(MRI.getRegClass(Reg)))
311 0 : return true;
312 : }
313 : return false;
314 : }
315 :
316 349 : static bool phiHasBreakDef(const MachineInstr &PHI,
317 : const MachineRegisterInfo &MRI,
318 : SmallSet<unsigned, 8> &Visited) {
319 819 : for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
320 644 : unsigned Reg = PHI.getOperand(i).getReg();
321 644 : if (Visited.count(Reg))
322 30 : continue;
323 :
324 614 : Visited.insert(Reg);
325 :
326 614 : MachineInstr *DefInstr = MRI.getVRegDef(Reg);
327 1228 : switch (DefInstr->getOpcode()) {
328 : default:
329 : break;
330 : case AMDGPU::SI_BREAK:
331 : case AMDGPU::SI_IF_BREAK:
332 : case AMDGPU::SI_ELSE_BREAK:
333 174 : return true;
334 87 : case AMDGPU::PHI:
335 87 : if (phiHasBreakDef(*DefInstr, MRI, Visited))
336 : return true;
337 : }
338 : }
339 : return false;
340 : }
341 :
342 787 : static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB,
343 : const TargetRegisterInfo &TRI) {
344 : for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(),
345 1431 : E = MBB.end(); I != E; ++I) {
346 895 : if (I->modifiesRegister(AMDGPU::EXEC, &TRI))
347 : return true;
348 : }
349 : return false;
350 : }
351 :
352 31647 : static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
353 : const MachineInstr *MoveImm,
354 : const SIInstrInfo *TII,
355 : unsigned &SMovOp,
356 : int64_t &Imm) {
357 63294 : if (Copy->getOpcode() != AMDGPU::COPY)
358 : return false;
359 :
360 31397 : if (!MoveImm->isMoveImmediate())
361 : return false;
362 :
363 : const MachineOperand *ImmOp =
364 : TII->getNamedOperand(*MoveImm, AMDGPU::OpName::src0);
365 292 : if (!ImmOp->isImm())
366 : return false;
367 :
368 : // FIXME: Handle copies with sub-regs.
369 54 : if (Copy->getOperand(0).getSubReg())
370 : return false;
371 :
372 54 : switch (MoveImm->getOpcode()) {
373 : default:
374 : return false;
375 27 : case AMDGPU::V_MOV_B32_e32:
376 27 : SMovOp = AMDGPU::S_MOV_B32;
377 27 : break;
378 0 : case AMDGPU::V_MOV_B64_PSEUDO:
379 0 : SMovOp = AMDGPU::S_MOV_B64;
380 0 : break;
381 : }
382 27 : Imm = ImmOp->getImm();
383 27 : return true;
384 : }
385 :
386 : template <class UnaryPredicate>
387 921 : bool searchPredecessors(const MachineBasicBlock *MBB,
388 : const MachineBasicBlock *CutOff,
389 : UnaryPredicate Predicate) {
390 921 : if (MBB == CutOff)
391 : return false;
392 :
393 : DenseSet<const MachineBasicBlock *> Visited;
394 793 : SmallVector<MachineBasicBlock *, 4> Worklist(MBB->pred_begin(),
395 : MBB->pred_end());
396 :
397 1621 : while (!Worklist.empty()) {
398 : MachineBasicBlock *MBB = Worklist.pop_back_val();
399 :
400 1079 : if (!Visited.insert(MBB).second)
401 : continue;
402 853 : if (MBB == CutOff)
403 : continue;
404 797 : if (Predicate(MBB))
405 : return true;
406 :
407 546 : Worklist.append(MBB->pred_begin(), MBB->pred_end());
408 : }
409 :
410 : return false;
411 : }
412 184 :
413 : static bool predsHasDivergentTerminator(MachineBasicBlock *MBB,
414 : const TargetRegisterInfo *TRI) {
415 184 : return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) {
416 : return hasTerminatorThatModifiesExec(*MBB, *TRI); });
417 : }
418 :
419 56 : // Checks if there is potential path From instruction To instruction.
420 : // If CutOff is specified and it sits in between of that path we ignore
421 : // a higher portion of the path and report it is not reachable.
422 132 : static bool isReachable(const MachineInstr *From,
423 : const MachineInstr *To,
424 : const MachineBasicBlock *CutOff,
425 76 : MachineDominatorTree &MDT) {
426 : // If either From block dominates To block or instructions are in the same
427 66 : // block and From is higher.
428 : if (MDT.dominates(From, To))
429 10 : return true;
430 :
431 : const MachineBasicBlock *MBBFrom = From->getParent();
432 10 : const MachineBasicBlock *MBBTo = To->getParent();
433 : if (MBBFrom == MBBTo)
434 : return false;
435 :
436 : // Instructions are in different blocks, do predecessor search.
437 737 : // We should almost never get here since we do not usually produce M0 stores
438 : // other than -1.
439 : return searchPredecessors(MBBTo, CutOff, [MBBFrom]
440 737 : (const MachineBasicBlock *MBB) { return MBB == MBBFrom; });
441 : }
442 :
443 : // Hoist and merge identical SGPR initializations into a common predecessor.
444 737 : // This is intended to combine M0 initializations, but can work with any
445 : // SGPR. A VGPR cannot be processed since we cannot guarantee vector
446 : // executioon.
447 1489 : static bool hoistAndMergeSGPRInits(unsigned Reg,
448 : const MachineRegisterInfo &MRI,
449 : MachineDominatorTree &MDT) {
450 1003 : // List of inits by immediate value.
451 : using InitListMap = std::map<unsigned, std::list<MachineInstr *>>;
452 787 : InitListMap Inits;
453 : // List of clobbering instructions.
454 787 : SmallVector<MachineInstr*, 8> Clobbers;
455 : bool Changed = false;
456 :
457 536 : for (auto &MI : MRI.def_instructions(Reg)) {
458 : MachineOperand *Imm = nullptr;
459 : for (auto &MO: MI.operands()) {
460 : if ((MO.isReg() && ((MO.isDef() && MO.getReg() != Reg) || !MO.isDef())) ||
461 : (!MO.isImm() && !MO.isReg()) || (MO.isImm() && Imm)) {
462 : Imm = nullptr;
463 : break;
464 : } else if (MO.isImm())
465 320 : Imm = &MO;
466 787 : }
467 : if (Imm)
468 : Inits[Imm->getImm()].push_front(&MI);
469 : else
470 : Clobbers.push_back(&MI);
471 : }
472 264 :
473 : for (auto &Init : Inits) {
474 : auto &Defs = Init.second;
475 :
476 : for (auto I1 = Defs.begin(), E = Defs.end(); I1 != E; ) {
477 : MachineInstr *MI1 = *I1;
478 264 :
479 : for (auto I2 = std::next(I1); I2 != E; ) {
480 : MachineInstr *MI2 = *I2;
481 203 :
482 203 : // Check any possible interference
483 203 : auto intereferes = [&](MachineBasicBlock::iterator From,
484 : MachineBasicBlock::iterator To) -> bool {
485 :
486 : assert(MDT.dominates(&*To, &*From));
487 :
488 : auto interferes = [&MDT, From, To](MachineInstr* &Clobber) -> bool {
489 184 : const MachineBasicBlock *MBBFrom = From->getParent();
490 184 : const MachineBasicBlock *MBBTo = To->getParent();
491 : bool MayClobberFrom = isReachable(Clobber, &*From, MBBTo, MDT);
492 : bool MayClobberTo = isReachable(Clobber, &*To, MBBTo, MDT);
493 : if (!MayClobberFrom && !MayClobberTo)
494 : return false;
495 : if ((MayClobberFrom && !MayClobberTo) ||
496 : (!MayClobberFrom && MayClobberTo))
497 1 : return true;
498 : // Both can clobber, this is not an interference only if both are
499 : // dominated by Clobber and belong to the same block or if Clobber
500 : // properly dominates To, given that To >> From, so it dominates
501 : // both and located in a common dominator.
502 : return !((MBBFrom == MBBTo &&
503 : MDT.dominates(Clobber, &*From) &&
504 : MDT.dominates(Clobber, &*To)) ||
505 : MDT.properlyDominates(Clobber->getParent(), MBBTo));
506 : };
507 18 :
508 : return (llvm::any_of(Clobbers, interferes)) ||
509 47 : (llvm::any_of(Inits, [&](InitListMap::value_type &C) {
510 32 : return C.first != Init.first &&
511 62 : llvm::any_of(C.second, interferes);
512 : }));
513 : };
514 30 :
515 : if (MDT.dominates(MI1, MI2)) {
516 : if (!intereferes(MI2, MI1)) {
517 17 : LLVM_DEBUG(dbgs()
518 15 : << "Erasing from "
519 : << printMBBReference(*MI2->getParent()) << " " << *MI2);
520 2 : MI2->eraseFromParent();
521 : Defs.erase(I2++);
522 : Changed = true;
523 5 : continue;
524 : }
525 : } else if (MDT.dominates(MI2, MI1)) {
526 15 : if (!intereferes(MI1, MI2)) {
527 11 : LLVM_DEBUG(dbgs()
528 : << "Erasing from "
529 30 : << printMBBReference(*MI1->getParent()) << " " << *MI1);
530 20 : MI1->eraseFromParent();
531 : Defs.erase(I1++);
532 : Changed = true;
533 : break;
534 : }
535 : } else {
536 : auto *MBB = MDT.findNearestCommonDominator(MI1->getParent(),
537 : MI2->getParent());
538 : if (!MBB) {
539 : ++I2;
540 : continue;
541 : }
542 :
543 : MachineBasicBlock::iterator I = MBB->getFirstNonPHI();
544 : if (!intereferes(MI1, I) && !intereferes(MI2, I)) {
545 : LLVM_DEBUG(dbgs()
546 : << "Erasing from "
547 : << printMBBReference(*MI1->getParent()) << " " << *MI1
548 : << "and moving from "
549 : << printMBBReference(*MI2->getParent()) << " to "
550 : << printMBBReference(*I->getParent()) << " " << *MI2);
551 : I->getParent()->splice(I, MI2->getParent(), MI2);
552 : MI1->eraseFromParent();
553 : Defs.erase(I1++);
554 : Changed = true;
555 : break;
556 : }
557 : }
558 : ++I2;
559 : }
560 104 : ++I1;
561 48 : }
562 : }
563 20 :
564 : if (Changed)
565 20 : MRI.clearKillFlags(Reg);
566 16 :
567 : return Changed;
568 : }
569 :
570 3 : bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
571 : const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
572 : MachineRegisterInfo &MRI = MF.getRegInfo();
573 3 : const SIRegisterInfo *TRI = ST.getRegisterInfo();
574 : const SIInstrInfo *TII = ST.getInstrInfo();
575 4 : MDT = &getAnalysis<MachineDominatorTree>();
576 0 :
577 : SmallVector<MachineInstr *, 16> Worklist;
578 :
579 : for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
580 0 : BI != BE; ++BI) {
581 : MachineBasicBlock &MBB = *BI;
582 : for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
583 1 : I != E; ++I) {
584 : MachineInstr &MI = *I;
585 :
586 4 : switch (MI.getOpcode()) {
587 : default:
588 4 : continue;
589 : case AMDGPU::COPY:
590 0 : case AMDGPU::WQM:
591 : case AMDGPU::WWM: {
592 : // If the destination register is a physical register there isn't really
593 4 : // much we can do to fix this.
594 4 : if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()))
595 : continue;
596 :
597 : const TargetRegisterClass *SrcRC, *DstRC;
598 : std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI);
599 : if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
600 : unsigned SrcReg = MI.getOperand(1).getReg();
601 2 : if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
602 1 : TII->moveToVALU(MI, MDT);
603 : break;
604 : }
605 1 :
606 : MachineInstr *DefMI = MRI.getVRegDef(SrcReg);
607 : unsigned SMovOp;
608 : int64_t Imm;
609 : // If we are just copying an immediate, we can replace the copy with
610 : // s_mov_b32.
611 : if (isSafeToFoldImmIntoCopy(&MI, DefMI, TII, SMovOp, Imm)) {
612 : MI.getOperand(1).ChangeToImmediate(Imm);
613 : MI.addImplicitDefUseOperands(MF);
614 1 : MI.setDesc(TII->get(SMovOp));
615 1 : break;
616 : }
617 1 : TII->moveToVALU(MI, MDT);
618 : } else if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) {
619 : tryChangeVGPRtoSGPRinCopy(MI, TRI, TII);
620 19722 : }
621 19722 :
622 19722 : break;
623 19722 : }
624 19722 : case AMDGPU::PHI: {
625 19722 : unsigned Reg = MI.getOperand(0).getReg();
626 : if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
627 : break;
628 :
629 : // We don't need to fix the PHI if the common dominator of the
630 41954 : // two incoming blocks terminates with a uniform branch.
631 : bool HasVGPROperand = phiHasVGPROperands(MI, MRI, TRI, TII);
632 : if (MI.getNumExplicitOperands() == 5 && !HasVGPROperand) {
633 647636 : MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB();
634 : MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB();
635 :
636 1250808 : if (!predsHasDivergentTerminator(MBB0, TRI) &&
637 : !predsHasDivergentTerminator(MBB1, TRI)) {
638 : LLVM_DEBUG(dbgs()
639 277239 : << "Not fixing PHI for uniform branch: " << MI << '\n');
640 : break;
641 : }
642 : }
643 :
644 554478 : // If a PHI node defines an SGPR and any of its operands are VGPRs,
645 11067 : // then we need to move it to the VALU.
646 : //
647 : // Also, if a PHI node defines an SGPR and has all SGPR operands
648 266172 : // we must move it to the VALU, because the SGPR operands will
649 266172 : // all end up being assigned the same register, which means
650 32023 : // there is a potential for a conflict if different threads take
651 32023 : // different control flow paths.
652 376 : //
653 403 : // For Example:
654 : //
655 : // sgpr0 = def;
656 31647 : // ...
657 : // sgpr1 = def;
658 : // ...
659 : // sgpr2 = PHI sgpr0, sgpr1
660 : // use sgpr2;
661 31647 : //
662 54 : // Will Become:
663 27 : //
664 27 : // sgpr2 = def;
665 : // ...
666 : // sgpr2 = def;
667 31620 : // ...
668 234149 : // use sgpr2
669 42819 : //
670 : // The one exception to this rule is when one of the operands
671 : // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
672 : // instruction. In this case, there we know the program will
673 : // never enter the second block (the loop) without entering
674 3271 : // the first block (where the condition is computed), so there
675 3271 : // is no chance for values to be over-written.
676 3271 :
677 : SmallSet<unsigned, 8> Visited;
678 : if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) {
679 : LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI);
680 : TII->moveToVALU(MI, MDT);
681 432 : }
682 432 : break;
683 417 : }
684 417 : case AMDGPU::REG_SEQUENCE:
685 : if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
686 737 : !hasVGPROperands(MI, TRI)) {
687 : foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI);
688 : continue;
689 : }
690 :
691 : LLVM_DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);
692 :
693 : TII->moveToVALU(MI, MDT);
694 : break;
695 : case AMDGPU::INSERT_SUBREG: {
696 : const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
697 : DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
698 : Src0RC = MRI.getRegClass(MI.getOperand(1).getReg());
699 : Src1RC = MRI.getRegClass(MI.getOperand(2).getReg());
700 : if (TRI->isSGPRClass(DstRC) &&
701 : (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) {
702 : LLVM_DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
703 : TII->moveToVALU(MI, MDT);
704 : }
705 : break;
706 : }
707 : }
708 : }
709 : }
710 :
711 : if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge)
712 : hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT);
713 :
714 : return true;
715 : }
|