LLVM  3.7.0
SIFixSGPRCopies.cpp
Go to the documentation of this file.
1 //===-- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies --------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// Copies from VGPR to SGPR registers are illegal and the register coalescer
12 /// will sometimes generate these illegal copies in situations like this:
13 ///
14 /// Register Class <vsrc> is the union of <vgpr> and <sgpr>
15 ///
16 /// BB0:
17 /// %vreg0 <sgpr> = SCALAR_INST
18 /// %vreg1 <vsrc> = COPY %vreg0 <sgpr>
19 /// ...
20 /// BRANCH %cond BB1, BB2
21 /// BB1:
22 /// %vreg2 <vgpr> = VECTOR_INST
23 /// %vreg3 <vsrc> = COPY %vreg2 <vgpr>
24 /// BB2:
25 /// %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1>
26 /// %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc>
27 ///
28 ///
29 /// The coalescer will begin at BB0 and eliminate its copy, then the resulting
30 /// code will look like this:
31 ///
32 /// BB0:
33 /// %vreg0 <sgpr> = SCALAR_INST
34 /// ...
35 /// BRANCH %cond BB1, BB2
36 /// BB1:
37 /// %vreg2 <vgpr> = VECTOR_INST
38 /// %vreg3 <vsrc> = COPY %vreg2 <vgpr>
39 /// BB2:
40 /// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <vsrc>, <BB#1>
41 /// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
42 ///
43 /// Now that the result of the PHI instruction is an SGPR, the register
44 /// allocator is now forced to constrain the register class of %vreg3 to
45 /// <sgpr> so we end up with final code like this:
46 ///
47 /// BB0:
48 /// %vreg0 <sgpr> = SCALAR_INST
49 /// ...
50 /// BRANCH %cond BB1, BB2
51 /// BB1:
52 /// %vreg2 <vgpr> = VECTOR_INST
53 /// %vreg3 <sgpr> = COPY %vreg2 <vgpr>
54 /// BB2:
55 /// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1>
56 /// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
57 ///
58 /// Now this code contains an illegal copy from a VGPR to an SGPR.
59 ///
60 /// In order to avoid this problem, this pass searches for PHI instructions
61 /// which define a <vsrc> register and constrains its definition class to
62 /// <vgpr> if the user of the PHI's definition register is a vector instruction.
63 /// If the PHI's definition class is constrained to <vgpr> then the coalescer
64 /// will be unable to perform the COPY removal from the above example which
65 /// ultimately led to the creation of an illegal COPY.
66 //===----------------------------------------------------------------------===//
67 
68 #include "AMDGPU.h"
69 #include "AMDGPUSubtarget.h"
70 #include "SIInstrInfo.h"
74 #include "llvm/Support/Debug.h"
77 
78 using namespace llvm;
79 
80 #define DEBUG_TYPE "sgpr-copies"
81 
82 namespace {
83 
84 class SIFixSGPRCopies : public MachineFunctionPass {
85 
86 private:
87  static char ID;
88  const TargetRegisterClass *inferRegClassFromUses(const SIRegisterInfo *TRI,
89  const MachineRegisterInfo &MRI,
90  unsigned Reg,
91  unsigned SubReg) const;
92  const TargetRegisterClass *inferRegClassFromDef(const SIRegisterInfo *TRI,
93  const MachineRegisterInfo &MRI,
94  unsigned Reg,
95  unsigned SubReg) const;
96  bool isVGPRToSGPRCopy(const MachineInstr &Copy, const SIRegisterInfo *TRI,
97  const MachineRegisterInfo &MRI) const;
98 
99 public:
100  SIFixSGPRCopies(TargetMachine &tm) : MachineFunctionPass(ID) { }
101 
102  bool runOnMachineFunction(MachineFunction &MF) override;
103 
104  const char *getPassName() const override {
105  return "SI Fix SGPR copies";
106  }
107 
108 };
109 
110 } // End anonymous namespace
111 
112 char SIFixSGPRCopies::ID = 0;
113 
115  return new SIFixSGPRCopies(tm);
116 }
117 
118 static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
119  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
120  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
121  if (!MI.getOperand(i).isReg() ||
123  continue;
124 
125  if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg())))
126  return true;
127  }
128  return false;
129 }
130 
131 /// This functions walks the use list of Reg until it finds an Instruction
132 /// that isn't a COPY returns the register class of that instruction.
133 /// \return The register defined by the first non-COPY instruction.
134 const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromUses(
135  const SIRegisterInfo *TRI,
136  const MachineRegisterInfo &MRI,
137  unsigned Reg,
138  unsigned SubReg) const {
139 
140  const TargetRegisterClass *RC
142  MRI.getRegClass(Reg) :
143  TRI->getPhysRegClass(Reg);
144 
145  RC = TRI->getSubRegClass(RC, SubReg);
147  I = MRI.use_instr_begin(Reg), E = MRI.use_instr_end(); I != E; ++I) {
148  switch (I->getOpcode()) {
149  case AMDGPU::COPY:
150  RC = TRI->getCommonSubClass(RC, inferRegClassFromUses(TRI, MRI,
151  I->getOperand(0).getReg(),
152  I->getOperand(0).getSubReg()));
153  break;
154  }
155  }
156 
157  return RC;
158 }
159 
160 const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromDef(
161  const SIRegisterInfo *TRI,
162  const MachineRegisterInfo &MRI,
163  unsigned Reg,
164  unsigned SubReg) const {
166  const TargetRegisterClass *RC = TRI->getPhysRegClass(Reg);
167  return TRI->getSubRegClass(RC, SubReg);
168  }
169  MachineInstr *Def = MRI.getVRegDef(Reg);
170  if (Def->getOpcode() != AMDGPU::COPY) {
171  return TRI->getSubRegClass(MRI.getRegClass(Reg), SubReg);
172  }
173 
174  return inferRegClassFromDef(TRI, MRI, Def->getOperand(1).getReg(),
175  Def->getOperand(1).getSubReg());
176 }
177 
178 bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
179  const SIRegisterInfo *TRI,
180  const MachineRegisterInfo &MRI) const {
181 
182  unsigned DstReg = Copy.getOperand(0).getReg();
183  unsigned SrcReg = Copy.getOperand(1).getReg();
184  unsigned SrcSubReg = Copy.getOperand(1).getSubReg();
185 
187  // If the destination register is a physical register there isn't really
188  // much we can do to fix this.
189  return false;
190  }
191 
192  const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg);
193 
194  const TargetRegisterClass *SrcRC;
195 
197  MRI.getRegClass(SrcReg) == &AMDGPU::VReg_1RegClass)
198  return false;
199 
200  SrcRC = TRI->getSubRegClass(MRI.getRegClass(SrcReg), SrcSubReg);
201  return TRI->isSGPRClass(DstRC) && TRI->hasVGPRs(SrcRC);
202 }
203 
204 bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
205  MachineRegisterInfo &MRI = MF.getRegInfo();
206  const SIRegisterInfo *TRI =
207  static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
208  const SIInstrInfo *TII =
209  static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
210  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
211  BI != BE; ++BI) {
212 
213  MachineBasicBlock &MBB = *BI;
214  for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
215  I != E; ++I) {
216  MachineInstr &MI = *I;
217  if (MI.getOpcode() == AMDGPU::COPY && isVGPRToSGPRCopy(MI, TRI, MRI)) {
218  DEBUG(dbgs() << "Fixing VGPR -> SGPR copy:\n");
219  DEBUG(MI.print(dbgs()));
220  TII->moveToVALU(MI);
221 
222  }
223 
224  switch (MI.getOpcode()) {
225  default: continue;
226  case AMDGPU::PHI: {
227  DEBUG(dbgs() << "Fixing PHI: " << MI);
228 
229  for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
230  const MachineOperand &Op = MI.getOperand(i);
231  unsigned Reg = Op.getReg();
232  const TargetRegisterClass *RC
233  = inferRegClassFromDef(TRI, MRI, Reg, Op.getSubReg());
234 
235  MRI.constrainRegClass(Op.getReg(), RC);
236  }
237  unsigned Reg = MI.getOperand(0).getReg();
238  const TargetRegisterClass *RC = inferRegClassFromUses(TRI, MRI, Reg,
239  MI.getOperand(0).getSubReg());
240  if (TRI->getCommonSubClass(RC, &AMDGPU::VGPR_32RegClass)) {
241  MRI.constrainRegClass(Reg, &AMDGPU::VGPR_32RegClass);
242  }
243 
244  if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
245  break;
246 
247  // If a PHI node defines an SGPR and any of its operands are VGPRs,
248  // then we need to move it to the VALU.
249  //
250  // Also, if a PHI node defines an SGPR and has all SGPR operands
251  // we must move it to the VALU, because the SGPR operands will
252  // all end up being assigned the same register, which means
253  // there is a potential for a conflict if different threads take
254  // different control flow paths.
255  //
256  // For Example:
257  //
258  // sgpr0 = def;
259  // ...
260  // sgpr1 = def;
261  // ...
262  // sgpr2 = PHI sgpr0, sgpr1
263  // use sgpr2;
264  //
265  // Will Become:
266  //
267  // sgpr2 = def;
268  // ...
269  // sgpr2 = def;
270  // ...
271  // use sgpr2
272  //
273  // FIXME: This is OK if the branching decision is made based on an
274  // SGPR value.
275  bool SGPRBranch = false;
276 
277  // The one exception to this rule is when one of the operands
278  // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
279  // instruction. In this case, there we know the program will
280  // never enter the second block (the loop) without entering
281  // the first block (where the condition is computed), so there
282  // is no chance for values to be over-written.
283 
284  bool HasBreakDef = false;
285  for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
286  unsigned Reg = MI.getOperand(i).getReg();
287  if (TRI->hasVGPRs(MRI.getRegClass(Reg))) {
288  TII->moveToVALU(MI);
289  break;
290  }
291  MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg);
292  assert(DefInstr);
293  switch(DefInstr->getOpcode()) {
294 
295  case AMDGPU::SI_BREAK:
296  case AMDGPU::SI_IF_BREAK:
297  case AMDGPU::SI_ELSE_BREAK:
298  // If we see a PHI instruction that defines an SGPR, then that PHI
299  // instruction has already been considered and should have
300  // a *_BREAK as an operand.
301  case AMDGPU::PHI:
302  HasBreakDef = true;
303  break;
304  }
305  }
306 
307  if (!SGPRBranch && !HasBreakDef)
308  TII->moveToVALU(MI);
309  break;
310  }
311  case AMDGPU::REG_SEQUENCE: {
312  if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
313  !hasVGPROperands(MI, TRI))
314  continue;
315 
316  DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);
317 
318  TII->moveToVALU(MI);
319  break;
320  }
321  case AMDGPU::INSERT_SUBREG: {
322  const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
323  DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
324  Src0RC = MRI.getRegClass(MI.getOperand(1).getReg());
325  Src1RC = MRI.getRegClass(MI.getOperand(2).getReg());
326  if (TRI->isSGPRClass(DstRC) &&
327  (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) {
328  DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
329  TII->moveToVALU(MI);
330  }
331  break;
332  }
333  }
334  }
335  }
336 
337  return true;
338 }
const MachineFunction * getParent() const
getParent - Return the MachineFunction containing this basic block.
AMDGPU specific subclass of TargetSubtarget.
static bool isVirtualRegister(unsigned Reg)
isVirtualRegister - Return true if the specified register number is in the virtual register namespace...
void moveToVALU(MachineInstr &MI) const
Replace this instruction's opcode with the equivalent VALU opcode.
bool hasVGPRs(const TargetRegisterClass *RC) const
static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI)
COPY - Target-independent register copy.
Definition: TargetOpcodes.h:86
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
use_instr_iterator use_instr_begin(unsigned RegNo) const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
const TargetRegisterClass * getRegClass(unsigned Reg) const
getRegClass - Return the register class of the specified virtual register.
bool isSGPRClass(const TargetRegisterClass *RC) const
Reg
All possible values of the reg field in the ModR/M byte.
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:271
defusechain_iterator - This class provides iterator support for machine operands in the function that...
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:267
FunctionPass * createSIFixSGPRCopiesPass(TargetMachine &tm)
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:120
bundle_iterator< MachineInstr, instr_iterator > iterator
void print(raw_ostream &OS, bool SkipOpers=false) const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:273
INSERT_SUBREG - This instruction takes three operands: a register that has subregisters, a register providing an insert value, and a subregister index.
Definition: TargetOpcodes.h:49
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:294
unsigned getSubReg() const
REG_SEQUENCE - This variadic instruction is used to form a register that represents a consecutive seq...
Definition: TargetOpcodes.h:82
MachineOperand class - Representation of each machine instruction operand.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const TargetRegisterClass * getSubRegClass(const TargetRegisterClass *RC, unsigned SubIdx) const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the 'base' register class for this register.
Representation of each machine instruction.
Definition: MachineInstr.h:51
Interface definition for SIInstrInfo.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
#define I(x, y, z)
Definition: MD5.cpp:54
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
unsigned getReg() const
getReg - Returns the register number.
virtual const TargetInstrInfo * getInstrInfo() const
static use_instr_iterator use_instr_end()
BasicBlockListType::iterator iterator
#define DEBUG(X)
Definition: Debug.h:92
Primary interface to the complete machine description for the target machine.
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.