LLVM  10.0.0svn
SIFixupVectorISel.cpp
Go to the documentation of this file.
1 //===-- SIFixupVectorISel.cpp - Fixup post ISel vector issues -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 /// \file
8 /// SIFixupVectorISel pass cleans up post ISEL Vector issues.
9 /// Currently this will convert GLOBAL_{LOAD|STORE}_*
10 /// and GLOBAL_Atomic_* instructions into their _SADDR variants,
11 /// feeding the sreg into the saddr field of the new instruction.
12 /// We currently handle a REG_SEQUENCE feeding the vaddr
13 /// and decompose it into a base and index.
14 ///
15 /// Transform:
16 /// %17:vgpr_32, %19:sreg_64_xexec = V_ADD_I32_e64 %21:sgpr_32, %22:vgpr_32
17 /// %18:vgpr_32, %20:sreg_64_xexec = V_ADDC_U32_e64 %25:vgpr_32,
18 /// %24:vgpr_32, %19:sreg_64_xexec
19 /// %16:vreg_64 = REG_SEQUENCE %17:vgpr_32, %sub0, %18:vgpr_32, %sub1
20 /// %11:vreg_64 = COPY %16:vreg_64
21 /// %10:vgpr_32 = GLOBAL_LOAD_DWORD killed %11:vreg_64, 16, 0, 0
22 /// Into:
23 /// %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1:sgpr_64, 36, 0
24 /// %14:vreg_64 = REG_SEQUENCE %6:vgpr_32, %sub0, %15:vgpr_32, %sub1
25 /// %10:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %14:vreg_64, %4:sreg_64_xexec,16...
26 ///
27 //===----------------------------------------------------------------------===//
28 //
29 
30 #include "AMDGPU.h"
31 #include "AMDGPUSubtarget.h"
33 #include "llvm/ADT/Statistic.h"
37 #include "llvm/IR/Function.h"
38 #include "llvm/IR/LLVMContext.h"
39 #include "llvm/Support/Debug.h"
41 #define DEBUG_TYPE "si-fixup-vector-isel"
42 
43 using namespace llvm;
44 
46  "amdgpu-enable-global-sgpr-addr",
47  cl::desc("Enable use of SGPR regs for GLOBAL LOAD/STORE instructions"),
48  cl::init(false));
49 
50 STATISTIC(NumSGPRGlobalOccurs, "Number of global ld/st opportunities");
51 STATISTIC(NumSGPRGlobalSaddrs, "Number of global sgpr instructions converted");
52 
53 namespace {
54 
55 class SIFixupVectorISel : public MachineFunctionPass {
56 public:
57  static char ID;
58 
59 public:
60  SIFixupVectorISel() : MachineFunctionPass(ID) {
62  }
63 
64  bool runOnMachineFunction(MachineFunction &MF) override;
65 
66  void getAnalysisUsage(AnalysisUsage &AU) const override {
67  AU.setPreservesCFG();
69  }
70 };
71 
72 } // End anonymous namespace.
73 
74 INITIALIZE_PASS(SIFixupVectorISel, DEBUG_TYPE,
75  "SI Fixup Vector ISel", false, false)
76 
77 char SIFixupVectorISel::ID = 0;
78 
79 char &llvm::SIFixupVectorISelID = SIFixupVectorISel::ID;
80 
82  return new SIFixupVectorISel();
83 }
84 
86  unsigned &BaseReg,
87  unsigned &IndexReg,
89  const SIRegisterInfo *TRI) {
91  Worklist.push_back(Op);
92  while (!Worklist.empty()) {
93  MachineOperand *WOp = Worklist.pop_back_val();
94  if (!WOp->isReg() || !Register::isVirtualRegister(WOp->getReg()))
95  continue;
96  MachineInstr *DefInst = MRI.getUniqueVRegDef(WOp->getReg());
97  switch (DefInst->getOpcode()) {
98  default:
99  continue;
100  case AMDGPU::COPY:
101  Worklist.push_back(&DefInst->getOperand(1));
102  break;
103  case AMDGPU::REG_SEQUENCE:
104  if (DefInst->getNumOperands() != 5)
105  continue;
106  Worklist.push_back(&DefInst->getOperand(1));
107  Worklist.push_back(&DefInst->getOperand(3));
108  break;
109  case AMDGPU::V_ADD_I32_e64:
110  // The V_ADD_* and its analogous V_ADDCV_* are generated by
111  // a previous pass which lowered from an ADD_64_PSEUDO,
112  // which generates subregs to break up the 64 bit args.
113  if (DefInst->getOperand(2).getSubReg() != AMDGPU::NoSubRegister)
114  continue;
115  BaseReg = DefInst->getOperand(2).getReg();
116  if (DefInst->getOperand(3).getSubReg() != AMDGPU::NoSubRegister)
117  continue;
118  IndexReg = DefInst->getOperand(3).getReg();
119  // Chase the IndexReg.
120  MachineInstr *MI = MRI.getUniqueVRegDef(IndexReg);
121  if (!MI || !MI->isCopy())
122  continue;
123  // Make sure the reg class is 64 bit for Index.
124  // If the Index register is a subreg, we want it to reference
125  // a 64 bit register which we will use as the Index reg.
126  const TargetRegisterClass *IdxRC, *BaseRC;
127  IdxRC = MRI.getRegClass(MI->getOperand(1).getReg());
128  if (AMDGPU::getRegBitWidth(IdxRC->getID()) != 64)
129  continue;
130  IndexReg = MI->getOperand(1).getReg();
131  // Chase the BaseReg.
132  MI = MRI.getUniqueVRegDef(BaseReg);
133  if (!MI || !MI->isCopy())
134  continue;
135  // Make sure the register class is 64 bit for Base.
136  BaseReg = MI->getOperand(1).getReg();
137  BaseRC = MRI.getRegClass(BaseReg);
138  if (AMDGPU::getRegBitWidth(BaseRC->getID()) != 64)
139  continue;
140  // Make sure Base is SReg and Index is VReg.
141  if (!TRI->isSGPRReg(MRI, BaseReg))
142  return false;
143  if (!TRI->hasVGPRs(MRI.getRegClass(IndexReg)))
144  return false;
145  // clear any killed flags on Index and Base regs, used later.
146  MRI.clearKillFlags(IndexReg);
147  MRI.clearKillFlags(BaseReg);
148  return true;
149  }
150  }
151  return false;
152 }
153 
154 // Identify Global LOAD|STORE/ATOMIC and try to convert to _SADDR.
156  MachineFunction &MF,
158  const GCNSubtarget &ST,
159  const SIInstrInfo *TII,
160  const SIRegisterInfo *TRI) {
162  return false;
163  bool FuncModified = false;
165  for (I = MBB.begin(); I != MBB.end(); I = Next) {
166  Next = std::next(I);
167  MachineInstr &MI = *I;
168  int NewOpcd = AMDGPU::getGlobalSaddrOp(MI.getOpcode());
169  if (NewOpcd < 0)
170  continue;
171  // Update our statistics on opportunities seen.
172  ++NumSGPRGlobalOccurs;
173  LLVM_DEBUG(dbgs() << "Global Mem opp " << MI << '\n');
174  // Need a Base and Index or we cant transform to _SADDR.
175  unsigned BaseReg = 0;
176  unsigned IndexReg = 0;
177  MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
178  if (!findSRegBaseAndIndex(Op, BaseReg, IndexReg, MRI, TRI))
179  continue;
180  ++NumSGPRGlobalSaddrs;
181  FuncModified = true;
182  // Create the new _SADDR Memory instruction.
183  bool HasVdst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst) != nullptr;
184  MachineOperand *VData = TII->getNamedOperand(MI, AMDGPU::OpName::vdata);
185  MachineInstr *NewGlob = nullptr;
186  NewGlob = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcd));
187  if (HasVdst)
188  NewGlob->addOperand(MF, MI.getOperand(0));
189  NewGlob->addOperand(MF, MachineOperand::CreateReg(IndexReg, false));
190  if (VData)
191  NewGlob->addOperand(MF, *VData);
192  NewGlob->addOperand(MF, MachineOperand::CreateReg(BaseReg, false));
193  NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::offset));
194 
195  MachineOperand *Glc = TII->getNamedOperand(MI, AMDGPU::OpName::glc);
196  // Atomics dont have a GLC, so omit the field if not there.
197  if (Glc)
198  NewGlob->addOperand(MF, *Glc);
199 
200  MachineOperand *DLC = TII->getNamedOperand(MI, AMDGPU::OpName::dlc);
201  if (DLC)
202  NewGlob->addOperand(MF, *DLC);
203 
204  NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::slc));
205  // _D16 have an vdst_in operand, copy it in.
206  MachineOperand *VDstInOp = TII->getNamedOperand(MI,
207  AMDGPU::OpName::vdst_in);
208  if (VDstInOp)
209  NewGlob->addOperand(MF, *VDstInOp);
210  NewGlob->copyImplicitOps(MF, MI);
211  NewGlob->cloneMemRefs(MF, MI);
212  // Remove the old Global Memop instruction.
213  MI.eraseFromParent();
214  LLVM_DEBUG(dbgs() << "New Global Mem " << *NewGlob << '\n');
215  }
216  return FuncModified;
217 }
218 
219 bool SIFixupVectorISel::runOnMachineFunction(MachineFunction &MF) {
220  if (skipFunction(MF.getFunction()))
221  return false;
222 
224  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
225  const SIInstrInfo *TII = ST.getInstrInfo();
226  const SIRegisterInfo *TRI = ST.getRegisterInfo();
227 
228  bool FuncModified = false;
229  for (MachineBasicBlock &MBB : MF) {
230  // Cleanup missed Saddr opportunites from ISel.
231  FuncModified |= fixupGlobalSaddr(MBB, MF, MRI, ST, TII, TRI);
232  }
233  return FuncModified;
234 }
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
AMDGPU specific subclass of TargetSubtarget.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:385
unsigned getSubReg() const
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
const SIInstrInfo * getInstrInfo() const override
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
char & SIFixupVectorISelID
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:414
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
unsigned getID() const
Return the register class ID number.
void copyImplicitOps(MachineFunction &MF, const MachineInstr &MI)
Copy implicit register operands from specified instruction to this instruction.
void clearKillFlags(unsigned Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
FunctionPass * createSIFixupVectorISelPass()
void initializeSIFixupVectorISelPass(PassRegistry &)
#define DEBUG_TYPE
static bool fixupGlobalSaddr(MachineBasicBlock &MBB, MachineFunction &MF, MachineRegisterInfo &MRI, const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode)
bool hasVGPRs(const TargetRegisterClass *RC) const
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
bool isCopy() const
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:33
void cloneMemRefs(MachineFunction &MF, const MachineInstr &MI)
Clone another MachineInstr&#39;s memory reference descriptor list and replace ours with it...
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
static cl::opt< bool > EnableGlobalSGPRAddr("amdgpu-enable-global-sgpr-addr", cl::desc("Enable use of SGPR regs for GLOBAL LOAD/STORE instructions"), cl::init(false))
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
static bool findSRegBaseAndIndex(MachineOperand *Op, unsigned &BaseReg, unsigned &IndexReg, MachineRegisterInfo &MRI, const SIRegisterInfo *TRI)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
#define I(x, y, z)
Definition: MD5.cpp:58
bool isReg() const
isReg - Tests if this is a MO_Register operand.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:69
IRTranslator LLVM IR MI
Register getReg() const
getReg - Returns the register number.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
const SIRegisterInfo * getRegisterInfo() const override