LLVM  9.0.0svn
SIFixupVectorISel.cpp
Go to the documentation of this file.
1 //===-- SIFixupVectorISel.cpp - Fixup post ISel vector issues -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 /// \file
8 /// SIFixupVectorISel pass cleans up post ISEL Vector issues.
9 /// Currently this will convert GLOBAL_{LOAD|STORE}_*
10 /// and GLOBAL_Atomic_* instructions into their _SADDR variants,
11 /// feeding the sreg into the saddr field of the new instruction.
12 /// We currently handle a REG_SEQUENCE feeding the vaddr
13 /// and decompose it into a base and index.
14 ///
15 /// Transform:
16 /// %17:vgpr_32, %19:sreg_64_xexec = V_ADD_I32_e64 %21:sgpr_32, %22:vgpr_32
17 /// %18:vgpr_32, %20:sreg_64_xexec = V_ADDC_U32_e64 %25:vgpr_32,
18 /// %24:vgpr_32, %19:sreg_64_xexec
19 /// %16:vreg_64 = REG_SEQUENCE %17:vgpr_32, %sub0, %18:vgpr_32, %sub1
20 /// %11:vreg_64 = COPY %16:vreg_64
21 /// %10:vgpr_32 = GLOBAL_LOAD_DWORD killed %11:vreg_64, 16, 0, 0
22 /// Into:
23 /// %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1:sgpr_64, 36, 0
24 /// %14:vreg_64 = REG_SEQUENCE %6:vgpr_32, %sub0, %15:vgpr_32, %sub1
25 /// %10:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %14:vreg_64, %4:sreg_64_xexec,16...
26 ///
27 //===----------------------------------------------------------------------===//
28 //
29 
30 #include "AMDGPU.h"
31 #include "AMDGPUSubtarget.h"
33 #include "llvm/ADT/Statistic.h"
37 #include "llvm/IR/Function.h"
38 #include "llvm/IR/LLVMContext.h"
39 #include "llvm/Support/Debug.h"
41 #define DEBUG_TYPE "si-fixup-vector-isel"
42 
43 using namespace llvm;
44 
46  "amdgpu-enable-global-sgpr-addr",
47  cl::desc("Enable use of SGPR regs for GLOBAL LOAD/STORE instructions"),
48  cl::init(false));
49 
50 STATISTIC(NumSGPRGlobalOccurs, "Number of global ld/st opportunities");
51 STATISTIC(NumSGPRGlobalSaddrs, "Number of global sgpr instructions converted");
52 
53 namespace {
54 
55 class SIFixupVectorISel : public MachineFunctionPass {
56 public:
57  static char ID;
58 
59 public:
60  SIFixupVectorISel() : MachineFunctionPass(ID) {
62  }
63 
64  bool runOnMachineFunction(MachineFunction &MF) override;
65 
66  void getAnalysisUsage(AnalysisUsage &AU) const override {
67  AU.setPreservesCFG();
69  }
70 };
71 
72 } // End anonymous namespace.
73 
74 INITIALIZE_PASS(SIFixupVectorISel, DEBUG_TYPE,
75  "SI Fixup Vector ISel", false, false)
76 
77 char SIFixupVectorISel::ID = 0;
78 
79 char &llvm::SIFixupVectorISelID = SIFixupVectorISel::ID;
80 
82  return new SIFixupVectorISel();
83 }
84 
86  unsigned &BaseReg,
87  unsigned &IndexReg,
89  const SIRegisterInfo *TRI) {
91  Worklist.push_back(Op);
92  while (!Worklist.empty()) {
93  MachineOperand *WOp = Worklist.pop_back_val();
94  if (!WOp->isReg() ||
96  continue;
97  MachineInstr *DefInst = MRI.getUniqueVRegDef(WOp->getReg());
98  switch (DefInst->getOpcode()) {
99  default:
100  continue;
101  case AMDGPU::COPY:
102  Worklist.push_back(&DefInst->getOperand(1));
103  break;
104  case AMDGPU::REG_SEQUENCE:
105  if (DefInst->getNumOperands() != 5)
106  continue;
107  Worklist.push_back(&DefInst->getOperand(1));
108  Worklist.push_back(&DefInst->getOperand(3));
109  break;
110  case AMDGPU::V_ADD_I32_e64:
111  // The V_ADD_* and its analogous V_ADDCV_* are generated by
112  // a previous pass which lowered from an ADD_64_PSEUDO,
113  // which generates subregs to break up the 64 bit args.
114  if (DefInst->getOperand(2).getSubReg() != AMDGPU::NoSubRegister)
115  continue;
116  BaseReg = DefInst->getOperand(2).getReg();
117  if (DefInst->getOperand(3).getSubReg() != AMDGPU::NoSubRegister)
118  continue;
119  IndexReg = DefInst->getOperand(3).getReg();
120  // Chase the IndexReg.
121  MachineInstr *MI = MRI.getUniqueVRegDef(IndexReg);
122  if (!MI || !MI->isCopy())
123  continue;
124  // Make sure the reg class is 64 bit for Index.
125  // If the Index register is a subreg, we want it to reference
126  // a 64 bit register which we will use as the Index reg.
127  const TargetRegisterClass *IdxRC, *BaseRC;
128  IdxRC = MRI.getRegClass(MI->getOperand(1).getReg());
129  if (AMDGPU::getRegBitWidth(IdxRC->getID()) != 64)
130  continue;
131  IndexReg = MI->getOperand(1).getReg();
132  // Chase the BaseReg.
133  MI = MRI.getUniqueVRegDef(BaseReg);
134  if (!MI || !MI->isCopy())
135  continue;
136  // Make sure the register class is 64 bit for Base.
137  BaseReg = MI->getOperand(1).getReg();
138  BaseRC = MRI.getRegClass(BaseReg);
139  if (AMDGPU::getRegBitWidth(BaseRC->getID()) != 64)
140  continue;
141  // Make sure Base is SReg and Index is VReg.
142  if (!TRI->isSGPRReg(MRI, BaseReg))
143  return false;
144  if (!TRI->hasVGPRs(MRI.getRegClass(IndexReg)))
145  return false;
146  // clear any killed flags on Index and Base regs, used later.
147  MRI.clearKillFlags(IndexReg);
148  MRI.clearKillFlags(BaseReg);
149  return true;
150  }
151  }
152  return false;
153 }
154 
155 // Identify Global LOAD|STORE/ATOMIC and try to convert to _SADDR.
157  MachineFunction &MF,
159  const GCNSubtarget &ST,
160  const SIInstrInfo *TII,
161  const SIRegisterInfo *TRI) {
163  return false;
164  bool FuncModified = false;
166  for (I = MBB.begin(); I != MBB.end(); I = Next) {
167  Next = std::next(I);
168  MachineInstr &MI = *I;
169  int NewOpcd = AMDGPU::getGlobalSaddrOp(MI.getOpcode());
170  if (NewOpcd < 0)
171  continue;
172  // Update our statistics on opportunities seen.
173  ++NumSGPRGlobalOccurs;
174  LLVM_DEBUG(dbgs() << "Global Mem opp " << MI << '\n');
175  // Need a Base and Index or we cant transform to _SADDR.
176  unsigned BaseReg = 0;
177  unsigned IndexReg = 0;
178  MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
179  if (!findSRegBaseAndIndex(Op, BaseReg, IndexReg, MRI, TRI))
180  continue;
181  ++NumSGPRGlobalSaddrs;
182  FuncModified = true;
183  // Create the new _SADDR Memory instruction.
184  bool HasVdst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst) != nullptr;
185  MachineOperand *VData = TII->getNamedOperand(MI, AMDGPU::OpName::vdata);
186  MachineInstr *NewGlob = nullptr;
187  NewGlob = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcd));
188  if (HasVdst)
189  NewGlob->addOperand(MF, MI.getOperand(0));
190  NewGlob->addOperand(MF, MachineOperand::CreateReg(IndexReg, false));
191  if (VData)
192  NewGlob->addOperand(MF, *VData);
193  NewGlob->addOperand(MF, MachineOperand::CreateReg(BaseReg, false));
194  NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::offset));
195 
196  MachineOperand *Glc = TII->getNamedOperand(MI, AMDGPU::OpName::glc);
197  // Atomics dont have a GLC, so omit the field if not there.
198  if (Glc)
199  NewGlob->addOperand(MF, *Glc);
200  NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::slc));
201  // _D16 have an vdst_in operand, copy it in.
202  MachineOperand *VDstInOp = TII->getNamedOperand(MI,
203  AMDGPU::OpName::vdst_in);
204  if (VDstInOp)
205  NewGlob->addOperand(MF, *VDstInOp);
206  NewGlob->copyImplicitOps(MF, MI);
207  NewGlob->cloneMemRefs(MF, MI);
208  // Remove the old Global Memop instruction.
209  MI.eraseFromParent();
210  LLVM_DEBUG(dbgs() << "New Global Mem " << *NewGlob << '\n');
211  }
212  return FuncModified;
213 }
214 
215 bool SIFixupVectorISel::runOnMachineFunction(MachineFunction &MF) {
216  if (skipFunction(MF.getFunction()))
217  return false;
218 
220  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
221  const SIInstrInfo *TII = ST.getInstrInfo();
222  const SIRegisterInfo *TRI = ST.getRegisterInfo();
223 
224  bool FuncModified = false;
225  for (MachineBasicBlock &MBB : MF) {
226  // Cleanup missed Saddr opportunites from ISel.
227  FuncModified |= fixupGlobalSaddr(MBB, MF, MRI, ST, TII, TRI);
228  }
229  return FuncModified;
230 }
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
AMDGPU specific subclass of TargetSubtarget.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:382
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned getSubReg() const
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
const SIInstrInfo * getInstrInfo() const override
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
char & SIFixupVectorISelID
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:411
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:408
unsigned getID() const
Return the register class ID number.
void copyImplicitOps(MachineFunction &MF, const MachineInstr &MI)
Copy implicit register operands from specified instruction to this instruction.
void clearKillFlags(unsigned Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
FunctionPass * createSIFixupVectorISelPass()
void initializeSIFixupVectorISelPass(PassRegistry &)
#define DEBUG_TYPE
static bool fixupGlobalSaddr(MachineBasicBlock &MBB, MachineFunction &MF, MachineRegisterInfo &MRI, const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:422
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode)
bool hasVGPRs(const TargetRegisterClass *RC) const
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
bool isCopy() const
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:33
void cloneMemRefs(MachineFunction &MF, const MachineInstr &MI)
Clone another MachineInstr&#39;s memory reference descriptor list and replace ours with it...
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:839
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:373
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:285
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
static cl::opt< bool > EnableGlobalSGPRAddr("amdgpu-enable-global-sgpr-addr", cl::desc("Enable use of SGPR regs for GLOBAL LOAD/STORE instructions"), cl::init(false))
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
static bool findSRegBaseAndIndex(MachineOperand *Op, unsigned &BaseReg, unsigned &IndexReg, MachineRegisterInfo &MRI, const SIRegisterInfo *TRI)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
#define I(x, y, z)
Definition: MD5.cpp:58
bool isReg() const
isReg - Tests if this is a MO_Register operand.
IRTranslator LLVM IR MI
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:413
const SIRegisterInfo * getRegisterInfo() const override