LLVM  14.0.0git
GCNPreRAOptimizations.cpp
Go to the documentation of this file.
1 //===-- GCNPreRAOptimizations.cpp -----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass combines split register tuple initialization into a single psuedo:
11 ///
12 /// undef %0.sub1:sreg_64 = S_MOV_B32 1
13 /// %0.sub0:sreg_64 = S_MOV_B32 2
14 /// =>
15 /// %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 0x200000001
16 ///
17 /// This is to allow rematerialization of a value instead of spilling. It is
18 /// supposed to be done after register coalescer to allow it to do its job and
19 /// before actual register allocation to allow rematerialization.
20 ///
21 /// Right now the pass only handles 64 bit SGPRs with immediate initializers,
22 /// although the same shall be possible with other register classes and
23 /// instructions if necessary.
24 ///
25 //===----------------------------------------------------------------------===//
26 
27 #include "AMDGPU.h"
28 #include "GCNSubtarget.h"
32 #include "llvm/InitializePasses.h"
33 
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "amdgpu-pre-ra-optimizations"
37 
38 namespace {
39 
40 class GCNPreRAOptimizations : public MachineFunctionPass {
41 private:
42  const SIInstrInfo *TII;
43  const SIRegisterInfo *TRI;
45  LiveIntervals *LIS;
46 
47  bool processReg(Register Reg);
48 
49 public:
50  static char ID;
51 
52  GCNPreRAOptimizations() : MachineFunctionPass(ID) {
54  }
55 
56  bool runOnMachineFunction(MachineFunction &MF) override;
57 
58  StringRef getPassName() const override {
59  return "AMDGPU Pre-RA optimizations";
60  }
61 
62  void getAnalysisUsage(AnalysisUsage &AU) const override {
64  AU.setPreservesAll();
66  }
67 };
68 
69 } // End anonymous namespace.
70 
71 INITIALIZE_PASS_BEGIN(GCNPreRAOptimizations, DEBUG_TYPE,
72  "AMDGPU Pre-RA optimizations", false, false)
74 INITIALIZE_PASS_END(GCNPreRAOptimizations, DEBUG_TYPE, "Pre-RA optimizations",
76 
77 char GCNPreRAOptimizations::ID = 0;
78 
79 char &llvm::GCNPreRAOptimizationsID = GCNPreRAOptimizations::ID;
80 
82  return new GCNPreRAOptimizations();
83 }
84 
85 bool GCNPreRAOptimizations::processReg(Register Reg) {
86  MachineInstr *Def0 = nullptr;
87  MachineInstr *Def1 = nullptr;
88  uint64_t Init = 0;
89  bool Changed = false;
90  SmallSet<Register, 32> ModifiedRegs;
91  bool IsAGPRDst = TRI->isAGPRClass(MRI->getRegClass(Reg));
92 
94  switch (I.getOpcode()) {
95  default:
96  return false;
97  case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
98  break;
99  case AMDGPU::COPY: {
100  // Some subtargets cannot do an AGPR to AGPR copy directly, and need an
101  // intermdiate temporary VGPR register. Try to find the defining
102  // accvgpr_write to avoid temporary registers.
103  if (!IsAGPRDst)
104  break;
105 
106  Register SrcReg = I.getOperand(1).getReg();
107 
108  if (!SrcReg.isVirtual())
109  break;
110 
111  // Check if source of copy is from another AGPR.
112  bool IsAGPRSrc = TRI->isAGPRClass(MRI->getRegClass(SrcReg));
113  if (!IsAGPRSrc)
114  break;
115 
116  // def_instructions() does not look at subregs so it may give us a
117  // different instruction that defines the same vreg but different subreg
118  // so we have to manually check subreg.
119  Register SrcSubReg = I.getOperand(1).getSubReg();
120  for (auto &Def : MRI->def_instructions(SrcReg)) {
121  if (SrcSubReg != Def.getOperand(0).getSubReg())
122  continue;
123 
124  if (Def.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64) {
125  MachineOperand DefSrcMO = Def.getOperand(1);
126 
127  // Immediates are not an issue and can be propagated in
128  // postrapseudos pass. Only handle cases where defining
129  // accvgpr_write source is a vreg.
130  if (DefSrcMO.isReg() && DefSrcMO.getReg().isVirtual()) {
131  // Propagate source reg of accvgpr write to this copy instruction
132  I.getOperand(1).setReg(DefSrcMO.getReg());
133  I.getOperand(1).setSubReg(DefSrcMO.getSubReg());
134 
135  // Reg uses were changed, collect unique set of registers to update
136  // live intervals at the end.
137  ModifiedRegs.insert(DefSrcMO.getReg());
138  ModifiedRegs.insert(SrcReg);
139 
140  Changed = true;
141  }
142 
143  // Found the defining accvgpr_write, stop looking any further.
144  break;
145  }
146  }
147  break;
148  }
149  case AMDGPU::S_MOV_B32:
150  if (I.getOperand(0).getReg() != Reg || !I.getOperand(1).isImm() ||
151  I.getNumOperands() != 2)
152  return false;
153 
154  switch (I.getOperand(0).getSubReg()) {
155  default:
156  return false;
157  case AMDGPU::sub0:
158  if (Def0)
159  return false;
160  Def0 = &I;
161  Init |= I.getOperand(1).getImm() & 0xffffffff;
162  break;
163  case AMDGPU::sub1:
164  if (Def1)
165  return false;
166  Def1 = &I;
167  Init |= static_cast<uint64_t>(I.getOperand(1).getImm()) << 32;
168  break;
169  }
170  break;
171  }
172  }
173 
174  // For AGPR reg, check if live intervals need to be updated.
175  if (IsAGPRDst) {
176  if (Changed) {
177  for (Register RegToUpdate : ModifiedRegs) {
178  LIS->removeInterval(RegToUpdate);
179  LIS->createAndComputeVirtRegInterval(RegToUpdate);
180  }
181  }
182 
183  return Changed;
184  }
185 
186  // For SGPR reg, check if we can combine instructions.
187  if (!Def0 || !Def1 || Def0->getParent() != Def1->getParent())
188  return Changed;
189 
190  LLVM_DEBUG(dbgs() << "Combining:\n " << *Def0 << " " << *Def1
191  << " =>\n");
192 
193  if (SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*Def1),
194  LIS->getInstructionIndex(*Def0)))
195  std::swap(Def0, Def1);
196 
197  LIS->RemoveMachineInstrFromMaps(*Def0);
198  LIS->RemoveMachineInstrFromMaps(*Def1);
199  auto NewI = BuildMI(*Def0->getParent(), *Def0, Def0->getDebugLoc(),
200  TII->get(AMDGPU::S_MOV_B64_IMM_PSEUDO), Reg)
201  .addImm(Init);
202 
203  Def0->eraseFromParent();
204  Def1->eraseFromParent();
205  LIS->InsertMachineInstrInMaps(*NewI);
206  LIS->removeInterval(Reg);
207  LIS->createAndComputeVirtRegInterval(Reg);
208 
209  LLVM_DEBUG(dbgs() << " " << *NewI);
210 
211  return true;
212 }
213 
214 bool GCNPreRAOptimizations::runOnMachineFunction(MachineFunction &MF) {
215  if (skipFunction(MF.getFunction()))
216  return false;
217 
218  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
219  TII = ST.getInstrInfo();
220  MRI = &MF.getRegInfo();
221  LIS = &getAnalysis<LiveIntervals>();
222  TRI = ST.getRegisterInfo();
223 
224  bool Changed = false;
225 
226  for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
228  if (!LIS->hasInterval(Reg))
229  continue;
230  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
231  if ((RC->MC->getSizeInBits() != 64 || !TRI->isSGPRClass(RC)) &&
232  (ST.hasGFX90AInsts() || !TRI->isAGPRClass(RC)))
233  continue;
234 
235  Changed |= processReg(Reg);
236  }
237 
238  return Changed;
239 }
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::initializeGCNPreRAOptimizationsPass
void initializeGCNPreRAOptimizationsPass(PassRegistry &)
llvm::MCRegisterClass::getSizeInBits
unsigned getSizeInBits() const
Return the size of the physical register in bits if we are able to determine it.
Definition: MCRegisterInfo.h:86
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(GCNPreRAOptimizations, DEBUG_TYPE, "AMDGPU Pre-RA optimizations", false, false) INITIALIZE_PASS_END(GCNPreRAOptimizations
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
llvm::MachineRegisterInfo::getNumVirtRegs
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
Definition: MachineRegisterInfo.h:757
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::Register::index2VirtReg
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:640
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
GCNSubtarget.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::createGCNPreRAOptimizationsPass
FunctionPass * createGCNPreRAOptimizationsPass()
Definition: GCNPreRAOptimizations.cpp:81
llvm::SlotIndex::isEarlierInstr
static bool isEarlierInstr(SlotIndex A, SlotIndex B)
isEarlierInstr - Return true if A refers to an instruction earlier than B.
Definition: SlotIndexes.h:203
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:142
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:30
optimizations
Pre RA optimizations
Definition: GCNPreRAOptimizations.cpp:74
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:634
llvm::Register::isVirtual
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:630
llvm::MachineInstr::getDebugLoc
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:418
AMDGPUMCTargetDesc.h
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:321
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
uint64_t
LiveIntervals.h
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::MachineRegisterInfo::def_instructions
iterator_range< def_instr_iterator > def_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:405
I
#define I(x, y, z)
Definition: MD5.cpp:59
MachineFunctionPass.h
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:360
RA
SI optimize exec mask operations pre RA
Definition: SIOptimizeExecMaskingPreRA.cpp:71
llvm::MachineFunction
Definition: MachineFunction.h:234
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
AMDGPU.h
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:286
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:180
llvm::Init
Definition: Record.h:271
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::MachineOperand::getSubReg
unsigned getSubReg() const
Definition: MachineOperand.h:365
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:596
llvm::AnalysisUsage::setPreservesAll
void setPreservesAll()
Set by analyses that do not transform their input at all.
Definition: PassAnalysisSupport.h:130
DEBUG_TYPE
#define DEBUG_TYPE
Definition: GCNPreRAOptimizations.cpp:36
llvm::LiveIntervals
Definition: LiveIntervals.h:54
llvm::TargetRegisterClass::MC
const MCRegisterClass * MC
Definition: TargetRegisterInfo.h:53
llvm::SIInstrInfo
Definition: SIInstrInfo.h:38
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::GCNPreRAOptimizationsID
char & GCNPreRAOptimizationsID
Definition: GCNPreRAOptimizations.cpp:79
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::MachineInstr::eraseFromParent
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Definition: MachineInstr.cpp:680
InitializePasses.h
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37