LLVM  13.0.0git
SILateBranchLowering.cpp
Go to the documentation of this file.
1 //===-- SILateBranchLowering.cpp - Final preparation of branches ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass mainly lowers early terminate pseudo instructions.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "GCNSubtarget.h"
17 #include "SIMachineFunctionInfo.h"
19 #include "llvm/InitializePasses.h"
20 
21 using namespace llvm;
22 
23 #define DEBUG_TYPE "si-late-branch-lowering"
24 
25 namespace {
26 
27 class SILateBranchLowering : public MachineFunctionPass {
28 private:
29  const SIRegisterInfo *TRI = nullptr;
30  const SIInstrInfo *TII = nullptr;
31  MachineDominatorTree *MDT = nullptr;
32 
33  void earlyTerm(MachineInstr &MI, MachineBasicBlock *EarlyExitBlock);
34 
35 public:
36  static char ID;
37 
38  unsigned MovOpc;
39  Register ExecReg;
40 
41  SILateBranchLowering() : MachineFunctionPass(ID) {}
42 
43  bool runOnMachineFunction(MachineFunction &MF) override;
44 
45  StringRef getPassName() const override {
46  return "SI Final Branch Preparation";
47  }
48 
49  void getAnalysisUsage(AnalysisUsage &AU) const override {
53  }
54 };
55 
56 } // end anonymous namespace
57 
59 
60 INITIALIZE_PASS_BEGIN(SILateBranchLowering, DEBUG_TYPE,
61  "SI insert s_cbranch_execz instructions", false, false)
63 INITIALIZE_PASS_END(SILateBranchLowering, DEBUG_TYPE,
64  "SI insert s_cbranch_execz instructions", false, false)
65 
66 char &llvm::SILateBranchLoweringPassID = SILateBranchLowering::ID;
67 
69  MachineBasicBlock::iterator I, DebugLoc DL,
71  const Function &F = MF.getFunction();
72  bool IsPS = F.getCallingConv() == CallingConv::AMDGPU_PS;
73 
74  // Check if hardware has been configured to expect color or depth exports.
75  bool HasExports =
77 
78  // Prior to GFX10, hardware always expects at least one export for PS.
79  bool MustExport = !AMDGPU::isGFX10Plus(TII->getSubtarget());
80 
81  if (IsPS && (HasExports || MustExport)) {
82  // Generate "null export" if hardware is expecting PS to export.
83  BuildMI(MBB, I, DL, TII->get(AMDGPU::EXP_DONE))
85  .addReg(AMDGPU::VGPR0, RegState::Undef)
86  .addReg(AMDGPU::VGPR0, RegState::Undef)
87  .addReg(AMDGPU::VGPR0, RegState::Undef)
88  .addReg(AMDGPU::VGPR0, RegState::Undef)
89  .addImm(1) // vm
90  .addImm(0) // compr
91  .addImm(0); // en
92  }
93 
94  // s_endpgm
95  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ENDPGM)).addImm(0);
96 }
97 
99  MachineDominatorTree *MDT) {
100  MachineBasicBlock *SplitBB = MBB.splitAt(MI, /*UpdateLiveIns*/ true);
101 
102  // Update dominator tree
103  using DomTreeT = DomTreeBase<MachineBasicBlock>;
105  for (MachineBasicBlock *Succ : SplitBB->successors()) {
106  DTUpdates.push_back({DomTreeT::Insert, SplitBB, Succ});
107  DTUpdates.push_back({DomTreeT::Delete, &MBB, Succ});
108  }
109  DTUpdates.push_back({DomTreeT::Insert, &MBB, SplitBB});
110  MDT->getBase().applyUpdates(DTUpdates);
111 }
112 
113 void SILateBranchLowering::earlyTerm(MachineInstr &MI,
114  MachineBasicBlock *EarlyExitBlock) {
115  MachineBasicBlock &MBB = *MI.getParent();
116  const DebugLoc DL = MI.getDebugLoc();
117 
118  auto BranchMI = BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_CBRANCH_SCC0))
119  .addMBB(EarlyExitBlock);
120  auto Next = std::next(MI.getIterator());
121 
122  if (Next != MBB.end() && !Next->isTerminator())
123  splitBlock(MBB, *BranchMI, MDT);
124 
125  MBB.addSuccessor(EarlyExitBlock);
126  MDT->getBase().insertEdge(&MBB, EarlyExitBlock);
127 }
128 
129 bool SILateBranchLowering::runOnMachineFunction(MachineFunction &MF) {
130  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
131  TII = ST.getInstrInfo();
132  TRI = &TII->getRegisterInfo();
133  MDT = &getAnalysis<MachineDominatorTree>();
134 
135  MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
136  ExecReg = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
137 
138  SmallVector<MachineInstr *, 4> EarlyTermInstrs;
139  SmallVector<MachineInstr *, 1> EpilogInstrs;
140  bool MadeChange = false;
141 
142  for (MachineBasicBlock &MBB : MF) {
144  for (I = MBB.begin(); I != MBB.end(); I = Next) {
145  Next = std::next(I);
146  MachineInstr &MI = *I;
147 
148  switch (MI.getOpcode()) {
149  case AMDGPU::S_BRANCH:
150  // Optimize out branches to the next block.
151  // This only occurs in -O0 when BranchFolding is not executed.
152  if (MBB.isLayoutSuccessor(MI.getOperand(0).getMBB())) {
153  assert(&MI == &MBB.back());
154  MI.eraseFromParent();
155  MadeChange = true;
156  }
157  break;
158 
159  case AMDGPU::SI_EARLY_TERMINATE_SCC0:
160  EarlyTermInstrs.push_back(&MI);
161  break;
162 
163  case AMDGPU::SI_RETURN_TO_EPILOG:
164  EpilogInstrs.push_back(&MI);
165  break;
166 
167  default:
168  break;
169  }
170  }
171  }
172 
173  // Lower any early exit branches first
174  if (!EarlyTermInstrs.empty()) {
175  MachineBasicBlock *EarlyExitBlock = MF.CreateMachineBasicBlock();
176  DebugLoc DL;
177 
178  MF.insert(MF.end(), EarlyExitBlock);
179  BuildMI(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII->get(MovOpc),
180  ExecReg)
181  .addImm(0);
182  generateEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII, MF);
183 
184  for (MachineInstr *Instr : EarlyTermInstrs) {
185  // Early termination in GS does nothing
186  if (MF.getFunction().getCallingConv() != CallingConv::AMDGPU_GS)
187  earlyTerm(*Instr, EarlyExitBlock);
188  Instr->eraseFromParent();
189  }
190 
191  EarlyTermInstrs.clear();
192  MadeChange = true;
193  }
194 
195  // Now check return to epilog instructions occur at function end
196  if (!EpilogInstrs.empty()) {
197  MachineBasicBlock *EmptyMBBAtEnd = nullptr;
198  assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid());
199 
200  // If there are multiple returns to epilog then all will
201  // become jumps to new empty end block.
202  if (EpilogInstrs.size() > 1) {
203  EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
204  MF.insert(MF.end(), EmptyMBBAtEnd);
205  }
206 
207  for (auto MI : EpilogInstrs) {
208  auto MBB = MI->getParent();
209  if (MBB == &MF.back() && MI == &MBB->back())
210  continue;
211 
212  // SI_RETURN_TO_EPILOG is not the last instruction.
213  // Jump to empty block at function end.
214  if (!EmptyMBBAtEnd) {
215  EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
216  MF.insert(MF.end(), EmptyMBBAtEnd);
217  }
218 
219  MBB->addSuccessor(EmptyMBBAtEnd);
220  MDT->getBase().insertEdge(MBB, EmptyMBBAtEnd);
221  BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::S_BRANCH))
222  .addMBB(EmptyMBBAtEnd);
223  MI->eraseFromParent();
224  MadeChange = true;
225  }
226 
227  EpilogInstrs.clear();
228  }
229 
230  return MadeChange;
231 }
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:102
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
SIMachineFunctionInfo.h
llvm::Function
Definition: Function.h:61
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:52
llvm::CallingConv::AMDGPU_GS
@ AMDGPU_GS
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:207
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::AMDGPU::Exp::ET_NULL
@ ET_NULL
Definition: SIDefines.h:736
splitBlock
static void splitBlock(MachineBasicBlock &MBB, MachineInstr &MI, MachineDominatorTree *MDT)
Definition: SILateBranchLowering.cpp:98
llvm::GCNSubtarget
Definition: GCNSubtarget.h:38
llvm::MachineBasicBlock::back
MachineInstr & back()
Definition: MachineBasicBlock.h:248
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::DominatorTreeBase::applyUpdates
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
Definition: GenericDomTree.h:544
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::MachineBasicBlock::addSuccessor
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:746
llvm::SIMachineFunctionInfo::returnsVoid
bool returnsVoid() const
Definition: SIMachineFunctionInfo.h:844
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
GCNSubtarget.h
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:142
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::MachineBasicBlock::splitAt
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
Definition: MachineBasicBlock.cpp:965
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:29
llvm::AMDGPU::isGFX10Plus
bool isGFX10Plus(const MCSubtargetInfo &STI)
Definition: AMDGPUBaseInfo.cpp:1455
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
DEBUG_TYPE
#define DEBUG_TYPE
Definition: SILateBranchLowering.cpp:23
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:622
llvm::instructions
inst_range instructions(Function *F)
Definition: InstIterator.h:133
AMDGPUMCTargetDesc.h
llvm::MachineDominatorTree::getBase
DomTreeT & getBase()
Definition: MachineDominators.h:87
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
I
#define I(x, y, z)
Definition: MD5.cpp:59
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::MachineBasicBlock::successors
iterator_range< succ_iterator > successors()
Definition: MachineBasicBlock.h:355
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
AMDGPU.h
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::CallingConv::AMDGPU_PS
@ AMDGPU_PS
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:210
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::DominatorTreeBase
Core dominator tree base class.
Definition: LoopInfo.h:65
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::MachineBasicBlock::insert
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Definition: MachineBasicBlock.cpp:1312
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(SILateBranchLowering, DEBUG_TYPE, "SI insert s_cbranch_execz instructions", false, false) INITIALIZE_PASS_END(SILateBranchLowering
llvm::MachineBasicBlock::isLayoutSuccessor
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
Definition: MachineBasicBlock.cpp:912
llvm::SIInstrInfo
Definition: SIInstrInfo.h:38
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:268
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:335
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition: MachineDominators.h:45
llvm::MachineInstrBundleIterator< MachineInstr >
generateEndPgm
static void generateEndPgm(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, const SIInstrInfo *TII, MachineFunction &MF)
Definition: SILateBranchLowering.cpp:68
InitializePasses.h
llvm::AMDGPU::getHasColorExport
bool getHasColorExport(const Function &F)
Definition: AMDGPUBaseInfo.cpp:1347
llvm::AMDGPU::getHasDepthExport
bool getHasDepthExport(const Function &F)
Definition: AMDGPUBaseInfo.cpp:1354
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:270
llvm::SILateBranchLoweringPassID
char & SILateBranchLoweringPassID
Definition: SILateBranchLowering.cpp:66
MachineDominators.h
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38