LLVM  3.7.0
R600ClauseMergePass.cpp
Go to the documentation of this file.
1 //===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
12 /// This pass is merging consecutive CFAlus where applicable.
13 /// It needs to be called after IfCvt for best results.
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "R600Defines.h"
19 #include "R600InstrInfo.h"
21 #include "R600RegisterInfo.h"
25 #include "llvm/Support/Debug.h"
27 
28 using namespace llvm;
29 
30 #define DEBUG_TYPE "r600mergeclause"
31 
32 namespace {
33 
34 static bool isCFAlu(const MachineInstr *MI) {
35  switch (MI->getOpcode()) {
36  case AMDGPU::CF_ALU:
37  case AMDGPU::CF_ALU_PUSH_BEFORE:
38  return true;
39  default:
40  return false;
41  }
42 }
43 
44 class R600ClauseMergePass : public MachineFunctionPass {
45 
46 private:
47  static char ID;
48  const R600InstrInfo *TII;
49 
50  unsigned getCFAluSize(const MachineInstr *MI) const;
51  bool isCFAluEnabled(const MachineInstr *MI) const;
52 
53  /// IfCvt pass can generate "disabled" ALU clause marker that need to be
54  /// removed and their content affected to the previous alu clause.
55  /// This function parse instructions after CFAlu until it find a disabled
56  /// CFAlu and merge the content, or an enabled CFAlu.
57  void cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) const;
58 
59  /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
60  /// it is the case.
61  bool mergeIfPossible(MachineInstr *RootCFAlu, const MachineInstr *LatrCFAlu)
62  const;
63 
64 public:
65  R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { }
66 
67  bool runOnMachineFunction(MachineFunction &MF) override;
68 
69  const char *getPassName() const override;
70 };
71 
73 
74 unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr *MI) const {
75  assert(isCFAlu(MI));
76  return MI->getOperand(
77  TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::COUNT)).getImm();
78 }
79 
80 bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr *MI) const {
81  assert(isCFAlu(MI));
82  return MI->getOperand(
83  TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::Enabled)).getImm();
84 }
85 
86 void R600ClauseMergePass::cleanPotentialDisabledCFAlu(MachineInstr *CFAlu)
87  const {
88  int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
89  MachineBasicBlock::iterator I = CFAlu, E = CFAlu->getParent()->end();
90  I++;
91  do {
92  while (I!= E && !isCFAlu(I))
93  I++;
94  if (I == E)
95  return;
96  MachineInstr *MI = I++;
97  if (isCFAluEnabled(MI))
98  break;
99  CFAlu->getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
100  MI->eraseFromParent();
101  } while (I != E);
102 }
103 
104 bool R600ClauseMergePass::mergeIfPossible(MachineInstr *RootCFAlu,
105  const MachineInstr *LatrCFAlu) const {
106  assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
107  int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
108  unsigned RootInstCount = getCFAluSize(RootCFAlu),
109  LaterInstCount = getCFAluSize(LatrCFAlu);
110  unsigned CumuledInsts = RootInstCount + LaterInstCount;
111  if (CumuledInsts >= TII->getMaxAlusPerClause()) {
112  DEBUG(dbgs() << "Excess inst counts\n");
113  return false;
114  }
115  if (RootCFAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
116  return false;
117  // Is KCache Bank 0 compatible ?
118  int Mode0Idx =
119  TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
120  int KBank0Idx =
121  TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
122  int KBank0LineIdx =
123  TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
124  if (LatrCFAlu->getOperand(Mode0Idx).getImm() &&
125  RootCFAlu->getOperand(Mode0Idx).getImm() &&
126  (LatrCFAlu->getOperand(KBank0Idx).getImm() !=
127  RootCFAlu->getOperand(KBank0Idx).getImm() ||
128  LatrCFAlu->getOperand(KBank0LineIdx).getImm() !=
129  RootCFAlu->getOperand(KBank0LineIdx).getImm())) {
130  DEBUG(dbgs() << "Wrong KC0\n");
131  return false;
132  }
133  // Is KCache Bank 1 compatible ?
134  int Mode1Idx =
135  TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
136  int KBank1Idx =
137  TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
138  int KBank1LineIdx =
139  TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
140  if (LatrCFAlu->getOperand(Mode1Idx).getImm() &&
141  RootCFAlu->getOperand(Mode1Idx).getImm() &&
142  (LatrCFAlu->getOperand(KBank1Idx).getImm() !=
143  RootCFAlu->getOperand(KBank1Idx).getImm() ||
144  LatrCFAlu->getOperand(KBank1LineIdx).getImm() !=
145  RootCFAlu->getOperand(KBank1LineIdx).getImm())) {
146  DEBUG(dbgs() << "Wrong KC0\n");
147  return false;
148  }
149  if (LatrCFAlu->getOperand(Mode0Idx).getImm()) {
150  RootCFAlu->getOperand(Mode0Idx).setImm(
151  LatrCFAlu->getOperand(Mode0Idx).getImm());
152  RootCFAlu->getOperand(KBank0Idx).setImm(
153  LatrCFAlu->getOperand(KBank0Idx).getImm());
154  RootCFAlu->getOperand(KBank0LineIdx).setImm(
155  LatrCFAlu->getOperand(KBank0LineIdx).getImm());
156  }
157  if (LatrCFAlu->getOperand(Mode1Idx).getImm()) {
158  RootCFAlu->getOperand(Mode1Idx).setImm(
159  LatrCFAlu->getOperand(Mode1Idx).getImm());
160  RootCFAlu->getOperand(KBank1Idx).setImm(
161  LatrCFAlu->getOperand(KBank1Idx).getImm());
162  RootCFAlu->getOperand(KBank1LineIdx).setImm(
163  LatrCFAlu->getOperand(KBank1LineIdx).getImm());
164  }
165  RootCFAlu->getOperand(CntIdx).setImm(CumuledInsts);
166  RootCFAlu->setDesc(TII->get(LatrCFAlu->getOpcode()));
167  return true;
168 }
169 
170 bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
171  TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
172  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
173  BB != BB_E; ++BB) {
174  MachineBasicBlock &MBB = *BB;
175  MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
176  MachineBasicBlock::iterator LatestCFAlu = E;
177  while (I != E) {
178  MachineInstr *MI = I++;
179  if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
180  TII->mustBeLastInClause(MI->getOpcode()))
181  LatestCFAlu = E;
182  if (!isCFAlu(MI))
183  continue;
184  cleanPotentialDisabledCFAlu(MI);
185 
186  if (LatestCFAlu != E && mergeIfPossible(LatestCFAlu, MI)) {
187  MI->eraseFromParent();
188  } else {
189  assert(MI->getOperand(8).getImm() && "CF ALU instruction disabled");
190  LatestCFAlu = MI;
191  }
192  }
193  }
194  return false;
195 }
196 
197 const char *R600ClauseMergePass::getPassName() const {
198  return "R600 Merge Clause Markers Pass";
199 }
200 
201 } // end anonymous namespace
202 
203 
205  return new R600ClauseMergePass(TM);
206 }
AMDGPU specific subclass of TargetSubtarget.
Interface definition for R600InstrInfo.
Interface definition for R600RegisterInfo.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
static cl::opt< bool > Enabled("stats", cl::desc("Enable statistics output from program (available with Asserts)"))
-stats - Command line option to cause transformations to emit stats about what they did...
FunctionPass * createR600ClauseMergePass(TargetMachine &tm)
int64_t getImm() const
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:267
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:120
bundle_iterator< MachineInstr, instr_iterator > iterator
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:273
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:294
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
Representation of each machine instruction.
Definition: MachineInstr.h:51
#define I(x, y, z)
Definition: MD5.cpp:54
virtual const TargetInstrInfo * getInstrInfo() const
BasicBlockListType::iterator iterator
#define DEBUG(X)
Definition: Debug.h:92
Primary interface to the complete machine description for the target machine.