LLVM  8.0.0svn
R600ClauseMergePass.cpp
Go to the documentation of this file.
1 //===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
12 /// This pass is merging consecutive CFAlus where applicable.
13 /// It needs to be called after IfCvt for best results.
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "R600Defines.h"
19 #include "R600InstrInfo.h"
21 #include "R600RegisterInfo.h"
26 #include "llvm/Support/Debug.h"
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "r600mergeclause"
32 
33 namespace {
34 
35 static bool isCFAlu(const MachineInstr &MI) {
36  switch (MI.getOpcode()) {
37  case R600::CF_ALU:
38  case R600::CF_ALU_PUSH_BEFORE:
39  return true;
40  default:
41  return false;
42  }
43 }
44 
45 class R600ClauseMergePass : public MachineFunctionPass {
46 
47 private:
48  const R600InstrInfo *TII;
49 
50  unsigned getCFAluSize(const MachineInstr &MI) const;
51  bool isCFAluEnabled(const MachineInstr &MI) const;
52 
53  /// IfCvt pass can generate "disabled" ALU clause marker that need to be
54  /// removed and their content affected to the previous alu clause.
55  /// This function parse instructions after CFAlu until it find a disabled
56  /// CFAlu and merge the content, or an enabled CFAlu.
57  void cleanPotentialDisabledCFAlu(MachineInstr &CFAlu) const;
58 
59  /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
60  /// it is the case.
61  bool mergeIfPossible(MachineInstr &RootCFAlu,
62  const MachineInstr &LatrCFAlu) const;
63 
64 public:
65  static char ID;
66 
67  R600ClauseMergePass() : MachineFunctionPass(ID) { }
68 
69  bool runOnMachineFunction(MachineFunction &MF) override;
70 
71  StringRef getPassName() const override;
72 };
73 
74 } // end anonymous namespace
75 
76 INITIALIZE_PASS_BEGIN(R600ClauseMergePass, DEBUG_TYPE,
77  "R600 Clause Merge", false, false)
78 INITIALIZE_PASS_END(R600ClauseMergePass, DEBUG_TYPE,
79  "R600 Clause Merge", false, false)
80 
81 char R600ClauseMergePass::ID = 0;
82 
83 char &llvm::R600ClauseMergePassID = R600ClauseMergePass::ID;
84 
85 unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr &MI) const {
86  assert(isCFAlu(MI));
87  return MI
88  .getOperand(TII->getOperandIdx(MI.getOpcode(), R600::OpName::COUNT))
89  .getImm();
90 }
91 
92 bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr &MI) const {
93  assert(isCFAlu(MI));
94  return MI
95  .getOperand(TII->getOperandIdx(MI.getOpcode(), R600::OpName::Enabled))
96  .getImm();
97 }
98 
99 void R600ClauseMergePass::cleanPotentialDisabledCFAlu(
100  MachineInstr &CFAlu) const {
101  int CntIdx = TII->getOperandIdx(R600::CF_ALU, R600::OpName::COUNT);
102  MachineBasicBlock::iterator I = CFAlu, E = CFAlu.getParent()->end();
103  I++;
104  do {
105  while (I != E && !isCFAlu(*I))
106  I++;
107  if (I == E)
108  return;
109  MachineInstr &MI = *I++;
110  if (isCFAluEnabled(MI))
111  break;
112  CFAlu.getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
113  MI.eraseFromParent();
114  } while (I != E);
115 }
116 
117 bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu,
118  const MachineInstr &LatrCFAlu) const {
119  assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
120  int CntIdx = TII->getOperandIdx(R600::CF_ALU, R600::OpName::COUNT);
121  unsigned RootInstCount = getCFAluSize(RootCFAlu),
122  LaterInstCount = getCFAluSize(LatrCFAlu);
123  unsigned CumuledInsts = RootInstCount + LaterInstCount;
124  if (CumuledInsts >= TII->getMaxAlusPerClause()) {
125  LLVM_DEBUG(dbgs() << "Excess inst counts\n");
126  return false;
127  }
128  if (RootCFAlu.getOpcode() == R600::CF_ALU_PUSH_BEFORE)
129  return false;
130  // Is KCache Bank 0 compatible ?
131  int Mode0Idx =
132  TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_MODE0);
133  int KBank0Idx =
134  TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_BANK0);
135  int KBank0LineIdx =
136  TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_ADDR0);
137  if (LatrCFAlu.getOperand(Mode0Idx).getImm() &&
138  RootCFAlu.getOperand(Mode0Idx).getImm() &&
139  (LatrCFAlu.getOperand(KBank0Idx).getImm() !=
140  RootCFAlu.getOperand(KBank0Idx).getImm() ||
141  LatrCFAlu.getOperand(KBank0LineIdx).getImm() !=
142  RootCFAlu.getOperand(KBank0LineIdx).getImm())) {
143  LLVM_DEBUG(dbgs() << "Wrong KC0\n");
144  return false;
145  }
146  // Is KCache Bank 1 compatible ?
147  int Mode1Idx =
148  TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_MODE1);
149  int KBank1Idx =
150  TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_BANK1);
151  int KBank1LineIdx =
152  TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_ADDR1);
153  if (LatrCFAlu.getOperand(Mode1Idx).getImm() &&
154  RootCFAlu.getOperand(Mode1Idx).getImm() &&
155  (LatrCFAlu.getOperand(KBank1Idx).getImm() !=
156  RootCFAlu.getOperand(KBank1Idx).getImm() ||
157  LatrCFAlu.getOperand(KBank1LineIdx).getImm() !=
158  RootCFAlu.getOperand(KBank1LineIdx).getImm())) {
159  LLVM_DEBUG(dbgs() << "Wrong KC0\n");
160  return false;
161  }
162  if (LatrCFAlu.getOperand(Mode0Idx).getImm()) {
163  RootCFAlu.getOperand(Mode0Idx).setImm(
164  LatrCFAlu.getOperand(Mode0Idx).getImm());
165  RootCFAlu.getOperand(KBank0Idx).setImm(
166  LatrCFAlu.getOperand(KBank0Idx).getImm());
167  RootCFAlu.getOperand(KBank0LineIdx)
168  .setImm(LatrCFAlu.getOperand(KBank0LineIdx).getImm());
169  }
170  if (LatrCFAlu.getOperand(Mode1Idx).getImm()) {
171  RootCFAlu.getOperand(Mode1Idx).setImm(
172  LatrCFAlu.getOperand(Mode1Idx).getImm());
173  RootCFAlu.getOperand(KBank1Idx).setImm(
174  LatrCFAlu.getOperand(KBank1Idx).getImm());
175  RootCFAlu.getOperand(KBank1LineIdx)
176  .setImm(LatrCFAlu.getOperand(KBank1LineIdx).getImm());
177  }
178  RootCFAlu.getOperand(CntIdx).setImm(CumuledInsts);
179  RootCFAlu.setDesc(TII->get(LatrCFAlu.getOpcode()));
180  return true;
181 }
182 
183 bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
184  if (skipFunction(MF.getFunction()))
185  return false;
186 
188  TII = ST.getInstrInfo();
189 
190  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
191  BB != BB_E; ++BB) {
192  MachineBasicBlock &MBB = *BB;
193  MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
194  MachineBasicBlock::iterator LatestCFAlu = E;
195  while (I != E) {
196  MachineInstr &MI = *I++;
197  if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
198  TII->mustBeLastInClause(MI.getOpcode()))
199  LatestCFAlu = E;
200  if (!isCFAlu(MI))
201  continue;
202  cleanPotentialDisabledCFAlu(MI);
203 
204  if (LatestCFAlu != E && mergeIfPossible(*LatestCFAlu, MI)) {
205  MI.eraseFromParent();
206  } else {
207  assert(MI.getOperand(8).getImm() && "CF ALU instruction disabled");
208  LatestCFAlu = MI;
209  }
210  }
211  }
212  return false;
213 }
214 
215 StringRef R600ClauseMergePass::getPassName() const {
216  return "R600 Merge Clause Markers Pass";
217 }
218 
220  return new R600ClauseMergePass();
221 }
AMDGPU specific subclass of TargetSubtarget.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
Interface definition for R600InstrInfo.
Interface definition for R600RegisterInfo.
#define DEBUG_TYPE
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
INITIALIZE_PASS_BEGIN(R600ClauseMergePass, DEBUG_TYPE, "R600 Clause Merge", false, false) INITIALIZE_PASS_END(R600ClauseMergePass
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const R600InstrInfo * getInstrInfo() const override
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
FunctionPass * createR600ClauseMergePass()
R600 Clause Merge
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
Iterator for intrusive lists based on ilist_node.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
int64_t getImm() const
static bool Enabled
Definition: Statistic.cpp:51
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:254
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
#define I(x, y, z)
Definition: MD5.cpp:58
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
aarch64 promote const
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
#define LLVM_DEBUG(X)
Definition: Debug.h:123
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
char & R600ClauseMergePassID