LLVM  6.0.0svn
R600ClauseMergePass.cpp
Go to the documentation of this file.
1 //===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
12 /// This pass is merging consecutive CFAlus where applicable.
13 /// It needs to be called after IfCvt for best results.
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "R600Defines.h"
19 #include "R600InstrInfo.h"
21 #include "R600RegisterInfo.h"
25 #include "llvm/Support/Debug.h"
27 
28 using namespace llvm;
29 
30 #define DEBUG_TYPE "r600mergeclause"
31 
32 namespace {
33 
34 static bool isCFAlu(const MachineInstr &MI) {
35  switch (MI.getOpcode()) {
36  case AMDGPU::CF_ALU:
37  case AMDGPU::CF_ALU_PUSH_BEFORE:
38  return true;
39  default:
40  return false;
41  }
42 }
43 
44 class R600ClauseMergePass : public MachineFunctionPass {
45 
46 private:
47  const R600InstrInfo *TII;
48 
49  unsigned getCFAluSize(const MachineInstr &MI) const;
50  bool isCFAluEnabled(const MachineInstr &MI) const;
51 
52  /// IfCvt pass can generate "disabled" ALU clause marker that need to be
53  /// removed and their content affected to the previous alu clause.
54  /// This function parse instructions after CFAlu until it find a disabled
55  /// CFAlu and merge the content, or an enabled CFAlu.
56  void cleanPotentialDisabledCFAlu(MachineInstr &CFAlu) const;
57 
58  /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
59  /// it is the case.
60  bool mergeIfPossible(MachineInstr &RootCFAlu,
61  const MachineInstr &LatrCFAlu) const;
62 
63 public:
64  static char ID;
65 
66  R600ClauseMergePass() : MachineFunctionPass(ID) { }
67 
68  bool runOnMachineFunction(MachineFunction &MF) override;
69 
70  StringRef getPassName() const override;
71 };
72 
73 } // end anonymous namespace
74 
75 INITIALIZE_PASS_BEGIN(R600ClauseMergePass, DEBUG_TYPE,
76  "R600 Clause Merge", false, false)
77 INITIALIZE_PASS_END(R600ClauseMergePass, DEBUG_TYPE,
78  "R600 Clause Merge", false, false)
79 
80 char R600ClauseMergePass::ID = 0;
81 
82 char &llvm::R600ClauseMergePassID = R600ClauseMergePass::ID;
83 
84 unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr &MI) const {
85  assert(isCFAlu(MI));
86  return MI
87  .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::COUNT))
88  .getImm();
89 }
90 
91 bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr &MI) const {
92  assert(isCFAlu(MI));
93  return MI
94  .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::Enabled))
95  .getImm();
96 }
97 
98 void R600ClauseMergePass::cleanPotentialDisabledCFAlu(
99  MachineInstr &CFAlu) const {
100  int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
101  MachineBasicBlock::iterator I = CFAlu, E = CFAlu.getParent()->end();
102  I++;
103  do {
104  while (I != E && !isCFAlu(*I))
105  I++;
106  if (I == E)
107  return;
108  MachineInstr &MI = *I++;
109  if (isCFAluEnabled(MI))
110  break;
111  CFAlu.getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
112  MI.eraseFromParent();
113  } while (I != E);
114 }
115 
116 bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu,
117  const MachineInstr &LatrCFAlu) const {
118  assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
119  int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
120  unsigned RootInstCount = getCFAluSize(RootCFAlu),
121  LaterInstCount = getCFAluSize(LatrCFAlu);
122  unsigned CumuledInsts = RootInstCount + LaterInstCount;
123  if (CumuledInsts >= TII->getMaxAlusPerClause()) {
124  DEBUG(dbgs() << "Excess inst counts\n");
125  return false;
126  }
127  if (RootCFAlu.getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
128  return false;
129  // Is KCache Bank 0 compatible ?
130  int Mode0Idx =
131  TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
132  int KBank0Idx =
133  TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
134  int KBank0LineIdx =
135  TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
136  if (LatrCFAlu.getOperand(Mode0Idx).getImm() &&
137  RootCFAlu.getOperand(Mode0Idx).getImm() &&
138  (LatrCFAlu.getOperand(KBank0Idx).getImm() !=
139  RootCFAlu.getOperand(KBank0Idx).getImm() ||
140  LatrCFAlu.getOperand(KBank0LineIdx).getImm() !=
141  RootCFAlu.getOperand(KBank0LineIdx).getImm())) {
142  DEBUG(dbgs() << "Wrong KC0\n");
143  return false;
144  }
145  // Is KCache Bank 1 compatible ?
146  int Mode1Idx =
147  TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
148  int KBank1Idx =
149  TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
150  int KBank1LineIdx =
151  TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
152  if (LatrCFAlu.getOperand(Mode1Idx).getImm() &&
153  RootCFAlu.getOperand(Mode1Idx).getImm() &&
154  (LatrCFAlu.getOperand(KBank1Idx).getImm() !=
155  RootCFAlu.getOperand(KBank1Idx).getImm() ||
156  LatrCFAlu.getOperand(KBank1LineIdx).getImm() !=
157  RootCFAlu.getOperand(KBank1LineIdx).getImm())) {
158  DEBUG(dbgs() << "Wrong KC0\n");
159  return false;
160  }
161  if (LatrCFAlu.getOperand(Mode0Idx).getImm()) {
162  RootCFAlu.getOperand(Mode0Idx).setImm(
163  LatrCFAlu.getOperand(Mode0Idx).getImm());
164  RootCFAlu.getOperand(KBank0Idx).setImm(
165  LatrCFAlu.getOperand(KBank0Idx).getImm());
166  RootCFAlu.getOperand(KBank0LineIdx)
167  .setImm(LatrCFAlu.getOperand(KBank0LineIdx).getImm());
168  }
169  if (LatrCFAlu.getOperand(Mode1Idx).getImm()) {
170  RootCFAlu.getOperand(Mode1Idx).setImm(
171  LatrCFAlu.getOperand(Mode1Idx).getImm());
172  RootCFAlu.getOperand(KBank1Idx).setImm(
173  LatrCFAlu.getOperand(KBank1Idx).getImm());
174  RootCFAlu.getOperand(KBank1LineIdx)
175  .setImm(LatrCFAlu.getOperand(KBank1LineIdx).getImm());
176  }
177  RootCFAlu.getOperand(CntIdx).setImm(CumuledInsts);
178  RootCFAlu.setDesc(TII->get(LatrCFAlu.getOpcode()));
179  return true;
180 }
181 
182 bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
183  if (skipFunction(*MF.getFunction()))
184  return false;
185 
187  TII = ST.getInstrInfo();
188 
189  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
190  BB != BB_E; ++BB) {
191  MachineBasicBlock &MBB = *BB;
192  MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
193  MachineBasicBlock::iterator LatestCFAlu = E;
194  while (I != E) {
195  MachineInstr &MI = *I++;
196  if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
197  TII->mustBeLastInClause(MI.getOpcode()))
198  LatestCFAlu = E;
199  if (!isCFAlu(MI))
200  continue;
201  cleanPotentialDisabledCFAlu(MI);
202 
203  if (LatestCFAlu != E && mergeIfPossible(*LatestCFAlu, MI)) {
204  MI.eraseFromParent();
205  } else {
206  assert(MI.getOperand(8).getImm() && "CF ALU instruction disabled");
207  LatestCFAlu = MI;
208  }
209  }
210  }
211  return false;
212 }
213 
214 StringRef R600ClauseMergePass::getPassName() const {
215  return "R600 Merge Clause Markers Pass";
216 }
217 
219  return new R600ClauseMergePass();
220 }
AMDGPU specific subclass of TargetSubtarget.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
Interface definition for R600InstrInfo.
Interface definition for R600RegisterInfo.
#define DEBUG_TYPE
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:290
INITIALIZE_PASS_BEGIN(R600ClauseMergePass, DEBUG_TYPE, "R600 Clause Merge", false, false) INITIALIZE_PASS_END(R600ClauseMergePass
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const R600InstrInfo * getInstrInfo() const override
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
FunctionPass * createR600ClauseMergePass()
R600 Clause Merge
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
Iterator for intrusive lists based on ilist_node.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
int64_t getImm() const
static bool Enabled
Definition: Statistic.cpp:49
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:139
Representation of each machine instruction.
Definition: MachineInstr.h:59
#define I(x, y, z)
Definition: MD5.cpp:58
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
aarch64 promote const
#define DEBUG(X)
Definition: Debug.h:118
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:295
char & R600ClauseMergePassID