LLVM  4.0.0
R600ClauseMergePass.cpp
Go to the documentation of this file.
1 //===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
12 /// This pass is merging consecutive CFAlus where applicable.
13 /// It needs to be called after IfCvt for best results.
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "R600Defines.h"
19 #include "R600InstrInfo.h"
21 #include "R600RegisterInfo.h"
25 #include "llvm/Support/Debug.h"
27 
28 using namespace llvm;
29 
30 #define DEBUG_TYPE "r600mergeclause"
31 
32 namespace {
33 
34 static bool isCFAlu(const MachineInstr &MI) {
35  switch (MI.getOpcode()) {
36  case AMDGPU::CF_ALU:
37  case AMDGPU::CF_ALU_PUSH_BEFORE:
38  return true;
39  default:
40  return false;
41  }
42 }
43 
44 class R600ClauseMergePass : public MachineFunctionPass {
45 
46 private:
47  static char ID;
48  const R600InstrInfo *TII;
49 
50  unsigned getCFAluSize(const MachineInstr &MI) const;
51  bool isCFAluEnabled(const MachineInstr &MI) const;
52 
53  /// IfCvt pass can generate "disabled" ALU clause marker that need to be
54  /// removed and their content affected to the previous alu clause.
55  /// This function parse instructions after CFAlu until it find a disabled
56  /// CFAlu and merge the content, or an enabled CFAlu.
57  void cleanPotentialDisabledCFAlu(MachineInstr &CFAlu) const;
58 
59  /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
60  /// it is the case.
61  bool mergeIfPossible(MachineInstr &RootCFAlu,
62  const MachineInstr &LatrCFAlu) const;
63 
64 public:
65  R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { }
66 
67  bool runOnMachineFunction(MachineFunction &MF) override;
68 
69  StringRef getPassName() const override;
70 };
71 
73 
74 unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr &MI) const {
75  assert(isCFAlu(MI));
76  return MI
77  .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::COUNT))
78  .getImm();
79 }
80 
81 bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr &MI) const {
82  assert(isCFAlu(MI));
83  return MI
84  .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::Enabled))
85  .getImm();
86 }
87 
88 void R600ClauseMergePass::cleanPotentialDisabledCFAlu(
89  MachineInstr &CFAlu) const {
90  int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
91  MachineBasicBlock::iterator I = CFAlu, E = CFAlu.getParent()->end();
92  I++;
93  do {
94  while (I != E && !isCFAlu(*I))
95  I++;
96  if (I == E)
97  return;
98  MachineInstr &MI = *I++;
99  if (isCFAluEnabled(MI))
100  break;
101  CFAlu.getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
102  MI.eraseFromParent();
103  } while (I != E);
104 }
105 
106 bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu,
107  const MachineInstr &LatrCFAlu) const {
108  assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
109  int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
110  unsigned RootInstCount = getCFAluSize(RootCFAlu),
111  LaterInstCount = getCFAluSize(LatrCFAlu);
112  unsigned CumuledInsts = RootInstCount + LaterInstCount;
113  if (CumuledInsts >= TII->getMaxAlusPerClause()) {
114  DEBUG(dbgs() << "Excess inst counts\n");
115  return false;
116  }
117  if (RootCFAlu.getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
118  return false;
119  // Is KCache Bank 0 compatible ?
120  int Mode0Idx =
121  TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
122  int KBank0Idx =
123  TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
124  int KBank0LineIdx =
125  TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
126  if (LatrCFAlu.getOperand(Mode0Idx).getImm() &&
127  RootCFAlu.getOperand(Mode0Idx).getImm() &&
128  (LatrCFAlu.getOperand(KBank0Idx).getImm() !=
129  RootCFAlu.getOperand(KBank0Idx).getImm() ||
130  LatrCFAlu.getOperand(KBank0LineIdx).getImm() !=
131  RootCFAlu.getOperand(KBank0LineIdx).getImm())) {
132  DEBUG(dbgs() << "Wrong KC0\n");
133  return false;
134  }
135  // Is KCache Bank 1 compatible ?
136  int Mode1Idx =
137  TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
138  int KBank1Idx =
139  TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
140  int KBank1LineIdx =
141  TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
142  if (LatrCFAlu.getOperand(Mode1Idx).getImm() &&
143  RootCFAlu.getOperand(Mode1Idx).getImm() &&
144  (LatrCFAlu.getOperand(KBank1Idx).getImm() !=
145  RootCFAlu.getOperand(KBank1Idx).getImm() ||
146  LatrCFAlu.getOperand(KBank1LineIdx).getImm() !=
147  RootCFAlu.getOperand(KBank1LineIdx).getImm())) {
148  DEBUG(dbgs() << "Wrong KC0\n");
149  return false;
150  }
151  if (LatrCFAlu.getOperand(Mode0Idx).getImm()) {
152  RootCFAlu.getOperand(Mode0Idx).setImm(
153  LatrCFAlu.getOperand(Mode0Idx).getImm());
154  RootCFAlu.getOperand(KBank0Idx).setImm(
155  LatrCFAlu.getOperand(KBank0Idx).getImm());
156  RootCFAlu.getOperand(KBank0LineIdx)
157  .setImm(LatrCFAlu.getOperand(KBank0LineIdx).getImm());
158  }
159  if (LatrCFAlu.getOperand(Mode1Idx).getImm()) {
160  RootCFAlu.getOperand(Mode1Idx).setImm(
161  LatrCFAlu.getOperand(Mode1Idx).getImm());
162  RootCFAlu.getOperand(KBank1Idx).setImm(
163  LatrCFAlu.getOperand(KBank1Idx).getImm());
164  RootCFAlu.getOperand(KBank1LineIdx)
165  .setImm(LatrCFAlu.getOperand(KBank1LineIdx).getImm());
166  }
167  RootCFAlu.getOperand(CntIdx).setImm(CumuledInsts);
168  RootCFAlu.setDesc(TII->get(LatrCFAlu.getOpcode()));
169  return true;
170 }
171 
172 bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
173  if (skipFunction(*MF.getFunction()))
174  return false;
175 
177  TII = ST.getInstrInfo();
178 
179  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
180  BB != BB_E; ++BB) {
181  MachineBasicBlock &MBB = *BB;
182  MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
183  MachineBasicBlock::iterator LatestCFAlu = E;
184  while (I != E) {
185  MachineInstr &MI = *I++;
186  if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
187  TII->mustBeLastInClause(MI.getOpcode()))
188  LatestCFAlu = E;
189  if (!isCFAlu(MI))
190  continue;
191  cleanPotentialDisabledCFAlu(MI);
192 
193  if (LatestCFAlu != E && mergeIfPossible(*LatestCFAlu, MI)) {
194  MI.eraseFromParent();
195  } else {
196  assert(MI.getOperand(8).getImm() && "CF ALU instruction disabled");
197  LatestCFAlu = MI;
198  }
199  }
200  }
201  return false;
202 }
203 
204 StringRef R600ClauseMergePass::getPassName() const {
205  return "R600 Merge Clause Markers Pass";
206 }
207 
208 } // end anonymous namespace
209 
210 
212  return new R600ClauseMergePass(TM);
213 }
AMDGPU specific subclass of TargetSubtarget.
Interface definition for R600InstrInfo.
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
Interface definition for R600RegisterInfo.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
MachineBasicBlock * MBB
FunctionPass * createR600ClauseMergePass(TargetMachine &tm)
int64_t getImm() const
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:273
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:131
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:279
const R600InstrInfo * getInstrInfo() const override
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
Iterator for intrusive lists based on ilist_node.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
static bool Enabled
Definition: Statistic.cpp:49
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Representation of each machine instruction.
Definition: MachineInstr.h:52
#define I(x, y, z)
Definition: MD5.cpp:54
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define DEBUG(X)
Definition: Debug.h:100
Primary interface to the complete machine description for the target machine.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47