LLVM  3.7.0
R600Packetizer.cpp
Go to the documentation of this file.
1 //===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// This pass implements instructions packetization for R600. It unsets isLast
12 /// bit of instructions inside a bundle and substitutes src register with
13 /// PreviousVector when applicable.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/Support/Debug.h"
18 #include "AMDGPU.h"
19 #include "AMDGPUSubtarget.h"
20 #include "R600InstrInfo.h"
25 #include "llvm/CodeGen/Passes.h"
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "packets"
32 
33 namespace {
34 
35 class R600Packetizer : public MachineFunctionPass {
36 
37 public:
38  static char ID;
39  R600Packetizer(const TargetMachine &TM) : MachineFunctionPass(ID) {}
40 
41  void getAnalysisUsage(AnalysisUsage &AU) const override {
42  AU.setPreservesCFG();
48  }
49 
50  const char *getPassName() const override {
51  return "R600 Packetizer";
52  }
53 
54  bool runOnMachineFunction(MachineFunction &Fn) override;
55 };
56 char R600Packetizer::ID = 0;
57 
58 class R600PacketizerList : public VLIWPacketizerList {
59 
60 private:
61  const R600InstrInfo *TII;
62  const R600RegisterInfo &TRI;
63  bool VLIW5;
64  bool ConsideredInstUsesAlreadyWrittenVectorElement;
65 
66  unsigned getSlot(const MachineInstr *MI) const {
67  return TRI.getHWRegChan(MI->getOperand(0).getReg());
68  }
69 
70  /// \returns register to PV chan mapping for bundle/single instructions that
71  /// immediately precedes I.
73  const {
75  I--;
76  if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle())
77  return Result;
78  MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
79  if (I->isBundle())
80  BI++;
81  int LastDstChan = -1;
82  do {
83  bool isTrans = false;
84  int BISlot = getSlot(BI);
85  if (LastDstChan >= BISlot)
86  isTrans = true;
87  LastDstChan = BISlot;
88  if (TII->isPredicated(BI))
89  continue;
90  int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write);
91  if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
92  continue;
93  int DstIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::dst);
94  if (DstIdx == -1) {
95  continue;
96  }
97  unsigned Dst = BI->getOperand(DstIdx).getReg();
98  if (isTrans || TII->isTransOnly(BI)) {
99  Result[Dst] = AMDGPU::PS;
100  continue;
101  }
102  if (BI->getOpcode() == AMDGPU::DOT4_r600 ||
103  BI->getOpcode() == AMDGPU::DOT4_eg) {
104  Result[Dst] = AMDGPU::PV_X;
105  continue;
106  }
107  if (Dst == AMDGPU::OQAP) {
108  continue;
109  }
110  unsigned PVReg = 0;
111  switch (TRI.getHWRegChan(Dst)) {
112  case 0:
113  PVReg = AMDGPU::PV_X;
114  break;
115  case 1:
116  PVReg = AMDGPU::PV_Y;
117  break;
118  case 2:
119  PVReg = AMDGPU::PV_Z;
120  break;
121  case 3:
122  PVReg = AMDGPU::PV_W;
123  break;
124  default:
125  llvm_unreachable("Invalid Chan");
126  }
127  Result[Dst] = PVReg;
128  } while ((++BI)->isBundledWithPred());
129  return Result;
130  }
131 
132  void substitutePV(MachineInstr *MI, const DenseMap<unsigned, unsigned> &PVs)
133  const {
134  unsigned Ops[] = {
135  AMDGPU::OpName::src0,
136  AMDGPU::OpName::src1,
137  AMDGPU::OpName::src2
138  };
139  for (unsigned i = 0; i < 3; i++) {
140  int OperandIdx = TII->getOperandIdx(MI->getOpcode(), Ops[i]);
141  if (OperandIdx < 0)
142  continue;
143  unsigned Src = MI->getOperand(OperandIdx).getReg();
145  if (It != PVs.end())
146  MI->getOperand(OperandIdx).setReg(It->second);
147  }
148  }
149 public:
150  // Ctor.
151  R600PacketizerList(MachineFunction &MF, MachineLoopInfo &MLI)
152  : VLIWPacketizerList(MF, MLI, true),
153  TII(static_cast<const R600InstrInfo *>(
154  MF.getSubtarget().getInstrInfo())),
155  TRI(TII->getRegisterInfo()) {
156  VLIW5 = !MF.getSubtarget<AMDGPUSubtarget>().hasCaymanISA();
157  }
158 
159  // initPacketizerState - initialize some internal flags.
160  void initPacketizerState() override {
161  ConsideredInstUsesAlreadyWrittenVectorElement = false;
162  }
163 
164  // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
165  bool ignorePseudoInstruction(MachineInstr *MI,
166  MachineBasicBlock *MBB) override {
167  return false;
168  }
169 
170  // isSoloInstruction - return true if instruction MI can not be packetized
171  // with any other instruction, which means that MI itself is a packet.
172  bool isSoloInstruction(MachineInstr *MI) override {
173  if (TII->isVector(*MI))
174  return true;
175  if (!TII->isALUInstr(MI->getOpcode()))
176  return true;
177  if (MI->getOpcode() == AMDGPU::GROUP_BARRIER)
178  return true;
179  // XXX: This can be removed once the packetizer properly handles all the
180  // LDS instruction group restrictions.
181  if (TII->isLDSInstr(MI->getOpcode()))
182  return true;
183  return false;
184  }
185 
186  // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
187  // together.
188  bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override {
189  MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
190  if (getSlot(MII) == getSlot(MIJ))
191  ConsideredInstUsesAlreadyWrittenVectorElement = true;
192  // Does MII and MIJ share the same pred_sel ?
193  int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel),
194  OpJ = TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel);
195  unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0,
196  PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0;
197  if (PredI != PredJ)
198  return false;
199  if (SUJ->isSucc(SUI)) {
200  for (unsigned i = 0, e = SUJ->Succs.size(); i < e; ++i) {
201  const SDep &Dep = SUJ->Succs[i];
202  if (Dep.getSUnit() != SUI)
203  continue;
204  if (Dep.getKind() == SDep::Anti)
205  continue;
206  if (Dep.getKind() == SDep::Output)
207  if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg())
208  continue;
209  return false;
210  }
211  }
212 
213  bool ARDef = TII->definesAddressRegister(MII) ||
214  TII->definesAddressRegister(MIJ);
215  bool ARUse = TII->usesAddressRegister(MII) ||
216  TII->usesAddressRegister(MIJ);
217  if (ARDef && ARUse)
218  return false;
219 
220  return true;
221  }
222 
223  // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
224  // and SUJ.
225  bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override {
226  return false;
227  }
228 
229  void setIsLastBit(MachineInstr *MI, unsigned Bit) const {
230  unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::last);
231  MI->getOperand(LastOp).setImm(Bit);
232  }
233 
234  bool isBundlableWithCurrentPMI(MachineInstr *MI,
236  std::vector<R600InstrInfo::BankSwizzle> &BS,
237  bool &isTransSlot) {
238  isTransSlot = TII->isTransOnly(MI);
239  assert (!isTransSlot || VLIW5);
240 
241  // Is the dst reg sequence legal ?
242  if (!isTransSlot && !CurrentPacketMIs.empty()) {
243  if (getSlot(MI) <= getSlot(CurrentPacketMIs.back())) {
244  if (ConsideredInstUsesAlreadyWrittenVectorElement &&
245  !TII->isVectorOnly(MI) && VLIW5) {
246  isTransSlot = true;
247  DEBUG(dbgs() << "Considering as Trans Inst :"; MI->dump(););
248  }
249  else
250  return false;
251  }
252  }
253 
254  // Are the Constants limitations met ?
255  CurrentPacketMIs.push_back(MI);
256  if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) {
257  DEBUG(
258  dbgs() << "Couldn't pack :\n";
259  MI->dump();
260  dbgs() << "with the following packets :\n";
261  for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
262  CurrentPacketMIs[i]->dump();
263  dbgs() << "\n";
264  }
265  dbgs() << "because of Consts read limitations\n";
266  );
267  CurrentPacketMIs.pop_back();
268  return false;
269  }
270 
271  // Is there a BankSwizzle set that meet Read Port limitations ?
272  if (!TII->fitsReadPortLimitations(CurrentPacketMIs,
273  PV, BS, isTransSlot)) {
274  DEBUG(
275  dbgs() << "Couldn't pack :\n";
276  MI->dump();
277  dbgs() << "with the following packets :\n";
278  for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
279  CurrentPacketMIs[i]->dump();
280  dbgs() << "\n";
281  }
282  dbgs() << "because of Read port limitations\n";
283  );
284  CurrentPacketMIs.pop_back();
285  return false;
286  }
287 
288  // We cannot read LDS source registrs from the Trans slot.
289  if (isTransSlot && TII->readsLDSSrcReg(MI))
290  return false;
291 
292  CurrentPacketMIs.pop_back();
293  return true;
294  }
295 
296  MachineBasicBlock::iterator addToPacket(MachineInstr *MI) override {
297  MachineBasicBlock::iterator FirstInBundle =
298  CurrentPacketMIs.empty() ? MI : CurrentPacketMIs.front();
299  const DenseMap<unsigned, unsigned> &PV =
300  getPreviousVector(FirstInBundle);
301  std::vector<R600InstrInfo::BankSwizzle> BS;
302  bool isTransSlot;
303 
304  if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) {
305  for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {
306  MachineInstr *MI = CurrentPacketMIs[i];
307  unsigned Op = TII->getOperandIdx(MI->getOpcode(),
308  AMDGPU::OpName::bank_swizzle);
309  MI->getOperand(Op).setImm(BS[i]);
310  }
311  unsigned Op = TII->getOperandIdx(MI->getOpcode(),
312  AMDGPU::OpName::bank_swizzle);
313  MI->getOperand(Op).setImm(BS.back());
314  if (!CurrentPacketMIs.empty())
315  setIsLastBit(CurrentPacketMIs.back(), 0);
316  substitutePV(MI, PV);
318  if (isTransSlot) {
319  endPacket(std::next(It)->getParent(), std::next(It));
320  }
321  return It;
322  }
323  endPacket(MI->getParent(), MI);
324  if (TII->isTransOnly(MI))
325  return MI;
327  }
328 };
329 
330 bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
332  MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
333 
334  // Instantiate the packetizer.
335  R600PacketizerList Packetizer(Fn, MLI);
336 
337  // DFA state table should not be empty.
338  assert(Packetizer.getResourceTracker() && "Empty DFA table!");
339 
340  //
341  // Loop over all basic blocks and remove KILL pseudo-instructions
342  // These instructions confuse the dependence analysis. Consider:
343  // D0 = ... (Insn 0)
344  // R0 = KILL R0, D0 (Insn 1)
345  // R0 = ... (Insn 2)
346  // Here, Insn 1 will result in the dependence graph not emitting an output
347  // dependence between Insn 0 and Insn 2. This can lead to incorrect
348  // packetization
349  //
350  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
351  MBB != MBBe; ++MBB) {
352  MachineBasicBlock::iterator End = MBB->end();
353  MachineBasicBlock::iterator MI = MBB->begin();
354  while (MI != End) {
355  if (MI->isKill() || MI->getOpcode() == AMDGPU::IMPLICIT_DEF ||
356  (MI->getOpcode() == AMDGPU::CF_ALU && !MI->getOperand(8).getImm())) {
357  MachineBasicBlock::iterator DeleteMI = MI;
358  ++MI;
359  MBB->erase(DeleteMI);
360  End = MBB->end();
361  continue;
362  }
363  ++MI;
364  }
365  }
366 
367  // Loop over all of the basic blocks.
368  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
369  MBB != MBBe; ++MBB) {
370  // Find scheduling regions and schedule / packetize each region.
371  unsigned RemainingCount = MBB->size();
372  for(MachineBasicBlock::iterator RegionEnd = MBB->end();
373  RegionEnd != MBB->begin();) {
374  // The next region starts above the previous region. Look backward in the
375  // instruction stream until we find the nearest boundary.
376  MachineBasicBlock::iterator I = RegionEnd;
377  for(;I != MBB->begin(); --I, --RemainingCount) {
378  if (TII->isSchedulingBoundary(std::prev(I), MBB, Fn))
379  break;
380  }
381  I = MBB->begin();
382 
383  // Skip empty scheduling regions.
384  if (I == RegionEnd) {
385  RegionEnd = std::prev(RegionEnd);
386  --RemainingCount;
387  continue;
388  }
389  // Skip regions with one instruction.
390  if (I == std::prev(RegionEnd)) {
391  RegionEnd = std::prev(RegionEnd);
392  continue;
393  }
394 
395  Packetizer.PacketizeMIs(MBB, I, RegionEnd);
396  RegionEnd = I;
397  }
398  }
399 
400  return true;
401 
402 }
403 
404 } // end anonymous namespace
405 
407  return new R600Packetizer(tm);
408 }
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
AMDGPU specific subclass of TargetSubtarget.
bool isSucc(SUnit *N)
isSucc - Test if node N is a successor of this node.
Definition: ScheduleDAG.h:466
Interface definition for R600InstrInfo.
unsigned getHWRegChan(unsigned reg) const
get the HW encoding for a register's channel.
Hexagon Packetizer
MachineInstr * getInstr() const
getInstr - Return the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:406
virtual bool isSchedulingBoundary(const MachineInstr *MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
Instructions::iterator instr_iterator
A register anti-dependedence (aka WAR).
Definition: ScheduleDAG.h:50
AnalysisUsage & addRequired()
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
bool isPredicated(const MachineInstr *MI) const override
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
const HexagonRegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:51
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:267
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:120
TargetInstrInfo - Interface to description of machine instruction set.
SDep - Scheduling dependency.
Definition: ScheduleDAG.h:45
IMPLICIT_DEF - This is the MachineInstr-level equivalent of undef.
Definition: TargetOpcodes.h:52
bundle_iterator< MachineInstr, instr_iterator > iterator
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:273
FunctionPass * createR600Packetizer(TargetMachine &tm)
Represent the analysis usage information of a pass.
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:294
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
void dump() const
SUnit * getSUnit() const
Definition: ScheduleDAG.h:160
void write(void *memory, value_type value)
Write a value to memory with a particular endianness.
Definition: Endian.h:73
Representation of each machine instruction.
Definition: MachineInstr.h:51
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:54
iterator end()
Definition: DenseMap.h:68
iterator find(const KeyT &Val)
Definition: DenseMap.h:124
Kind getKind() const
getKind - Return an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:170
unsigned getReg() const
getReg - Returns the register number.
virtual const TargetInstrInfo * getInstrInfo() const
SmallVector< SDep, 4 > Succs
Definition: ScheduleDAG.h:276
static const Function * getParent(const Value *V)
BasicBlockListType::iterator iterator
#define DEBUG(X)
Definition: Debug.h:92
Primary interface to the complete machine description for the target machine.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
SUnit - Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:261
virtual MachineBasicBlock::iterator addToPacket(MachineInstr *MI)