LLVM  10.0.0svn
R600Packetizer.cpp
Go to the documentation of this file.
1 //===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass implements instructions packetization for R600. It unsets isLast
11 /// bit of instructions inside a bundle and substitutes src register with
12 /// PreviousVector when applicable.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "R600InstrInfo.h"
24 #include "llvm/CodeGen/Passes.h"
26 #include "llvm/Support/Debug.h"
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "packets"
32 
33 namespace {
34 
35 class R600Packetizer : public MachineFunctionPass {
36 
37 public:
38  static char ID;
39  R600Packetizer() : MachineFunctionPass(ID) {}
40 
41  void getAnalysisUsage(AnalysisUsage &AU) const override {
42  AU.setPreservesCFG();
48  }
49 
50  StringRef getPassName() const override { return "R600 Packetizer"; }
51 
52  bool runOnMachineFunction(MachineFunction &Fn) override;
53 };
54 
55 class R600PacketizerList : public VLIWPacketizerList {
56 private:
57  const R600InstrInfo *TII;
58  const R600RegisterInfo &TRI;
59  bool VLIW5;
60  bool ConsideredInstUsesAlreadyWrittenVectorElement;
61 
62  unsigned getSlot(const MachineInstr &MI) const {
63  return TRI.getHWRegChan(MI.getOperand(0).getReg());
64  }
65 
66  /// \returns register to PV chan mapping for bundle/single instructions that
67  /// immediately precedes I.
69  const {
71  I--;
72  if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle())
73  return Result;
75  if (I->isBundle())
76  BI++;
77  int LastDstChan = -1;
78  do {
79  bool isTrans = false;
80  int BISlot = getSlot(*BI);
81  if (LastDstChan >= BISlot)
82  isTrans = true;
83  LastDstChan = BISlot;
84  if (TII->isPredicated(*BI))
85  continue;
86  int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::write);
87  if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
88  continue;
89  int DstIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::dst);
90  if (DstIdx == -1) {
91  continue;
92  }
93  Register Dst = BI->getOperand(DstIdx).getReg();
94  if (isTrans || TII->isTransOnly(*BI)) {
95  Result[Dst] = R600::PS;
96  continue;
97  }
98  if (BI->getOpcode() == R600::DOT4_r600 ||
99  BI->getOpcode() == R600::DOT4_eg) {
100  Result[Dst] = R600::PV_X;
101  continue;
102  }
103  if (Dst == R600::OQAP) {
104  continue;
105  }
106  unsigned PVReg = 0;
107  switch (TRI.getHWRegChan(Dst)) {
108  case 0:
109  PVReg = R600::PV_X;
110  break;
111  case 1:
112  PVReg = R600::PV_Y;
113  break;
114  case 2:
115  PVReg = R600::PV_Z;
116  break;
117  case 3:
118  PVReg = R600::PV_W;
119  break;
120  default:
121  llvm_unreachable("Invalid Chan");
122  }
123  Result[Dst] = PVReg;
124  } while ((++BI)->isBundledWithPred());
125  return Result;
126  }
127 
128  void substitutePV(MachineInstr &MI, const DenseMap<unsigned, unsigned> &PVs)
129  const {
130  unsigned Ops[] = {
131  R600::OpName::src0,
132  R600::OpName::src1,
133  R600::OpName::src2
134  };
135  for (unsigned i = 0; i < 3; i++) {
136  int OperandIdx = TII->getOperandIdx(MI.getOpcode(), Ops[i]);
137  if (OperandIdx < 0)
138  continue;
139  Register Src = MI.getOperand(OperandIdx).getReg();
141  if (It != PVs.end())
142  MI.getOperand(OperandIdx).setReg(It->second);
143  }
144  }
145 public:
146  // Ctor.
147  R600PacketizerList(MachineFunction &MF, const R600Subtarget &ST,
148  MachineLoopInfo &MLI)
149  : VLIWPacketizerList(MF, MLI, nullptr),
150  TII(ST.getInstrInfo()),
151  TRI(TII->getRegisterInfo()) {
152  VLIW5 = !ST.hasCaymanISA();
153  }
154 
155  // initPacketizerState - initialize some internal flags.
156  void initPacketizerState() override {
157  ConsideredInstUsesAlreadyWrittenVectorElement = false;
158  }
159 
160  // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
161  bool ignorePseudoInstruction(const MachineInstr &MI,
162  const MachineBasicBlock *MBB) override {
163  return false;
164  }
165 
166  // isSoloInstruction - return true if instruction MI can not be packetized
167  // with any other instruction, which means that MI itself is a packet.
168  bool isSoloInstruction(const MachineInstr &MI) override {
169  if (TII->isVector(MI))
170  return true;
171  if (!TII->isALUInstr(MI.getOpcode()))
172  return true;
173  if (MI.getOpcode() == R600::GROUP_BARRIER)
174  return true;
175  // XXX: This can be removed once the packetizer properly handles all the
176  // LDS instruction group restrictions.
177  return TII->isLDSInstr(MI.getOpcode());
178  }
179 
180  // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
181  // together.
182  bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override {
183  MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
184  if (getSlot(*MII) == getSlot(*MIJ))
185  ConsideredInstUsesAlreadyWrittenVectorElement = true;
186  // Does MII and MIJ share the same pred_sel ?
187  int OpI = TII->getOperandIdx(MII->getOpcode(), R600::OpName::pred_sel),
188  OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600::OpName::pred_sel);
189  Register PredI = (OpI > -1)?MII->getOperand(OpI).getReg() : Register(),
190  PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg() : Register();
191  if (PredI != PredJ)
192  return false;
193  if (SUJ->isSucc(SUI)) {
194  for (unsigned i = 0, e = SUJ->Succs.size(); i < e; ++i) {
195  const SDep &Dep = SUJ->Succs[i];
196  if (Dep.getSUnit() != SUI)
197  continue;
198  if (Dep.getKind() == SDep::Anti)
199  continue;
200  if (Dep.getKind() == SDep::Output)
201  if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg())
202  continue;
203  return false;
204  }
205  }
206 
207  bool ARDef =
208  TII->definesAddressRegister(*MII) || TII->definesAddressRegister(*MIJ);
209  bool ARUse =
210  TII->usesAddressRegister(*MII) || TII->usesAddressRegister(*MIJ);
211 
212  return !ARDef || !ARUse;
213  }
214 
215  // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
216  // and SUJ.
217  bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override {
218  return false;
219  }
220 
221  void setIsLastBit(MachineInstr *MI, unsigned Bit) const {
222  unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), R600::OpName::last);
223  MI->getOperand(LastOp).setImm(Bit);
224  }
225 
226  bool isBundlableWithCurrentPMI(MachineInstr &MI,
228  std::vector<R600InstrInfo::BankSwizzle> &BS,
229  bool &isTransSlot) {
230  isTransSlot = TII->isTransOnly(MI);
231  assert (!isTransSlot || VLIW5);
232 
233  // Is the dst reg sequence legal ?
234  if (!isTransSlot && !CurrentPacketMIs.empty()) {
235  if (getSlot(MI) <= getSlot(*CurrentPacketMIs.back())) {
236  if (ConsideredInstUsesAlreadyWrittenVectorElement &&
237  !TII->isVectorOnly(MI) && VLIW5) {
238  isTransSlot = true;
239  LLVM_DEBUG({
240  dbgs() << "Considering as Trans Inst :";
241  MI.dump();
242  });
243  }
244  else
245  return false;
246  }
247  }
248 
249  // Are the Constants limitations met ?
250  CurrentPacketMIs.push_back(&MI);
251  if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) {
252  LLVM_DEBUG({
253  dbgs() << "Couldn't pack :\n";
254  MI.dump();
255  dbgs() << "with the following packets :\n";
256  for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
257  CurrentPacketMIs[i]->dump();
258  dbgs() << "\n";
259  }
260  dbgs() << "because of Consts read limitations\n";
261  });
262  CurrentPacketMIs.pop_back();
263  return false;
264  }
265 
266  // Is there a BankSwizzle set that meet Read Port limitations ?
267  if (!TII->fitsReadPortLimitations(CurrentPacketMIs,
268  PV, BS, isTransSlot)) {
269  LLVM_DEBUG({
270  dbgs() << "Couldn't pack :\n";
271  MI.dump();
272  dbgs() << "with the following packets :\n";
273  for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
274  CurrentPacketMIs[i]->dump();
275  dbgs() << "\n";
276  }
277  dbgs() << "because of Read port limitations\n";
278  });
279  CurrentPacketMIs.pop_back();
280  return false;
281  }
282 
283  // We cannot read LDS source registers from the Trans slot.
284  if (isTransSlot && TII->readsLDSSrcReg(MI))
285  return false;
286 
287  CurrentPacketMIs.pop_back();
288  return true;
289  }
290 
291  MachineBasicBlock::iterator addToPacket(MachineInstr &MI) override {
292  MachineBasicBlock::iterator FirstInBundle =
293  CurrentPacketMIs.empty() ? &MI : CurrentPacketMIs.front();
294  const DenseMap<unsigned, unsigned> &PV =
295  getPreviousVector(FirstInBundle);
296  std::vector<R600InstrInfo::BankSwizzle> BS;
297  bool isTransSlot;
298 
299  if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) {
300  for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {
301  MachineInstr *MI = CurrentPacketMIs[i];
302  unsigned Op = TII->getOperandIdx(MI->getOpcode(),
303  R600::OpName::bank_swizzle);
304  MI->getOperand(Op).setImm(BS[i]);
305  }
306  unsigned Op =
307  TII->getOperandIdx(MI.getOpcode(), R600::OpName::bank_swizzle);
308  MI.getOperand(Op).setImm(BS.back());
309  if (!CurrentPacketMIs.empty())
310  setIsLastBit(CurrentPacketMIs.back(), 0);
311  substitutePV(MI, PV);
313  if (isTransSlot) {
314  endPacket(std::next(It)->getParent(), std::next(It));
315  }
316  return It;
317  }
318  endPacket(MI.getParent(), MI);
319  if (TII->isTransOnly(MI))
320  return MI;
322  }
323 };
324 
325 bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
327  const R600InstrInfo *TII = ST.getInstrInfo();
328 
329  MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
330 
331  // Instantiate the packetizer.
332  R600PacketizerList Packetizer(Fn, ST, MLI);
333 
334  // DFA state table should not be empty.
335  assert(Packetizer.getResourceTracker() && "Empty DFA table!");
336  assert(Packetizer.getResourceTracker()->getInstrItins());
337 
338  if (Packetizer.getResourceTracker()->getInstrItins()->isEmpty())
339  return false;
340 
341  //
342  // Loop over all basic blocks and remove KILL pseudo-instructions
343  // These instructions confuse the dependence analysis. Consider:
344  // D0 = ... (Insn 0)
345  // R0 = KILL R0, D0 (Insn 1)
346  // R0 = ... (Insn 2)
347  // Here, Insn 1 will result in the dependence graph not emitting an output
348  // dependence between Insn 0 and Insn 2. This can lead to incorrect
349  // packetization
350  //
351  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
352  MBB != MBBe; ++MBB) {
353  MachineBasicBlock::iterator End = MBB->end();
354  MachineBasicBlock::iterator MI = MBB->begin();
355  while (MI != End) {
356  if (MI->isKill() || MI->getOpcode() == R600::IMPLICIT_DEF ||
357  (MI->getOpcode() == R600::CF_ALU && !MI->getOperand(8).getImm())) {
358  MachineBasicBlock::iterator DeleteMI = MI;
359  ++MI;
360  MBB->erase(DeleteMI);
361  End = MBB->end();
362  continue;
363  }
364  ++MI;
365  }
366  }
367 
368  // Loop over all of the basic blocks.
369  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
370  MBB != MBBe; ++MBB) {
371  // Find scheduling regions and schedule / packetize each region.
372  unsigned RemainingCount = MBB->size();
373  for(MachineBasicBlock::iterator RegionEnd = MBB->end();
374  RegionEnd != MBB->begin();) {
375  // The next region starts above the previous region. Look backward in the
376  // instruction stream until we find the nearest boundary.
377  MachineBasicBlock::iterator I = RegionEnd;
378  for(;I != MBB->begin(); --I, --RemainingCount) {
379  if (TII->isSchedulingBoundary(*std::prev(I), &*MBB, Fn))
380  break;
381  }
382  I = MBB->begin();
383 
384  // Skip empty scheduling regions.
385  if (I == RegionEnd) {
386  RegionEnd = std::prev(RegionEnd);
387  --RemainingCount;
388  continue;
389  }
390  // Skip regions with one instruction.
391  if (I == std::prev(RegionEnd)) {
392  RegionEnd = std::prev(RegionEnd);
393  continue;
394  }
395 
396  Packetizer.PacketizeMIs(&*MBB, &*I, RegionEnd);
397  RegionEnd = I;
398  }
399  }
400 
401  return true;
402 
403 }
404 
405 } // end anonymous namespace
406 
407 INITIALIZE_PASS_BEGIN(R600Packetizer, DEBUG_TYPE,
408  "R600 Packetizer", false, false)
410  "R600 Packetizer", false, false)
411 
412 char R600Packetizer::ID = 0;
413 
414 char &llvm::R600PacketizerID = R600Packetizer::ID;
415 
417  return new R600Packetizer();
418 }
bool usesAddressRegister(MachineInstr &MI) const
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
AMDGPU specific subclass of TargetSubtarget.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Interface definition for R600InstrInfo.
bool isPredicated(const MachineInstr &MI) const override
unsigned const TargetRegisterInfo * TRI
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
AnalysisUsage & addRequired()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:55
virtual MachineBasicBlock::iterator addToPacket(MachineInstr &MI)
char & R600PacketizerID
void setReg(Register Reg)
Change the register this operand corresponds to.
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:176
R600 Packetizer
Scheduling dependency.
Definition: ScheduleDAG.h:49
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Represent the analysis usage information of a pass.
const R600InstrInfo * getInstrInfo() const override
bool isLDSInstr(unsigned Opcode) const
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
int getOperandIdx(const MachineInstr &MI, unsigned Op) const
Get the index of Op in the MachineInstr.
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:71
bool isVectorOnly(unsigned Opcode) const
static void write(bool isBE, void *P, T V)
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
#define DEBUG_TYPE
bool fitsConstReadLimitations(const std::vector< MachineInstr *> &) const
An instruction group can only access 2 channel pair (either [XY] or [ZW]) from KCache bank on R700+...
bool isTransOnly(unsigned Opcode) const
INITIALIZE_PASS_BEGIN(R600Packetizer, DEBUG_TYPE, "R600 Packetizer", false, false) INITIALIZE_PASS_END(R600Packetizer
bool isVector(const MachineInstr &MI) const
Vector instructions are instructions that must fill all instruction slots within an instruction group...
Iterator for intrusive lists based on ilist_node.
Promote Memory to Register
Definition: Mem2Reg.cpp:109
bool hasCaymanISA() const
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
unsigned getHWRegChan(unsigned reg) const
get the HW encoding for a register&#39;s channel.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
bool isALUInstr(unsigned Opcode) const
bool readsLDSSrcReg(const MachineInstr &MI) const
FunctionPass * createR600Packetizer()
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:256
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
bool fitsReadPortLimitations(const std::vector< MachineInstr *> &MIs, const DenseMap< unsigned, unsigned > &PV, std::vector< BankSwizzle > &BS, bool isLastAluTrans) const
Given the order VEC_012 < VEC_021 < VEC_120 < VEC_102 < VEC_201 < VEC_210 returns true and the first ...
#define I(x, y, z)
Definition: MD5.cpp:58
iterator end()
Definition: DenseMap.h:108
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:486
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
static const Function * getParent(const Value *V)
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
Register getReg() const
getReg - Returns the register number.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool isSucc(const SUnit *N) const
Tests if node N is a successor of this node.
Definition: ScheduleDAG.h:439
bool definesAddressRegister(MachineInstr &MI) const
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
Wrapper class representing virtual and physical registers.
Definition: Register.h:19