Line data Source code
1 : //===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : /// \file
11 : /// This pass implements instructions packetization for R600. It unsets isLast
12 : /// bit of instructions inside a bundle and substitutes src register with
13 : /// PreviousVector when applicable.
14 : //
15 : //===----------------------------------------------------------------------===//
16 :
17 : #include "AMDGPU.h"
18 : #include "AMDGPUSubtarget.h"
19 : #include "R600InstrInfo.h"
20 : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 : #include "llvm/CodeGen/DFAPacketizer.h"
22 : #include "llvm/CodeGen/MachineDominators.h"
23 : #include "llvm/CodeGen/MachineFunctionPass.h"
24 : #include "llvm/CodeGen/MachineLoopInfo.h"
25 : #include "llvm/CodeGen/Passes.h"
26 : #include "llvm/CodeGen/ScheduleDAG.h"
27 : #include "llvm/Support/Debug.h"
28 : #include "llvm/Support/raw_ostream.h"
29 :
30 : using namespace llvm;
31 :
32 : #define DEBUG_TYPE "packets"
33 :
34 : namespace {
35 :
36 : class R600Packetizer : public MachineFunctionPass {
37 :
38 : public:
39 : static char ID;
40 282 : R600Packetizer() : MachineFunctionPass(ID) {}
41 :
42 282 : void getAnalysisUsage(AnalysisUsage &AU) const override {
43 282 : AU.setPreservesCFG();
44 : AU.addRequired<MachineDominatorTree>();
45 : AU.addPreserved<MachineDominatorTree>();
46 : AU.addRequired<MachineLoopInfo>();
47 : AU.addPreserved<MachineLoopInfo>();
48 282 : MachineFunctionPass::getAnalysisUsage(AU);
49 282 : }
50 :
51 282 : StringRef getPassName() const override { return "R600 Packetizer"; }
52 :
53 : bool runOnMachineFunction(MachineFunction &Fn) override;
54 : };
55 :
56 2297 : class R600PacketizerList : public VLIWPacketizerList {
57 : private:
58 : const R600InstrInfo *TII;
59 : const R600RegisterInfo &TRI;
60 : bool VLIW5;
61 : bool ConsideredInstUsesAlreadyWrittenVectorElement;
62 :
63 0 : unsigned getSlot(const MachineInstr &MI) const {
64 0 : return TRI.getHWRegChan(MI.getOperand(0).getReg());
65 : }
66 :
67 : /// \returns register to PV chan mapping for bundle/single instructions that
68 : /// immediately precedes I.
69 46396 : DenseMap<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I)
70 : const {
71 : DenseMap<unsigned, unsigned> Result;
72 : I--;
73 139188 : if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle())
74 : return Result;
75 : MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
76 40574 : if (I->isBundle())
77 : BI++;
78 : int LastDstChan = -1;
79 : do {
80 : bool isTrans = false;
81 99365 : int BISlot = getSlot(*BI);
82 99365 : if (LastDstChan >= BISlot)
83 : isTrans = true;
84 : LastDstChan = BISlot;
85 99365 : if (TII->isPredicated(*BI))
86 25424 : continue;
87 197846 : int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::write);
88 98923 : if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
89 : continue;
90 196646 : int DstIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::dst);
91 98323 : if (DstIdx == -1) {
92 : continue;
93 : }
94 94141 : unsigned Dst = BI->getOperand(DstIdx).getReg();
95 94141 : if (isTrans || TII->isTransOnly(*BI)) {
96 18516 : Result[Dst] = R600::PS;
97 18516 : continue;
98 : }
99 151250 : if (BI->getOpcode() == R600::DOT4_r600 ||
100 : BI->getOpcode() == R600::DOT4_eg) {
101 24 : Result[Dst] = R600::PV_X;
102 24 : continue;
103 : }
104 75601 : if (Dst == R600::OQAP) {
105 : continue;
106 : }
107 : unsigned PVReg = 0;
108 73941 : switch (TRI.getHWRegChan(Dst)) {
109 : case 0:
110 : PVReg = R600::PV_X;
111 : break;
112 : case 1:
113 : PVReg = R600::PV_Y;
114 : break;
115 : case 2:
116 : PVReg = R600::PV_Z;
117 : break;
118 : case 3:
119 : PVReg = R600::PV_W;
120 : break;
121 0 : default:
122 0 : llvm_unreachable("Invalid Chan");
123 : }
124 73941 : Result[Dst] = PVReg;
125 99365 : } while ((++BI)->isBundledWithPred());
126 : return Result;
127 : }
128 :
129 0 : void substitutePV(MachineInstr &MI, const DenseMap<unsigned, unsigned> &PVs)
130 : const {
131 0 : unsigned Ops[] = {
132 : R600::OpName::src0,
133 : R600::OpName::src1,
134 : R600::OpName::src2
135 : };
136 0 : for (unsigned i = 0; i < 3; i++) {
137 0 : int OperandIdx = TII->getOperandIdx(MI.getOpcode(), Ops[i]);
138 0 : if (OperandIdx < 0)
139 0 : continue;
140 0 : unsigned Src = MI.getOperand(OperandIdx).getReg();
141 0 : const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src);
142 0 : if (It != PVs.end())
143 0 : MI.getOperand(OperandIdx).setReg(It->second);
144 : }
145 0 : }
146 : public:
147 : // Ctor.
148 : R600PacketizerList(MachineFunction &MF, const R600Subtarget &ST,
149 : MachineLoopInfo &MLI)
150 2297 : : VLIWPacketizerList(MF, MLI, nullptr),
151 : TII(ST.getInstrInfo()),
152 2297 : TRI(TII->getRegisterInfo()) {
153 2297 : VLIW5 = !ST.hasCaymanISA();
154 : }
155 :
156 : // initPacketizerState - initialize some internal flags.
157 61510 : void initPacketizerState() override {
158 61510 : ConsideredInstUsesAlreadyWrittenVectorElement = false;
159 61510 : }
160 :
161 : // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
162 46396 : bool ignorePseudoInstruction(const MachineInstr &MI,
163 : const MachineBasicBlock *MBB) override {
164 46396 : return false;
165 : }
166 :
167 : // isSoloInstruction - return true if instruction MI can not be packetized
168 : // with any other instruction, which means that MI itself is a packet.
169 61510 : bool isSoloInstruction(const MachineInstr &MI) override {
170 61510 : if (TII->isVector(MI))
171 : return true;
172 123020 : if (!TII->isALUInstr(MI.getOpcode()))
173 : return true;
174 99746 : if (MI.getOpcode() == R600::GROUP_BARRIER)
175 : return true;
176 : // XXX: This can be removed once the packetizer properly handles all the
177 : // LDS instruction group restrictions.
178 49869 : return TII->isLDSInstr(MI.getOpcode());
179 : }
180 :
181 : // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
182 : // together.
183 53553 : bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override {
184 53553 : MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
185 53553 : if (getSlot(*MII) == getSlot(*MIJ))
186 12006 : ConsideredInstUsesAlreadyWrittenVectorElement = true;
187 : // Does MII and MIJ share the same pred_sel ?
188 107106 : int OpI = TII->getOperandIdx(MII->getOpcode(), R600::OpName::pred_sel),
189 107106 : OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600::OpName::pred_sel);
190 53553 : unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0,
191 53553 : PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0;
192 53553 : if (PredI != PredJ)
193 : return false;
194 53517 : if (SUJ->isSucc(SUI)) {
195 37977 : for (unsigned i = 0, e = SUJ->Succs.size(); i < e; ++i) {
196 32176 : const SDep &Dep = SUJ->Succs[i];
197 32176 : if (Dep.getSUnit() != SUI)
198 : continue;
199 11214 : if (Dep.getKind() == SDep::Anti)
200 : continue;
201 5415 : if (Dep.getKind() == SDep::Output)
202 856 : if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg())
203 : continue;
204 : return false;
205 : }
206 : }
207 :
208 : bool ARDef =
209 48110 : TII->definesAddressRegister(*MII) || TII->definesAddressRegister(*MIJ);
210 : bool ARUse =
211 48110 : TII->usesAddressRegister(*MII) || TII->usesAddressRegister(*MIJ);
212 :
213 48110 : return !ARDef || !ARUse;
214 : }
215 :
216 : // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
217 : // and SUJ.
218 5580 : bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override {
219 5580 : return false;
220 : }
221 :
222 0 : void setIsLastBit(MachineInstr *MI, unsigned Bit) const {
223 23509 : unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), R600::OpName::last);
224 23509 : MI->getOperand(LastOp).setImm(Bit);
225 0 : }
226 :
227 46396 : bool isBundlableWithCurrentPMI(MachineInstr &MI,
228 : const DenseMap<unsigned, unsigned> &PV,
229 : std::vector<R600InstrInfo::BankSwizzle> &BS,
230 : bool &isTransSlot) {
231 46396 : isTransSlot = TII->isTransOnly(MI);
232 : assert (!isTransSlot || VLIW5);
233 :
234 : // Is the dst reg sequence legal ?
235 46396 : if (!isTransSlot && !CurrentPacketMIs.empty()) {
236 26774 : if (getSlot(MI) <= getSlot(*CurrentPacketMIs.back())) {
237 9383 : if (ConsideredInstUsesAlreadyWrittenVectorElement &&
238 20581 : !TII->isVectorOnly(MI) && VLIW5) {
239 8650 : isTransSlot = true;
240 : LLVM_DEBUG({
241 : dbgs() << "Considering as Trans Inst :";
242 : MI.dump();
243 : });
244 : }
245 : else
246 2857 : return false;
247 : }
248 : }
249 :
250 : // Are the Constants limitations met ?
251 43539 : CurrentPacketMIs.push_back(&MI);
252 43539 : if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) {
253 : LLVM_DEBUG({
254 : dbgs() << "Couldn't pack :\n";
255 : MI.dump();
256 : dbgs() << "with the following packets :\n";
257 : for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
258 : CurrentPacketMIs[i]->dump();
259 : dbgs() << "\n";
260 : }
261 : dbgs() << "because of Consts read limitations\n";
262 : });
263 : CurrentPacketMIs.pop_back();
264 346 : return false;
265 : }
266 :
267 : // Is there a BankSwizzle set that meet Read Port limitations ?
268 43193 : if (!TII->fitsReadPortLimitations(CurrentPacketMIs,
269 43193 : PV, BS, isTransSlot)) {
270 : LLVM_DEBUG({
271 : dbgs() << "Couldn't pack :\n";
272 : MI.dump();
273 : dbgs() << "with the following packets :\n";
274 : for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
275 : CurrentPacketMIs[i]->dump();
276 : dbgs() << "\n";
277 : }
278 : dbgs() << "because of Read port limitations\n";
279 : });
280 : CurrentPacketMIs.pop_back();
281 521 : return false;
282 : }
283 :
284 : // We cannot read LDS source registers from the Trans slot.
285 42672 : if (isTransSlot && TII->readsLDSSrcReg(MI))
286 : return false;
287 :
288 : CurrentPacketMIs.pop_back();
289 42672 : return true;
290 : }
291 :
292 46396 : MachineBasicBlock::iterator addToPacket(MachineInstr &MI) override {
293 : MachineBasicBlock::iterator FirstInBundle =
294 46396 : CurrentPacketMIs.empty() ? &MI : CurrentPacketMIs.front();
295 : const DenseMap<unsigned, unsigned> &PV =
296 46396 : getPreviousVector(FirstInBundle);
297 : std::vector<R600InstrInfo::BankSwizzle> BS;
298 : bool isTransSlot;
299 :
300 46396 : if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) {
301 125276 : for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {
302 39932 : MachineInstr *MI = CurrentPacketMIs[i];
303 79864 : unsigned Op = TII->getOperandIdx(MI->getOpcode(),
304 39932 : R600::OpName::bank_swizzle);
305 119796 : MI->getOperand(Op).setImm(BS[i]);
306 : }
307 : unsigned Op =
308 85344 : TII->getOperandIdx(MI.getOpcode(), R600::OpName::bank_swizzle);
309 85344 : MI.getOperand(Op).setImm(BS.back());
310 42672 : if (!CurrentPacketMIs.empty())
311 23509 : setIsLastBit(CurrentPacketMIs.back(), 0);
312 42672 : substitutePV(MI, PV);
313 42672 : MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI);
314 42672 : if (isTransSlot) {
315 9235 : endPacket(std::next(It)->getParent(), std::next(It));
316 : }
317 42672 : return It;
318 : }
319 7448 : endPacket(MI.getParent(), MI);
320 3724 : if (TII->isTransOnly(MI))
321 11 : return MI;
322 3713 : return VLIWPacketizerList::addToPacket(MI);
323 : }
324 : };
325 :
326 2297 : bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
327 2297 : const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>();
328 : const R600InstrInfo *TII = ST.getInstrInfo();
329 :
330 2297 : MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
331 :
332 : // Instantiate the packetizer.
333 : R600PacketizerList Packetizer(Fn, ST, MLI);
334 :
335 : // DFA state table should not be empty.
336 : assert(Packetizer.getResourceTracker() && "Empty DFA table!");
337 : assert(Packetizer.getResourceTracker()->getInstrItins());
338 :
339 2297 : if (Packetizer.getResourceTracker()->getInstrItins()->isEmpty())
340 : return false;
341 :
342 : //
343 : // Loop over all basic blocks and remove KILL pseudo-instructions
344 : // These instructions confuse the dependence analysis. Consider:
345 : // D0 = ... (Insn 0)
346 : // R0 = KILL R0, D0 (Insn 1)
347 : // R0 = ... (Insn 2)
348 : // Here, Insn 1 will result in the dependence graph not emitting an output
349 : // dependence between Insn 0 and Insn 2. This can lead to incorrect
350 : // packetization
351 : //
352 : for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
353 4592 : MBB != MBBe; ++MBB) {
354 : MachineBasicBlock::iterator End = MBB->end();
355 : MachineBasicBlock::iterator MI = MBB->begin();
356 63914 : while (MI != End) {
357 61618 : if (MI->isKill() || MI->getOpcode() == R600::IMPLICIT_DEF ||
358 3781 : (MI->getOpcode() == R600::CF_ALU && !MI->getOperand(8).getImm())) {
359 17 : MachineBasicBlock::iterator DeleteMI = MI;
360 : ++MI;
361 17 : MBB->erase(DeleteMI);
362 : End = MBB->end();
363 : continue;
364 : }
365 : ++MI;
366 : }
367 : }
368 :
369 : // Loop over all of the basic blocks.
370 : for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
371 4592 : MBB != MBBe; ++MBB) {
372 : // Find scheduling regions and schedule / packetize each region.
373 : unsigned RemainingCount = MBB->size();
374 : for(MachineBasicBlock::iterator RegionEnd = MBB->end();
375 4590 : RegionEnd != MBB->begin();) {
376 : // The next region starts above the previous region. Look backward in the
377 : // instruction stream until we find the nearest boundary.
378 2294 : MachineBasicBlock::iterator I = RegionEnd;
379 2294 : for(;I != MBB->begin(); --I, --RemainingCount) {
380 2294 : if (TII->isSchedulingBoundary(*std::prev(I), &*MBB, Fn))
381 : break;
382 : }
383 2294 : I = MBB->begin();
384 :
385 : // Skip empty scheduling regions.
386 2294 : if (I == RegionEnd) {
387 0 : RegionEnd = std::prev(RegionEnd);
388 : --RemainingCount;
389 91 : continue;
390 : }
391 : // Skip regions with one instruction.
392 4588 : if (I == std::prev(RegionEnd)) {
393 91 : RegionEnd = std::prev(RegionEnd);
394 91 : continue;
395 : }
396 :
397 2203 : Packetizer.PacketizeMIs(&*MBB, &*I, RegionEnd);
398 2203 : RegionEnd = I;
399 : }
400 : }
401 :
402 : return true;
403 :
404 : }
405 :
406 : } // end anonymous namespace
407 :
408 85105 : INITIALIZE_PASS_BEGIN(R600Packetizer, DEBUG_TYPE,
409 : "R600 Packetizer", false, false)
410 199024 : INITIALIZE_PASS_END(R600Packetizer, DEBUG_TYPE,
411 : "R600 Packetizer", false, false)
412 :
413 : char R600Packetizer::ID = 0;
414 :
415 : char &llvm::R600PacketizerID = R600Packetizer::ID;
416 :
417 282 : llvm::FunctionPass *llvm::createR600Packetizer() {
418 282 : return new R600Packetizer();
419 : }
|