Bug Summary

File:lib/Target/AMDGPU/SIInstrInfo.cpp
Location:line 1991, column 33
Description:Called C++ object pointer is null

Annotated Source Code

1//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief SI Implementation of TargetInstrInfo.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "SIInstrInfo.h"
17#include "AMDGPUTargetMachine.h"
18#include "GCNHazardRecognizer.h"
19#include "SIDefines.h"
20#include "SIMachineFunctionInfo.h"
21#include "llvm/CodeGen/MachineFrameInfo.h"
22#include "llvm/CodeGen/MachineInstrBuilder.h"
23#include "llvm/CodeGen/MachineRegisterInfo.h"
24#include "llvm/CodeGen/ScheduleDAG.h"
25#include "llvm/IR/Function.h"
26#include "llvm/CodeGen/RegisterScavenging.h"
27#include "llvm/MC/MCInstrDesc.h"
28#include "llvm/Support/Debug.h"
29
30using namespace llvm;
31
32SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st)
33 : AMDGPUInstrInfo(st), RI() {}
34
35//===----------------------------------------------------------------------===//
36// TargetInstrInfo callbacks
37//===----------------------------------------------------------------------===//
38
39static unsigned getNumOperandsNoGlue(SDNode *Node) {
40 unsigned N = Node->getNumOperands();
41 while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
42 --N;
43 return N;
44}
45
46static SDValue findChainOperand(SDNode *Load) {
47 SDValue LastOp = Load->getOperand(getNumOperandsNoGlue(Load) - 1);
48 assert(LastOp.getValueType() == MVT::Other && "Chain missing from load node")((LastOp.getValueType() == MVT::Other && "Chain missing from load node"
) ? static_cast<void> (0) : __assert_fail ("LastOp.getValueType() == MVT::Other && \"Chain missing from load node\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 48, __PRETTY_FUNCTION__))
;
49 return LastOp;
50}
51
52/// \brief Returns true if both nodes have the same value for the given
53/// operand \p Op, or if both nodes do not have this operand.
54static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) {
55 unsigned Opc0 = N0->getMachineOpcode();
56 unsigned Opc1 = N1->getMachineOpcode();
57
58 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName);
59 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName);
60
61 if (Op0Idx == -1 && Op1Idx == -1)
62 return true;
63
64
65 if ((Op0Idx == -1 && Op1Idx != -1) ||
66 (Op1Idx == -1 && Op0Idx != -1))
67 return false;
68
69 // getNamedOperandIdx returns the index for the MachineInstr's operands,
70 // which includes the result as the first operand. We are indexing into the
71 // MachineSDNode's operands, so we need to skip the result operand to get
72 // the real index.
73 --Op0Idx;
74 --Op1Idx;
75
76 return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx);
77}
78
79bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
80 AliasAnalysis *AA) const {
81 // TODO: The generic check fails for VALU instructions that should be
82 // rematerializable due to implicit reads of exec. We really want all of the
83 // generic logic for this except for this.
84 switch (MI->getOpcode()) {
85 case AMDGPU::V_MOV_B32_e32:
86 case AMDGPU::V_MOV_B32_e64:
87 case AMDGPU::V_MOV_B64_PSEUDO:
88 return true;
89 default:
90 return false;
91 }
92}
93
94bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
95 int64_t &Offset0,
96 int64_t &Offset1) const {
97 if (!Load0->isMachineOpcode() || !Load1->isMachineOpcode())
98 return false;
99
100 unsigned Opc0 = Load0->getMachineOpcode();
101 unsigned Opc1 = Load1->getMachineOpcode();
102
103 // Make sure both are actually loads.
104 if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad())
105 return false;
106
107 if (isDS(Opc0) && isDS(Opc1)) {
108
109 // FIXME: Handle this case:
110 if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1))
111 return false;
112
113 // Check base reg.
114 if (Load0->getOperand(1) != Load1->getOperand(1))
115 return false;
116
117 // Check chain.
118 if (findChainOperand(Load0) != findChainOperand(Load1))
119 return false;
120
121 // Skip read2 / write2 variants for simplicity.
122 // TODO: We should report true if the used offsets are adjacent (excluded
123 // st64 versions).
124 if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 ||
125 AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1)
126 return false;
127
128 Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue();
129 Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue();
130 return true;
131 }
132
133 if (isSMRD(Opc0) && isSMRD(Opc1)) {
134 assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1))((getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1))
? static_cast<void> (0) : __assert_fail ("getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1)"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 134, __PRETTY_FUNCTION__))
;
135
136 // Check base reg.
137 if (Load0->getOperand(0) != Load1->getOperand(0))
138 return false;
139
140 const ConstantSDNode *Load0Offset =
141 dyn_cast<ConstantSDNode>(Load0->getOperand(1));
142 const ConstantSDNode *Load1Offset =
143 dyn_cast<ConstantSDNode>(Load1->getOperand(1));
144
145 if (!Load0Offset || !Load1Offset)
146 return false;
147
148 // Check chain.
149 if (findChainOperand(Load0) != findChainOperand(Load1))
150 return false;
151
152 Offset0 = Load0Offset->getZExtValue();
153 Offset1 = Load1Offset->getZExtValue();
154 return true;
155 }
156
157 // MUBUF and MTBUF can access the same addresses.
158 if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) {
159
160 // MUBUF and MTBUF have vaddr at different indices.
161 if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) ||
162 findChainOperand(Load0) != findChainOperand(Load1) ||
163 !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) ||
164 !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc))
165 return false;
166
167 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
168 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
169
170 if (OffIdx0 == -1 || OffIdx1 == -1)
171 return false;
172
173 // getNamedOperandIdx returns the index for MachineInstrs. Since they
174 // inlcude the output in the operand list, but SDNodes don't, we need to
175 // subtract the index by one.
176 --OffIdx0;
177 --OffIdx1;
178
179 SDValue Off0 = Load0->getOperand(OffIdx0);
180 SDValue Off1 = Load1->getOperand(OffIdx1);
181
182 // The offset might be a FrameIndexSDNode.
183 if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
184 return false;
185
186 Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue();
187 Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue();
188 return true;
189 }
190
191 return false;
192}
193
194static bool isStride64(unsigned Opc) {
195 switch (Opc) {
196 case AMDGPU::DS_READ2ST64_B32:
197 case AMDGPU::DS_READ2ST64_B64:
198 case AMDGPU::DS_WRITE2ST64_B32:
199 case AMDGPU::DS_WRITE2ST64_B64:
200 return true;
201 default:
202 return false;
203 }
204}
205
206bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
207 int64_t &Offset,
208 const TargetRegisterInfo *TRI) const {
209 unsigned Opc = LdSt->getOpcode();
210
211 if (isDS(*LdSt)) {
212 const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
213 AMDGPU::OpName::offset);
214 if (OffsetImm) {
215 // Normal, single offset LDS instruction.
216 const MachineOperand *AddrReg = getNamedOperand(*LdSt,
217 AMDGPU::OpName::addr);
218
219 BaseReg = AddrReg->getReg();
220 Offset = OffsetImm->getImm();
221 return true;
222 }
223
224 // The 2 offset instructions use offset0 and offset1 instead. We can treat
225 // these as a load with a single offset if the 2 offsets are consecutive. We
226 // will use this for some partially aligned loads.
227 const MachineOperand *Offset0Imm = getNamedOperand(*LdSt,
228 AMDGPU::OpName::offset0);
229 const MachineOperand *Offset1Imm = getNamedOperand(*LdSt,
230 AMDGPU::OpName::offset1);
231
232 uint8_t Offset0 = Offset0Imm->getImm();
233 uint8_t Offset1 = Offset1Imm->getImm();
234
235 if (Offset1 > Offset0 && Offset1 - Offset0 == 1) {
236 // Each of these offsets is in element sized units, so we need to convert
237 // to bytes of the individual reads.
238
239 unsigned EltSize;
240 if (LdSt->mayLoad())
241 EltSize = getOpRegClass(*LdSt, 0)->getSize() / 2;
242 else {
243 assert(LdSt->mayStore())((LdSt->mayStore()) ? static_cast<void> (0) : __assert_fail
("LdSt->mayStore()", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 243, __PRETTY_FUNCTION__))
;
244 int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
245 EltSize = getOpRegClass(*LdSt, Data0Idx)->getSize();
246 }
247
248 if (isStride64(Opc))
249 EltSize *= 64;
250
251 const MachineOperand *AddrReg = getNamedOperand(*LdSt,
252 AMDGPU::OpName::addr);
253 BaseReg = AddrReg->getReg();
254 Offset = EltSize * Offset0;
255 return true;
256 }
257
258 return false;
259 }
260
261 if (isMUBUF(*LdSt) || isMTBUF(*LdSt)) {
262 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset) != -1)
263 return false;
264
265 const MachineOperand *AddrReg = getNamedOperand(*LdSt,
266 AMDGPU::OpName::vaddr);
267 if (!AddrReg)
268 return false;
269
270 const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
271 AMDGPU::OpName::offset);
272 BaseReg = AddrReg->getReg();
273 Offset = OffsetImm->getImm();
274 return true;
275 }
276
277 if (isSMRD(*LdSt)) {
278 const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
279 AMDGPU::OpName::offset);
280 if (!OffsetImm)
281 return false;
282
283 const MachineOperand *SBaseReg = getNamedOperand(*LdSt,
284 AMDGPU::OpName::sbase);
285 BaseReg = SBaseReg->getReg();
286 Offset = OffsetImm->getImm();
287 return true;
288 }
289
290 return false;
291}
292
293bool SIInstrInfo::shouldClusterMemOps(MachineInstr *FirstLdSt,
294 MachineInstr *SecondLdSt,
295 unsigned NumLoads) const {
296 const MachineOperand *FirstDst = nullptr;
297 const MachineOperand *SecondDst = nullptr;
298
299 if (isDS(*FirstLdSt) && isDS(*SecondLdSt)) {
300 FirstDst = getNamedOperand(*FirstLdSt, AMDGPU::OpName::vdst);
301 SecondDst = getNamedOperand(*SecondLdSt, AMDGPU::OpName::vdst);
302 }
303
304 if (isSMRD(*FirstLdSt) && isSMRD(*SecondLdSt)) {
305 FirstDst = getNamedOperand(*FirstLdSt, AMDGPU::OpName::sdst);
306 SecondDst = getNamedOperand(*SecondLdSt, AMDGPU::OpName::sdst);
307 }
308
309 if ((isMUBUF(*FirstLdSt) && isMUBUF(*SecondLdSt)) ||
310 (isMTBUF(*FirstLdSt) && isMTBUF(*SecondLdSt))) {
311 FirstDst = getNamedOperand(*FirstLdSt, AMDGPU::OpName::vdata);
312 SecondDst = getNamedOperand(*SecondLdSt, AMDGPU::OpName::vdata);
313 }
314
315 if (!FirstDst || !SecondDst)
316 return false;
317
318 // Try to limit clustering based on the total number of bytes loaded
319 // rather than the number of instructions. This is done to help reduce
320 // register pressure. The method used is somewhat inexact, though,
321 // because it assumes that all loads in the cluster will load the
322 // same number of bytes as FirstLdSt.
323
324 // The unit of this value is bytes.
325 // FIXME: This needs finer tuning.
326 unsigned LoadClusterThreshold = 16;
327
328 const MachineRegisterInfo &MRI =
329 FirstLdSt->getParent()->getParent()->getRegInfo();
330 const TargetRegisterClass *DstRC = MRI.getRegClass(FirstDst->getReg());
331
332 return (NumLoads * DstRC->getSize()) <= LoadClusterThreshold;
333}
334
335void
336SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
337 MachineBasicBlock::iterator MI, DebugLoc DL,
338 unsigned DestReg, unsigned SrcReg,
339 bool KillSrc) const {
340
341 // If we are trying to copy to or from SCC, there is a bug somewhere else in
342 // the backend. While it may be theoretically possible to do this, it should
343 // never be necessary.
344 assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC)((DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC) ? static_cast
<void> (0) : __assert_fail ("DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 344, __PRETTY_FUNCTION__))
;
345
346 static const int16_t Sub0_15[] = {
347 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
348 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
349 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
350 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
351 };
352
353 static const int16_t Sub0_15_64[] = {
354 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
355 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
356 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
357 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
358 };
359
360 static const int16_t Sub0_7[] = {
361 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
362 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
363 };
364
365 static const int16_t Sub0_7_64[] = {
366 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
367 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
368 };
369
370 static const int16_t Sub0_3[] = {
371 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
372 };
373
374 static const int16_t Sub0_3_64[] = {
375 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
376 };
377
378 static const int16_t Sub0_2[] = {
379 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
380 };
381
382 static const int16_t Sub0_1[] = {
383 AMDGPU::sub0, AMDGPU::sub1,
384 };
385
386 unsigned Opcode;
387 ArrayRef<int16_t> SubIndices;
388 bool Forward;
389
390 if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
391 assert(AMDGPU::SReg_32RegClass.contains(SrcReg))((AMDGPU::SReg_32RegClass.contains(SrcReg)) ? static_cast<
void> (0) : __assert_fail ("AMDGPU::SReg_32RegClass.contains(SrcReg)"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 391, __PRETTY_FUNCTION__))
;
392 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
393 .addReg(SrcReg, getKillRegState(KillSrc));
394 return;
395
396 } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
397 if (DestReg == AMDGPU::VCC) {
398 if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
399 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
400 .addReg(SrcReg, getKillRegState(KillSrc));
401 } else {
402 // FIXME: Hack until VReg_1 removed.
403 assert(AMDGPU::VGPR_32RegClass.contains(SrcReg))((AMDGPU::VGPR_32RegClass.contains(SrcReg)) ? static_cast<
void> (0) : __assert_fail ("AMDGPU::VGPR_32RegClass.contains(SrcReg)"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 403, __PRETTY_FUNCTION__))
;
404 BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32))
405 .addImm(0)
406 .addReg(SrcReg, getKillRegState(KillSrc));
407 }
408
409 return;
410 }
411
412 assert(AMDGPU::SReg_64RegClass.contains(SrcReg))((AMDGPU::SReg_64RegClass.contains(SrcReg)) ? static_cast<
void> (0) : __assert_fail ("AMDGPU::SReg_64RegClass.contains(SrcReg)"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 412, __PRETTY_FUNCTION__))
;
413 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
414 .addReg(SrcReg, getKillRegState(KillSrc));
415 return;
416
417 } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) {
418 assert(AMDGPU::SReg_128RegClass.contains(SrcReg))((AMDGPU::SReg_128RegClass.contains(SrcReg)) ? static_cast<
void> (0) : __assert_fail ("AMDGPU::SReg_128RegClass.contains(SrcReg)"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 418, __PRETTY_FUNCTION__))
;
419 Opcode = AMDGPU::S_MOV_B64;
420 SubIndices = Sub0_3_64;
421
422 } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) {
423 assert(AMDGPU::SReg_256RegClass.contains(SrcReg))((AMDGPU::SReg_256RegClass.contains(SrcReg)) ? static_cast<
void> (0) : __assert_fail ("AMDGPU::SReg_256RegClass.contains(SrcReg)"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 423, __PRETTY_FUNCTION__))
;
424 Opcode = AMDGPU::S_MOV_B64;
425 SubIndices = Sub0_7_64;
426
427 } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) {
428 assert(AMDGPU::SReg_512RegClass.contains(SrcReg))((AMDGPU::SReg_512RegClass.contains(SrcReg)) ? static_cast<
void> (0) : __assert_fail ("AMDGPU::SReg_512RegClass.contains(SrcReg)"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 428, __PRETTY_FUNCTION__))
;
429 Opcode = AMDGPU::S_MOV_B64;
430 SubIndices = Sub0_15_64;
431
432 } else if (AMDGPU::VGPR_32RegClass.contains(DestReg)) {
433 assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||((AMDGPU::VGPR_32RegClass.contains(SrcReg) || AMDGPU::SReg_32RegClass
.contains(SrcReg)) ? static_cast<void> (0) : __assert_fail
("AMDGPU::VGPR_32RegClass.contains(SrcReg) || AMDGPU::SReg_32RegClass.contains(SrcReg)"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 434, __PRETTY_FUNCTION__))
434 AMDGPU::SReg_32RegClass.contains(SrcReg))((AMDGPU::VGPR_32RegClass.contains(SrcReg) || AMDGPU::SReg_32RegClass
.contains(SrcReg)) ? static_cast<void> (0) : __assert_fail
("AMDGPU::VGPR_32RegClass.contains(SrcReg) || AMDGPU::SReg_32RegClass.contains(SrcReg)"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 434, __PRETTY_FUNCTION__))
;
435 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
436 .addReg(SrcReg, getKillRegState(KillSrc));
437 return;
438
439 } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) {
440 assert(AMDGPU::VReg_64RegClass.contains(SrcReg) ||((AMDGPU::VReg_64RegClass.contains(SrcReg) || AMDGPU::SReg_64RegClass
.contains(SrcReg)) ? static_cast<void> (0) : __assert_fail
("AMDGPU::VReg_64RegClass.contains(SrcReg) || AMDGPU::SReg_64RegClass.contains(SrcReg)"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 441, __PRETTY_FUNCTION__))
441 AMDGPU::SReg_64RegClass.contains(SrcReg))((AMDGPU::VReg_64RegClass.contains(SrcReg) || AMDGPU::SReg_64RegClass
.contains(SrcReg)) ? static_cast<void> (0) : __assert_fail
("AMDGPU::VReg_64RegClass.contains(SrcReg) || AMDGPU::SReg_64RegClass.contains(SrcReg)"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 441, __PRETTY_FUNCTION__))
;
442 Opcode = AMDGPU::V_MOV_B32_e32;
443 SubIndices = Sub0_1;
444
445 } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) {
446 assert(AMDGPU::VReg_96RegClass.contains(SrcReg))((AMDGPU::VReg_96RegClass.contains(SrcReg)) ? static_cast<
void> (0) : __assert_fail ("AMDGPU::VReg_96RegClass.contains(SrcReg)"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 446, __PRETTY_FUNCTION__))
;
447 Opcode = AMDGPU::V_MOV_B32_e32;
448 SubIndices = Sub0_2;
449
450 } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
451 assert(AMDGPU::VReg_128RegClass.contains(SrcReg) ||((AMDGPU::VReg_128RegClass.contains(SrcReg) || AMDGPU::SReg_128RegClass
.contains(SrcReg)) ? static_cast<void> (0) : __assert_fail
("AMDGPU::VReg_128RegClass.contains(SrcReg) || AMDGPU::SReg_128RegClass.contains(SrcReg)"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 452, __PRETTY_FUNCTION__))
452 AMDGPU::SReg_128RegClass.contains(SrcReg))((AMDGPU::VReg_128RegClass.contains(SrcReg) || AMDGPU::SReg_128RegClass
.contains(SrcReg)) ? static_cast<void> (0) : __assert_fail
("AMDGPU::VReg_128RegClass.contains(SrcReg) || AMDGPU::SReg_128RegClass.contains(SrcReg)"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 452, __PRETTY_FUNCTION__))
;
453 Opcode = AMDGPU::V_MOV_B32_e32;
454 SubIndices = Sub0_3;
455
456 } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) {
457 assert(AMDGPU::VReg_256RegClass.contains(SrcReg) ||((AMDGPU::VReg_256RegClass.contains(SrcReg) || AMDGPU::SReg_256RegClass
.contains(SrcReg)) ? static_cast<void> (0) : __assert_fail
("AMDGPU::VReg_256RegClass.contains(SrcReg) || AMDGPU::SReg_256RegClass.contains(SrcReg)"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 458, __PRETTY_FUNCTION__))
458 AMDGPU::SReg_256RegClass.contains(SrcReg))((AMDGPU::VReg_256RegClass.contains(SrcReg) || AMDGPU::SReg_256RegClass
.contains(SrcReg)) ? static_cast<void> (0) : __assert_fail
("AMDGPU::VReg_256RegClass.contains(SrcReg) || AMDGPU::SReg_256RegClass.contains(SrcReg)"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 458, __PRETTY_FUNCTION__))
;
459 Opcode = AMDGPU::V_MOV_B32_e32;
460 SubIndices = Sub0_7;
461
462 } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) {
463 assert(AMDGPU::VReg_512RegClass.contains(SrcReg) ||((AMDGPU::VReg_512RegClass.contains(SrcReg) || AMDGPU::SReg_512RegClass
.contains(SrcReg)) ? static_cast<void> (0) : __assert_fail
("AMDGPU::VReg_512RegClass.contains(SrcReg) || AMDGPU::SReg_512RegClass.contains(SrcReg)"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 464, __PRETTY_FUNCTION__))
464 AMDGPU::SReg_512RegClass.contains(SrcReg))((AMDGPU::VReg_512RegClass.contains(SrcReg) || AMDGPU::SReg_512RegClass
.contains(SrcReg)) ? static_cast<void> (0) : __assert_fail
("AMDGPU::VReg_512RegClass.contains(SrcReg) || AMDGPU::SReg_512RegClass.contains(SrcReg)"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 464, __PRETTY_FUNCTION__))
;
465 Opcode = AMDGPU::V_MOV_B32_e32;
466 SubIndices = Sub0_15;
467
468 } else {
469 llvm_unreachable("Can't copy register!")::llvm::llvm_unreachable_internal("Can't copy register!", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 469)
;
470 }
471
472 if (RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg))
473 Forward = true;
474 else
475 Forward = false;
476
477 for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
478 unsigned SubIdx;
479 if (Forward)
480 SubIdx = SubIndices[Idx];
481 else
482 SubIdx = SubIndices[SubIndices.size() - Idx - 1];
483
484 MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
485 get(Opcode), RI.getSubReg(DestReg, SubIdx));
486
487 Builder.addReg(RI.getSubReg(SrcReg, SubIdx));
488
489 if (Idx == SubIndices.size() - 1)
490 Builder.addReg(SrcReg, RegState::Kill | RegState::Implicit);
491
492 if (Idx == 0)
493 Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
494 }
495}
496
497int SIInstrInfo::commuteOpcode(const MachineInstr &MI) const {
498 const unsigned Opcode = MI.getOpcode();
499
500 int NewOpc;
501
502 // Try to map original to commuted opcode
503 NewOpc = AMDGPU::getCommuteRev(Opcode);
504 if (NewOpc != -1)
505 // Check if the commuted (REV) opcode exists on the target.
506 return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
507
508 // Try to map commuted to original opcode
509 NewOpc = AMDGPU::getCommuteOrig(Opcode);
510 if (NewOpc != -1)
511 // Check if the original (non-REV) opcode exists on the target.
512 return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
513
514 return Opcode;
515}
516
517unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
518
519 if (DstRC->getSize() == 4) {
520 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
521 } else if (DstRC->getSize() == 8 && RI.isSGPRClass(DstRC)) {
522 return AMDGPU::S_MOV_B64;
523 } else if (DstRC->getSize() == 8 && !RI.isSGPRClass(DstRC)) {
524 return AMDGPU::V_MOV_B64_PSEUDO;
525 }
526 return AMDGPU::COPY;
527}
528
529static unsigned getSGPRSpillSaveOpcode(unsigned Size) {
530 switch (Size) {
531 case 4:
532 return AMDGPU::SI_SPILL_S32_SAVE;
533 case 8:
534 return AMDGPU::SI_SPILL_S64_SAVE;
535 case 16:
536 return AMDGPU::SI_SPILL_S128_SAVE;
537 case 32:
538 return AMDGPU::SI_SPILL_S256_SAVE;
539 case 64:
540 return AMDGPU::SI_SPILL_S512_SAVE;
541 default:
542 llvm_unreachable("unknown register size")::llvm::llvm_unreachable_internal("unknown register size", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 542)
;
543 }
544}
545
546static unsigned getVGPRSpillSaveOpcode(unsigned Size) {
547 switch (Size) {
548 case 4:
549 return AMDGPU::SI_SPILL_V32_SAVE;
550 case 8:
551 return AMDGPU::SI_SPILL_V64_SAVE;
552 case 12:
553 return AMDGPU::SI_SPILL_V96_SAVE;
554 case 16:
555 return AMDGPU::SI_SPILL_V128_SAVE;
556 case 32:
557 return AMDGPU::SI_SPILL_V256_SAVE;
558 case 64:
559 return AMDGPU::SI_SPILL_V512_SAVE;
560 default:
561 llvm_unreachable("unknown register size")::llvm::llvm_unreachable_internal("unknown register size", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 561)
;
562 }
563}
564
565void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
566 MachineBasicBlock::iterator MI,
567 unsigned SrcReg, bool isKill,
568 int FrameIndex,
569 const TargetRegisterClass *RC,
570 const TargetRegisterInfo *TRI) const {
571 MachineFunction *MF = MBB.getParent();
572 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
573 MachineFrameInfo *FrameInfo = MF->getFrameInfo();
574 DebugLoc DL = MBB.findDebugLoc(MI);
575
576 unsigned Size = FrameInfo->getObjectSize(FrameIndex);
577 unsigned Align = FrameInfo->getObjectAlignment(FrameIndex);
578 MachinePointerInfo PtrInfo
579 = MachinePointerInfo::getFixedStack(*MF, FrameIndex);
580 MachineMemOperand *MMO
581 = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
582 Size, Align);
583
584 if (RI.isSGPRClass(RC)) {
585 MFI->setHasSpilledSGPRs();
586
587 if (TargetRegisterInfo::isVirtualRegister(SrcReg) && RC->getSize() == 4) {
588 // m0 may not be allowed for readlane.
589 MachineRegisterInfo &MRI = MF->getRegInfo();
590 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass);
591 }
592
593 // We are only allowed to create one new instruction when spilling
594 // registers, so we need to use pseudo instruction for spilling
595 // SGPRs.
596 unsigned Opcode = getSGPRSpillSaveOpcode(RC->getSize());
597 BuildMI(MBB, MI, DL, get(Opcode))
598 .addReg(SrcReg) // src
599 .addFrameIndex(FrameIndex) // frame_idx
600 .addMemOperand(MMO);
601
602 return;
603 }
604
605 if (!ST.isVGPRSpillingEnabled(*MF->getFunction())) {
606 LLVMContext &Ctx = MF->getFunction()->getContext();
607 Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
608 " spill register");
609 BuildMI(MBB, MI, DL, get(AMDGPU::KILL))
610 .addReg(SrcReg);
611
612 return;
613 }
614
615 assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected")((RI.hasVGPRs(RC) && "Only VGPR spilling expected") ?
static_cast<void> (0) : __assert_fail ("RI.hasVGPRs(RC) && \"Only VGPR spilling expected\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 615, __PRETTY_FUNCTION__))
;
616
617 unsigned Opcode = getVGPRSpillSaveOpcode(RC->getSize());
618 MFI->setHasSpilledVGPRs();
619 BuildMI(MBB, MI, DL, get(Opcode))
620 .addReg(SrcReg) // src
621 .addFrameIndex(FrameIndex) // frame_idx
622 .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
623 .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
624 .addImm(0) // offset
625 .addMemOperand(MMO);
626}
627
628static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
629 switch (Size) {
630 case 4:
631 return AMDGPU::SI_SPILL_S32_RESTORE;
632 case 8:
633 return AMDGPU::SI_SPILL_S64_RESTORE;
634 case 16:
635 return AMDGPU::SI_SPILL_S128_RESTORE;
636 case 32:
637 return AMDGPU::SI_SPILL_S256_RESTORE;
638 case 64:
639 return AMDGPU::SI_SPILL_S512_RESTORE;
640 default:
641 llvm_unreachable("unknown register size")::llvm::llvm_unreachable_internal("unknown register size", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 641)
;
642 }
643}
644
645static unsigned getVGPRSpillRestoreOpcode(unsigned Size) {
646 switch (Size) {
647 case 4:
648 return AMDGPU::SI_SPILL_V32_RESTORE;
649 case 8:
650 return AMDGPU::SI_SPILL_V64_RESTORE;
651 case 12:
652 return AMDGPU::SI_SPILL_V96_RESTORE;
653 case 16:
654 return AMDGPU::SI_SPILL_V128_RESTORE;
655 case 32:
656 return AMDGPU::SI_SPILL_V256_RESTORE;
657 case 64:
658 return AMDGPU::SI_SPILL_V512_RESTORE;
659 default:
660 llvm_unreachable("unknown register size")::llvm::llvm_unreachable_internal("unknown register size", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 660)
;
661 }
662}
663
664void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
665 MachineBasicBlock::iterator MI,
666 unsigned DestReg, int FrameIndex,
667 const TargetRegisterClass *RC,
668 const TargetRegisterInfo *TRI) const {
669 MachineFunction *MF = MBB.getParent();
670 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
671 MachineFrameInfo *FrameInfo = MF->getFrameInfo();
672 DebugLoc DL = MBB.findDebugLoc(MI);
673 unsigned Align = FrameInfo->getObjectAlignment(FrameIndex);
674 unsigned Size = FrameInfo->getObjectSize(FrameIndex);
675
676 MachinePointerInfo PtrInfo
677 = MachinePointerInfo::getFixedStack(*MF, FrameIndex);
678
679 MachineMemOperand *MMO = MF->getMachineMemOperand(
680 PtrInfo, MachineMemOperand::MOLoad, Size, Align);
681
682 if (RI.isSGPRClass(RC)) {
683 // FIXME: Maybe this should not include a memoperand because it will be
684 // lowered to non-memory instructions.
685 unsigned Opcode = getSGPRSpillRestoreOpcode(RC->getSize());
686
687 if (TargetRegisterInfo::isVirtualRegister(DestReg) && RC->getSize() == 4) {
688 // m0 may not be allowed for readlane.
689 MachineRegisterInfo &MRI = MF->getRegInfo();
690 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass);
691 }
692
693 BuildMI(MBB, MI, DL, get(Opcode), DestReg)
694 .addFrameIndex(FrameIndex) // frame_idx
695 .addMemOperand(MMO);
696
697 return;
698 }
699
700 if (!ST.isVGPRSpillingEnabled(*MF->getFunction())) {
701 LLVMContext &Ctx = MF->getFunction()->getContext();
702 Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
703 " restore register");
704 BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg);
705
706 return;
707 }
708
709 assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected")((RI.hasVGPRs(RC) && "Only VGPR spilling expected") ?
static_cast<void> (0) : __assert_fail ("RI.hasVGPRs(RC) && \"Only VGPR spilling expected\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 709, __PRETTY_FUNCTION__))
;
710
711 unsigned Opcode = getVGPRSpillRestoreOpcode(RC->getSize());
712 BuildMI(MBB, MI, DL, get(Opcode), DestReg)
713 .addFrameIndex(FrameIndex) // frame_idx
714 .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
715 .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
716 .addImm(0) // offset
717 .addMemOperand(MMO);
718}
719
720/// \param @Offset Offset in bytes of the FrameIndex being spilled
721unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
722 MachineBasicBlock::iterator MI,
723 RegScavenger *RS, unsigned TmpReg,
724 unsigned FrameOffset,
725 unsigned Size) const {
726 MachineFunction *MF = MBB.getParent();
727 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
728 const AMDGPUSubtarget &ST = MF->getSubtarget<AMDGPUSubtarget>();
729 const SIRegisterInfo *TRI =
730 static_cast<const SIRegisterInfo*>(ST.getRegisterInfo());
731 DebugLoc DL = MBB.findDebugLoc(MI);
732 unsigned WorkGroupSize = MFI->getMaximumWorkGroupSize(*MF);
733 unsigned WavefrontSize = ST.getWavefrontSize();
734
735 unsigned TIDReg = MFI->getTIDReg();
736 if (!MFI->hasCalculatedTID()) {
737 MachineBasicBlock &Entry = MBB.getParent()->front();
738 MachineBasicBlock::iterator Insert = Entry.front();
739 DebugLoc DL = Insert->getDebugLoc();
740
741 TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass);
742 if (TIDReg == AMDGPU::NoRegister)
743 return TIDReg;
744
745
746 if (!AMDGPU::isShader(MF->getFunction()->getCallingConv()) &&
747 WorkGroupSize > WavefrontSize) {
748
749 unsigned TIDIGXReg
750 = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_X);
751 unsigned TIDIGYReg
752 = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Y);
753 unsigned TIDIGZReg
754 = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Z);
755 unsigned InputPtrReg =
756 TRI->getPreloadedValue(*MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
757 for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
758 if (!Entry.isLiveIn(Reg))
759 Entry.addLiveIn(Reg);
760 }
761
762 RS->enterBasicBlock(Entry);
763 // FIXME: Can we scavenge an SReg_64 and access the subregs?
764 unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
765 unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
766 BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0)
767 .addReg(InputPtrReg)
768 .addImm(SI::KernelInputOffsets::NGROUPS_Z);
769 BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1)
770 .addReg(InputPtrReg)
771 .addImm(SI::KernelInputOffsets::NGROUPS_Y);
772
773 // NGROUPS.X * NGROUPS.Y
774 BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1)
775 .addReg(STmp1)
776 .addReg(STmp0);
777 // (NGROUPS.X * NGROUPS.Y) * TIDIG.X
778 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg)
779 .addReg(STmp1)
780 .addReg(TIDIGXReg);
781 // NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)
782 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg)
783 .addReg(STmp0)
784 .addReg(TIDIGYReg)
785 .addReg(TIDReg);
786 // (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z
787 BuildMI(Entry, Insert, DL, get(AMDGPU::V_ADD_I32_e32), TIDReg)
788 .addReg(TIDReg)
789 .addReg(TIDIGZReg);
790 } else {
791 // Get the wave id
792 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64),
793 TIDReg)
794 .addImm(-1)
795 .addImm(0);
796
797 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64),
798 TIDReg)
799 .addImm(-1)
800 .addReg(TIDReg);
801 }
802
803 BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32),
804 TIDReg)
805 .addImm(2)
806 .addReg(TIDReg);
807 MFI->setTIDReg(TIDReg);
808 }
809
810 // Add FrameIndex to LDS offset
811 unsigned LDSOffset = MFI->LDSSize + (FrameOffset * WorkGroupSize);
812 BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), TmpReg)
813 .addImm(LDSOffset)
814 .addReg(TIDReg);
815
816 return TmpReg;
817}
818
819void SIInstrInfo::insertWaitStates(MachineBasicBlock &MBB,
820 MachineBasicBlock::iterator MI,
821 int Count) const {
822 DebugLoc DL = MBB.findDebugLoc(MI);
823 while (Count > 0) {
824 int Arg;
825 if (Count >= 8)
826 Arg = 7;
827 else
828 Arg = Count - 1;
829 Count -= 8;
830 BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP))
831 .addImm(Arg);
832 }
833}
834
835void SIInstrInfo::insertNoop(MachineBasicBlock &MBB,
836 MachineBasicBlock::iterator MI) const {
837 insertWaitStates(MBB, MI, 1);
838}
839
840unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) const {
841 switch (MI.getOpcode()) {
842 default: return 1; // FIXME: Do wait states equal cycles?
843
844 case AMDGPU::S_NOP:
845 return MI.getOperand(0).getImm() + 1;
846 }
847}
848
849bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
850 MachineBasicBlock &MBB = *MI->getParent();
851 DebugLoc DL = MBB.findDebugLoc(MI);
852 switch (MI->getOpcode()) {
853 default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
854
855 case AMDGPU::SGPR_USE:
856 // This is just a placeholder for register allocation.
857 MI->eraseFromParent();
858 break;
859
860 case AMDGPU::V_MOV_B64_PSEUDO: {
861 unsigned Dst = MI->getOperand(0).getReg();
862 unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
863 unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
864
865 const MachineOperand &SrcOp = MI->getOperand(1);
866 // FIXME: Will this work for 64-bit floating point immediates?
867 assert(!SrcOp.isFPImm())((!SrcOp.isFPImm()) ? static_cast<void> (0) : __assert_fail
("!SrcOp.isFPImm()", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 867, __PRETTY_FUNCTION__))
;
868 if (SrcOp.isImm()) {
869 APInt Imm(64, SrcOp.getImm());
870 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
871 .addImm(Imm.getLoBits(32).getZExtValue())
872 .addReg(Dst, RegState::Implicit);
873 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
874 .addImm(Imm.getHiBits(32).getZExtValue())
875 .addReg(Dst, RegState::Implicit);
876 } else {
877 assert(SrcOp.isReg())((SrcOp.isReg()) ? static_cast<void> (0) : __assert_fail
("SrcOp.isReg()", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 877, __PRETTY_FUNCTION__))
;
878 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
879 .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0))
880 .addReg(Dst, RegState::Implicit);
881 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
882 .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1))
883 .addReg(Dst, RegState::Implicit);
884 }
885 MI->eraseFromParent();
886 break;
887 }
888
889 case AMDGPU::V_CNDMASK_B64_PSEUDO: {
890 unsigned Dst = MI->getOperand(0).getReg();
891 unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
892 unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
893 unsigned Src0 = MI->getOperand(1).getReg();
894 unsigned Src1 = MI->getOperand(2).getReg();
895 const MachineOperand &SrcCond = MI->getOperand(3);
896
897 BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstLo)
898 .addReg(RI.getSubReg(Src0, AMDGPU::sub0))
899 .addReg(RI.getSubReg(Src1, AMDGPU::sub0))
900 .addOperand(SrcCond);
901 BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstHi)
902 .addReg(RI.getSubReg(Src0, AMDGPU::sub1))
903 .addReg(RI.getSubReg(Src1, AMDGPU::sub1))
904 .addOperand(SrcCond);
905 MI->eraseFromParent();
906 break;
907 }
908
909 case AMDGPU::SI_CONSTDATA_PTR: {
910 const SIRegisterInfo *TRI =
911 static_cast<const SIRegisterInfo *>(ST.getRegisterInfo());
912 MachineFunction &MF = *MBB.getParent();
913 unsigned Reg = MI->getOperand(0).getReg();
914 unsigned RegLo = TRI->getSubReg(Reg, AMDGPU::sub0);
915 unsigned RegHi = TRI->getSubReg(Reg, AMDGPU::sub1);
916
917 // Create a bundle so these instructions won't be re-ordered by the
918 // post-RA scheduler.
919 MIBundleBuilder Bundler(MBB, MI);
920 Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg));
921
922 // Add 32-bit offset from this instruction to the start of the
923 // constant data.
924 Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo)
925 .addReg(RegLo)
926 .addOperand(MI->getOperand(1)));
927 Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi)
928 .addReg(RegHi)
929 .addImm(0));
930
931 llvm::finalizeBundle(MBB, Bundler.begin());
932
933 MI->eraseFromParent();
934 break;
935 }
936 }
937 return true;
938}
939
940/// Commutes the operands in the given instruction.
941/// The commutable operands are specified by their indices OpIdx0 and OpIdx1.
942///
943/// Do not call this method for a non-commutable instruction or for
944/// non-commutable pair of operand indices OpIdx0 and OpIdx1.
945/// Even though the instruction is commutable, the method may still
946/// fail to commute the operands, null pointer is returned in such cases.
947MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr *MI,
948 bool NewMI,
949 unsigned OpIdx0,
950 unsigned OpIdx1) const {
951 int CommutedOpcode = commuteOpcode(*MI);
952 if (CommutedOpcode == -1)
953 return nullptr;
954
955 int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
956 AMDGPU::OpName::src0);
957 MachineOperand &Src0 = MI->getOperand(Src0Idx);
958 if (!Src0.isReg())
959 return nullptr;
960
961 int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
962 AMDGPU::OpName::src1);
963
964 if ((OpIdx0 != static_cast<unsigned>(Src0Idx) ||
965 OpIdx1 != static_cast<unsigned>(Src1Idx)) &&
966 (OpIdx0 != static_cast<unsigned>(Src1Idx) ||
967 OpIdx1 != static_cast<unsigned>(Src0Idx)))
968 return nullptr;
969
970 MachineOperand &Src1 = MI->getOperand(Src1Idx);
971
972
973 if (isVOP2(*MI) || isVOPC(*MI)) {
974 const MCInstrDesc &InstrDesc = MI->getDesc();
975 // For VOP2 and VOPC instructions, any operand type is valid to use for
976 // src0. Make sure we can use the src0 as src1.
977 //
978 // We could be stricter here and only allow commuting if there is a reason
979 // to do so. i.e. if both operands are VGPRs there is no real benefit,
980 // although MachineCSE attempts to find matches by commuting.
981 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
982 if (!isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0))
983 return nullptr;
984 }
985
986 if (!Src1.isReg()) {
987 // Allow commuting instructions with Imm operands.
988 if (NewMI || !Src1.isImm() ||
989 (!isVOP2(*MI) && !isVOP3(*MI))) {
990 return nullptr;
991 }
992 // Be sure to copy the source modifiers to the right place.
993 if (MachineOperand *Src0Mods
994 = getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) {
995 MachineOperand *Src1Mods
996 = getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers);
997
998 int Src0ModsVal = Src0Mods->getImm();
999 if (!Src1Mods && Src0ModsVal != 0)
1000 return nullptr;
1001
1002 // XXX - This assert might be a lie. It might be useful to have a neg
1003 // modifier with 0.0.
1004 int Src1ModsVal = Src1Mods->getImm();
1005 assert((Src1ModsVal == 0) && "Not expecting modifiers with immediates")(((Src1ModsVal == 0) && "Not expecting modifiers with immediates"
) ? static_cast<void> (0) : __assert_fail ("(Src1ModsVal == 0) && \"Not expecting modifiers with immediates\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 1005, __PRETTY_FUNCTION__))
;
1006
1007 Src1Mods->setImm(Src0ModsVal);
1008 Src0Mods->setImm(Src1ModsVal);
1009 }
1010
1011 unsigned Reg = Src0.getReg();
1012 unsigned SubReg = Src0.getSubReg();
1013 if (Src1.isImm())
1014 Src0.ChangeToImmediate(Src1.getImm());
1015 else
1016 llvm_unreachable("Should only have immediates")::llvm::llvm_unreachable_internal("Should only have immediates"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 1016)
;
1017
1018 Src1.ChangeToRegister(Reg, false);
1019 Src1.setSubReg(SubReg);
1020 } else {
1021 MI = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx0, OpIdx1);
1022 }
1023
1024 if (MI)
1025 MI->setDesc(get(CommutedOpcode));
1026
1027 return MI;
1028}
1029
1030// This needs to be implemented because the source modifiers may be inserted
1031// between the true commutable operands, and the base
1032// TargetInstrInfo::commuteInstruction uses it.
1033bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI,
1034 unsigned &SrcOpIdx0,
1035 unsigned &SrcOpIdx1) const {
1036 const MCInstrDesc &MCID = MI->getDesc();
1037 if (!MCID.isCommutable())
1038 return false;
1039
1040 unsigned Opc = MI->getOpcode();
1041 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
1042 if (Src0Idx == -1)
1043 return false;
1044
1045 // FIXME: Workaround TargetInstrInfo::commuteInstruction asserting on
1046 // immediate. Also, immediate src0 operand is not handled in
1047 // SIInstrInfo::commuteInstruction();
1048 if (!MI->getOperand(Src0Idx).isReg())
1049 return false;
1050
1051 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
1052 if (Src1Idx == -1)
1053 return false;
1054
1055 MachineOperand &Src1 = MI->getOperand(Src1Idx);
1056 if (Src1.isImm()) {
1057 // SIInstrInfo::commuteInstruction() does support commuting the immediate
1058 // operand src1 in 2 and 3 operand instructions.
1059 if (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))
1060 return false;
1061 } else if (Src1.isReg()) {
1062 // If any source modifiers are set, the generic instruction commuting won't
1063 // understand how to copy the source modifiers.
1064 if (hasModifiersSet(*MI, AMDGPU::OpName::src0_modifiers) ||
1065 hasModifiersSet(*MI, AMDGPU::OpName::src1_modifiers))
1066 return false;
1067 } else
1068 return false;
1069
1070 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
1071}
1072
1073unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) {
1074 switch (Cond) {
1075 case SIInstrInfo::SCC_TRUE:
1076 return AMDGPU::S_CBRANCH_SCC1;
1077 case SIInstrInfo::SCC_FALSE:
1078 return AMDGPU::S_CBRANCH_SCC0;
1079 case SIInstrInfo::VCCNZ:
1080 return AMDGPU::S_CBRANCH_VCCNZ;
1081 case SIInstrInfo::VCCZ:
1082 return AMDGPU::S_CBRANCH_VCCZ;
1083 case SIInstrInfo::EXECNZ:
1084 return AMDGPU::S_CBRANCH_EXECNZ;
1085 case SIInstrInfo::EXECZ:
1086 return AMDGPU::S_CBRANCH_EXECZ;
1087 default:
1088 llvm_unreachable("invalid branch predicate")::llvm::llvm_unreachable_internal("invalid branch predicate",
"/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 1088)
;
1089 }
1090}
1091
1092SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(unsigned Opcode) {
1093 switch (Opcode) {
1094 case AMDGPU::S_CBRANCH_SCC0:
1095 return SCC_FALSE;
1096 case AMDGPU::S_CBRANCH_SCC1:
1097 return SCC_TRUE;
1098 case AMDGPU::S_CBRANCH_VCCNZ:
1099 return VCCNZ;
1100 case AMDGPU::S_CBRANCH_VCCZ:
1101 return VCCZ;
1102 case AMDGPU::S_CBRANCH_EXECNZ:
1103 return EXECNZ;
1104 case AMDGPU::S_CBRANCH_EXECZ:
1105 return EXECZ;
1106 default:
1107 return INVALID_BR;
1108 }
1109}
1110
1111bool SIInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
1112 MachineBasicBlock *&TBB,
1113 MachineBasicBlock *&FBB,
1114 SmallVectorImpl<MachineOperand> &Cond,
1115 bool AllowModify) const {
1116 MachineBasicBlock::iterator I = MBB.getFirstTerminator();
1117
1118 if (I == MBB.end())
1119 return false;
1120
1121 if (I->getOpcode() == AMDGPU::S_BRANCH) {
1122 // Unconditional Branch
1123 TBB = I->getOperand(0).getMBB();
1124 return false;
1125 }
1126
1127 BranchPredicate Pred = getBranchPredicate(I->getOpcode());
1128 if (Pred == INVALID_BR)
1129 return true;
1130
1131 MachineBasicBlock *CondBB = I->getOperand(0).getMBB();
1132 Cond.push_back(MachineOperand::CreateImm(Pred));
1133
1134 ++I;
1135
1136 if (I == MBB.end()) {
1137 // Conditional branch followed by fall-through.
1138 TBB = CondBB;
1139 return false;
1140 }
1141
1142 if (I->getOpcode() == AMDGPU::S_BRANCH) {
1143 TBB = CondBB;
1144 FBB = I->getOperand(0).getMBB();
1145 return false;
1146 }
1147
1148 return true;
1149}
1150
1151unsigned SIInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
1152 MachineBasicBlock::iterator I = MBB.getFirstTerminator();
1153
1154 unsigned Count = 0;
1155 while (I != MBB.end()) {
1156 MachineBasicBlock::iterator Next = std::next(I);
1157 I->eraseFromParent();
1158 ++Count;
1159 I = Next;
1160 }
1161
1162 return Count;
1163}
1164
1165unsigned SIInstrInfo::InsertBranch(MachineBasicBlock &MBB,
1166 MachineBasicBlock *TBB,
1167 MachineBasicBlock *FBB,
1168 ArrayRef<MachineOperand> Cond,
1169 DebugLoc DL) const {
1170
1171 if (!FBB && Cond.empty()) {
1172 BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
1173 .addMBB(TBB);
1174 return 1;
1175 }
1176
1177 assert(TBB && Cond[0].isImm())((TBB && Cond[0].isImm()) ? static_cast<void> (
0) : __assert_fail ("TBB && Cond[0].isImm()", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 1177, __PRETTY_FUNCTION__))
;
1178
1179 unsigned Opcode
1180 = getBranchOpcode(static_cast<BranchPredicate>(Cond[0].getImm()));
1181
1182 if (!FBB) {
1183 BuildMI(&MBB, DL, get(Opcode))
1184 .addMBB(TBB);
1185 return 1;
1186 }
1187
1188 assert(TBB && FBB)((TBB && FBB) ? static_cast<void> (0) : __assert_fail
("TBB && FBB", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 1188, __PRETTY_FUNCTION__))
;
1189
1190 BuildMI(&MBB, DL, get(Opcode))
1191 .addMBB(TBB);
1192 BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
1193 .addMBB(FBB);
1194
1195 return 2;
1196}
1197
1198bool SIInstrInfo::ReverseBranchCondition(
1199 SmallVectorImpl<MachineOperand> &Cond) const {
1200 assert(Cond.size() == 1)((Cond.size() == 1) ? static_cast<void> (0) : __assert_fail
("Cond.size() == 1", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 1200, __PRETTY_FUNCTION__))
;
1201 Cond[0].setImm(-Cond[0].getImm());
1202 return false;
1203}
1204
1205static void removeModOperands(MachineInstr &MI) {
1206 unsigned Opc = MI.getOpcode();
1207 int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc,
1208 AMDGPU::OpName::src0_modifiers);
1209 int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc,
1210 AMDGPU::OpName::src1_modifiers);
1211 int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc,
1212 AMDGPU::OpName::src2_modifiers);
1213
1214 MI.RemoveOperand(Src2ModIdx);
1215 MI.RemoveOperand(Src1ModIdx);
1216 MI.RemoveOperand(Src0ModIdx);
1217}
1218
1219// TODO: Maybe this should be removed this and custom fold everything in
1220// SIFoldOperands?
1221bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
1222 unsigned Reg, MachineRegisterInfo *MRI) const {
1223 if (!MRI->hasOneNonDBGUse(Reg))
1224 return false;
1225
1226 unsigned Opc = UseMI->getOpcode();
1227 if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64) {
1228 // Don't fold if we are using source modifiers. The new VOP2 instructions
1229 // don't have them.
1230 if (hasModifiersSet(*UseMI, AMDGPU::OpName::src0_modifiers) ||
1231 hasModifiersSet(*UseMI, AMDGPU::OpName::src1_modifiers) ||
1232 hasModifiersSet(*UseMI, AMDGPU::OpName::src2_modifiers)) {
1233 return false;
1234 }
1235
1236 const MachineOperand &ImmOp = DefMI->getOperand(1);
1237
1238 // If this is a free constant, there's no reason to do this.
1239 // TODO: We could fold this here instead of letting SIFoldOperands do it
1240 // later.
1241 if (isInlineConstant(ImmOp, 4))
1242 return false;
1243
1244 MachineOperand *Src0 = getNamedOperand(*UseMI, AMDGPU::OpName::src0);
1245 MachineOperand *Src1 = getNamedOperand(*UseMI, AMDGPU::OpName::src1);
1246 MachineOperand *Src2 = getNamedOperand(*UseMI, AMDGPU::OpName::src2);
1247
1248 // Multiplied part is the constant: Use v_madmk_f32
1249 // We should only expect these to be on src0 due to canonicalizations.
1250 if (Src0->isReg() && Src0->getReg() == Reg) {
1251 if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
1252 return false;
1253
1254 if (!Src2->isReg() || RI.isSGPRClass(MRI->getRegClass(Src2->getReg())))
1255 return false;
1256
1257 // We need to swap operands 0 and 1 since madmk constant is at operand 1.
1258
1259 const int64_t Imm = DefMI->getOperand(1).getImm();
1260
1261 // FIXME: This would be a lot easier if we could return a new instruction
1262 // instead of having to modify in place.
1263
1264 // Remove these first since they are at the end.
1265 UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
1266 AMDGPU::OpName::omod));
1267 UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
1268 AMDGPU::OpName::clamp));
1269
1270 unsigned Src1Reg = Src1->getReg();
1271 unsigned Src1SubReg = Src1->getSubReg();
1272 Src0->setReg(Src1Reg);
1273 Src0->setSubReg(Src1SubReg);
1274 Src0->setIsKill(Src1->isKill());
1275
1276 if (Opc == AMDGPU::V_MAC_F32_e64) {
1277 UseMI->untieRegOperand(
1278 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
1279 }
1280
1281 Src1->ChangeToImmediate(Imm);
1282
1283 removeModOperands(*UseMI);
1284 UseMI->setDesc(get(AMDGPU::V_MADMK_F32));
1285
1286 bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
1287 if (DeleteDef)
1288 DefMI->eraseFromParent();
1289
1290 return true;
1291 }
1292
1293 // Added part is the constant: Use v_madak_f32
1294 if (Src2->isReg() && Src2->getReg() == Reg) {
1295 // Not allowed to use constant bus for another operand.
1296 // We can however allow an inline immediate as src0.
1297 if (!Src0->isImm() &&
1298 (Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
1299 return false;
1300
1301 if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
1302 return false;
1303
1304 const int64_t Imm = DefMI->getOperand(1).getImm();
1305
1306 // FIXME: This would be a lot easier if we could return a new instruction
1307 // instead of having to modify in place.
1308
1309 // Remove these first since they are at the end.
1310 UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
1311 AMDGPU::OpName::omod));
1312 UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
1313 AMDGPU::OpName::clamp));
1314
1315 if (Opc == AMDGPU::V_MAC_F32_e64) {
1316 UseMI->untieRegOperand(
1317 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
1318 }
1319
1320 // ChangingToImmediate adds Src2 back to the instruction.
1321 Src2->ChangeToImmediate(Imm);
1322
1323 // These come before src2.
1324 removeModOperands(*UseMI);
1325 UseMI->setDesc(get(AMDGPU::V_MADAK_F32));
1326
1327 bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
1328 if (DeleteDef)
1329 DefMI->eraseFromParent();
1330
1331 return true;
1332 }
1333 }
1334
1335 return false;
1336}
1337
1338static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
1339 int WidthB, int OffsetB) {
1340 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1341 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1342 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1343 return LowOffset + LowWidth <= HighOffset;
1344}
1345
1346bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr *MIa,
1347 MachineInstr *MIb) const {
1348 unsigned BaseReg0, BaseReg1;
1349 int64_t Offset0, Offset1;
1350
1351 if (getMemOpBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) &&
1352 getMemOpBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) {
1353
1354 if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) {
1355 // FIXME: Handle ds_read2 / ds_write2.
1356 return false;
1357 }
1358 unsigned Width0 = (*MIa->memoperands_begin())->getSize();
1359 unsigned Width1 = (*MIb->memoperands_begin())->getSize();
1360 if (BaseReg0 == BaseReg1 &&
1361 offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) {
1362 return true;
1363 }
1364 }
1365
1366 return false;
1367}
1368
1369bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
1370 MachineInstr *MIb,
1371 AliasAnalysis *AA) const {
1372 assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&((MIa && (MIa->mayLoad() || MIa->mayStore()) &&
"MIa must load from or modify a memory location") ? static_cast
<void> (0) : __assert_fail ("MIa && (MIa->mayLoad() || MIa->mayStore()) && \"MIa must load from or modify a memory location\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 1373, __PRETTY_FUNCTION__))
1373 "MIa must load from or modify a memory location")((MIa && (MIa->mayLoad() || MIa->mayStore()) &&
"MIa must load from or modify a memory location") ? static_cast
<void> (0) : __assert_fail ("MIa && (MIa->mayLoad() || MIa->mayStore()) && \"MIa must load from or modify a memory location\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 1373, __PRETTY_FUNCTION__))
;
1374 assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&((MIb && (MIb->mayLoad() || MIb->mayStore()) &&
"MIb must load from or modify a memory location") ? static_cast
<void> (0) : __assert_fail ("MIb && (MIb->mayLoad() || MIb->mayStore()) && \"MIb must load from or modify a memory location\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 1375, __PRETTY_FUNCTION__))
1375 "MIb must load from or modify a memory location")((MIb && (MIb->mayLoad() || MIb->mayStore()) &&
"MIb must load from or modify a memory location") ? static_cast
<void> (0) : __assert_fail ("MIb && (MIb->mayLoad() || MIb->mayStore()) && \"MIb must load from or modify a memory location\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 1375, __PRETTY_FUNCTION__))
;
1376
1377 if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects())
1378 return false;
1379
1380 // XXX - Can we relax this between address spaces?
1381 if (MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef())
1382 return false;
1383
1384 // TODO: Should we check the address space from the MachineMemOperand? That
1385 // would allow us to distinguish objects we know don't alias based on the
1386 // underlying address space, even if it was lowered to a different one,
1387 // e.g. private accesses lowered to use MUBUF instructions on a scratch
1388 // buffer.
1389 if (isDS(*MIa)) {
1390 if (isDS(*MIb))
1391 return checkInstOffsetsDoNotOverlap(MIa, MIb);
1392
1393 return !isFLAT(*MIb);
1394 }
1395
1396 if (isMUBUF(*MIa) || isMTBUF(*MIa)) {
1397 if (isMUBUF(*MIb) || isMTBUF(*MIb))
1398 return checkInstOffsetsDoNotOverlap(MIa, MIb);
1399
1400 return !isFLAT(*MIb) && !isSMRD(*MIb);
1401 }
1402
1403 if (isSMRD(*MIa)) {
1404 if (isSMRD(*MIb))
1405 return checkInstOffsetsDoNotOverlap(MIa, MIb);
1406
1407 return !isFLAT(*MIb) && !isMUBUF(*MIa) && !isMTBUF(*MIa);
1408 }
1409
1410 if (isFLAT(*MIa)) {
1411 if (isFLAT(*MIb))
1412 return checkInstOffsetsDoNotOverlap(MIa, MIb);
1413
1414 return false;
1415 }
1416
1417 return false;
1418}
1419
1420MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
1421 MachineBasicBlock::iterator &MI,
1422 LiveVariables *LV) const {
1423
1424 switch (MI->getOpcode()) {
1425 default: return nullptr;
1426 case AMDGPU::V_MAC_F32_e64: break;
1427 case AMDGPU::V_MAC_F32_e32: {
1428 const MachineOperand *Src0 = getNamedOperand(*MI, AMDGPU::OpName::src0);
1429 if (Src0->isImm() && !isInlineConstant(*Src0, 4))
1430 return nullptr;
1431 break;
1432 }
1433 }
1434
1435 const MachineOperand *Dst = getNamedOperand(*MI, AMDGPU::OpName::vdst);
1436 const MachineOperand *Src0 = getNamedOperand(*MI, AMDGPU::OpName::src0);
1437 const MachineOperand *Src1 = getNamedOperand(*MI, AMDGPU::OpName::src1);
1438 const MachineOperand *Src2 = getNamedOperand(*MI, AMDGPU::OpName::src2);
1439
1440 return BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_MAD_F32))
1441 .addOperand(*Dst)
1442 .addImm(0) // Src0 mods
1443 .addOperand(*Src0)
1444 .addImm(0) // Src1 mods
1445 .addOperand(*Src1)
1446 .addImm(0) // Src mods
1447 .addOperand(*Src2)
1448 .addImm(0) // clamp
1449 .addImm(0); // omod
1450}
1451
1452bool SIInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
1453 const MachineBasicBlock *MBB,
1454 const MachineFunction &MF) const {
1455 // Target-independent instructions do not have an implicit-use of EXEC, even
1456 // when they operate on VGPRs. Treating EXEC modifications as scheduling
1457 // boundaries prevents incorrect movements of such instructions.
1458 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1459 if (MI->modifiesRegister(AMDGPU::EXEC, TRI))
1460 return true;
1461
1462 return AMDGPUInstrInfo::isSchedulingBoundary(MI, MBB, MF);
1463}
1464
1465bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
1466 int64_t SVal = Imm.getSExtValue();
1467 if (SVal >= -16 && SVal <= 64)
1468 return true;
1469
1470 if (Imm.getBitWidth() == 64) {
1471 uint64_t Val = Imm.getZExtValue();
1472 return (DoubleToBits(0.0) == Val) ||
1473 (DoubleToBits(1.0) == Val) ||
1474 (DoubleToBits(-1.0) == Val) ||
1475 (DoubleToBits(0.5) == Val) ||
1476 (DoubleToBits(-0.5) == Val) ||
1477 (DoubleToBits(2.0) == Val) ||
1478 (DoubleToBits(-2.0) == Val) ||
1479 (DoubleToBits(4.0) == Val) ||
1480 (DoubleToBits(-4.0) == Val);
1481 }
1482
1483 // The actual type of the operand does not seem to matter as long
1484 // as the bits match one of the inline immediate values. For example:
1485 //
1486 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
1487 // so it is a legal inline immediate.
1488 //
1489 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
1490 // floating-point, so it is a legal inline immediate.
1491 uint32_t Val = Imm.getZExtValue();
1492
1493 return (FloatToBits(0.0f) == Val) ||
1494 (FloatToBits(1.0f) == Val) ||
1495 (FloatToBits(-1.0f) == Val) ||
1496 (FloatToBits(0.5f) == Val) ||
1497 (FloatToBits(-0.5f) == Val) ||
1498 (FloatToBits(2.0f) == Val) ||
1499 (FloatToBits(-2.0f) == Val) ||
1500 (FloatToBits(4.0f) == Val) ||
1501 (FloatToBits(-4.0f) == Val);
1502}
1503
1504bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
1505 unsigned OpSize) const {
1506 if (MO.isImm()) {
1507 // MachineOperand provides no way to tell the true operand size, since it
1508 // only records a 64-bit value. We need to know the size to determine if a
1509 // 32-bit floating point immediate bit pattern is legal for an integer
1510 // immediate. It would be for any 32-bit integer operand, but would not be
1511 // for a 64-bit one.
1512
1513 unsigned BitSize = 8 * OpSize;
1514 return isInlineConstant(APInt(BitSize, MO.getImm(), true));
1515 }
1516
1517 return false;
1518}
1519
1520bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO,
1521 unsigned OpSize) const {
1522 return MO.isImm() && !isInlineConstant(MO, OpSize);
1523}
1524
1525static bool compareMachineOp(const MachineOperand &Op0,
1526 const MachineOperand &Op1) {
1527 if (Op0.getType() != Op1.getType())
1528 return false;
1529
1530 switch (Op0.getType()) {
1531 case MachineOperand::MO_Register:
1532 return Op0.getReg() == Op1.getReg();
1533 case MachineOperand::MO_Immediate:
1534 return Op0.getImm() == Op1.getImm();
1535 default:
1536 llvm_unreachable("Didn't expect to be comparing these operand types")::llvm::llvm_unreachable_internal("Didn't expect to be comparing these operand types"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 1536)
;
1537 }
1538}
1539
1540bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,
1541 const MachineOperand &MO) const {
1542 const MCOperandInfo &OpInfo = get(MI->getOpcode()).OpInfo[OpNo];
1543
1544 assert(MO.isImm() || MO.isTargetIndex() || MO.isFI())((MO.isImm() || MO.isTargetIndex() || MO.isFI()) ? static_cast
<void> (0) : __assert_fail ("MO.isImm() || MO.isTargetIndex() || MO.isFI()"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 1544, __PRETTY_FUNCTION__))
;
1545
1546 if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE)
1547 return true;
1548
1549 if (OpInfo.RegClass < 0)
1550 return false;
1551
1552 unsigned OpSize = RI.getRegClass(OpInfo.RegClass)->getSize();
1553 if (isLiteralConstant(MO, OpSize))
1554 return RI.opCanUseLiteralConstant(OpInfo.OperandType);
1555
1556 return RI.opCanUseInlineConstant(OpInfo.OperandType);
1557}
1558
1559bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
1560 int Op32 = AMDGPU::getVOPe32(Opcode);
1561 if (Op32 == -1)
1562 return false;
1563
1564 return pseudoToMCOpcode(Op32) != -1;
1565}
1566
1567bool SIInstrInfo::hasModifiers(unsigned Opcode) const {
1568 // The src0_modifier operand is present on all instructions
1569 // that have modifiers.
1570
1571 return AMDGPU::getNamedOperandIdx(Opcode,
1572 AMDGPU::OpName::src0_modifiers) != -1;
1573}
1574
1575bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI,
1576 unsigned OpName) const {
1577 const MachineOperand *Mods = getNamedOperand(MI, OpName);
1578 return Mods && Mods->getImm();
1579}
1580
1581bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
1582 const MachineOperand &MO,
1583 unsigned OpSize) const {
1584 // Literal constants use the constant bus.
1585 if (isLiteralConstant(MO, OpSize))
1586 return true;
1587
1588 if (!MO.isReg() || !MO.isUse())
1589 return false;
1590
1591 if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
1592 return RI.isSGPRClass(MRI.getRegClass(MO.getReg()));
1593
1594 // FLAT_SCR is just an SGPR pair.
1595 if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR))
1596 return true;
1597
1598 // EXEC register uses the constant bus.
1599 if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
1600 return true;
1601
1602 // SGPRs use the constant bus
1603 return (MO.getReg() == AMDGPU::VCC || MO.getReg() == AMDGPU::M0 ||
1604 (!MO.isImplicit() &&
1605 (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) ||
1606 AMDGPU::SGPR_64RegClass.contains(MO.getReg()))));
1607}
1608
1609static unsigned findImplicitSGPRRead(const MachineInstr &MI) {
1610 for (const MachineOperand &MO : MI.implicit_operands()) {
1611 // We only care about reads.
1612 if (MO.isDef())
1613 continue;
1614
1615 switch (MO.getReg()) {
1616 case AMDGPU::VCC:
1617 case AMDGPU::M0:
1618 case AMDGPU::FLAT_SCR:
1619 return MO.getReg();
1620
1621 default:
1622 break;
1623 }
1624 }
1625
1626 return AMDGPU::NoRegister;
1627}
1628
1629bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
1630 StringRef &ErrInfo) const {
1631 uint16_t Opcode = MI->getOpcode();
1632 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
1633 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
1634 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
1635 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
1636
1637 // Make sure the number of operands is correct.
1638 const MCInstrDesc &Desc = get(Opcode);
1639 if (!Desc.isVariadic() &&
1640 Desc.getNumOperands() != MI->getNumExplicitOperands()) {
1641 ErrInfo = "Instruction has wrong number of operands.";
1642 return false;
1643 }
1644
1645 // Make sure the register classes are correct.
1646 for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) {
1647 if (MI->getOperand(i).isFPImm()) {
1648 ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast "
1649 "all fp values to integers.";
1650 return false;
1651 }
1652
1653 int RegClass = Desc.OpInfo[i].RegClass;
1654
1655 switch (Desc.OpInfo[i].OperandType) {
1656 case MCOI::OPERAND_REGISTER:
1657 if (MI->getOperand(i).isImm()) {
1658 ErrInfo = "Illegal immediate value for operand.";
1659 return false;
1660 }
1661 break;
1662 case AMDGPU::OPERAND_REG_IMM32:
1663 break;
1664 case AMDGPU::OPERAND_REG_INLINE_C:
1665 if (isLiteralConstant(MI->getOperand(i),
1666 RI.getRegClass(RegClass)->getSize())) {
1667 ErrInfo = "Illegal immediate value for operand.";
1668 return false;
1669 }
1670 break;
1671 case MCOI::OPERAND_IMMEDIATE:
1672 // Check if this operand is an immediate.
1673 // FrameIndex operands will be replaced by immediates, so they are
1674 // allowed.
1675 if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFI()) {
1676 ErrInfo = "Expected immediate, but got non-immediate";
1677 return false;
1678 }
1679 // Fall-through
1680 default:
1681 continue;
1682 }
1683
1684 if (!MI->getOperand(i).isReg())
1685 continue;
1686
1687 if (RegClass != -1) {
1688 unsigned Reg = MI->getOperand(i).getReg();
1689 if (TargetRegisterInfo::isVirtualRegister(Reg))
1690 continue;
1691
1692 const TargetRegisterClass *RC = RI.getRegClass(RegClass);
1693 if (!RC->contains(Reg)) {
1694 ErrInfo = "Operand has incorrect register class.";
1695 return false;
1696 }
1697 }
1698 }
1699
1700
1701 // Verify VOP*
1702 if (isVOP1(*MI) || isVOP2(*MI) || isVOP3(*MI) || isVOPC(*MI)) {
1703 // Only look at the true operands. Only a real operand can use the constant
1704 // bus, and we don't want to check pseudo-operands like the source modifier
1705 // flags.
1706 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
1707
1708 unsigned ConstantBusCount = 0;
1709 unsigned SGPRUsed = findImplicitSGPRRead(*MI);
1710 if (SGPRUsed != AMDGPU::NoRegister)
1711 ++ConstantBusCount;
1712
1713 for (int OpIdx : OpIndices) {
1714 if (OpIdx == -1)
1715 break;
1716 const MachineOperand &MO = MI->getOperand(OpIdx);
1717 if (usesConstantBus(MRI, MO, getOpSize(Opcode, OpIdx))) {
1718 if (MO.isReg()) {
1719 if (MO.getReg() != SGPRUsed)
1720 ++ConstantBusCount;
1721 SGPRUsed = MO.getReg();
1722 } else {
1723 ++ConstantBusCount;
1724 }
1725 }
1726 }
1727 if (ConstantBusCount > 1) {
1728 ErrInfo = "VOP* instruction uses the constant bus more than once";
1729 return false;
1730 }
1731 }
1732
1733 // Verify misc. restrictions on specific instructions.
1734 if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 ||
1735 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
1736 const MachineOperand &Src0 = MI->getOperand(Src0Idx);
1737 const MachineOperand &Src1 = MI->getOperand(Src1Idx);
1738 const MachineOperand &Src2 = MI->getOperand(Src2Idx);
1739 if (Src0.isReg() && Src1.isReg() && Src2.isReg()) {
1740 if (!compareMachineOp(Src0, Src1) &&
1741 !compareMachineOp(Src0, Src2)) {
1742 ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2";
1743 return false;
1744 }
1745 }
1746 }
1747
1748 // Make sure we aren't losing exec uses in the td files. This mostly requires
1749 // being careful when using let Uses to try to add other use registers.
1750 if (!isGenericOpcode(Opcode) && !isSALU(Opcode) && !isSMRD(Opcode)) {
1751 if (!MI->hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
1752 ErrInfo = "VALU instruction does not implicitly read exec mask";
1753 return false;
1754 }
1755 }
1756
1757 return true;
1758}
1759
1760unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
1761 switch (MI.getOpcode()) {
1762 default: return AMDGPU::INSTRUCTION_LIST_END;
1763 case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
1764 case AMDGPU::COPY: return AMDGPU::COPY;
1765 case AMDGPU::PHI: return AMDGPU::PHI;
1766 case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG;
1767 case AMDGPU::S_MOV_B32:
1768 return MI.getOperand(1).isReg() ?
1769 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
1770 case AMDGPU::S_ADD_I32:
1771 case AMDGPU::S_ADD_U32: return AMDGPU::V_ADD_I32_e32;
1772 case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32;
1773 case AMDGPU::S_SUB_I32:
1774 case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32;
1775 case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
1776 case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32;
1777 case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32;
1778 case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32;
1779 case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32;
1780 case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32;
1781 case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32;
1782 case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32;
1783 case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32;
1784 case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
1785 case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
1786 case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
1787 case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
1788 case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
1789 case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
1790 case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32;
1791 case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
1792 case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
1793 case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
1794 case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64;
1795 case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
1796 case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
1797 case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
1798 case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
1799 case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
1800 case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32;
1801 case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32;
1802 case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32;
1803 case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32;
1804 case AMDGPU::S_CMP_EQ_U32: return AMDGPU::V_CMP_EQ_U32_e32;
1805 case AMDGPU::S_CMP_LG_U32: return AMDGPU::V_CMP_NE_U32_e32;
1806 case AMDGPU::S_CMP_GT_U32: return AMDGPU::V_CMP_GT_U32_e32;
1807 case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e32;
1808 case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e32;
1809 case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e32;
1810 case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
1811 case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
1812 case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
1813 case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64;
1814 case AMDGPU::S_CBRANCH_SCC0: return AMDGPU::S_CBRANCH_VCCZ;
1815 case AMDGPU::S_CBRANCH_SCC1: return AMDGPU::S_CBRANCH_VCCNZ;
1816 }
1817}
1818
1819bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const {
1820 return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END;
1821}
1822
1823const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
1824 unsigned OpNo) const {
1825 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
1826 const MCInstrDesc &Desc = get(MI.getOpcode());
1827 if (MI.isVariadic() || OpNo >= Desc.getNumOperands() ||
1828 Desc.OpInfo[OpNo].RegClass == -1) {
1829 unsigned Reg = MI.getOperand(OpNo).getReg();
1830
1831 if (TargetRegisterInfo::isVirtualRegister(Reg))
1832 return MRI.getRegClass(Reg);
1833 return RI.getPhysRegClass(Reg);
1834 }
1835
1836 unsigned RCID = Desc.OpInfo[OpNo].RegClass;
1837 return RI.getRegClass(RCID);
1838}
1839
1840bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
1841 switch (MI.getOpcode()) {
1842 case AMDGPU::COPY:
1843 case AMDGPU::REG_SEQUENCE:
1844 case AMDGPU::PHI:
1845 case AMDGPU::INSERT_SUBREG:
1846 return RI.hasVGPRs(getOpRegClass(MI, 0));
1847 default:
1848 return RI.hasVGPRs(getOpRegClass(MI, OpNo));
1849 }
1850}
1851
1852void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
1853 MachineBasicBlock::iterator I = MI;
1854 MachineBasicBlock *MBB = MI->getParent();
1855 MachineOperand &MO = MI->getOperand(OpIdx);
1856 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1857 unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass;
1858 const TargetRegisterClass *RC = RI.getRegClass(RCID);
1859 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1860 if (MO.isReg())
1861 Opcode = AMDGPU::COPY;
1862 else if (RI.isSGPRClass(RC))
1863 Opcode = AMDGPU::S_MOV_B32;
1864
1865
1866 const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
1867 if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC))
1868 VRC = &AMDGPU::VReg_64RegClass;
1869 else
1870 VRC = &AMDGPU::VGPR_32RegClass;
1871
1872 unsigned Reg = MRI.createVirtualRegister(VRC);
1873 DebugLoc DL = MBB->findDebugLoc(I);
1874 BuildMI(*MI->getParent(), I, DL, get(Opcode), Reg)
1875 .addOperand(MO);
1876 MO.ChangeToRegister(Reg, false);
1877}
1878
1879unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
1880 MachineRegisterInfo &MRI,
1881 MachineOperand &SuperReg,
1882 const TargetRegisterClass *SuperRC,
1883 unsigned SubIdx,
1884 const TargetRegisterClass *SubRC)
1885 const {
1886 MachineBasicBlock *MBB = MI->getParent();
1887 DebugLoc DL = MI->getDebugLoc();
1888 unsigned SubReg = MRI.createVirtualRegister(SubRC);
1889
1890 if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) {
1891 BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
1892 .addReg(SuperReg.getReg(), 0, SubIdx);
1893 return SubReg;
1894 }
1895
1896 // Just in case the super register is itself a sub-register, copy it to a new
1897 // value so we don't need to worry about merging its subreg index with the
1898 // SubIdx passed to this function. The register coalescer should be able to
1899 // eliminate this extra copy.
1900 unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
1901
1902 BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg)
1903 .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg());
1904
1905 BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
1906 .addReg(NewSuperReg, 0, SubIdx);
1907
1908 return SubReg;
1909}
1910
1911MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
1912 MachineBasicBlock::iterator MII,
1913 MachineRegisterInfo &MRI,
1914 MachineOperand &Op,
1915 const TargetRegisterClass *SuperRC,
1916 unsigned SubIdx,
1917 const TargetRegisterClass *SubRC) const {
1918 if (Op.isImm()) {
1919 // XXX - Is there a better way to do this?
1920 if (SubIdx == AMDGPU::sub0)
1921 return MachineOperand::CreateImm(Op.getImm() & 0xFFFFFFFF);
1922 if (SubIdx == AMDGPU::sub1)
1923 return MachineOperand::CreateImm(Op.getImm() >> 32);
1924
1925 llvm_unreachable("Unhandled register index for immediate")::llvm::llvm_unreachable_internal("Unhandled register index for immediate"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 1925)
;
1926 }
1927
1928 unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC,
1929 SubIdx, SubRC);
1930 return MachineOperand::CreateReg(SubReg, false);
1931}
1932
1933// Change the order of operands from (0, 1, 2) to (0, 2, 1)
1934void SIInstrInfo::swapOperands(MachineBasicBlock::iterator Inst) const {
1935 assert(Inst->getNumExplicitOperands() == 3)((Inst->getNumExplicitOperands() == 3) ? static_cast<void
> (0) : __assert_fail ("Inst->getNumExplicitOperands() == 3"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 1935, __PRETTY_FUNCTION__))
;
1936 MachineOperand Op1 = Inst->getOperand(1);
1937 Inst->RemoveOperand(1);
1938 Inst->addOperand(Op1);
1939}
1940
1941bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI,
1942 const MCOperandInfo &OpInfo,
1943 const MachineOperand &MO) const {
1944 if (!MO.isReg())
1945 return false;
1946
1947 unsigned Reg = MO.getReg();
1948 const TargetRegisterClass *RC =
1949 TargetRegisterInfo::isVirtualRegister(Reg) ?
1950 MRI.getRegClass(Reg) :
1951 RI.getPhysRegClass(Reg);
1952
1953 const SIRegisterInfo *TRI =
1954 static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
1955 RC = TRI->getSubRegClass(RC, MO.getSubReg());
1956
1957 // In order to be legal, the common sub-class must be equal to the
1958 // class of the current operand. For example:
1959 //
1960 // v_mov_b32 s0 ; Operand defined as vsrc_32
1961 // ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL
1962 //
1963 // s_sendmsg 0, s0 ; Operand defined as m0reg
1964 // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
1965
1966 return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
1967}
1968
1969bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
1970 const MCOperandInfo &OpInfo,
1971 const MachineOperand &MO) const {
1972 if (MO.isReg())
1973 return isLegalRegOperand(MRI, OpInfo, MO);
1974
1975 // Handle non-register types that are treated like immediates.
1976 assert(MO.isImm() || MO.isTargetIndex() || MO.isFI())((MO.isImm() || MO.isTargetIndex() || MO.isFI()) ? static_cast
<void> (0) : __assert_fail ("MO.isImm() || MO.isTargetIndex() || MO.isFI()"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 1976, __PRETTY_FUNCTION__))
;
1977 return true;
1978}
1979
1980bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
1981 const MachineOperand *MO) const {
1982 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
1983 const MCInstrDesc &InstDesc = MI->getDesc();
1984 const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
1985 const TargetRegisterClass *DefinedRC =
2
'DefinedRC' initialized to a null pointer value
1986 OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr;
1
'?' condition is false
1987 if (!MO)
3
Assuming 'MO' is non-null
4
Taking false branch
1988 MO = &MI->getOperand(OpIdx);
1989
1990 if (isVALU(*MI) &&
1991 usesConstantBus(MRI, *MO, DefinedRC->getSize())) {
5
Called C++ object pointer is null
1992
1993 RegSubRegPair SGPRUsed;
1994 if (MO->isReg())
1995 SGPRUsed = RegSubRegPair(MO->getReg(), MO->getSubReg());
1996
1997 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1998 if (i == OpIdx)
1999 continue;
2000 const MachineOperand &Op = MI->getOperand(i);
2001 if (Op.isReg() &&
2002 (Op.getReg() != SGPRUsed.Reg || Op.getSubReg() != SGPRUsed.SubReg) &&
2003 usesConstantBus(MRI, Op, getOpSize(*MI, i))) {
2004 return false;
2005 }
2006 }
2007 }
2008
2009 if (MO->isReg()) {
2010 assert(DefinedRC)((DefinedRC) ? static_cast<void> (0) : __assert_fail ("DefinedRC"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 2010, __PRETTY_FUNCTION__))
;
2011 return isLegalRegOperand(MRI, OpInfo, *MO);
2012 }
2013
2014
2015 // Handle non-register types that are treated like immediates.
2016 assert(MO->isImm() || MO->isTargetIndex() || MO->isFI())((MO->isImm() || MO->isTargetIndex() || MO->isFI()) ?
static_cast<void> (0) : __assert_fail ("MO->isImm() || MO->isTargetIndex() || MO->isFI()"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 2016, __PRETTY_FUNCTION__))
;
2017
2018 if (!DefinedRC) {
2019 // This operand expects an immediate.
2020 return true;
2021 }
2022
2023 return isImmOperandLegal(MI, OpIdx, *MO);
2024}
2025
2026void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
2027 MachineInstr *MI) const {
2028 unsigned Opc = MI->getOpcode();
2029 const MCInstrDesc &InstrDesc = get(Opc);
2030
2031 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
2032 MachineOperand &Src1 = MI->getOperand(Src1Idx);
2033
2034 // If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32
2035 // we need to only have one constant bus use.
2036 //
2037 // Note we do not need to worry about literal constants here. They are
2038 // disabled for the operand type for instructions because they will always
2039 // violate the one constant bus use rule.
2040 bool HasImplicitSGPR = findImplicitSGPRRead(*MI) != AMDGPU::NoRegister;
2041 if (HasImplicitSGPR) {
2042 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
2043 MachineOperand &Src0 = MI->getOperand(Src0Idx);
2044
2045 if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg()))
2046 legalizeOpWithMove(MI, Src0Idx);
2047 }
2048
2049 // VOP2 src0 instructions support all operand types, so we don't need to check
2050 // their legality. If src1 is already legal, we don't need to do anything.
2051 if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1))
2052 return;
2053
2054 // We do not use commuteInstruction here because it is too aggressive and will
2055 // commute if it is possible. We only want to commute here if it improves
2056 // legality. This can be called a fairly large number of times so don't waste
2057 // compile time pointlessly swapping and checking legality again.
2058 if (HasImplicitSGPR || !MI->isCommutable()) {
2059 legalizeOpWithMove(MI, Src1Idx);
2060 return;
2061 }
2062
2063 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
2064 MachineOperand &Src0 = MI->getOperand(Src0Idx);
2065
2066 // If src0 can be used as src1, commuting will make the operands legal.
2067 // Otherwise we have to give up and insert a move.
2068 //
2069 // TODO: Other immediate-like operand kinds could be commuted if there was a
2070 // MachineOperand::ChangeTo* for them.
2071 if ((!Src1.isImm() && !Src1.isReg()) ||
2072 !isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) {
2073 legalizeOpWithMove(MI, Src1Idx);
2074 return;
2075 }
2076
2077 int CommutedOpc = commuteOpcode(*MI);
2078 if (CommutedOpc == -1) {
2079 legalizeOpWithMove(MI, Src1Idx);
2080 return;
2081 }
2082
2083 MI->setDesc(get(CommutedOpc));
2084
2085 unsigned Src0Reg = Src0.getReg();
2086 unsigned Src0SubReg = Src0.getSubReg();
2087 bool Src0Kill = Src0.isKill();
2088
2089 if (Src1.isImm())
2090 Src0.ChangeToImmediate(Src1.getImm());
2091 else if (Src1.isReg()) {
2092 Src0.ChangeToRegister(Src1.getReg(), false, false, Src1.isKill());
2093 Src0.setSubReg(Src1.getSubReg());
2094 } else
2095 llvm_unreachable("Should only have register or immediate operands")::llvm::llvm_unreachable_internal("Should only have register or immediate operands"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 2095)
;
2096
2097 Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill);
2098 Src1.setSubReg(Src0SubReg);
2099}
2100
2101// Legalize VOP3 operands. Because all operand types are supported for any
2102// operand, and since literal constants are not allowed and should never be
2103// seen, we only need to worry about inserting copies if we use multiple SGPR
2104// operands.
2105void SIInstrInfo::legalizeOperandsVOP3(
2106 MachineRegisterInfo &MRI,
2107 MachineInstr *MI) const {
2108 unsigned Opc = MI->getOpcode();
2109
2110 int VOP3Idx[3] = {
2111 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
2112 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1),
2113 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)
2114 };
2115
2116 // Find the one SGPR operand we are allowed to use.
2117 unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
2118
2119 for (unsigned i = 0; i < 3; ++i) {
2120 int Idx = VOP3Idx[i];
2121 if (Idx == -1)
2122 break;
2123 MachineOperand &MO = MI->getOperand(Idx);
2124
2125 // We should never see a VOP3 instruction with an illegal immediate operand.
2126 if (!MO.isReg())
2127 continue;
2128
2129 if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
2130 continue; // VGPRs are legal
2131
2132 if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
2133 SGPRReg = MO.getReg();
2134 // We can use one SGPR in each VOP3 instruction.
2135 continue;
2136 }
2137
2138 // If we make it this far, then the operand is not legal and we must
2139 // legalize it.
2140 legalizeOpWithMove(MI, Idx);
2141 }
2142}
2143
2144unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr *UseMI,
2145 MachineRegisterInfo &MRI) const {
2146 const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg);
2147 const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC);
2148 unsigned DstReg = MRI.createVirtualRegister(SRC);
2149 unsigned SubRegs = VRC->getSize() / 4;
2150
2151 SmallVector<unsigned, 8> SRegs;
2152 for (unsigned i = 0; i < SubRegs; ++i) {
2153 unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
2154 BuildMI(*UseMI->getParent(), UseMI, UseMI->getDebugLoc(),
2155 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
2156 .addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
2157 SRegs.push_back(SGPR);
2158 }
2159
2160 MachineInstrBuilder MIB = BuildMI(*UseMI->getParent(), UseMI,
2161 UseMI->getDebugLoc(),
2162 get(AMDGPU::REG_SEQUENCE), DstReg);
2163 for (unsigned i = 0; i < SubRegs; ++i) {
2164 MIB.addReg(SRegs[i]);
2165 MIB.addImm(RI.getSubRegFromChannel(i));
2166 }
2167 return DstReg;
2168}
2169
2170void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI,
2171 MachineInstr *MI) const {
2172
2173 // If the pointer is store in VGPRs, then we need to move them to
2174 // SGPRs using v_readfirstlane. This is safe because we only select
2175 // loads with uniform pointers to SMRD instruction so we know the
2176 // pointer value is uniform.
2177 MachineOperand *SBase = getNamedOperand(*MI, AMDGPU::OpName::sbase);
2178 if (SBase && !RI.isSGPRClass(MRI.getRegClass(SBase->getReg()))) {
2179 unsigned SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI);
2180 SBase->setReg(SGPR);
2181 }
2182}
2183
2184void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
2185 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
2186
2187 // Legalize VOP2
2188 if (isVOP2(*MI) || isVOPC(*MI)) {
2189 legalizeOperandsVOP2(MRI, MI);
2190 return;
2191 }
2192
2193 // Legalize VOP3
2194 if (isVOP3(*MI)) {
2195 legalizeOperandsVOP3(MRI, MI);
2196 return;
2197 }
2198
2199 // Legalize SMRD
2200 if (isSMRD(*MI)) {
2201 legalizeOperandsSMRD(MRI, MI);
2202 return;
2203 }
2204
2205 // Legalize REG_SEQUENCE and PHI
2206 // The register class of the operands much be the same type as the register
2207 // class of the output.
2208 if (MI->getOpcode() == AMDGPU::PHI) {
2209 const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr;
2210 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
2211 if (!MI->getOperand(i).isReg() ||
2212 !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
2213 continue;
2214 const TargetRegisterClass *OpRC =
2215 MRI.getRegClass(MI->getOperand(i).getReg());
2216 if (RI.hasVGPRs(OpRC)) {
2217 VRC = OpRC;
2218 } else {
2219 SRC = OpRC;
2220 }
2221 }
2222
2223 // If any of the operands are VGPR registers, then they all most be
2224 // otherwise we will create illegal VGPR->SGPR copies when legalizing
2225 // them.
2226 if (VRC || !RI.isSGPRClass(getOpRegClass(*MI, 0))) {
2227 if (!VRC) {
2228 assert(SRC)((SRC) ? static_cast<void> (0) : __assert_fail ("SRC", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 2228, __PRETTY_FUNCTION__))
;
2229 VRC = RI.getEquivalentVGPRClass(SRC);
2230 }
2231 RC = VRC;
2232 } else {
2233 RC = SRC;
2234 }
2235
2236 // Update all the operands so they have the same type.
2237 for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
2238 MachineOperand &Op = MI->getOperand(I);
2239 if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
2240 continue;
2241 unsigned DstReg = MRI.createVirtualRegister(RC);
2242
2243 // MI is a PHI instruction.
2244 MachineBasicBlock *InsertBB = MI->getOperand(I + 1).getMBB();
2245 MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator();
2246
2247 BuildMI(*InsertBB, Insert, MI->getDebugLoc(), get(AMDGPU::COPY), DstReg)
2248 .addOperand(Op);
2249 Op.setReg(DstReg);
2250 }
2251 }
2252
2253 // REG_SEQUENCE doesn't really require operand legalization, but if one has a
2254 // VGPR dest type and SGPR sources, insert copies so all operands are
2255 // VGPRs. This seems to help operand folding / the register coalescer.
2256 if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) {
2257 MachineBasicBlock *MBB = MI->getParent();
2258 const TargetRegisterClass *DstRC = getOpRegClass(*MI, 0);
2259 if (RI.hasVGPRs(DstRC)) {
2260 // Update all the operands so they are VGPR register classes. These may
2261 // not be the same register class because REG_SEQUENCE supports mixing
2262 // subregister index types e.g. sub0_sub1 + sub2 + sub3
2263 for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
2264 MachineOperand &Op = MI->getOperand(I);
2265 if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
2266 continue;
2267
2268 const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg());
2269 const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(OpRC);
2270 if (VRC == OpRC)
2271 continue;
2272
2273 unsigned DstReg = MRI.createVirtualRegister(VRC);
2274
2275 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), DstReg)
2276 .addOperand(Op);
2277
2278 Op.setReg(DstReg);
2279 Op.setIsKill();
2280 }
2281 }
2282
2283 return;
2284 }
2285
2286 // Legalize INSERT_SUBREG
2287 // src0 must have the same register class as dst
2288 if (MI->getOpcode() == AMDGPU::INSERT_SUBREG) {
2289 unsigned Dst = MI->getOperand(0).getReg();
2290 unsigned Src0 = MI->getOperand(1).getReg();
2291 const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
2292 const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0);
2293 if (DstRC != Src0RC) {
2294 MachineBasicBlock &MBB = *MI->getParent();
2295 unsigned NewSrc0 = MRI.createVirtualRegister(DstRC);
2296 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), NewSrc0)
2297 .addReg(Src0);
2298 MI->getOperand(1).setReg(NewSrc0);
2299 }
2300 return;
2301 }
2302
2303 // Legalize MIMG
2304 if (isMIMG(*MI)) {
2305 MachineOperand *SRsrc = getNamedOperand(*MI, AMDGPU::OpName::srsrc);
2306 if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) {
2307 unsigned SGPR = readlaneVGPRToSGPR(SRsrc->getReg(), MI, MRI);
2308 SRsrc->setReg(SGPR);
2309 }
2310
2311 MachineOperand *SSamp = getNamedOperand(*MI, AMDGPU::OpName::ssamp);
2312 if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg()))) {
2313 unsigned SGPR = readlaneVGPRToSGPR(SSamp->getReg(), MI, MRI);
2314 SSamp->setReg(SGPR);
2315 }
2316 return;
2317 }
2318
2319 // Legalize MUBUF* instructions
2320 // FIXME: If we start using the non-addr64 instructions for compute, we
2321 // may need to legalize them here.
2322 int SRsrcIdx =
2323 AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::srsrc);
2324 if (SRsrcIdx != -1) {
2325 // We have an MUBUF instruction
2326 MachineOperand *SRsrc = &MI->getOperand(SRsrcIdx);
2327 unsigned SRsrcRC = get(MI->getOpcode()).OpInfo[SRsrcIdx].RegClass;
2328 if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()),
2329 RI.getRegClass(SRsrcRC))) {
2330 // The operands are legal.
2331 // FIXME: We may need to legalize operands besided srsrc.
2332 return;
2333 }
2334
2335 MachineBasicBlock &MBB = *MI->getParent();
2336
2337 // Extract the ptr from the resource descriptor.
2338 unsigned SRsrcPtr = buildExtractSubReg(MI, MRI, *SRsrc,
2339 &AMDGPU::VReg_128RegClass, AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
2340
2341 // Create an empty resource descriptor
2342 unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2343 unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
2344 unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
2345 unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
2346 uint64_t RsrcDataFormat = getDefaultRsrcDataFormat();
2347
2348 // Zero64 = 0
2349 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
2350 Zero64)
2351 .addImm(0);
2352
2353 // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
2354 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
2355 SRsrcFormatLo)
2356 .addImm(RsrcDataFormat & 0xFFFFFFFF);
2357
2358 // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
2359 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
2360 SRsrcFormatHi)
2361 .addImm(RsrcDataFormat >> 32);
2362
2363 // NewSRsrc = {Zero64, SRsrcFormat}
2364 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewSRsrc)
2365 .addReg(Zero64)
2366 .addImm(AMDGPU::sub0_sub1)
2367 .addReg(SRsrcFormatLo)
2368 .addImm(AMDGPU::sub2)
2369 .addReg(SRsrcFormatHi)
2370 .addImm(AMDGPU::sub3);
2371
2372 MachineOperand *VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr);
2373 unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
2374 if (VAddr) {
2375 // This is already an ADDR64 instruction so we need to add the pointer
2376 // extracted from the resource descriptor to the current value of VAddr.
2377 unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2378 unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2379
2380 // NewVaddrLo = SRsrcPtr:sub0 + VAddr:sub0
2381 DebugLoc DL = MI->getDebugLoc();
2382 BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo)
2383 .addReg(SRsrcPtr, 0, AMDGPU::sub0)
2384 .addReg(VAddr->getReg(), 0, AMDGPU::sub0);
2385
2386 // NewVaddrHi = SRsrcPtr:sub1 + VAddr:sub1
2387 BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi)
2388 .addReg(SRsrcPtr, 0, AMDGPU::sub1)
2389 .addReg(VAddr->getReg(), 0, AMDGPU::sub1);
2390
2391 // NewVaddr = {NewVaddrHi, NewVaddrLo}
2392 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
2393 .addReg(NewVAddrLo)
2394 .addImm(AMDGPU::sub0)
2395 .addReg(NewVAddrHi)
2396 .addImm(AMDGPU::sub1);
2397 } else {
2398 // This instructions is the _OFFSET variant, so we need to convert it to
2399 // ADDR64.
2400 assert(MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration()((MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration
() < AMDGPUSubtarget::VOLCANIC_ISLANDS && "FIXME: Need to emit flat atomics here"
) ? static_cast<void> (0) : __assert_fail ("MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS && \"FIXME: Need to emit flat atomics here\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 2402, __PRETTY_FUNCTION__))
2401 < AMDGPUSubtarget::VOLCANIC_ISLANDS &&((MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration
() < AMDGPUSubtarget::VOLCANIC_ISLANDS && "FIXME: Need to emit flat atomics here"
) ? static_cast<void> (0) : __assert_fail ("MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS && \"FIXME: Need to emit flat atomics here\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 2402, __PRETTY_FUNCTION__))
2402 "FIXME: Need to emit flat atomics here")((MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration
() < AMDGPUSubtarget::VOLCANIC_ISLANDS && "FIXME: Need to emit flat atomics here"
) ? static_cast<void> (0) : __assert_fail ("MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS && \"FIXME: Need to emit flat atomics here\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 2402, __PRETTY_FUNCTION__))
;
2403
2404 MachineOperand *VData = getNamedOperand(*MI, AMDGPU::OpName::vdata);
2405 MachineOperand *Offset = getNamedOperand(*MI, AMDGPU::OpName::offset);
2406 MachineOperand *SOffset = getNamedOperand(*MI, AMDGPU::OpName::soffset);
2407 unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode());
2408
2409 // Atomics rith return have have an additional tied operand and are
2410 // missing some of the special bits.
2411 MachineOperand *VDataIn = getNamedOperand(*MI, AMDGPU::OpName::vdata_in);
2412 MachineInstr *Addr64;
2413
2414 if (!VDataIn) {
2415 // Regular buffer load / store.
2416 MachineInstrBuilder MIB
2417 = BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
2418 .addOperand(*VData)
2419 .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
2420 // This will be replaced later
2421 // with the new value of vaddr.
2422 .addOperand(*SRsrc)
2423 .addOperand(*SOffset)
2424 .addOperand(*Offset);
2425
2426 // Atomics do not have this operand.
2427 if (const MachineOperand *GLC
2428 = getNamedOperand(*MI, AMDGPU::OpName::glc)) {
2429 MIB.addImm(GLC->getImm());
2430 }
2431
2432 MIB.addImm(getNamedImmOperand(*MI, AMDGPU::OpName::slc));
2433
2434 if (const MachineOperand *TFE
2435 = getNamedOperand(*MI, AMDGPU::OpName::tfe)) {
2436 MIB.addImm(TFE->getImm());
2437 }
2438
2439 MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
2440 Addr64 = MIB;
2441 } else {
2442 // Atomics with return.
2443 Addr64 = BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
2444 .addOperand(*VData)
2445 .addOperand(*VDataIn)
2446 .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
2447 // This will be replaced later
2448 // with the new value of vaddr.
2449 .addOperand(*SRsrc)
2450 .addOperand(*SOffset)
2451 .addOperand(*Offset)
2452 .addImm(getNamedImmOperand(*MI, AMDGPU::OpName::slc))
2453 .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
2454 }
2455
2456 MI->removeFromParent();
2457 MI = Addr64;
2458
2459 // NewVaddr = {NewVaddrHi, NewVaddrLo}
2460 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
2461 .addReg(SRsrcPtr, 0, AMDGPU::sub0)
2462 .addImm(AMDGPU::sub0)
2463 .addReg(SRsrcPtr, 0, AMDGPU::sub1)
2464 .addImm(AMDGPU::sub1);
2465
2466 VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr);
2467 SRsrc = getNamedOperand(*MI, AMDGPU::OpName::srsrc);
2468 }
2469
2470 // Update the instruction to use NewVaddr
2471 VAddr->setReg(NewVAddr);
2472 // Update the instruction to use NewSRsrc
2473 SRsrc->setReg(NewSRsrc);
2474 }
2475}
2476
2477void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
2478 SmallVector<MachineInstr *, 128> Worklist;
2479 Worklist.push_back(&TopInst);
2480
2481 while (!Worklist.empty()) {
2482 MachineInstr *Inst = Worklist.pop_back_val();
2483 MachineBasicBlock *MBB = Inst->getParent();
2484 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
2485
2486 unsigned Opcode = Inst->getOpcode();
2487 unsigned NewOpcode = getVALUOp(*Inst);
2488
2489 // Handle some special cases
2490 switch (Opcode) {
2491 default:
2492 break;
2493 case AMDGPU::S_AND_B64:
2494 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_AND_B32_e64);
2495 Inst->eraseFromParent();
2496 continue;
2497
2498 case AMDGPU::S_OR_B64:
2499 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_OR_B32_e64);
2500 Inst->eraseFromParent();
2501 continue;
2502
2503 case AMDGPU::S_XOR_B64:
2504 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_XOR_B32_e64);
2505 Inst->eraseFromParent();
2506 continue;
2507
2508 case AMDGPU::S_NOT_B64:
2509 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::V_NOT_B32_e32);
2510 Inst->eraseFromParent();
2511 continue;
2512
2513 case AMDGPU::S_BCNT1_I32_B64:
2514 splitScalar64BitBCNT(Worklist, Inst);
2515 Inst->eraseFromParent();
2516 continue;
2517
2518 case AMDGPU::S_BFE_I64: {
2519 splitScalar64BitBFE(Worklist, Inst);
2520 Inst->eraseFromParent();
2521 continue;
2522 }
2523
2524 case AMDGPU::S_LSHL_B32:
2525 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
2526 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
2527 swapOperands(Inst);
2528 }
2529 break;
2530 case AMDGPU::S_ASHR_I32:
2531 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
2532 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
2533 swapOperands(Inst);
2534 }
2535 break;
2536 case AMDGPU::S_LSHR_B32:
2537 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
2538 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
2539 swapOperands(Inst);
2540 }
2541 break;
2542 case AMDGPU::S_LSHL_B64:
2543 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
2544 NewOpcode = AMDGPU::V_LSHLREV_B64;
2545 swapOperands(Inst);
2546 }
2547 break;
2548 case AMDGPU::S_ASHR_I64:
2549 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
2550 NewOpcode = AMDGPU::V_ASHRREV_I64;
2551 swapOperands(Inst);
2552 }
2553 break;
2554 case AMDGPU::S_LSHR_B64:
2555 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
2556 NewOpcode = AMDGPU::V_LSHRREV_B64;
2557 swapOperands(Inst);
2558 }
2559 break;
2560
2561 case AMDGPU::S_ABS_I32:
2562 lowerScalarAbs(Worklist, Inst);
2563 Inst->eraseFromParent();
2564 continue;
2565
2566 case AMDGPU::S_CBRANCH_SCC0:
2567 case AMDGPU::S_CBRANCH_SCC1:
2568 // Clear unused bits of vcc
2569 BuildMI(*MBB, Inst, Inst->getDebugLoc(), get(AMDGPU::S_AND_B64), AMDGPU::VCC)
2570 .addReg(AMDGPU::EXEC)
2571 .addReg(AMDGPU::VCC);
2572 break;
2573
2574 case AMDGPU::S_BFE_U64:
2575 case AMDGPU::S_BFM_B64:
2576 llvm_unreachable("Moving this op to VALU not implemented")::llvm::llvm_unreachable_internal("Moving this op to VALU not implemented"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 2576)
;
2577 }
2578
2579 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
2580 // We cannot move this instruction to the VALU, so we should try to
2581 // legalize its operands instead.
2582 legalizeOperands(Inst);
2583 continue;
2584 }
2585
2586 // Use the new VALU Opcode.
2587 const MCInstrDesc &NewDesc = get(NewOpcode);
2588 Inst->setDesc(NewDesc);
2589
2590 // Remove any references to SCC. Vector instructions can't read from it, and
2591 // We're just about to add the implicit use / defs of VCC, and we don't want
2592 // both.
2593 for (unsigned i = Inst->getNumOperands() - 1; i > 0; --i) {
2594 MachineOperand &Op = Inst->getOperand(i);
2595 if (Op.isReg() && Op.getReg() == AMDGPU::SCC) {
2596 Inst->RemoveOperand(i);
2597 addSCCDefUsersToVALUWorklist(Inst, Worklist);
2598 }
2599 }
2600
2601 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
2602 // We are converting these to a BFE, so we need to add the missing
2603 // operands for the size and offset.
2604 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
2605 Inst->addOperand(MachineOperand::CreateImm(0));
2606 Inst->addOperand(MachineOperand::CreateImm(Size));
2607
2608 } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
2609 // The VALU version adds the second operand to the result, so insert an
2610 // extra 0 operand.
2611 Inst->addOperand(MachineOperand::CreateImm(0));
2612 }
2613
2614 Inst->addImplicitDefUseOperands(*Inst->getParent()->getParent());
2615
2616 if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
2617 const MachineOperand &OffsetWidthOp = Inst->getOperand(2);
2618 // If we need to move this to VGPRs, we need to unpack the second operand
2619 // back into the 2 separate ones for bit offset and width.
2620 assert(OffsetWidthOp.isImm() &&((OffsetWidthOp.isImm() && "Scalar BFE is only implemented for constant width and offset"
) ? static_cast<void> (0) : __assert_fail ("OffsetWidthOp.isImm() && \"Scalar BFE is only implemented for constant width and offset\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 2621, __PRETTY_FUNCTION__))
2621 "Scalar BFE is only implemented for constant width and offset")((OffsetWidthOp.isImm() && "Scalar BFE is only implemented for constant width and offset"
) ? static_cast<void> (0) : __assert_fail ("OffsetWidthOp.isImm() && \"Scalar BFE is only implemented for constant width and offset\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 2621, __PRETTY_FUNCTION__))
;
2622 uint32_t Imm = OffsetWidthOp.getImm();
2623
2624 uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
2625 uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
2626 Inst->RemoveOperand(2); // Remove old immediate.
2627 Inst->addOperand(MachineOperand::CreateImm(Offset));
2628 Inst->addOperand(MachineOperand::CreateImm(BitWidth));
2629 }
2630
2631 bool HasDst = Inst->getOperand(0).isReg() && Inst->getOperand(0).isDef();
2632 unsigned NewDstReg = AMDGPU::NoRegister;
2633 if (HasDst) {
2634 // Update the destination register class.
2635 const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(*Inst);
2636 if (!NewDstRC)
2637 continue;
2638
2639 unsigned DstReg = Inst->getOperand(0).getReg();
2640 NewDstReg = MRI.createVirtualRegister(NewDstRC);
2641 MRI.replaceRegWith(DstReg, NewDstReg);
2642 }
2643
2644 // Legalize the operands
2645 legalizeOperands(Inst);
2646
2647 if (HasDst)
2648 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
2649 }
2650}
2651
2652//===----------------------------------------------------------------------===//
2653// Indirect addressing callbacks
2654//===----------------------------------------------------------------------===//
2655
2656const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const {
2657 return &AMDGPU::VGPR_32RegClass;
2658}
2659
2660void SIInstrInfo::lowerScalarAbs(SmallVectorImpl<MachineInstr *> &Worklist,
2661 MachineInstr *Inst) const {
2662 MachineBasicBlock &MBB = *Inst->getParent();
2663 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
2664 MachineBasicBlock::iterator MII = Inst;
2665 DebugLoc DL = Inst->getDebugLoc();
2666
2667 MachineOperand &Dest = Inst->getOperand(0);
2668 MachineOperand &Src = Inst->getOperand(1);
2669 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2670 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2671
2672 BuildMI(MBB, MII, DL, get(AMDGPU::V_SUB_I32_e32), TmpReg)
2673 .addImm(0)
2674 .addReg(Src.getReg());
2675
2676 BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg)
2677 .addReg(Src.getReg())
2678 .addReg(TmpReg);
2679
2680 MRI.replaceRegWith(Dest.getReg(), ResultReg);
2681 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
2682}
2683
2684void SIInstrInfo::splitScalar64BitUnaryOp(
2685 SmallVectorImpl<MachineInstr *> &Worklist,
2686 MachineInstr *Inst,
2687 unsigned Opcode) const {
2688 MachineBasicBlock &MBB = *Inst->getParent();
2689 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
2690
2691 MachineOperand &Dest = Inst->getOperand(0);
2692 MachineOperand &Src0 = Inst->getOperand(1);
2693 DebugLoc DL = Inst->getDebugLoc();
2694
2695 MachineBasicBlock::iterator MII = Inst;
2696
2697 const MCInstrDesc &InstDesc = get(Opcode);
2698 const TargetRegisterClass *Src0RC = Src0.isReg() ?
2699 MRI.getRegClass(Src0.getReg()) :
2700 &AMDGPU::SGPR_32RegClass;
2701
2702 const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
2703
2704 MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
2705 AMDGPU::sub0, Src0SubRC);
2706
2707 const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
2708 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
2709 const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
2710
2711 unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
2712 BuildMI(MBB, MII, DL, InstDesc, DestSub0)
2713 .addOperand(SrcReg0Sub0);
2714
2715 MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
2716 AMDGPU::sub1, Src0SubRC);
2717
2718 unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
2719 BuildMI(MBB, MII, DL, InstDesc, DestSub1)
2720 .addOperand(SrcReg0Sub1);
2721
2722 unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
2723 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
2724 .addReg(DestSub0)
2725 .addImm(AMDGPU::sub0)
2726 .addReg(DestSub1)
2727 .addImm(AMDGPU::sub1);
2728
2729 MRI.replaceRegWith(Dest.getReg(), FullDestReg);
2730
2731 // We don't need to legalizeOperands here because for a single operand, src0
2732 // will support any kind of input.
2733
2734 // Move all users of this moved value.
2735 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
2736}
2737
2738void SIInstrInfo::splitScalar64BitBinaryOp(
2739 SmallVectorImpl<MachineInstr *> &Worklist,
2740 MachineInstr *Inst,
2741 unsigned Opcode) const {
2742 MachineBasicBlock &MBB = *Inst->getParent();
2743 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
2744
2745 MachineOperand &Dest = Inst->getOperand(0);
2746 MachineOperand &Src0 = Inst->getOperand(1);
2747 MachineOperand &Src1 = Inst->getOperand(2);
2748 DebugLoc DL = Inst->getDebugLoc();
2749
2750 MachineBasicBlock::iterator MII = Inst;
2751
2752 const MCInstrDesc &InstDesc = get(Opcode);
2753 const TargetRegisterClass *Src0RC = Src0.isReg() ?
2754 MRI.getRegClass(Src0.getReg()) :
2755 &AMDGPU::SGPR_32RegClass;
2756
2757 const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
2758 const TargetRegisterClass *Src1RC = Src1.isReg() ?
2759 MRI.getRegClass(Src1.getReg()) :
2760 &AMDGPU::SGPR_32RegClass;
2761
2762 const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
2763
2764 MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
2765 AMDGPU::sub0, Src0SubRC);
2766 MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
2767 AMDGPU::sub0, Src1SubRC);
2768
2769 const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
2770 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
2771 const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
2772
2773 unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
2774 MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
2775 .addOperand(SrcReg0Sub0)
2776 .addOperand(SrcReg1Sub0);
2777
2778 MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
2779 AMDGPU::sub1, Src0SubRC);
2780 MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
2781 AMDGPU::sub1, Src1SubRC);
2782
2783 unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
2784 MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
2785 .addOperand(SrcReg0Sub1)
2786 .addOperand(SrcReg1Sub1);
2787
2788 unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
2789 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
2790 .addReg(DestSub0)
2791 .addImm(AMDGPU::sub0)
2792 .addReg(DestSub1)
2793 .addImm(AMDGPU::sub1);
2794
2795 MRI.replaceRegWith(Dest.getReg(), FullDestReg);
2796
2797 // Try to legalize the operands in case we need to swap the order to keep it
2798 // valid.
2799 legalizeOperands(LoHalf);
2800 legalizeOperands(HiHalf);
2801
2802 // Move all users of this moved vlaue.
2803 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
2804}
2805
2806void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist,
2807 MachineInstr *Inst) const {
2808 MachineBasicBlock &MBB = *Inst->getParent();
2809 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
2810
2811 MachineBasicBlock::iterator MII = Inst;
2812 DebugLoc DL = Inst->getDebugLoc();
2813
2814 MachineOperand &Dest = Inst->getOperand(0);
2815 MachineOperand &Src = Inst->getOperand(1);
2816
2817 const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64);
2818 const TargetRegisterClass *SrcRC = Src.isReg() ?
2819 MRI.getRegClass(Src.getReg()) :
2820 &AMDGPU::SGPR_32RegClass;
2821
2822 unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2823 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2824
2825 const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0);
2826
2827 MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
2828 AMDGPU::sub0, SrcSubRC);
2829 MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
2830 AMDGPU::sub1, SrcSubRC);
2831
2832 BuildMI(MBB, MII, DL, InstDesc, MidReg)
2833 .addOperand(SrcRegSub0)
2834 .addImm(0);
2835
2836 BuildMI(MBB, MII, DL, InstDesc, ResultReg)
2837 .addOperand(SrcRegSub1)
2838 .addReg(MidReg);
2839
2840 MRI.replaceRegWith(Dest.getReg(), ResultReg);
2841
2842 // We don't need to legalize operands here. src0 for etiher instruction can be
2843 // an SGPR, and the second input is unused or determined here.
2844 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
2845}
2846
2847void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
2848 MachineInstr *Inst) const {
2849 MachineBasicBlock &MBB = *Inst->getParent();
2850 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
2851 MachineBasicBlock::iterator MII = Inst;
2852 DebugLoc DL = Inst->getDebugLoc();
2853
2854 MachineOperand &Dest = Inst->getOperand(0);
2855 uint32_t Imm = Inst->getOperand(2).getImm();
2856 uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
2857 uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
2858
2859 (void) Offset;
2860
2861 // Only sext_inreg cases handled.
2862 assert(Inst->getOpcode() == AMDGPU::S_BFE_I64 &&((Inst->getOpcode() == AMDGPU::S_BFE_I64 && BitWidth
<= 32 && Offset == 0 && "Not implemented"
) ? static_cast<void> (0) : __assert_fail ("Inst->getOpcode() == AMDGPU::S_BFE_I64 && BitWidth <= 32 && Offset == 0 && \"Not implemented\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 2865, __PRETTY_FUNCTION__))
2863 BitWidth <= 32 &&((Inst->getOpcode() == AMDGPU::S_BFE_I64 && BitWidth
<= 32 && Offset == 0 && "Not implemented"
) ? static_cast<void> (0) : __assert_fail ("Inst->getOpcode() == AMDGPU::S_BFE_I64 && BitWidth <= 32 && Offset == 0 && \"Not implemented\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 2865, __PRETTY_FUNCTION__))
2864 Offset == 0 &&((Inst->getOpcode() == AMDGPU::S_BFE_I64 && BitWidth
<= 32 && Offset == 0 && "Not implemented"
) ? static_cast<void> (0) : __assert_fail ("Inst->getOpcode() == AMDGPU::S_BFE_I64 && BitWidth <= 32 && Offset == 0 && \"Not implemented\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 2865, __PRETTY_FUNCTION__))
2865 "Not implemented")((Inst->getOpcode() == AMDGPU::S_BFE_I64 && BitWidth
<= 32 && Offset == 0 && "Not implemented"
) ? static_cast<void> (0) : __assert_fail ("Inst->getOpcode() == AMDGPU::S_BFE_I64 && BitWidth <= 32 && Offset == 0 && \"Not implemented\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Target/AMDGPU/SIInstrInfo.cpp"
, 2865, __PRETTY_FUNCTION__))
;
2866
2867 if (BitWidth < 32) {
2868 unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2869 unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2870 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
2871
2872 BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo)
2873 .addReg(Inst->getOperand(1).getReg(), 0, AMDGPU::sub0)
2874 .addImm(0)
2875 .addImm(BitWidth);
2876
2877 BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi)
2878 .addImm(31)
2879 .addReg(MidRegLo);
2880
2881 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
2882 .addReg(MidRegLo)
2883 .addImm(AMDGPU::sub0)
2884 .addReg(MidRegHi)
2885 .addImm(AMDGPU::sub1);
2886
2887 MRI.replaceRegWith(Dest.getReg(), ResultReg);
2888 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
2889 return;
2890 }
2891
2892 MachineOperand &Src = Inst->getOperand(1);
2893 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2894 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
2895
2896 BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg)
2897 .addImm(31)
2898 .addReg(Src.getReg(), 0, AMDGPU::sub0);
2899
2900 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
2901 .addReg(Src.getReg(), 0, AMDGPU::sub0)
2902 .addImm(AMDGPU::sub0)
2903 .addReg(TmpReg)
2904 .addImm(AMDGPU::sub1);
2905
2906 MRI.replaceRegWith(Dest.getReg(), ResultReg);
2907 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
2908}
2909
2910void SIInstrInfo::addUsersToMoveToVALUWorklist(
2911 unsigned DstReg,
2912 MachineRegisterInfo &MRI,
2913 SmallVectorImpl<MachineInstr *> &Worklist) const {
2914 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
2915 E = MRI.use_end(); I != E; ++I) {
2916 MachineInstr &UseMI = *I->getParent();
2917 if (!canReadVGPR(UseMI, I.getOperandNo())) {
2918 Worklist.push_back(&UseMI);
2919 }
2920 }
2921}
2922
2923void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineInstr *SCCDefInst,
2924 SmallVectorImpl<MachineInstr *> &Worklist) const {
2925 // This assumes that all the users of SCC are in the same block
2926 // as the SCC def.
2927 for (MachineBasicBlock::iterator I = SCCDefInst,
2928 E = SCCDefInst->getParent()->end(); I != E; ++I) {
2929
2930 // Exit if we find another SCC def.
2931 if (I->findRegisterDefOperandIdx(AMDGPU::SCC) != -1)
2932 return;
2933
2934 if (I->findRegisterUseOperandIdx(AMDGPU::SCC) != -1)
2935 Worklist.push_back(I);
2936 }
2937}
2938
2939const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
2940 const MachineInstr &Inst) const {
2941 const TargetRegisterClass *NewDstRC = getOpRegClass(Inst, 0);
2942
2943 switch (Inst.getOpcode()) {
2944 // For target instructions, getOpRegClass just returns the virtual register
2945 // class associated with the operand, so we need to find an equivalent VGPR
2946 // register class in order to move the instruction to the VALU.
2947 case AMDGPU::COPY:
2948 case AMDGPU::PHI:
2949 case AMDGPU::REG_SEQUENCE:
2950 case AMDGPU::INSERT_SUBREG:
2951 if (RI.hasVGPRs(NewDstRC))
2952 return nullptr;
2953
2954 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
2955 if (!NewDstRC)
2956 return nullptr;
2957 return NewDstRC;
2958 default:
2959 return NewDstRC;
2960 }
2961}
2962
2963// Find the one SGPR operand we are allowed to use.
2964unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI,
2965 int OpIndices[3]) const {
2966 const MCInstrDesc &Desc = MI->getDesc();
2967
2968 // Find the one SGPR operand we are allowed to use.
2969 //
2970 // First we need to consider the instruction's operand requirements before
2971 // legalizing. Some operands are required to be SGPRs, such as implicit uses
2972 // of VCC, but we are still bound by the constant bus requirement to only use
2973 // one.
2974 //
2975 // If the operand's class is an SGPR, we can never move it.
2976
2977 unsigned SGPRReg = findImplicitSGPRRead(*MI);
2978 if (SGPRReg != AMDGPU::NoRegister)
2979 return SGPRReg;
2980
2981 unsigned UsedSGPRs[3] = { AMDGPU::NoRegister };
2982 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
2983
2984 for (unsigned i = 0; i < 3; ++i) {
2985 int Idx = OpIndices[i];
2986 if (Idx == -1)
2987 break;
2988
2989 const MachineOperand &MO = MI->getOperand(Idx);
2990 if (!MO.isReg())
2991 continue;
2992
2993 // Is this operand statically required to be an SGPR based on the operand
2994 // constraints?
2995 const TargetRegisterClass *OpRC = RI.getRegClass(Desc.OpInfo[Idx].RegClass);
2996 bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
2997 if (IsRequiredSGPR)
2998 return MO.getReg();
2999
3000 // If this could be a VGPR or an SGPR, Check the dynamic register class.
3001 unsigned Reg = MO.getReg();
3002 const TargetRegisterClass *RegRC = MRI.getRegClass(Reg);
3003 if (RI.isSGPRClass(RegRC))
3004 UsedSGPRs[i] = Reg;
3005 }
3006
3007 // We don't have a required SGPR operand, so we have a bit more freedom in
3008 // selecting operands to move.
3009
3010 // Try to select the most used SGPR. If an SGPR is equal to one of the
3011 // others, we choose that.
3012 //
3013 // e.g.
3014 // V_FMA_F32 v0, s0, s0, s0 -> No moves
3015 // V_FMA_F32 v0, s0, s1, s0 -> Move s1
3016
3017 // TODO: If some of the operands are 64-bit SGPRs and some 32, we should
3018 // prefer those.
3019
3020 if (UsedSGPRs[0] != AMDGPU::NoRegister) {
3021 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
3022 SGPRReg = UsedSGPRs[0];
3023 }
3024
3025 if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) {
3026 if (UsedSGPRs[1] == UsedSGPRs[2])
3027 SGPRReg = UsedSGPRs[1];
3028 }
3029
3030 return SGPRReg;
3031}
3032
3033void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved,
3034 const MachineFunction &MF) const {
3035 int End = getIndirectIndexEnd(MF);
3036 int Begin = getIndirectIndexBegin(MF);
3037
3038 if (End == -1)
3039 return;
3040
3041
3042 for (int Index = Begin; Index <= End; ++Index)
3043 Reserved.set(AMDGPU::VGPR_32RegClass.getRegister(Index));
3044
3045 for (int Index = std::max(0, Begin - 1); Index <= End; ++Index)
3046 Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index));
3047
3048 for (int Index = std::max(0, Begin - 2); Index <= End; ++Index)
3049 Reserved.set(AMDGPU::VReg_96RegClass.getRegister(Index));
3050
3051 for (int Index = std::max(0, Begin - 3); Index <= End; ++Index)
3052 Reserved.set(AMDGPU::VReg_128RegClass.getRegister(Index));
3053
3054 for (int Index = std::max(0, Begin - 7); Index <= End; ++Index)
3055 Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index));
3056
3057 for (int Index = std::max(0, Begin - 15); Index <= End; ++Index)
3058 Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index));
3059}
3060
3061MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI,
3062 unsigned OperandName) const {
3063 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName);
3064 if (Idx == -1)
3065 return nullptr;
3066
3067 return &MI.getOperand(Idx);
3068}
3069
3070uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
3071 uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
3072 if (ST.isAmdHsaOS()) {
3073 RsrcDataFormat |= (1ULL << 56);
3074
3075 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
3076 // Set MTYPE = 2
3077 RsrcDataFormat |= (2ULL << 59);
3078 }
3079
3080 return RsrcDataFormat;
3081}
3082
3083uint64_t SIInstrInfo::getScratchRsrcWords23() const {
3084 uint64_t Rsrc23 = getDefaultRsrcDataFormat() |
3085 AMDGPU::RSRC_TID_ENABLE |
3086 0xffffffff; // Size;
3087
3088 uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
3089
3090 Rsrc23 |= (EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT);
3091
3092 // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
3093 // Clear them unless we want a huge stride.
3094 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
3095 Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
3096
3097 return Rsrc23;
3098}
3099
3100bool SIInstrInfo::isLowLatencyInstruction(const MachineInstr *MI) const {
3101 unsigned Opc = MI->getOpcode();
3102
3103 return isSMRD(Opc);
3104}
3105
3106bool SIInstrInfo::isHighLatencyInstruction(const MachineInstr *MI) const {
3107 unsigned Opc = MI->getOpcode();
3108
3109 return isMUBUF(Opc) || isMTBUF(Opc) || isMIMG(Opc);
3110}
3111
3112ArrayRef<std::pair<int, const char *>>
3113SIInstrInfo::getSerializableTargetIndices() const {
3114 static const std::pair<int, const char *> TargetIndices[] = {
3115 {AMDGPU::TI_CONSTDATA_START, "amdgpu-constdata-start"},
3116 {AMDGPU::TI_SCRATCH_RSRC_DWORD0, "amdgpu-scratch-rsrc-dword0"},
3117 {AMDGPU::TI_SCRATCH_RSRC_DWORD1, "amdgpu-scratch-rsrc-dword1"},
3118 {AMDGPU::TI_SCRATCH_RSRC_DWORD2, "amdgpu-scratch-rsrc-dword2"},
3119 {AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}};
3120 return makeArrayRef(TargetIndices);
3121}
3122
3123/// This is used by the post-RA scheduler (SchedulePostRAList.cpp). The
3124/// post-RA version of misched uses CreateTargetMIHazardRecognizer.
3125ScheduleHazardRecognizer *
3126SIInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
3127 const ScheduleDAG *DAG) const {
3128 return new GCNHazardRecognizer(DAG->MF);
3129}
3130
3131/// This is the hazard recognizer used at -O0 by the PostRAHazardRecognizer
3132/// pass.
3133ScheduleHazardRecognizer *
3134SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const {
3135 return new GCNHazardRecognizer(MF);
3136}