Bug Summary

File:lib/Target/AMDGPU/R600InstrInfo.cpp
Warning:line 402, column 5
Undefined or garbage value returned to caller

Annotated Source Code

1//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief R600 Implementation of TargetInstrInfo.
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600InstrInfo.h"
16#include "AMDGPU.h"
17#include "AMDGPUInstrInfo.h"
18#include "AMDGPUSubtarget.h"
19#include "R600Defines.h"
20#include "R600FrameLowering.h"
21#include "R600RegisterInfo.h"
22#include "Utils/AMDGPUBaseInfo.h"
23#include "llvm/ADT/BitVector.h"
24#include "llvm/ADT/SmallSet.h"
25#include "llvm/ADT/SmallVector.h"
26#include "llvm/CodeGen/MachineBasicBlock.h"
27#include "llvm/CodeGen/MachineFrameInfo.h"
28#include "llvm/CodeGen/MachineFunction.h"
29#include "llvm/CodeGen/MachineInstr.h"
30#include "llvm/CodeGen/MachineInstrBuilder.h"
31#include "llvm/CodeGen/MachineOperand.h"
32#include "llvm/CodeGen/MachineRegisterInfo.h"
33#include "llvm/Support/ErrorHandling.h"
34#include "llvm/Target/TargetRegisterInfo.h"
35#include "llvm/Target/TargetSubtargetInfo.h"
36#include <algorithm>
37#include <cassert>
38#include <cstdint>
39#include <cstring>
40#include <iterator>
41#include <utility>
42#include <vector>
43
44using namespace llvm;
45
46#define GET_INSTRINFO_CTOR_DTOR
47#include "AMDGPUGenDFAPacketizer.inc"
48
49R600InstrInfo::R600InstrInfo(const R600Subtarget &ST)
50 : AMDGPUInstrInfo(ST), RI(), ST(ST) {}
51
52bool R600InstrInfo::isVector(const MachineInstr &MI) const {
53 return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
54}
55
56void R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
57 MachineBasicBlock::iterator MI,
58 const DebugLoc &DL, unsigned DestReg,
59 unsigned SrcReg, bool KillSrc) const {
60 unsigned VectorComponents = 0;
61 if ((AMDGPU::R600_Reg128RegClass.contains(DestReg) ||
62 AMDGPU::R600_Reg128VerticalRegClass.contains(DestReg)) &&
63 (AMDGPU::R600_Reg128RegClass.contains(SrcReg) ||
64 AMDGPU::R600_Reg128VerticalRegClass.contains(SrcReg))) {
65 VectorComponents = 4;
66 } else if((AMDGPU::R600_Reg64RegClass.contains(DestReg) ||
67 AMDGPU::R600_Reg64VerticalRegClass.contains(DestReg)) &&
68 (AMDGPU::R600_Reg64RegClass.contains(SrcReg) ||
69 AMDGPU::R600_Reg64VerticalRegClass.contains(SrcReg))) {
70 VectorComponents = 2;
71 }
72
73 if (VectorComponents > 0) {
74 for (unsigned I = 0; I < VectorComponents; I++) {
75 unsigned SubRegIndex = RI.getSubRegFromChannel(I);
76 buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
77 RI.getSubReg(DestReg, SubRegIndex),
78 RI.getSubReg(SrcReg, SubRegIndex))
79 .addReg(DestReg,
80 RegState::Define | RegState::Implicit);
81 }
82 } else {
83 MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
84 DestReg, SrcReg);
85 NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0))
86 .setIsKill(KillSrc);
87 }
88}
89
90/// \returns true if \p MBBI can be moved into a new basic.
91bool R600InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
92 MachineBasicBlock::iterator MBBI) const {
93 for (MachineInstr::const_mop_iterator I = MBBI->operands_begin(),
94 E = MBBI->operands_end(); I != E; ++I) {
95 if (I->isReg() && !TargetRegisterInfo::isVirtualRegister(I->getReg()) &&
96 I->isUse() && RI.isPhysRegLiveAcrossClauses(I->getReg()))
97 return false;
98 }
99 return true;
100}
101
102bool R600InstrInfo::isMov(unsigned Opcode) const {
103 switch(Opcode) {
104 default:
105 return false;
106 case AMDGPU::MOV:
107 case AMDGPU::MOV_IMM_F32:
108 case AMDGPU::MOV_IMM_I32:
109 return true;
110 }
111}
112
113bool R600InstrInfo::isReductionOp(unsigned Opcode) const {
114 return false;
115}
116
117bool R600InstrInfo::isCubeOp(unsigned Opcode) const {
118 switch(Opcode) {
119 default: return false;
120 case AMDGPU::CUBE_r600_pseudo:
121 case AMDGPU::CUBE_r600_real:
122 case AMDGPU::CUBE_eg_pseudo:
123 case AMDGPU::CUBE_eg_real:
124 return true;
125 }
126}
127
128bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
129 unsigned TargetFlags = get(Opcode).TSFlags;
130
131 return (TargetFlags & R600_InstFlag::ALU_INST);
132}
133
134bool R600InstrInfo::hasInstrModifiers(unsigned Opcode) const {
135 unsigned TargetFlags = get(Opcode).TSFlags;
136
137 return ((TargetFlags & R600_InstFlag::OP1) |
138 (TargetFlags & R600_InstFlag::OP2) |
139 (TargetFlags & R600_InstFlag::OP3));
140}
141
142bool R600InstrInfo::isLDSInstr(unsigned Opcode) const {
143 unsigned TargetFlags = get(Opcode).TSFlags;
144
145 return ((TargetFlags & R600_InstFlag::LDS_1A) |
146 (TargetFlags & R600_InstFlag::LDS_1A1D) |
147 (TargetFlags & R600_InstFlag::LDS_1A2D));
148}
149
150bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const {
151 return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1;
152}
153
154bool R600InstrInfo::canBeConsideredALU(const MachineInstr &MI) const {
155 if (isALUInstr(MI.getOpcode()))
156 return true;
157 if (isVector(MI) || isCubeOp(MI.getOpcode()))
158 return true;
159 switch (MI.getOpcode()) {
160 case AMDGPU::PRED_X:
161 case AMDGPU::INTERP_PAIR_XY:
162 case AMDGPU::INTERP_PAIR_ZW:
163 case AMDGPU::INTERP_VEC_LOAD:
164 case AMDGPU::COPY:
165 case AMDGPU::DOT_4:
166 return true;
167 default:
168 return false;
169 }
170}
171
172bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
173 if (ST.hasCaymanISA())
174 return false;
175 return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU);
176}
177
178bool R600InstrInfo::isTransOnly(const MachineInstr &MI) const {
179 return isTransOnly(MI.getOpcode());
180}
181
182bool R600InstrInfo::isVectorOnly(unsigned Opcode) const {
183 return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU);
184}
185
186bool R600InstrInfo::isVectorOnly(const MachineInstr &MI) const {
187 return isVectorOnly(MI.getOpcode());
188}
189
190bool R600InstrInfo::isExport(unsigned Opcode) const {
191 return (get(Opcode).TSFlags & R600_InstFlag::IS_EXPORT);
192}
193
194bool R600InstrInfo::usesVertexCache(unsigned Opcode) const {
195 return ST.hasVertexCache() && IS_VTX(get(Opcode))((get(Opcode)).TSFlags & R600_InstFlag::VTX_INST);
196}
197
198bool R600InstrInfo::usesVertexCache(const MachineInstr &MI) const {
199 const MachineFunction *MF = MI.getParent()->getParent();
200 return !AMDGPU::isCompute(MF->getFunction()->getCallingConv()) &&
201 usesVertexCache(MI.getOpcode());
202}
203
204bool R600InstrInfo::usesTextureCache(unsigned Opcode) const {
205 return (!ST.hasVertexCache() && IS_VTX(get(Opcode))((get(Opcode)).TSFlags & R600_InstFlag::VTX_INST)) || IS_TEX(get(Opcode))((get(Opcode)).TSFlags & R600_InstFlag::TEX_INST);
206}
207
208bool R600InstrInfo::usesTextureCache(const MachineInstr &MI) const {
209 const MachineFunction *MF = MI.getParent()->getParent();
210 return (AMDGPU::isCompute(MF->getFunction()->getCallingConv()) &&
211 usesVertexCache(MI.getOpcode())) ||
212 usesTextureCache(MI.getOpcode());
213}
214
215bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
216 switch (Opcode) {
217 case AMDGPU::KILLGT:
218 case AMDGPU::GROUP_BARRIER:
219 return true;
220 default:
221 return false;
222 }
223}
224
225bool R600InstrInfo::usesAddressRegister(MachineInstr &MI) const {
226 return MI.findRegisterUseOperandIdx(AMDGPU::AR_X) != -1;
227}
228
229bool R600InstrInfo::definesAddressRegister(MachineInstr &MI) const {
230 return MI.findRegisterDefOperandIdx(AMDGPU::AR_X) != -1;
231}
232
233bool R600InstrInfo::readsLDSSrcReg(const MachineInstr &MI) const {
234 if (!isALUInstr(MI.getOpcode())) {
235 return false;
236 }
237 for (MachineInstr::const_mop_iterator I = MI.operands_begin(),
238 E = MI.operands_end();
239 I != E; ++I) {
240 if (!I->isReg() || !I->isUse() ||
241 TargetRegisterInfo::isVirtualRegister(I->getReg()))
242 continue;
243
244 if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg()))
245 return true;
246 }
247 return false;
248}
249
250int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
251 static const unsigned SrcSelTable[][2] = {
252 {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
253 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
254 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
255 {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
256 {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
257 {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
258 {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
259 {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
260 {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
261 {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
262 {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}
263 };
264
265 for (const auto &Row : SrcSelTable) {
266 if (getOperandIdx(Opcode, Row[0]) == (int)SrcIdx) {
267 return getOperandIdx(Opcode, Row[1]);
268 }
269 }
270 return -1;
271}
272
273SmallVector<std::pair<MachineOperand *, int64_t>, 3>
274R600InstrInfo::getSrcs(MachineInstr &MI) const {
275 SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result;
276
277 if (MI.getOpcode() == AMDGPU::DOT_4) {
278 static const unsigned OpTable[8][2] = {
279 {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
280 {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
281 {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
282 {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
283 {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
284 {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
285 {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
286 {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W},
287 };
288
289 for (unsigned j = 0; j < 8; j++) {
290 MachineOperand &MO =
291 MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][0]));
292 unsigned Reg = MO.getReg();
293 if (Reg == AMDGPU::ALU_CONST) {
294 MachineOperand &Sel =
295 MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1]));
296 Result.push_back(std::make_pair(&MO, Sel.getImm()));
297 continue;
298 }
299
300 }
301 return Result;
302 }
303
304 static const unsigned OpTable[3][2] = {
305 {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
306 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
307 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
308 };
309
310 for (unsigned j = 0; j < 3; j++) {
311 int SrcIdx = getOperandIdx(MI.getOpcode(), OpTable[j][0]);
312 if (SrcIdx < 0)
313 break;
314 MachineOperand &MO = MI.getOperand(SrcIdx);
315 unsigned Reg = MO.getReg();
316 if (Reg == AMDGPU::ALU_CONST) {
317 MachineOperand &Sel =
318 MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1]));
319 Result.push_back(std::make_pair(&MO, Sel.getImm()));
320 continue;
321 }
322 if (Reg == AMDGPU::ALU_LITERAL_X) {
323 MachineOperand &Operand =
324 MI.getOperand(getOperandIdx(MI.getOpcode(), AMDGPU::OpName::literal));
325 if (Operand.isImm()) {
326 Result.push_back(std::make_pair(&MO, Operand.getImm()));
327 continue;
328 }
329 assert(Operand.isGlobal())((Operand.isGlobal()) ? static_cast<void> (0) : __assert_fail
("Operand.isGlobal()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/R600InstrInfo.cpp"
, 329, __PRETTY_FUNCTION__))
;
330 }
331 Result.push_back(std::make_pair(&MO, 0));
332 }
333 return Result;
334}
335
336std::vector<std::pair<int, unsigned>>
337R600InstrInfo::ExtractSrcs(MachineInstr &MI,
338 const DenseMap<unsigned, unsigned> &PV,
339 unsigned &ConstCount) const {
340 ConstCount = 0;
341 const std::pair<int, unsigned> DummyPair(-1, 0);
342 std::vector<std::pair<int, unsigned>> Result;
343 unsigned i = 0;
344 for (const auto &Src : getSrcs(MI)) {
345 ++i;
346 unsigned Reg = Src.first->getReg();
347 int Index = RI.getEncodingValue(Reg) & 0xff;
348 if (Reg == AMDGPU::OQAP) {
349 Result.push_back(std::make_pair(Index, 0U));
350 }
351 if (PV.find(Reg) != PV.end()) {
352 // 255 is used to tells its a PS/PV reg
353 Result.push_back(std::make_pair(255, 0U));
354 continue;
355 }
356 if (Index > 127) {
357 ConstCount++;
358 Result.push_back(DummyPair);
359 continue;
360 }
361 unsigned Chan = RI.getHWRegChan(Reg);
362 Result.push_back(std::make_pair(Index, Chan));
363 }
364 for (; i < 3; ++i)
365 Result.push_back(DummyPair);
366 return Result;
367}
368
369static std::vector<std::pair<int, unsigned>>
370Swizzle(std::vector<std::pair<int, unsigned>> Src,
371 R600InstrInfo::BankSwizzle Swz) {
372 if (Src[0] == Src[1])
373 Src[1].first = -1;
374 switch (Swz) {
375 case R600InstrInfo::ALU_VEC_012_SCL_210:
376 break;
377 case R600InstrInfo::ALU_VEC_021_SCL_122:
378 std::swap(Src[1], Src[2]);
379 break;
380 case R600InstrInfo::ALU_VEC_102_SCL_221:
381 std::swap(Src[0], Src[1]);
382 break;
383 case R600InstrInfo::ALU_VEC_120_SCL_212:
384 std::swap(Src[0], Src[1]);
385 std::swap(Src[0], Src[2]);
386 break;
387 case R600InstrInfo::ALU_VEC_201:
388 std::swap(Src[0], Src[2]);
389 std::swap(Src[0], Src[1]);
390 break;
391 case R600InstrInfo::ALU_VEC_210:
392 std::swap(Src[0], Src[2]);
393 break;
394 }
395 return Src;
396}
397
398static unsigned getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) {
399 switch (Swz) {
27
Control jumps to 'case ALU_VEC_012_SCL_210:' at line 400
400 case R600InstrInfo::ALU_VEC_012_SCL_210: {
401 unsigned Cycles[3] = { 2, 1, 0};
402 return Cycles[Op];
28
Undefined or garbage value returned to caller
403 }
404 case R600InstrInfo::ALU_VEC_021_SCL_122: {
405 unsigned Cycles[3] = { 1, 2, 2};
406 return Cycles[Op];
407 }
408 case R600InstrInfo::ALU_VEC_120_SCL_212: {
409 unsigned Cycles[3] = { 2, 1, 2};
410 return Cycles[Op];
411 }
412 case R600InstrInfo::ALU_VEC_102_SCL_221: {
413 unsigned Cycles[3] = { 2, 2, 1};
414 return Cycles[Op];
415 }
416 default:
417 llvm_unreachable("Wrong Swizzle for Trans Slot")::llvm::llvm_unreachable_internal("Wrong Swizzle for Trans Slot"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/R600InstrInfo.cpp"
, 417)
;
418 }
419}
420
421/// returns how many MIs (whose inputs are represented by IGSrcs) can be packed
422/// in the same Instruction Group while meeting read port limitations given a
423/// Swz swizzle sequence.
424unsigned R600InstrInfo::isLegalUpTo(
425 const std::vector<std::vector<std::pair<int, unsigned>>> &IGSrcs,
426 const std::vector<R600InstrInfo::BankSwizzle> &Swz,
427 const std::vector<std::pair<int, unsigned>> &TransSrcs,
428 R600InstrInfo::BankSwizzle TransSwz) const {
429 int Vector[4][3];
430 memset(Vector, -1, sizeof(Vector));
431 for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) {
7
Assuming 'i' is >= 'e'
8
Loop condition is false. Execution continues on line 455
432 const std::vector<std::pair<int, unsigned>> &Srcs =
433 Swizzle(IGSrcs[i], Swz[i]);
434 for (unsigned j = 0; j < 3; j++) {
435 const std::pair<int, unsigned> &Src = Srcs[j];
436 if (Src.first < 0 || Src.first == 255)
437 continue;
438 if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))((RI.getEncodingValue(AMDGPU::OQAP)) & 0x1ff)) {
439 if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 &&
440 Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) {
441 // The value from output queue A (denoted by register OQAP) can
442 // only be fetched during the first cycle.
443 return false;
444 }
445 // OQAP does not count towards the normal read port restrictions
446 continue;
447 }
448 if (Vector[Src.second][j] < 0)
449 Vector[Src.second][j] = Src.first;
450 if (Vector[Src.second][j] != Src.first)
451 return i;
452 }
453 }
454 // Now check Trans Alu
455 for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) {
9
Assuming 'i' is < 'e'
10
Loop condition is true. Entering loop body
14
Assuming 'i' is < 'e'
15
Loop condition is true. Entering loop body
19
Assuming 'i' is < 'e'
20
Loop condition is true. Entering loop body
24
Assuming 'i' is < 'e'
25
Loop condition is true. Entering loop body
456 const std::pair<int, unsigned> &Src = TransSrcs[i];
457 unsigned Cycle = getTransSwizzle(TransSwz, i);
26
Calling 'getTransSwizzle'
458 if (Src.first < 0)
11
Assuming the condition is true
12
Taking true branch
16
Assuming the condition is true
17
Taking true branch
21
Assuming the condition is true
22
Taking true branch
459 continue;
13
Execution continues on line 455
18
Execution continues on line 455
23
Execution continues on line 455
460 if (Src.first == 255)
461 continue;
462 if (Vector[Src.second][Cycle] < 0)
463 Vector[Src.second][Cycle] = Src.first;
464 if (Vector[Src.second][Cycle] != Src.first)
465 return IGSrcs.size() - 1;
466 }
467 return IGSrcs.size();
468}
469
470/// Given a swizzle sequence SwzCandidate and an index Idx, returns the next
471/// (in lexicographic term) swizzle sequence assuming that all swizzles after
472/// Idx can be skipped
473static bool
474NextPossibleSolution(
475 std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
476 unsigned Idx) {
477 assert(Idx < SwzCandidate.size())((Idx < SwzCandidate.size()) ? static_cast<void> (0)
: __assert_fail ("Idx < SwzCandidate.size()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/R600InstrInfo.cpp"
, 477, __PRETTY_FUNCTION__))
;
478 int ResetIdx = Idx;
479 while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210)
480 ResetIdx --;
481 for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) {
482 SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210;
483 }
484 if (ResetIdx == -1)
485 return false;
486 int NextSwizzle = SwzCandidate[ResetIdx] + 1;
487 SwzCandidate[ResetIdx] = (R600InstrInfo::BankSwizzle)NextSwizzle;
488 return true;
489}
490
491/// Enumerate all possible Swizzle sequence to find one that can meet all
492/// read port requirements.
493bool R600InstrInfo::FindSwizzleForVectorSlot(
494 const std::vector<std::vector<std::pair<int, unsigned>>> &IGSrcs,
495 std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
496 const std::vector<std::pair<int, unsigned>> &TransSrcs,
497 R600InstrInfo::BankSwizzle TransSwz) const {
498 unsigned ValidUpTo = 0;
499 do {
500 ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz);
6
Calling 'R600InstrInfo::isLegalUpTo'
501 if (ValidUpTo == IGSrcs.size())
502 return true;
503 } while (NextPossibleSolution(SwzCandidate, ValidUpTo));
504 return false;
505}
506
507/// Instructions in Trans slot can't read gpr at cycle 0 if they also read
508/// a const, and can't read a gpr at cycle 1 if they read 2 const.
509static bool
510isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,
511 const std::vector<std::pair<int, unsigned>> &TransOps,
512 unsigned ConstCount) {
513 // TransALU can't read 3 constants
514 if (ConstCount > 2)
515 return false;
516 for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {
517 const std::pair<int, unsigned> &Src = TransOps[i];
518 unsigned Cycle = getTransSwizzle(TransSwz, i);
519 if (Src.first < 0)
520 continue;
521 if (ConstCount > 0 && Cycle == 0)
522 return false;
523 if (ConstCount > 1 && Cycle == 1)
524 return false;
525 }
526 return true;
527}
528
529bool
530R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
531 const DenseMap<unsigned, unsigned> &PV,
532 std::vector<BankSwizzle> &ValidSwizzle,
533 bool isLastAluTrans)
534 const {
535 //Todo : support shared src0 - src1 operand
536
537 std::vector<std::vector<std::pair<int, unsigned>>> IGSrcs;
538 ValidSwizzle.clear();
539 unsigned ConstCount;
540 BankSwizzle TransBS = ALU_VEC_012_SCL_210;
541 for (unsigned i = 0, e = IG.size(); i < e; ++i) {
1
Assuming 'i' is >= 'e'
2
Loop condition is false. Execution continues on line 548
542 IGSrcs.push_back(ExtractSrcs(*IG[i], PV, ConstCount));
543 unsigned Op = getOperandIdx(IG[i]->getOpcode(),
544 AMDGPU::OpName::bank_swizzle);
545 ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
546 IG[i]->getOperand(Op).getImm());
547 }
548 std::vector<std::pair<int, unsigned>> TransOps;
549 if (!isLastAluTrans)
3
Assuming 'isLastAluTrans' is 0
4
Taking true branch
550 return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);
5
Calling 'R600InstrInfo::FindSwizzleForVectorSlot'
551
552 TransOps = std::move(IGSrcs.back());
553 IGSrcs.pop_back();
554 ValidSwizzle.pop_back();
555
556 static const R600InstrInfo::BankSwizzle TransSwz[] = {
557 ALU_VEC_012_SCL_210,
558 ALU_VEC_021_SCL_122,
559 ALU_VEC_120_SCL_212,
560 ALU_VEC_102_SCL_221
561 };
562 for (unsigned i = 0; i < 4; i++) {
563 TransBS = TransSwz[i];
564 if (!isConstCompatible(TransBS, TransOps, ConstCount))
565 continue;
566 bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps,
567 TransBS);
568 if (Result) {
569 ValidSwizzle.push_back(TransBS);
570 return true;
571 }
572 }
573
574 return false;
575}
576
577bool
578R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
579 const {
580 assert (Consts.size() <= 12 && "Too many operands in instructions group")((Consts.size() <= 12 && "Too many operands in instructions group"
) ? static_cast<void> (0) : __assert_fail ("Consts.size() <= 12 && \"Too many operands in instructions group\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/R600InstrInfo.cpp"
, 580, __PRETTY_FUNCTION__))
;
581 unsigned Pair1 = 0, Pair2 = 0;
582 for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
583 unsigned ReadConstHalf = Consts[i] & 2;
584 unsigned ReadConstIndex = Consts[i] & (~3);
585 unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
586 if (!Pair1) {
587 Pair1 = ReadHalfConst;
588 continue;
589 }
590 if (Pair1 == ReadHalfConst)
591 continue;
592 if (!Pair2) {
593 Pair2 = ReadHalfConst;
594 continue;
595 }
596 if (Pair2 != ReadHalfConst)
597 return false;
598 }
599 return true;
600}
601
602bool
603R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)
604 const {
605 std::vector<unsigned> Consts;
606 SmallSet<int64_t, 4> Literals;
607 for (unsigned i = 0, n = MIs.size(); i < n; i++) {
608 MachineInstr &MI = *MIs[i];
609 if (!isALUInstr(MI.getOpcode()))
610 continue;
611
612 for (const auto &Src : getSrcs(MI)) {
613 if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X)
614 Literals.insert(Src.second);
615 if (Literals.size() > 4)
616 return false;
617 if (Src.first->getReg() == AMDGPU::ALU_CONST)
618 Consts.push_back(Src.second);
619 if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) ||
620 AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) {
621 unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff;
622 unsigned Chan = RI.getHWRegChan(Src.first->getReg());
623 Consts.push_back((Index << 2) | Chan);
624 }
625 }
626 }
627 return fitsConstReadLimitations(Consts);
628}
629
630DFAPacketizer *
631R600InstrInfo::CreateTargetScheduleState(const TargetSubtargetInfo &STI) const {
632 const InstrItineraryData *II = STI.getInstrItineraryData();
633 return static_cast<const R600Subtarget &>(STI).createDFAPacketizer(II);
634}
635
636static bool
637isPredicateSetter(unsigned Opcode) {
638 switch (Opcode) {
639 case AMDGPU::PRED_X:
640 return true;
641 default:
642 return false;
643 }
644}
645
646static MachineInstr *
647findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
648 MachineBasicBlock::iterator I) {
649 while (I != MBB.begin()) {
650 --I;
651 MachineInstr &MI = *I;
652 if (isPredicateSetter(MI.getOpcode()))
653 return &MI;
654 }
655
656 return nullptr;
657}
658
659static
660bool isJump(unsigned Opcode) {
661 return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND;
662}
663
664static bool isBranch(unsigned Opcode) {
665 return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 ||
666 Opcode == AMDGPU::BRANCH_COND_f32;
667}
668
669bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
670 MachineBasicBlock *&TBB,
671 MachineBasicBlock *&FBB,
672 SmallVectorImpl<MachineOperand> &Cond,
673 bool AllowModify) const {
674 // Most of the following comes from the ARM implementation of AnalyzeBranch
675
676 // If the block has no terminators, it just falls into the block after it.
677 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
678 if (I == MBB.end())
679 return false;
680
681 // AMDGPU::BRANCH* instructions are only available after isel and are not
682 // handled
683 if (isBranch(I->getOpcode()))
684 return true;
685 if (!isJump(I->getOpcode())) {
686 return false;
687 }
688
689 // Remove successive JUMP
690 while (I != MBB.begin() && std::prev(I)->getOpcode() == AMDGPU::JUMP) {
691 MachineBasicBlock::iterator PriorI = std::prev(I);
692 if (AllowModify)
693 I->removeFromParent();
694 I = PriorI;
695 }
696 MachineInstr &LastInst = *I;
697
698 // If there is only one terminator instruction, process it.
699 unsigned LastOpc = LastInst.getOpcode();
700 if (I == MBB.begin() || !isJump((--I)->getOpcode())) {
701 if (LastOpc == AMDGPU::JUMP) {
702 TBB = LastInst.getOperand(0).getMBB();
703 return false;
704 } else if (LastOpc == AMDGPU::JUMP_COND) {
705 auto predSet = I;
706 while (!isPredicateSetter(predSet->getOpcode())) {
707 predSet = --I;
708 }
709 TBB = LastInst.getOperand(0).getMBB();
710 Cond.push_back(predSet->getOperand(1));
711 Cond.push_back(predSet->getOperand(2));
712 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
713 return false;
714 }
715 return true; // Can't handle indirect branch.
716 }
717
718 // Get the instruction before it if it is a terminator.
719 MachineInstr &SecondLastInst = *I;
720 unsigned SecondLastOpc = SecondLastInst.getOpcode();
721
722 // If the block ends with a B and a Bcc, handle it.
723 if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) {
724 auto predSet = --I;
725 while (!isPredicateSetter(predSet->getOpcode())) {
726 predSet = --I;
727 }
728 TBB = SecondLastInst.getOperand(0).getMBB();
729 FBB = LastInst.getOperand(0).getMBB();
730 Cond.push_back(predSet->getOperand(1));
731 Cond.push_back(predSet->getOperand(2));
732 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
733 return false;
734 }
735
736 // Otherwise, can't handle this.
737 return true;
738}
739
740static
741MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) {
742 for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend();
743 It != E; ++It) {
744 if (It->getOpcode() == AMDGPU::CF_ALU ||
745 It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
746 return It.getReverse();
747 }
748 return MBB.end();
749}
750
751unsigned R600InstrInfo::insertBranch(MachineBasicBlock &MBB,
752 MachineBasicBlock *TBB,
753 MachineBasicBlock *FBB,
754 ArrayRef<MachineOperand> Cond,
755 const DebugLoc &DL,
756 int *BytesAdded) const {
757 assert(TBB && "insertBranch must not be told to insert a fallthrough")((TBB && "insertBranch must not be told to insert a fallthrough"
) ? static_cast<void> (0) : __assert_fail ("TBB && \"insertBranch must not be told to insert a fallthrough\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/R600InstrInfo.cpp"
, 757, __PRETTY_FUNCTION__))
;
758 assert(!BytesAdded && "code size not handled")((!BytesAdded && "code size not handled") ? static_cast
<void> (0) : __assert_fail ("!BytesAdded && \"code size not handled\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/R600InstrInfo.cpp"
, 758, __PRETTY_FUNCTION__))
;
759
760 if (!FBB) {
761 if (Cond.empty()) {
762 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB);
763 return 1;
764 } else {
765 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
766 assert(PredSet && "No previous predicate !")((PredSet && "No previous predicate !") ? static_cast
<void> (0) : __assert_fail ("PredSet && \"No previous predicate !\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/R600InstrInfo.cpp"
, 766, __PRETTY_FUNCTION__))
;
767 addFlag(*PredSet, 0, MO_FLAG_PUSH(1 << 4));
768 PredSet->getOperand(2).setImm(Cond[1].getImm());
769
770 BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
771 .addMBB(TBB)
772 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
773 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
774 if (CfAlu == MBB.end())
775 return 1;
776 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU)((CfAlu->getOpcode() == AMDGPU::CF_ALU) ? static_cast<void
> (0) : __assert_fail ("CfAlu->getOpcode() == AMDGPU::CF_ALU"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/R600InstrInfo.cpp"
, 776, __PRETTY_FUNCTION__))
;
777 CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
778 return 1;
779 }
780 } else {
781 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
782 assert(PredSet && "No previous predicate !")((PredSet && "No previous predicate !") ? static_cast
<void> (0) : __assert_fail ("PredSet && \"No previous predicate !\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/R600InstrInfo.cpp"
, 782, __PRETTY_FUNCTION__))
;
783 addFlag(*PredSet, 0, MO_FLAG_PUSH(1 << 4));
784 PredSet->getOperand(2).setImm(Cond[1].getImm());
785 BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
786 .addMBB(TBB)
787 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
788 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
789 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
790 if (CfAlu == MBB.end())
791 return 2;
792 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU)((CfAlu->getOpcode() == AMDGPU::CF_ALU) ? static_cast<void
> (0) : __assert_fail ("CfAlu->getOpcode() == AMDGPU::CF_ALU"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/R600InstrInfo.cpp"
, 792, __PRETTY_FUNCTION__))
;
793 CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
794 return 2;
795 }
796}
797
798unsigned R600InstrInfo::removeBranch(MachineBasicBlock &MBB,
799 int *BytesRemoved) const {
800 assert(!BytesRemoved && "code size not handled")((!BytesRemoved && "code size not handled") ? static_cast
<void> (0) : __assert_fail ("!BytesRemoved && \"code size not handled\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/R600InstrInfo.cpp"
, 800, __PRETTY_FUNCTION__))
;
801
802 // Note : we leave PRED* instructions there.
803 // They may be needed when predicating instructions.
804
805 MachineBasicBlock::iterator I = MBB.end();
806
807 if (I == MBB.begin()) {
808 return 0;
809 }
810 --I;
811 switch (I->getOpcode()) {
812 default:
813 return 0;
814 case AMDGPU::JUMP_COND: {
815 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
816 clearFlag(*predSet, 0, MO_FLAG_PUSH(1 << 4));
817 I->eraseFromParent();
818 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
819 if (CfAlu == MBB.end())
820 break;
821 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)((CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE) ? static_cast
<void> (0) : __assert_fail ("CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/R600InstrInfo.cpp"
, 821, __PRETTY_FUNCTION__))
;
822 CfAlu->setDesc(get(AMDGPU::CF_ALU));
823 break;
824 }
825 case AMDGPU::JUMP:
826 I->eraseFromParent();
827 break;
828 }
829 I = MBB.end();
830
831 if (I == MBB.begin()) {
832 return 1;
833 }
834 --I;
835 switch (I->getOpcode()) {
836 // FIXME: only one case??
837 default:
838 return 1;
839 case AMDGPU::JUMP_COND: {
840 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
841 clearFlag(*predSet, 0, MO_FLAG_PUSH(1 << 4));
842 I->eraseFromParent();
843 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
844 if (CfAlu == MBB.end())
845 break;
846 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)((CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE) ? static_cast
<void> (0) : __assert_fail ("CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/R600InstrInfo.cpp"
, 846, __PRETTY_FUNCTION__))
;
847 CfAlu->setDesc(get(AMDGPU::CF_ALU));
848 break;
849 }
850 case AMDGPU::JUMP:
851 I->eraseFromParent();
852 break;
853 }
854 return 2;
855}
856
857bool R600InstrInfo::isPredicated(const MachineInstr &MI) const {
858 int idx = MI.findFirstPredOperandIdx();
859 if (idx < 0)
860 return false;
861
862 unsigned Reg = MI.getOperand(idx).getReg();
863 switch (Reg) {
864 default: return false;
865 case AMDGPU::PRED_SEL_ONE:
866 case AMDGPU::PRED_SEL_ZERO:
867 case AMDGPU::PREDICATE_BIT:
868 return true;
869 }
870}
871
872bool R600InstrInfo::isPredicable(const MachineInstr &MI) const {
873 // XXX: KILL* instructions can be predicated, but they must be the last
874 // instruction in a clause, so this means any instructions after them cannot
875 // be predicated. Until we have proper support for instruction clauses in the
876 // backend, we will mark KILL* instructions as unpredicable.
877
878 if (MI.getOpcode() == AMDGPU::KILLGT) {
879 return false;
880 } else if (MI.getOpcode() == AMDGPU::CF_ALU) {
881 // If the clause start in the middle of MBB then the MBB has more
882 // than a single clause, unable to predicate several clauses.
883 if (MI.getParent()->begin() != MachineBasicBlock::const_iterator(MI))
884 return false;
885 // TODO: We don't support KC merging atm
886 return MI.getOperand(3).getImm() == 0 && MI.getOperand(4).getImm() == 0;
887 } else if (isVector(MI)) {
888 return false;
889 } else {
890 return AMDGPUInstrInfo::isPredicable(MI);
891 }
892}
893
894bool
895R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
896 unsigned NumCycles,
897 unsigned ExtraPredCycles,
898 BranchProbability Probability) const{
899 return true;
900}
901
902bool
903R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
904 unsigned NumTCycles,
905 unsigned ExtraTCycles,
906 MachineBasicBlock &FMBB,
907 unsigned NumFCycles,
908 unsigned ExtraFCycles,
909 BranchProbability Probability) const {
910 return true;
911}
912
913bool
914R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
915 unsigned NumCycles,
916 BranchProbability Probability)
917 const {
918 return true;
919}
920
921bool
922R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
923 MachineBasicBlock &FMBB) const {
924 return false;
925}
926
927bool
928R600InstrInfo::reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
929 MachineOperand &MO = Cond[1];
930 switch (MO.getImm()) {
931 case AMDGPU::PRED_SETE_INT:
932 MO.setImm(AMDGPU::PRED_SETNE_INT);
933 break;
934 case AMDGPU::PRED_SETNE_INT:
935 MO.setImm(AMDGPU::PRED_SETE_INT);
936 break;
937 case AMDGPU::PRED_SETE:
938 MO.setImm(AMDGPU::PRED_SETNE);
939 break;
940 case AMDGPU::PRED_SETNE:
941 MO.setImm(AMDGPU::PRED_SETE);
942 break;
943 default:
944 return true;
945 }
946
947 MachineOperand &MO2 = Cond[2];
948 switch (MO2.getReg()) {
949 case AMDGPU::PRED_SEL_ZERO:
950 MO2.setReg(AMDGPU::PRED_SEL_ONE);
951 break;
952 case AMDGPU::PRED_SEL_ONE:
953 MO2.setReg(AMDGPU::PRED_SEL_ZERO);
954 break;
955 default:
956 return true;
957 }
958 return false;
959}
960
961bool R600InstrInfo::DefinesPredicate(MachineInstr &MI,
962 std::vector<MachineOperand> &Pred) const {
963 return isPredicateSetter(MI.getOpcode());
964}
965
966bool R600InstrInfo::PredicateInstruction(MachineInstr &MI,
967 ArrayRef<MachineOperand> Pred) const {
968 int PIdx = MI.findFirstPredOperandIdx();
969
970 if (MI.getOpcode() == AMDGPU::CF_ALU) {
971 MI.getOperand(8).setImm(0);
972 return true;
973 }
974
975 if (MI.getOpcode() == AMDGPU::DOT_4) {
976 MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_X))
977 .setReg(Pred[2].getReg());
978 MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_Y))
979 .setReg(Pred[2].getReg());
980 MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_Z))
981 .setReg(Pred[2].getReg());
982 MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_W))
983 .setReg(Pred[2].getReg());
984 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
985 MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
986 return true;
987 }
988
989 if (PIdx != -1) {
990 MachineOperand &PMO = MI.getOperand(PIdx);
991 PMO.setReg(Pred[2].getReg());
992 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
993 MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
994 return true;
995 }
996
997 return false;
998}
999
1000unsigned int R600InstrInfo::getPredicationCost(const MachineInstr &) const {
1001 return 2;
1002}
1003
1004unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
1005 const MachineInstr &,
1006 unsigned *PredCost) const {
1007 if (PredCost)
1008 *PredCost = 2;
1009 return 2;
1010}
1011
1012unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
1013 unsigned Channel) const {
1014 assert(Channel == 0)((Channel == 0) ? static_cast<void> (0) : __assert_fail
("Channel == 0", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/R600InstrInfo.cpp"
, 1014, __PRETTY_FUNCTION__))
;
1015 return RegIndex;
1016}
1017
1018bool R600InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1019 switch (MI.getOpcode()) {
1020 default: {
1021 MachineBasicBlock *MBB = MI.getParent();
1022 int OffsetOpIdx =
1023 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::addr);
1024 // addr is a custom operand with multiple MI operands, and only the
1025 // first MI operand is given a name.
1026 int RegOpIdx = OffsetOpIdx + 1;
1027 int ChanOpIdx =
1028 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::chan);
1029 if (isRegisterLoad(MI)) {
1030 int DstOpIdx =
1031 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
1032 unsigned RegIndex = MI.getOperand(RegOpIdx).getImm();
1033 unsigned Channel = MI.getOperand(ChanOpIdx).getImm();
1034 unsigned Address = calculateIndirectAddress(RegIndex, Channel);
1035 unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg();
1036 if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
1037 buildMovInstr(MBB, MI, MI.getOperand(DstOpIdx).getReg(),
1038 getIndirectAddrRegClass()->getRegister(Address));
1039 } else {
1040 buildIndirectRead(MBB, MI, MI.getOperand(DstOpIdx).getReg(), Address,
1041 OffsetReg);
1042 }
1043 } else if (isRegisterStore(MI)) {
1044 int ValOpIdx =
1045 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::val);
1046 unsigned RegIndex = MI.getOperand(RegOpIdx).getImm();
1047 unsigned Channel = MI.getOperand(ChanOpIdx).getImm();
1048 unsigned Address = calculateIndirectAddress(RegIndex, Channel);
1049 unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg();
1050 if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
1051 buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address),
1052 MI.getOperand(ValOpIdx).getReg());
1053 } else {
1054 buildIndirectWrite(MBB, MI, MI.getOperand(ValOpIdx).getReg(),
1055 calculateIndirectAddress(RegIndex, Channel),
1056 OffsetReg);
1057 }
1058 } else {
1059 return false;
1060 }
1061
1062 MBB->erase(MI);
1063 return true;
1064 }
1065 case AMDGPU::R600_EXTRACT_ELT_V2:
1066 case AMDGPU::R600_EXTRACT_ELT_V4:
1067 buildIndirectRead(MI.getParent(), MI, MI.getOperand(0).getReg(),
1068 RI.getHWRegIndex(MI.getOperand(1).getReg()), // Address
1069 MI.getOperand(2).getReg(),
1070 RI.getHWRegChan(MI.getOperand(1).getReg()));
1071 break;
1072 case AMDGPU::R600_INSERT_ELT_V2:
1073 case AMDGPU::R600_INSERT_ELT_V4:
1074 buildIndirectWrite(MI.getParent(), MI, MI.getOperand(2).getReg(), // Value
1075 RI.getHWRegIndex(MI.getOperand(1).getReg()), // Address
1076 MI.getOperand(3).getReg(), // Offset
1077 RI.getHWRegChan(MI.getOperand(1).getReg())); // Channel
1078 break;
1079 }
1080 MI.eraseFromParent();
1081 return true;
1082}
1083
1084void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
1085 const MachineFunction &MF) const {
1086 const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
1087 const R600FrameLowering *TFL = ST.getFrameLowering();
1088
1089 unsigned StackWidth = TFL->getStackWidth(MF);
1090 int End = getIndirectIndexEnd(MF);
1091
1092 if (End == -1)
1093 return;
1094
1095 for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
1096 unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index);
1097 Reserved.set(SuperReg);
1098 for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
1099 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
1100 Reserved.set(Reg);
1101 }
1102 }
1103}
1104
1105const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const {
1106 return &AMDGPU::R600_TReg32_XRegClass;
1107}
1108
1109MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
1110 MachineBasicBlock::iterator I,
1111 unsigned ValueReg, unsigned Address,
1112 unsigned OffsetReg) const {
1113 return buildIndirectWrite(MBB, I, ValueReg, Address, OffsetReg, 0);
1114}
1115
1116MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
1117 MachineBasicBlock::iterator I,
1118 unsigned ValueReg, unsigned Address,
1119 unsigned OffsetReg,
1120 unsigned AddrChan) const {
1121 unsigned AddrReg;
1122 switch (AddrChan) {
1123 default: llvm_unreachable("Invalid Channel")::llvm::llvm_unreachable_internal("Invalid Channel", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/R600InstrInfo.cpp"
, 1123)
;
1124 case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
1125 case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
1126 case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
1127 case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
1128 }
1129 MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
1130 AMDGPU::AR_X, OffsetReg);
1131 setImmOperand(*MOVA, AMDGPU::OpName::write, 0);
1132
1133 MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
1134 AddrReg, ValueReg)
1135 .addReg(AMDGPU::AR_X,
1136 RegState::Implicit | RegState::Kill);
1137 setImmOperand(*Mov, AMDGPU::OpName::dst_rel, 1);
1138 return Mov;
1139}
1140
1141MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
1142 MachineBasicBlock::iterator I,
1143 unsigned ValueReg, unsigned Address,
1144 unsigned OffsetReg) const {
1145 return buildIndirectRead(MBB, I, ValueReg, Address, OffsetReg, 0);
1146}
1147
1148MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
1149 MachineBasicBlock::iterator I,
1150 unsigned ValueReg, unsigned Address,
1151 unsigned OffsetReg,
1152 unsigned AddrChan) const {
1153 unsigned AddrReg;
1154 switch (AddrChan) {
1155 default: llvm_unreachable("Invalid Channel")::llvm::llvm_unreachable_internal("Invalid Channel", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/R600InstrInfo.cpp"
, 1155)
;
1156 case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
1157 case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
1158 case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
1159 case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
1160 }
1161 MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
1162 AMDGPU::AR_X,
1163 OffsetReg);
1164 setImmOperand(*MOVA, AMDGPU::OpName::write, 0);
1165 MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
1166 ValueReg,
1167 AddrReg)
1168 .addReg(AMDGPU::AR_X,
1169 RegState::Implicit | RegState::Kill);
1170 setImmOperand(*Mov, AMDGPU::OpName::src0_rel, 1);
1171
1172 return Mov;
1173}
1174
1175int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
1176 const MachineRegisterInfo &MRI = MF.getRegInfo();
1177 const MachineFrameInfo &MFI = MF.getFrameInfo();
1178 int Offset = -1;
1179
1180 if (MFI.getNumObjects() == 0) {
1181 return -1;
1182 }
1183
1184 if (MRI.livein_empty()) {
1185 return 0;
1186 }
1187
1188 const TargetRegisterClass *IndirectRC = getIndirectAddrRegClass();
1189 for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
1190 LE = MRI.livein_end();
1191 LI != LE; ++LI) {
1192 unsigned Reg = LI->first;
1193 if (TargetRegisterInfo::isVirtualRegister(Reg) ||
1194 !IndirectRC->contains(Reg))
1195 continue;
1196
1197 unsigned RegIndex;
1198 unsigned RegEnd;
1199 for (RegIndex = 0, RegEnd = IndirectRC->getNumRegs(); RegIndex != RegEnd;
1200 ++RegIndex) {
1201 if (IndirectRC->getRegister(RegIndex) == Reg)
1202 break;
1203 }
1204 Offset = std::max(Offset, (int)RegIndex);
1205 }
1206
1207 return Offset + 1;
1208}
1209
1210int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
1211 int Offset = 0;
1212 const MachineFrameInfo &MFI = MF.getFrameInfo();
1213
1214 // Variable sized objects are not supported
1215 if (MFI.hasVarSizedObjects()) {
1216 return -1;
1217 }
1218
1219 if (MFI.getNumObjects() == 0) {
1220 return -1;
1221 }
1222
1223 const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
1224 const R600FrameLowering *TFL = ST.getFrameLowering();
1225
1226 unsigned IgnoredFrameReg;
1227 Offset = TFL->getFrameIndexReference(MF, -1, IgnoredFrameReg);
1228
1229 return getIndirectIndexBegin(MF) + Offset;
1230}
1231
1232unsigned R600InstrInfo::getMaxAlusPerClause() const {
1233 return 115;
1234}
1235
1236MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB,
1237 MachineBasicBlock::iterator I,
1238 unsigned Opcode,
1239 unsigned DstReg,
1240 unsigned Src0Reg,
1241 unsigned Src1Reg) const {
1242 MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode),
1243 DstReg); // $dst
1244
1245 if (Src1Reg) {
1246 MIB.addImm(0) // $update_exec_mask
1247 .addImm(0); // $update_predicate
1248 }
1249 MIB.addImm(1) // $write
1250 .addImm(0) // $omod
1251 .addImm(0) // $dst_rel
1252 .addImm(0) // $dst_clamp
1253 .addReg(Src0Reg) // $src0
1254 .addImm(0) // $src0_neg
1255 .addImm(0) // $src0_rel
1256 .addImm(0) // $src0_abs
1257 .addImm(-1); // $src0_sel
1258
1259 if (Src1Reg) {
1260 MIB.addReg(Src1Reg) // $src1
1261 .addImm(0) // $src1_neg
1262 .addImm(0) // $src1_rel
1263 .addImm(0) // $src1_abs
1264 .addImm(-1); // $src1_sel
1265 }
1266
1267 //XXX: The r600g finalizer expects this to be 1, once we've moved the
1268 //scheduling to the backend, we can change the default to 0.
1269 MIB.addImm(1) // $last
1270 .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel
1271 .addImm(0) // $literal
1272 .addImm(0); // $bank_swizzle
1273
1274 return MIB;
1275}
1276
1277#define OPERAND_CASE(Label) \
1278 case Label: { \
1279 static const unsigned Ops[] = \
1280 { \
1281 Label##_X, \
1282 Label##_Y, \
1283 Label##_Z, \
1284 Label##_W \
1285 }; \
1286 return Ops[Slot]; \
1287 }
1288
1289static unsigned getSlotedOps(unsigned Op, unsigned Slot) {
1290 switch (Op) {
1291 OPERAND_CASE(AMDGPU::OpName::update_exec_mask)
1292 OPERAND_CASE(AMDGPU::OpName::update_pred)
1293 OPERAND_CASE(AMDGPU::OpName::write)
1294 OPERAND_CASE(AMDGPU::OpName::omod)
1295 OPERAND_CASE(AMDGPU::OpName::dst_rel)
1296 OPERAND_CASE(AMDGPU::OpName::clamp)
1297 OPERAND_CASE(AMDGPU::OpName::src0)
1298 OPERAND_CASE(AMDGPU::OpName::src0_neg)
1299 OPERAND_CASE(AMDGPU::OpName::src0_rel)
1300 OPERAND_CASE(AMDGPU::OpName::src0_abs)
1301 OPERAND_CASE(AMDGPU::OpName::src0_sel)
1302 OPERAND_CASE(AMDGPU::OpName::src1)
1303 OPERAND_CASE(AMDGPU::OpName::src1_neg)
1304 OPERAND_CASE(AMDGPU::OpName::src1_rel)
1305 OPERAND_CASE(AMDGPU::OpName::src1_abs)
1306 OPERAND_CASE(AMDGPU::OpName::src1_sel)
1307 OPERAND_CASE(AMDGPU::OpName::pred_sel)
1308 default:
1309 llvm_unreachable("Wrong Operand")::llvm::llvm_unreachable_internal("Wrong Operand", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/R600InstrInfo.cpp"
, 1309)
;
1310 }
1311}
1312
1313#undef OPERAND_CASE
1314
1315MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction(
1316 MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg)
1317 const {
1318 assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented")((MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented"
) ? static_cast<void> (0) : __assert_fail ("MI->getOpcode() == AMDGPU::DOT_4 && \"Not Implemented\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/R600InstrInfo.cpp"
, 1318, __PRETTY_FUNCTION__))
;
1319 unsigned Opcode;
1320 if (ST.getGeneration() <= R600Subtarget::R700)
1321 Opcode = AMDGPU::DOT4_r600;
1322 else
1323 Opcode = AMDGPU::DOT4_eg;
1324 MachineBasicBlock::iterator I = MI;
1325 MachineOperand &Src0 = MI->getOperand(
1326 getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot)));
1327 MachineOperand &Src1 = MI->getOperand(
1328 getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot)));
1329 MachineInstr *MIB = buildDefaultInstruction(
1330 MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg());
1331 static const unsigned Operands[14] = {
1332 AMDGPU::OpName::update_exec_mask,
1333 AMDGPU::OpName::update_pred,
1334 AMDGPU::OpName::write,
1335 AMDGPU::OpName::omod,
1336 AMDGPU::OpName::dst_rel,
1337 AMDGPU::OpName::clamp,
1338 AMDGPU::OpName::src0_neg,
1339 AMDGPU::OpName::src0_rel,
1340 AMDGPU::OpName::src0_abs,
1341 AMDGPU::OpName::src0_sel,
1342 AMDGPU::OpName::src1_neg,
1343 AMDGPU::OpName::src1_rel,
1344 AMDGPU::OpName::src1_abs,
1345 AMDGPU::OpName::src1_sel,
1346 };
1347
1348 MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(),
1349 getSlotedOps(AMDGPU::OpName::pred_sel, Slot)));
1350 MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel))
1351 .setReg(MO.getReg());
1352
1353 for (unsigned i = 0; i < 14; i++) {
1354 MachineOperand &MO = MI->getOperand(
1355 getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot)));
1356 assert (MO.isImm())((MO.isImm()) ? static_cast<void> (0) : __assert_fail (
"MO.isImm()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/R600InstrInfo.cpp"
, 1356, __PRETTY_FUNCTION__))
;
1357 setImmOperand(*MIB, Operands[i], MO.getImm());
1358 }
1359 MIB->getOperand(20).setImm(0);
1360 return MIB;
1361}
1362
1363MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB,
1364 MachineBasicBlock::iterator I,
1365 unsigned DstReg,
1366 uint64_t Imm) const {
1367 MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg,
1368 AMDGPU::ALU_LITERAL_X);
1369 setImmOperand(*MovImm, AMDGPU::OpName::literal, Imm);
1370 return MovImm;
1371}
1372
1373MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB,
1374 MachineBasicBlock::iterator I,
1375 unsigned DstReg, unsigned SrcReg) const {
1376 return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg);
1377}
1378
1379int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const {
1380 return getOperandIdx(MI.getOpcode(), Op);
1381}
1382
1383int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const {
1384 return AMDGPU::getNamedOperandIdx(Opcode, Op);
1385}
1386
1387void R600InstrInfo::setImmOperand(MachineInstr &MI, unsigned Op,
1388 int64_t Imm) const {
1389 int Idx = getOperandIdx(MI, Op);
1390 assert(Idx != -1 && "Operand not supported for this instruction.")((Idx != -1 && "Operand not supported for this instruction."
) ? static_cast<void> (0) : __assert_fail ("Idx != -1 && \"Operand not supported for this instruction.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/R600InstrInfo.cpp"
, 1390, __PRETTY_FUNCTION__))
;
1391 assert(MI.getOperand(Idx).isImm())((MI.getOperand(Idx).isImm()) ? static_cast<void> (0) :
__assert_fail ("MI.getOperand(Idx).isImm()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/R600InstrInfo.cpp"
, 1391, __PRETTY_FUNCTION__))
;
1392 MI.getOperand(Idx).setImm(Imm);
1393}
1394
1395//===----------------------------------------------------------------------===//
1396// Instruction flag getters/setters
1397//===----------------------------------------------------------------------===//
1398
1399MachineOperand &R600InstrInfo::getFlagOp(MachineInstr &MI, unsigned SrcIdx,
1400 unsigned Flag) const {
1401 unsigned TargetFlags = get(MI.getOpcode()).TSFlags;
1402 int FlagIndex = 0;
1403 if (Flag != 0) {
1404 // If we pass something other than the default value of Flag to this
1405 // function, it means we are want to set a flag on an instruction
1406 // that uses native encoding.
1407 assert(HAS_NATIVE_OPERANDS(TargetFlags))((((TargetFlags) & R600_InstFlag::NATIVE_OPERANDS)) ? static_cast
<void> (0) : __assert_fail ("HAS_NATIVE_OPERANDS(TargetFlags)"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/R600InstrInfo.cpp"
, 1407, __PRETTY_FUNCTION__))
;
1408 bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3;
1409 switch (Flag) {
1410 case MO_FLAG_CLAMP(1 << 0):
1411 FlagIndex = getOperandIdx(MI, AMDGPU::OpName::clamp);
1412 break;
1413 case MO_FLAG_MASK(1 << 3):
1414 FlagIndex = getOperandIdx(MI, AMDGPU::OpName::write);
1415 break;
1416 case MO_FLAG_NOT_LAST(1 << 5):
1417 case MO_FLAG_LAST(1 << 6):
1418 FlagIndex = getOperandIdx(MI, AMDGPU::OpName::last);
1419 break;
1420 case MO_FLAG_NEG(1 << 1):
1421 switch (SrcIdx) {
1422 case 0:
1423 FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_neg);
1424 break;
1425 case 1:
1426 FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_neg);
1427 break;
1428 case 2:
1429 FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src2_neg);
1430 break;
1431 }
1432 break;
1433
1434 case MO_FLAG_ABS(1 << 2):
1435 assert(!IsOP3 && "Cannot set absolute value modifier for OP3 "((!IsOP3 && "Cannot set absolute value modifier for OP3 "
"instructions.") ? static_cast<void> (0) : __assert_fail
("!IsOP3 && \"Cannot set absolute value modifier for OP3 \" \"instructions.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/R600InstrInfo.cpp"
, 1436, __PRETTY_FUNCTION__))
1436 "instructions.")((!IsOP3 && "Cannot set absolute value modifier for OP3 "
"instructions.") ? static_cast<void> (0) : __assert_fail
("!IsOP3 && \"Cannot set absolute value modifier for OP3 \" \"instructions.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/R600InstrInfo.cpp"
, 1436, __PRETTY_FUNCTION__))
;
1437 (void)IsOP3;
1438 switch (SrcIdx) {
1439 case 0:
1440 FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_abs);
1441 break;
1442 case 1:
1443 FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_abs);
1444 break;
1445 }
1446 break;
1447
1448 default:
1449 FlagIndex = -1;
1450 break;
1451 }
1452 assert(FlagIndex != -1 && "Flag not supported for this instruction")((FlagIndex != -1 && "Flag not supported for this instruction"
) ? static_cast<void> (0) : __assert_fail ("FlagIndex != -1 && \"Flag not supported for this instruction\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/R600InstrInfo.cpp"
, 1452, __PRETTY_FUNCTION__))
;
1453 } else {
1454 FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags)(((TargetFlags) >> 7) & 0x3);
1455 assert(FlagIndex != 0 &&((FlagIndex != 0 && "Instruction flags not supported for this instruction"
) ? static_cast<void> (0) : __assert_fail ("FlagIndex != 0 && \"Instruction flags not supported for this instruction\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/R600InstrInfo.cpp"
, 1456, __PRETTY_FUNCTION__))
1456 "Instruction flags not supported for this instruction")((FlagIndex != 0 && "Instruction flags not supported for this instruction"
) ? static_cast<void> (0) : __assert_fail ("FlagIndex != 0 && \"Instruction flags not supported for this instruction\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/R600InstrInfo.cpp"
, 1456, __PRETTY_FUNCTION__))
;
1457 }
1458
1459 MachineOperand &FlagOp = MI.getOperand(FlagIndex);
1460 assert(FlagOp.isImm())((FlagOp.isImm()) ? static_cast<void> (0) : __assert_fail
("FlagOp.isImm()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/R600InstrInfo.cpp"
, 1460, __PRETTY_FUNCTION__))
;
1461 return FlagOp;
1462}
1463
1464void R600InstrInfo::addFlag(MachineInstr &MI, unsigned Operand,
1465 unsigned Flag) const {
1466 unsigned TargetFlags = get(MI.getOpcode()).TSFlags;
1467 if (Flag == 0) {
1468 return;
1469 }
1470 if (HAS_NATIVE_OPERANDS(TargetFlags)((TargetFlags) & R600_InstFlag::NATIVE_OPERANDS)) {
1471 MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
1472 if (Flag == MO_FLAG_NOT_LAST(1 << 5)) {
1473 clearFlag(MI, Operand, MO_FLAG_LAST(1 << 6));
1474 } else if (Flag == MO_FLAG_MASK(1 << 3)) {
1475 clearFlag(MI, Operand, Flag);
1476 } else {
1477 FlagOp.setImm(1);
1478 }
1479 } else {
1480 MachineOperand &FlagOp = getFlagOp(MI, Operand);
1481 FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS7 * Operand)));
1482 }
1483}
1484
1485void R600InstrInfo::clearFlag(MachineInstr &MI, unsigned Operand,
1486 unsigned Flag) const {
1487 unsigned TargetFlags = get(MI.getOpcode()).TSFlags;
1488 if (HAS_NATIVE_OPERANDS(TargetFlags)((TargetFlags) & R600_InstFlag::NATIVE_OPERANDS)) {
1489 MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
1490 FlagOp.setImm(0);
1491 } else {
1492 MachineOperand &FlagOp = getFlagOp(MI);
1493 unsigned InstFlags = FlagOp.getImm();
1494 InstFlags &= ~(Flag << (NUM_MO_FLAGS7 * Operand));
1495 FlagOp.setImm(InstFlags);
1496 }
1497}