clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name SIInstrInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/AMDGPU -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/include -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include -D NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/AMDGPU -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-09-04-040900-46481-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | #include "SIInstrInfo.h" |
15 | #include "AMDGPU.h" |
16 | #include "AMDGPUInstrInfo.h" |
17 | #include "GCNHazardRecognizer.h" |
18 | #include "GCNSubtarget.h" |
19 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
20 | #include "SIMachineFunctionInfo.h" |
21 | #include "llvm/Analysis/ValueTracking.h" |
22 | #include "llvm/CodeGen/LiveVariables.h" |
23 | #include "llvm/CodeGen/MachineDominators.h" |
24 | #include "llvm/CodeGen/RegisterScavenging.h" |
25 | #include "llvm/CodeGen/ScheduleDAG.h" |
26 | #include "llvm/IR/DiagnosticInfo.h" |
27 | #include "llvm/IR/IntrinsicsAMDGPU.h" |
28 | #include "llvm/MC/MCContext.h" |
29 | #include "llvm/Support/CommandLine.h" |
30 | #include "llvm/Target/TargetMachine.h" |
31 | |
32 | using namespace llvm; |
33 | |
34 | #define DEBUG_TYPE "si-instr-info" |
35 | |
36 | #define GET_INSTRINFO_CTOR_DTOR |
37 | #include "AMDGPUGenInstrInfo.inc" |
38 | |
39 | namespace llvm { |
40 | |
41 | class AAResults; |
42 | |
43 | namespace AMDGPU { |
44 | #define GET_D16ImageDimIntrinsics_IMPL |
45 | #define GET_ImageDimIntrinsicTable_IMPL |
46 | #define GET_RsrcIntrinsics_IMPL |
47 | #include "AMDGPUGenSearchableTables.inc" |
48 | } |
49 | } |
50 | |
51 | |
52 | |
53 | |
54 | |
55 | static cl::opt<unsigned> |
56 | BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), |
57 | cl::desc("Restrict range of branch instructions (DEBUG)")); |
58 | |
59 | static cl::opt<bool> Fix16BitCopies( |
60 | "amdgpu-fix-16-bit-physreg-copies", |
61 | cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), |
62 | cl::init(true), |
63 | cl::ReallyHidden); |
64 | |
65 | SIInstrInfo::SIInstrInfo(const GCNSubtarget &ST) |
66 | : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN), |
67 | RI(ST), ST(ST) { |
68 | SchedModel.init(&ST); |
69 | } |
70 | |
71 | |
72 | |
73 | |
74 | |
75 | static unsigned getNumOperandsNoGlue(SDNode *Node) { |
76 | unsigned N = Node->getNumOperands(); |
77 | while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue) |
78 | --N; |
79 | return N; |
80 | } |
81 | |
82 | |
83 | |
84 | static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) { |
85 | unsigned Opc0 = N0->getMachineOpcode(); |
86 | unsigned Opc1 = N1->getMachineOpcode(); |
87 | |
88 | int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName); |
89 | int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName); |
90 | |
91 | if (Op0Idx == -1 && Op1Idx == -1) |
92 | return true; |
93 | |
94 | |
95 | if ((Op0Idx == -1 && Op1Idx != -1) || |
96 | (Op1Idx == -1 && Op0Idx != -1)) |
97 | return false; |
98 | |
99 | |
100 | |
101 | |
102 | |
103 | --Op0Idx; |
104 | --Op1Idx; |
105 | |
106 | return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx); |
107 | } |
108 | |
109 | bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, |
110 | AAResults *AA) const { |
111 | if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isSDWA(MI)) { |
112 | |
113 | |
114 | |
115 | |
116 | |
117 | |
118 | |
119 | return !MI.hasImplicitDef() && |
120 | MI.getNumImplicitOperands() == MI.getDesc().getNumImplicitUses() && |
121 | !MI.mayRaiseFPException(); |
122 | } |
123 | |
124 | return false; |
125 | } |
126 | |
127 | bool SIInstrInfo::isIgnorableUse(const MachineOperand &MO) const { |
128 | |
129 | return MO.getReg() == AMDGPU::EXEC && MO.isImplicit() && |
130 | isVALU(*MO.getParent()); |
131 | } |
132 | |
133 | bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, |
134 | int64_t &Offset0, |
135 | int64_t &Offset1) const { |
136 | if (!Load0->isMachineOpcode() || !Load1->isMachineOpcode()) |
137 | return false; |
138 | |
139 | unsigned Opc0 = Load0->getMachineOpcode(); |
140 | unsigned Opc1 = Load1->getMachineOpcode(); |
141 | |
142 | |
143 | if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad()) |
144 | return false; |
145 | |
146 | if (isDS(Opc0) && isDS(Opc1)) { |
147 | |
148 | |
149 | if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1)) |
150 | return false; |
151 | |
152 | |
153 | if (Load0->getOperand(0) != Load1->getOperand(0)) |
154 | return false; |
155 | |
156 | |
157 | |
158 | |
159 | int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset); |
160 | int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset); |
161 | if (Offset0Idx == -1 || Offset1Idx == -1) |
162 | return false; |
163 | |
164 | |
165 | |
166 | |
167 | |
168 | Offset0Idx -= get(Opc0).NumDefs; |
169 | Offset1Idx -= get(Opc1).NumDefs; |
170 | Offset0 = cast<ConstantSDNode>(Load0->getOperand(Offset0Idx))->getZExtValue(); |
171 | Offset1 = cast<ConstantSDNode>(Load1->getOperand(Offset1Idx))->getZExtValue(); |
172 | return true; |
173 | } |
174 | |
175 | if (isSMRD(Opc0) && isSMRD(Opc1)) { |
176 | |
177 | if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::sbase) == -1 || |
178 | AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::sbase) == -1) |
179 | return false; |
180 | |
181 | assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1)); |
182 | |
183 | |
184 | if (Load0->getOperand(0) != Load1->getOperand(0)) |
185 | return false; |
186 | |
187 | const ConstantSDNode *Load0Offset = |
188 | dyn_cast<ConstantSDNode>(Load0->getOperand(1)); |
189 | const ConstantSDNode *Load1Offset = |
190 | dyn_cast<ConstantSDNode>(Load1->getOperand(1)); |
191 | |
192 | if (!Load0Offset || !Load1Offset) |
193 | return false; |
194 | |
195 | Offset0 = Load0Offset->getZExtValue(); |
196 | Offset1 = Load1Offset->getZExtValue(); |
197 | return true; |
198 | } |
199 | |
200 | |
201 | if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) { |
202 | |
203 | |
204 | if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) || |
205 | !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) || |
206 | !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc)) |
207 | return false; |
208 | |
209 | int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset); |
210 | int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset); |
211 | |
212 | if (OffIdx0 == -1 || OffIdx1 == -1) |
213 | return false; |
214 | |
215 | |
216 | |
217 | |
218 | OffIdx0 -= get(Opc0).NumDefs; |
219 | OffIdx1 -= get(Opc1).NumDefs; |
220 | |
221 | SDValue Off0 = Load0->getOperand(OffIdx0); |
222 | SDValue Off1 = Load1->getOperand(OffIdx1); |
223 | |
224 | |
225 | if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1)) |
226 | return false; |
227 | |
228 | Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue(); |
229 | Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue(); |
230 | return true; |
231 | } |
232 | |
233 | return false; |
234 | } |
235 | |
236 | static bool isStride64(unsigned Opc) { |
237 | switch (Opc) { |
238 | case AMDGPU::DS_READ2ST64_B32: |
239 | case AMDGPU::DS_READ2ST64_B64: |
240 | case AMDGPU::DS_WRITE2ST64_B32: |
241 | case AMDGPU::DS_WRITE2ST64_B64: |
242 | return true; |
243 | default: |
244 | return false; |
245 | } |
246 | } |
247 | |
248 | bool SIInstrInfo::getMemOperandsWithOffsetWidth( |
249 | const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps, |
250 | int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, |
251 | const TargetRegisterInfo *TRI) const { |
252 | if (!LdSt.mayLoadOrStore()) |
253 | return false; |
254 | |
255 | unsigned Opc = LdSt.getOpcode(); |
256 | OffsetIsScalable = false; |
257 | const MachineOperand *BaseOp, *OffsetOp; |
258 | int DataOpIdx; |
259 | |
260 | if (isDS(LdSt)) { |
261 | BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr); |
262 | OffsetOp = getNamedOperand(LdSt, AMDGPU::OpName::offset); |
263 | if (OffsetOp) { |
264 | |
265 | if (!BaseOp) { |
266 | |
267 | |
268 | return false; |
269 | } |
270 | BaseOps.push_back(BaseOp); |
271 | Offset = OffsetOp->getImm(); |
272 | |
273 | DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); |
274 | if (DataOpIdx == -1) |
275 | DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); |
276 | Width = getOpSize(LdSt, DataOpIdx); |
277 | } else { |
278 | |
279 | |
280 | |
281 | const MachineOperand *Offset0Op = |
282 | getNamedOperand(LdSt, AMDGPU::OpName::offset0); |
283 | const MachineOperand *Offset1Op = |
284 | getNamedOperand(LdSt, AMDGPU::OpName::offset1); |
285 | |
286 | unsigned Offset0 = Offset0Op->getImm(); |
287 | unsigned Offset1 = Offset1Op->getImm(); |
288 | if (Offset0 + 1 != Offset1) |
289 | return false; |
290 | |
291 | |
292 | |
293 | |
294 | unsigned EltSize; |
295 | if (LdSt.mayLoad()) |
296 | EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, 0)) / 16; |
297 | else { |
298 | assert(LdSt.mayStore()); |
299 | int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); |
300 | EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, Data0Idx)) / 8; |
301 | } |
302 | |
303 | if (isStride64(Opc)) |
304 | EltSize *= 64; |
305 | |
306 | BaseOps.push_back(BaseOp); |
307 | Offset = EltSize * Offset0; |
308 | |
309 | DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); |
310 | if (DataOpIdx == -1) { |
311 | DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); |
312 | Width = getOpSize(LdSt, DataOpIdx); |
313 | DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1); |
314 | Width += getOpSize(LdSt, DataOpIdx); |
315 | } else { |
316 | Width = getOpSize(LdSt, DataOpIdx); |
317 | } |
318 | } |
319 | return true; |
320 | } |
321 | |
322 | if (isMUBUF(LdSt) || isMTBUF(LdSt)) { |
323 | const MachineOperand *RSrc = getNamedOperand(LdSt, AMDGPU::OpName::srsrc); |
324 | if (!RSrc) |
325 | return false; |
326 | BaseOps.push_back(RSrc); |
327 | BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); |
328 | if (BaseOp && !BaseOp->isFI()) |
329 | BaseOps.push_back(BaseOp); |
330 | const MachineOperand *OffsetImm = |
331 | getNamedOperand(LdSt, AMDGPU::OpName::offset); |
332 | Offset = OffsetImm->getImm(); |
333 | const MachineOperand *SOffset = |
334 | getNamedOperand(LdSt, AMDGPU::OpName::soffset); |
335 | if (SOffset) { |
336 | if (SOffset->isReg()) |
337 | BaseOps.push_back(SOffset); |
338 | else |
339 | Offset += SOffset->getImm(); |
340 | } |
341 | |
342 | DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); |
343 | if (DataOpIdx == -1) |
344 | DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); |
345 | Width = getOpSize(LdSt, DataOpIdx); |
346 | return true; |
347 | } |
348 | |
349 | if (isMIMG(LdSt)) { |
350 | int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); |
351 | BaseOps.push_back(&LdSt.getOperand(SRsrcIdx)); |
352 | int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); |
353 | if (VAddr0Idx >= 0) { |
354 | |
355 | for (int I = VAddr0Idx; I < SRsrcIdx; ++I) |
356 | BaseOps.push_back(&LdSt.getOperand(I)); |
357 | } else { |
358 | BaseOps.push_back(getNamedOperand(LdSt, AMDGPU::OpName::vaddr)); |
359 | } |
360 | Offset = 0; |
361 | |
362 | DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); |
363 | Width = getOpSize(LdSt, DataOpIdx); |
364 | return true; |
365 | } |
366 | |
367 | if (isSMRD(LdSt)) { |
368 | BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::sbase); |
369 | if (!BaseOp) |
370 | return false; |
371 | BaseOps.push_back(BaseOp); |
372 | OffsetOp = getNamedOperand(LdSt, AMDGPU::OpName::offset); |
373 | Offset = OffsetOp ? OffsetOp->getImm() : 0; |
374 | |
375 | DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst); |
376 | Width = getOpSize(LdSt, DataOpIdx); |
377 | return true; |
378 | } |
379 | |
380 | if (isFLAT(LdSt)) { |
381 | |
382 | BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); |
383 | if (BaseOp) |
384 | BaseOps.push_back(BaseOp); |
385 | BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::saddr); |
386 | if (BaseOp) |
387 | BaseOps.push_back(BaseOp); |
388 | Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm(); |
389 | |
390 | DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); |
391 | if (DataOpIdx == -1) |
392 | DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); |
393 | Width = getOpSize(LdSt, DataOpIdx); |
394 | return true; |
395 | } |
396 | |
397 | return false; |
398 | } |
399 | |
400 | static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, |
401 | ArrayRef<const MachineOperand *> BaseOps1, |
402 | const MachineInstr &MI2, |
403 | ArrayRef<const MachineOperand *> BaseOps2) { |
404 | |
405 | |
406 | |
407 | if (BaseOps1.front()->isIdenticalTo(*BaseOps2.front())) |
408 | return true; |
409 | |
410 | if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand()) |
411 | return false; |
412 | |
413 | auto MO1 = *MI1.memoperands_begin(); |
414 | auto MO2 = *MI2.memoperands_begin(); |
415 | if (MO1->getAddrSpace() != MO2->getAddrSpace()) |
416 | return false; |
417 | |
418 | auto Base1 = MO1->getValue(); |
419 | auto Base2 = MO2->getValue(); |
420 | if (!Base1 || !Base2) |
421 | return false; |
422 | Base1 = getUnderlyingObject(Base1); |
423 | Base2 = getUnderlyingObject(Base2); |
424 | |
425 | if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2)) |
426 | return false; |
427 | |
428 | return Base1 == Base2; |
429 | } |
430 | |
431 | bool SIInstrInfo::shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1, |
432 | ArrayRef<const MachineOperand *> BaseOps2, |
433 | unsigned NumLoads, |
434 | unsigned NumBytes) const { |
435 | |
436 | |
437 | if (!BaseOps1.empty() && !BaseOps2.empty()) { |
438 | const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent(); |
439 | const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent(); |
440 | if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOps1, SecondLdSt, BaseOps2)) |
441 | return false; |
442 | } else if (!BaseOps1.empty() || !BaseOps2.empty()) { |
443 | |
444 | return false; |
445 | } |
446 | |
447 | |
448 | |
449 | |
450 | |
451 | |
452 | |
453 | |
454 | |
455 | |
456 | |
457 | |
458 | |
459 | const unsigned LoadSize = NumBytes / NumLoads; |
460 | const unsigned NumDWORDs = ((LoadSize + 3) / 4) * NumLoads; |
461 | return NumDWORDs <= 8; |
462 | } |
463 | |
464 | |
465 | |
466 | |
467 | |
468 | |
469 | |
470 | |
471 | |
472 | |
473 | |
474 | bool SIInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, |
475 | int64_t Offset0, int64_t Offset1, |
476 | unsigned NumLoads) const { |
477 | assert(Offset1 > Offset0 && |
478 | "Second offset should be larger than first offset!"); |
479 | |
480 | |
481 | |
482 | |
483 | return (NumLoads <= 16 && (Offset1 - Offset0) < 64); |
484 | } |
485 | |
486 | static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, |
487 | MachineBasicBlock::iterator MI, |
488 | const DebugLoc &DL, MCRegister DestReg, |
489 | MCRegister SrcReg, bool KillSrc, |
490 | const char *Msg = "illegal SGPR to VGPR copy") { |
491 | MachineFunction *MF = MBB.getParent(); |
492 | DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(), Msg, DL, DS_Error); |
493 | LLVMContext &C = MF->getFunction().getContext(); |
494 | C.diagnose(IllegalCopy); |
495 | |
496 | BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_ILLEGAL_COPY), DestReg) |
497 | .addReg(SrcReg, getKillRegState(KillSrc)); |
498 | } |
499 | |
500 | |
501 | |
502 | static void indirectCopyToAGPR(const SIInstrInfo &TII, |
503 | MachineBasicBlock &MBB, |
504 | MachineBasicBlock::iterator MI, |
505 | const DebugLoc &DL, MCRegister DestReg, |
506 | MCRegister SrcReg, bool KillSrc, |
507 | RegScavenger &RS, |
508 | Register ImpDefSuperReg = Register(), |
509 | Register ImpUseSuperReg = Register()) { |
510 | const SIRegisterInfo &RI = TII.getRegisterInfo(); |
511 | |
512 | assert(AMDGPU::SReg_32RegClass.contains(SrcReg) || |
513 | AMDGPU::AGPR_32RegClass.contains(SrcReg)); |
514 | |
515 | |
516 | for (auto Def = MI, E = MBB.begin(); Def != E; ) { |
517 | --Def; |
518 | if (!Def->definesRegister(SrcReg, &RI)) |
519 | continue; |
520 | if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64) |
521 | break; |
522 | |
523 | MachineOperand &DefOp = Def->getOperand(1); |
524 | assert(DefOp.isReg() || DefOp.isImm()); |
525 | |
526 | if (DefOp.isReg()) { |
527 | |
528 | |
529 | bool SafeToPropagate = true; |
530 | for (auto I = Def; I != MI && SafeToPropagate; ++I) |
531 | if (I->modifiesRegister(DefOp.getReg(), &RI)) |
532 | SafeToPropagate = false; |
533 | |
534 | if (!SafeToPropagate) |
535 | break; |
536 | |
537 | DefOp.setIsKill(false); |
538 | } |
539 | |
540 | MachineInstrBuilder Builder = |
541 | BuildMI(MBB, MI, DL, TII.get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), DestReg) |
542 | .add(DefOp); |
543 | if (ImpDefSuperReg) |
544 | Builder.addReg(ImpDefSuperReg, RegState::Define | RegState::Implicit); |
545 | |
546 | if (ImpUseSuperReg) { |
547 | Builder.addReg(ImpUseSuperReg, |
548 | getKillRegState(KillSrc) | RegState::Implicit); |
549 | } |
550 | |
551 | return; |
552 | } |
553 | |
554 | RS.enterBasicBlock(MBB); |
555 | RS.forward(MI); |
556 | |
557 | |
558 | |
559 | unsigned MaxVGPRs = RI.getRegPressureLimit(&AMDGPU::VGPR_32RegClass, |
560 | *MBB.getParent()); |
561 | |
562 | |
563 | |
564 | unsigned RegNo = DestReg % 3; |
565 | Register Tmp = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0); |
566 | if (!Tmp) |
567 | report_fatal_error("Cannot scavenge VGPR to copy to AGPR"); |
568 | RS.setRegUsed(Tmp); |
569 | |
570 | if (!TII.getSubtarget().hasGFX90AInsts()) { |
571 | |
572 | |
573 | |
574 | while (RegNo-- && RS.FindUnusedReg(&AMDGPU::VGPR_32RegClass)) { |
575 | Register Tmp2 = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0); |
576 | if (!Tmp2 || RI.getHWRegIndex(Tmp2) >= MaxVGPRs) |
577 | break; |
578 | Tmp = Tmp2; |
579 | RS.setRegUsed(Tmp); |
580 | } |
581 | } |
582 | |
583 | |
584 | unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32; |
585 | if (AMDGPU::AGPR_32RegClass.contains(SrcReg)) { |
586 | TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64; |
587 | } else { |
588 | assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); |
589 | } |
590 | |
591 | MachineInstrBuilder UseBuilder = BuildMI(MBB, MI, DL, TII.get(TmpCopyOp), Tmp) |
592 | .addReg(SrcReg, getKillRegState(KillSrc)); |
593 | if (ImpUseSuperReg) { |
594 | UseBuilder.addReg(ImpUseSuperReg, |
595 | getKillRegState(KillSrc) | RegState::Implicit); |
596 | } |
597 | |
598 | MachineInstrBuilder DefBuilder |
599 | = BuildMI(MBB, MI, DL, TII.get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), DestReg) |
600 | .addReg(Tmp, RegState::Kill); |
601 | |
602 | if (ImpDefSuperReg) |
603 | DefBuilder.addReg(ImpDefSuperReg, RegState::Define | RegState::Implicit); |
604 | } |
605 | |
606 | static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, |
607 | MachineBasicBlock::iterator MI, const DebugLoc &DL, |
608 | MCRegister DestReg, MCRegister SrcReg, bool KillSrc, |
609 | const TargetRegisterClass *RC, bool Forward) { |
610 | const SIRegisterInfo &RI = TII.getRegisterInfo(); |
611 | ArrayRef<int16_t> BaseIndices = RI.getRegSplitParts(RC, 4); |
612 | MachineBasicBlock::iterator I = MI; |
613 | MachineInstr *FirstMI = nullptr, *LastMI = nullptr; |
| 17 | | 'FirstMI' initialized to a null pointer value | |
|
614 | |
615 | for (unsigned Idx = 0; Idx < BaseIndices.size(); ++Idx) { |
| 18 | | Assuming the condition is false | |
|
| 19 | | Loop condition is false. Execution continues on line 643 | |
|
616 | int16_t SubIdx = BaseIndices[Idx]; |
617 | Register Reg = RI.getSubReg(DestReg, SubIdx); |
618 | unsigned Opcode = AMDGPU::S_MOV_B32; |
619 | |
620 | |
621 | Register Src = RI.getSubReg(SrcReg, SubIdx); |
622 | bool AlignedDest = ((Reg - AMDGPU::SGPR0) % 2) == 0; |
623 | bool AlignedSrc = ((Src - AMDGPU::SGPR0) % 2) == 0; |
624 | if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.size())) { |
625 | |
626 | unsigned Channel = RI.getChannelFromSubReg(SubIdx); |
627 | SubIdx = RI.getSubRegFromChannel(Channel, 2); |
628 | Opcode = AMDGPU::S_MOV_B64; |
629 | Idx++; |
630 | } |
631 | |
632 | LastMI = BuildMI(MBB, I, DL, TII.get(Opcode), RI.getSubReg(DestReg, SubIdx)) |
633 | .addReg(RI.getSubReg(SrcReg, SubIdx)) |
634 | .addReg(SrcReg, RegState::Implicit); |
635 | |
636 | if (!FirstMI) |
637 | FirstMI = LastMI; |
638 | |
639 | if (!Forward) |
640 | I--; |
641 | } |
642 | |
643 | assert(FirstMI && LastMI); |
644 | if (!Forward) |
| 20 | | Assuming 'Forward' is true | |
|
| |
645 | std::swap(FirstMI, LastMI); |
646 | |
647 | FirstMI->addOperand( |
| 22 | | Called C++ object pointer is null |
|
648 | MachineOperand::CreateReg(DestReg, true , true )); |
649 | |
650 | if (KillSrc) |
651 | LastMI->addRegisterKilled(SrcReg, &RI); |
652 | } |
653 | |
654 | void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, |
655 | MachineBasicBlock::iterator MI, |
656 | const DebugLoc &DL, MCRegister DestReg, |
657 | MCRegister SrcReg, bool KillSrc) const { |
658 | const TargetRegisterClass *RC = RI.getPhysRegClass(DestReg); |
659 | |
660 | |
661 | |
662 | if (Fix16BitCopies && |
| 1 | Assuming the condition is false | |
|
663 | ((RI.getRegSizeInBits(*RC) == 16) ^ |
664 | (RI.getRegSizeInBits(*RI.getPhysRegClass(SrcReg)) == 16))) { |
665 | MCRegister &RegToFix = (RI.getRegSizeInBits(*RC) == 16) ? DestReg : SrcReg; |
666 | MCRegister Super = RI.get32BitRegister(RegToFix); |
667 | assert(RI.getSubReg(Super, AMDGPU::lo16) == RegToFix); |
668 | RegToFix = Super; |
669 | |
670 | if (DestReg == SrcReg) { |
671 | |
672 | BuildMI(MBB, MI, DL, get(AMDGPU::BUNDLE)); |
673 | return; |
674 | } |
675 | |
676 | RC = RI.getPhysRegClass(DestReg); |
677 | } |
678 | |
679 | if (RC == &AMDGPU::VGPR_32RegClass) { |
| 2 | | Assuming the condition is false | |
|
680 | assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) || |
681 | AMDGPU::SReg_32RegClass.contains(SrcReg) || |
682 | AMDGPU::AGPR_32RegClass.contains(SrcReg)); |
683 | unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ? |
684 | AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32; |
685 | BuildMI(MBB, MI, DL, get(Opc), DestReg) |
686 | .addReg(SrcReg, getKillRegState(KillSrc)); |
687 | return; |
688 | } |
689 | |
690 | if (RC == &AMDGPU::SReg_32_XM0RegClass || |
| 3 | | Assuming the condition is false | |
|
| |
691 | RC == &AMDGPU::SReg_32RegClass) { |
| 4 | | Assuming the condition is false | |
|
692 | if (SrcReg == AMDGPU::SCC) { |
693 | BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg) |
694 | .addImm(1) |
695 | .addImm(0); |
696 | return; |
697 | } |
698 | |
699 | if (DestReg == AMDGPU::VCC_LO) { |
700 | if (AMDGPU::SReg_32RegClass.contains(SrcReg)) { |
701 | BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), AMDGPU::VCC_LO) |
702 | .addReg(SrcReg, getKillRegState(KillSrc)); |
703 | } else { |
704 | |
705 | assert(AMDGPU::VGPR_32RegClass.contains(SrcReg)); |
706 | BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32)) |
707 | .addImm(0) |
708 | .addReg(SrcReg, getKillRegState(KillSrc)); |
709 | } |
710 | |
711 | return; |
712 | } |
713 | |
714 | if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) { |
715 | reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); |
716 | return; |
717 | } |
718 | |
719 | BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) |
720 | .addReg(SrcReg, getKillRegState(KillSrc)); |
721 | return; |
722 | } |
723 | |
724 | if (RC == &AMDGPU::SReg_64RegClass) { |
| 6 | | Assuming the condition is false | |
|
| |
725 | if (SrcReg == AMDGPU::SCC) { |
726 | BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B64), DestReg) |
727 | .addImm(1) |
728 | .addImm(0); |
729 | return; |
730 | } |
731 | |
732 | if (DestReg == AMDGPU::VCC) { |
733 | if (AMDGPU::SReg_64RegClass.contains(SrcReg)) { |
734 | BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC) |
735 | .addReg(SrcReg, getKillRegState(KillSrc)); |
736 | } else { |
737 | |
738 | assert(AMDGPU::VGPR_32RegClass.contains(SrcReg)); |
739 | BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32)) |
740 | .addImm(0) |
741 | .addReg(SrcReg, getKillRegState(KillSrc)); |
742 | } |
743 | |
744 | return; |
745 | } |
746 | |
747 | if (!AMDGPU::SReg_64RegClass.contains(SrcReg)) { |
748 | reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); |
749 | return; |
750 | } |
751 | |
752 | BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) |
753 | .addReg(SrcReg, getKillRegState(KillSrc)); |
754 | return; |
755 | } |
756 | |
757 | if (DestReg == AMDGPU::SCC) { |
| |
758 | |
759 | |
760 | if (AMDGPU::SReg_64RegClass.contains(SrcReg)) { |
761 | |
762 | |
763 | |
764 | assert(ST.hasScalarCompareEq64()); |
765 | BuildMI(MBB, MI, DL, get(AMDGPU::S_CMP_LG_U64)) |
766 | .addReg(SrcReg, getKillRegState(KillSrc)) |
767 | .addImm(0); |
768 | } else { |
769 | assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); |
770 | BuildMI(MBB, MI, DL, get(AMDGPU::S_CMP_LG_U32)) |
771 | .addReg(SrcReg, getKillRegState(KillSrc)) |
772 | .addImm(0); |
773 | } |
774 | |
775 | return; |
776 | } |
777 | |
778 | if (RC == &AMDGPU::AGPR_32RegClass) { |
| 9 | | Assuming the condition is false | |
|
| |
779 | if (AMDGPU::VGPR_32RegClass.contains(SrcReg)) { |
780 | BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), DestReg) |
781 | .addReg(SrcReg, getKillRegState(KillSrc)); |
782 | return; |
783 | } |
784 | |
785 | if (AMDGPU::AGPR_32RegClass.contains(SrcReg) && ST.hasGFX90AInsts()) { |
786 | BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_MOV_B32), DestReg) |
787 | .addReg(SrcReg, getKillRegState(KillSrc)); |
788 | return; |
789 | } |
790 | |
791 | |
792 | |
793 | RegScavenger RS; |
794 | indirectCopyToAGPR(*this, MBB, MI, DL, DestReg, SrcReg, KillSrc, RS); |
795 | return; |
796 | } |
797 | |
798 | const unsigned Size = RI.getRegSizeInBits(*RC); |
799 | if (Size == 16) { |
| 11 | | Assuming 'Size' is not equal to 16 | |
|
| |
800 | assert(AMDGPU::VGPR_LO16RegClass.contains(SrcReg) || |
801 | AMDGPU::VGPR_HI16RegClass.contains(SrcReg) || |
802 | AMDGPU::SReg_LO16RegClass.contains(SrcReg) || |
803 | AMDGPU::AGPR_LO16RegClass.contains(SrcReg)); |
804 | |
805 | bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg); |
806 | bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg); |
807 | bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg); |
808 | bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg); |
809 | bool DstLow = AMDGPU::VGPR_LO16RegClass.contains(DestReg) || |
810 | AMDGPU::SReg_LO16RegClass.contains(DestReg) || |
811 | AMDGPU::AGPR_LO16RegClass.contains(DestReg); |
812 | bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg) || |
813 | AMDGPU::SReg_LO16RegClass.contains(SrcReg) || |
814 | AMDGPU::AGPR_LO16RegClass.contains(SrcReg); |
815 | MCRegister NewDestReg = RI.get32BitRegister(DestReg); |
816 | MCRegister NewSrcReg = RI.get32BitRegister(SrcReg); |
817 | |
818 | if (IsSGPRDst) { |
819 | if (!IsSGPRSrc) { |
820 | reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); |
821 | return; |
822 | } |
823 | |
824 | BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), NewDestReg) |
825 | .addReg(NewSrcReg, getKillRegState(KillSrc)); |
826 | return; |
827 | } |
828 | |
829 | if (IsAGPRDst || IsAGPRSrc) { |
830 | if (!DstLow || !SrcLow) { |
831 | reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc, |
832 | "Cannot use hi16 subreg with an AGPR!"); |
833 | } |
834 | |
835 | copyPhysReg(MBB, MI, DL, NewDestReg, NewSrcReg, KillSrc); |
836 | return; |
837 | } |
838 | |
839 | if (IsSGPRSrc && !ST.hasSDWAScalar()) { |
840 | if (!DstLow || !SrcLow) { |
841 | reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc, |
842 | "Cannot use hi16 subreg on VI!"); |
843 | } |
844 | |
845 | BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), NewDestReg) |
846 | .addReg(NewSrcReg, getKillRegState(KillSrc)); |
847 | return; |
848 | } |
849 | |
850 | auto MIB = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_sdwa), NewDestReg) |
851 | .addImm(0) |
852 | .addReg(NewSrcReg) |
853 | .addImm(0) |
854 | .addImm(DstLow ? AMDGPU::SDWA::SdwaSel::WORD_0 |
855 | : AMDGPU::SDWA::SdwaSel::WORD_1) |
856 | .addImm(AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE) |
857 | .addImm(SrcLow ? AMDGPU::SDWA::SdwaSel::WORD_0 |
858 | : AMDGPU::SDWA::SdwaSel::WORD_1) |
859 | .addReg(NewDestReg, RegState::Implicit | RegState::Undef); |
860 | |
861 | MIB->tieOperands(0, MIB->getNumOperands() - 1); |
862 | return; |
863 | } |
864 | |
865 | const TargetRegisterClass *SrcRC = RI.getPhysRegClass(SrcReg); |
866 | if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) { |
| 13 | | Assuming the condition is false | |
|
867 | if (ST.hasPackedFP32Ops()) { |
868 | BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), DestReg) |
869 | .addImm(SISrcMods::OP_SEL_1) |
870 | .addReg(SrcReg) |
871 | .addImm(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1) |
872 | .addReg(SrcReg) |
873 | .addImm(0) |
874 | .addImm(0) |
875 | .addImm(0) |
876 | .addImm(0) |
877 | .addImm(0) |
878 | .addReg(SrcReg, getKillRegState(KillSrc) | RegState::Implicit); |
879 | return; |
880 | } |
881 | } |
882 | |
883 | const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg); |
884 | if (RI.isSGPRClass(RC)) { |
| |
885 | if (!RI.isSGPRClass(SrcRC)) { |
| |
886 | reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); |
887 | return; |
888 | } |
889 | expandSGPRCopy(*this, MBB, MI, DL, DestReg, SrcReg, KillSrc, RC, Forward); |
| 16 | | Calling 'expandSGPRCopy' | |
|
890 | return; |
891 | } |
892 | |
893 | unsigned EltSize = 4; |
894 | unsigned Opcode = AMDGPU::V_MOV_B32_e32; |
895 | if (RI.hasAGPRs(RC)) { |
896 | Opcode = (RI.hasVGPRs(SrcRC)) ? |
897 | AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::INSTRUCTION_LIST_END; |
898 | } else if (RI.hasVGPRs(RC) && RI.hasAGPRs(SrcRC)) { |
899 | Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64; |
900 | } else if ((Size % 64 == 0) && RI.hasVGPRs(RC) && |
901 | (RI.isProperlyAlignedRC(*RC) && |
902 | (SrcRC == RC || RI.isSGPRClass(SrcRC)))) { |
903 | |
904 | if (ST.hasPackedFP32Ops()) { |
905 | Opcode = AMDGPU::V_PK_MOV_B32; |
906 | EltSize = 8; |
907 | } |
908 | } |
909 | |
910 | |
911 | |
912 | |
913 | |
914 | |
915 | std::unique_ptr<RegScavenger> RS; |
916 | if (Opcode == AMDGPU::INSTRUCTION_LIST_END) |
917 | RS.reset(new RegScavenger()); |
918 | |
919 | ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RC, EltSize); |
920 | |
921 | |
922 | |
923 | const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg); |
924 | |
925 | for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) { |
926 | unsigned SubIdx; |
927 | if (Forward) |
928 | SubIdx = SubIndices[Idx]; |
929 | else |
930 | SubIdx = SubIndices[SubIndices.size() - Idx - 1]; |
931 | |
932 | bool UseKill = CanKillSuperReg && Idx == SubIndices.size() - 1; |
933 | |
934 | if (Opcode == AMDGPU::INSTRUCTION_LIST_END) { |
935 | Register ImpDefSuper = Idx == 0 ? Register(DestReg) : Register(); |
936 | Register ImpUseSuper = SrcReg; |
937 | indirectCopyToAGPR(*this, MBB, MI, DL, RI.getSubReg(DestReg, SubIdx), |
938 | RI.getSubReg(SrcReg, SubIdx), UseKill, *RS, |
939 | ImpDefSuper, ImpUseSuper); |
940 | } else if (Opcode == AMDGPU::V_PK_MOV_B32) { |
941 | Register DstSubReg = RI.getSubReg(DestReg, SubIdx); |
942 | Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx); |
943 | MachineInstrBuilder MIB = |
944 | BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), DstSubReg) |
945 | .addImm(SISrcMods::OP_SEL_1) |
946 | .addReg(SrcSubReg) |
947 | .addImm(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1) |
948 | .addReg(SrcSubReg) |
949 | .addImm(0) |
950 | .addImm(0) |
951 | .addImm(0) |
952 | .addImm(0) |
953 | .addImm(0) |
954 | .addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit); |
955 | if (Idx == 0) |
956 | MIB.addReg(DestReg, RegState::Define | RegState::Implicit); |
957 | } else { |
958 | MachineInstrBuilder Builder = |
959 | BuildMI(MBB, MI, DL, get(Opcode), RI.getSubReg(DestReg, SubIdx)) |
960 | .addReg(RI.getSubReg(SrcReg, SubIdx)); |
961 | if (Idx == 0) |
962 | Builder.addReg(DestReg, RegState::Define | RegState::Implicit); |
963 | |
964 | Builder.addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit); |
965 | } |
966 | } |
967 | } |
968 | |
969 | int SIInstrInfo::commuteOpcode(unsigned Opcode) const { |
970 | int NewOpc; |
971 | |
972 | |
973 | NewOpc = AMDGPU::getCommuteRev(Opcode); |
974 | if (NewOpc != -1) |
975 | |
976 | return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1; |
977 | |
978 | |
979 | NewOpc = AMDGPU::getCommuteOrig(Opcode); |
980 | if (NewOpc != -1) |
981 | |
982 | return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1; |
983 | |
984 | return Opcode; |
985 | } |
986 | |
987 | void SIInstrInfo::materializeImmediate(MachineBasicBlock &MBB, |
988 | MachineBasicBlock::iterator MI, |
989 | const DebugLoc &DL, unsigned DestReg, |
990 | int64_t Value) const { |
991 | MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); |
992 | const TargetRegisterClass *RegClass = MRI.getRegClass(DestReg); |
993 | if (RegClass == &AMDGPU::SReg_32RegClass || |
994 | RegClass == &AMDGPU::SGPR_32RegClass || |
995 | RegClass == &AMDGPU::SReg_32_XM0RegClass || |
996 | RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) { |
997 | BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) |
998 | .addImm(Value); |
999 | return; |
1000 | } |
1001 | |
1002 | if (RegClass == &AMDGPU::SReg_64RegClass || |
1003 | RegClass == &AMDGPU::SGPR_64RegClass || |
1004 | RegClass == &AMDGPU::SReg_64_XEXECRegClass) { |
1005 | BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) |
1006 | .addImm(Value); |
1007 | return; |
1008 | } |
1009 | |
1010 | if (RegClass == &AMDGPU::VGPR_32RegClass) { |
1011 | BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) |
1012 | .addImm(Value); |
1013 | return; |
1014 | } |
1015 | if (RegClass->hasSuperClassEq(&AMDGPU::VReg_64RegClass)) { |
1016 | BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), DestReg) |
1017 | .addImm(Value); |
1018 | return; |
1019 | } |
1020 | |
1021 | unsigned EltSize = 4; |
1022 | unsigned Opcode = AMDGPU::V_MOV_B32_e32; |
1023 | if (RI.isSGPRClass(RegClass)) { |
1024 | if (RI.getRegSizeInBits(*RegClass) > 32) { |
1025 | Opcode = AMDGPU::S_MOV_B64; |
1026 | EltSize = 8; |
1027 | } else { |
1028 | Opcode = AMDGPU::S_MOV_B32; |
1029 | EltSize = 4; |
1030 | } |
1031 | } |
1032 | |
1033 | ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RegClass, EltSize); |
1034 | for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) { |
1035 | int64_t IdxValue = Idx == 0 ? Value : 0; |
1036 | |
1037 | MachineInstrBuilder Builder = BuildMI(MBB, MI, DL, |
1038 | get(Opcode), RI.getSubReg(DestReg, SubIndices[Idx])); |
1039 | Builder.addImm(IdxValue); |
1040 | } |
1041 | } |
1042 | |
1043 | const TargetRegisterClass * |
1044 | SIInstrInfo::getPreferredSelectRegClass(unsigned Size) const { |
1045 | return &AMDGPU::VGPR_32RegClass; |
1046 | } |
1047 | |
1048 | void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, |
1049 | MachineBasicBlock::iterator I, |
1050 | const DebugLoc &DL, Register DstReg, |
1051 | ArrayRef<MachineOperand> Cond, |
1052 | Register TrueReg, |
1053 | Register FalseReg) const { |
1054 | MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); |
1055 | const TargetRegisterClass *BoolXExecRC = |
1056 | RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID); |
1057 | assert(MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass && |
1058 | "Not a VGPR32 reg"); |
1059 | |
1060 | if (Cond.size() == 1) { |
1061 | Register SReg = MRI.createVirtualRegister(BoolXExecRC); |
1062 | BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) |
1063 | .add(Cond[0]); |
1064 | BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) |
1065 | .addImm(0) |
1066 | .addReg(FalseReg) |
1067 | .addImm(0) |
1068 | .addReg(TrueReg) |
1069 | .addReg(SReg); |
1070 | } else if (Cond.size() == 2) { |
1071 | assert(Cond[0].isImm() && "Cond[0] is not an immediate"); |
1072 | switch (Cond[0].getImm()) { |
1073 | case SIInstrInfo::SCC_TRUE: { |
1074 | Register SReg = MRI.createVirtualRegister(BoolXExecRC); |
1075 | BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 |
1076 | : AMDGPU::S_CSELECT_B64), SReg) |
1077 | .addImm(1) |
1078 | .addImm(0); |
1079 | BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) |
1080 | .addImm(0) |
1081 | .addReg(FalseReg) |
1082 | .addImm(0) |
1083 | .addReg(TrueReg) |
1084 | .addReg(SReg); |
1085 | break; |
1086 | } |
1087 | case SIInstrInfo::SCC_FALSE: { |
1088 | Register SReg = MRI.createVirtualRegister(BoolXExecRC); |
1089 | BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 |
1090 | : AMDGPU::S_CSELECT_B64), SReg) |
1091 | .addImm(0) |
1092 | .addImm(1); |
1093 | BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) |
1094 | .addImm(0) |
1095 | .addReg(FalseReg) |
1096 | .addImm(0) |
1097 | .addReg(TrueReg) |
1098 | .addReg(SReg); |
1099 | break; |
1100 | } |
1101 | case SIInstrInfo::VCCNZ: { |
1102 | MachineOperand RegOp = Cond[1]; |
1103 | RegOp.setImplicit(false); |
1104 | Register SReg = MRI.createVirtualRegister(BoolXExecRC); |
1105 | BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) |
1106 | .add(RegOp); |
1107 | BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) |
1108 | .addImm(0) |
1109 | .addReg(FalseReg) |
1110 | .addImm(0) |
1111 | .addReg(TrueReg) |
1112 | .addReg(SReg); |
1113 | break; |
1114 | } |
1115 | case SIInstrInfo::VCCZ: { |
1116 | MachineOperand RegOp = Cond[1]; |
1117 | RegOp.setImplicit(false); |
1118 | Register SReg = MRI.createVirtualRegister(BoolXExecRC); |
1119 | BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) |
1120 | .add(RegOp); |
1121 | BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) |
1122 | .addImm(0) |
1123 | .addReg(TrueReg) |
1124 | .addImm(0) |
1125 | .addReg(FalseReg) |
1126 | .addReg(SReg); |
1127 | break; |
1128 | } |
1129 | case SIInstrInfo::EXECNZ: { |
1130 | Register SReg = MRI.createVirtualRegister(BoolXExecRC); |
1131 | Register SReg2 = MRI.createVirtualRegister(RI.getBoolRC()); |
1132 | BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 |
1133 | : AMDGPU::S_OR_SAVEEXEC_B64), SReg2) |
1134 | .addImm(0); |
1135 | BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 |
1136 | : AMDGPU::S_CSELECT_B64), SReg) |
1137 | .addImm(1) |
1138 | .addImm(0); |
1139 | BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) |
1140 | .addImm(0) |
1141 | .addReg(FalseReg) |
1142 | .addImm(0) |
1143 | .addReg(TrueReg) |
1144 | .addReg(SReg); |
1145 | break; |
1146 | } |
1147 | case SIInstrInfo::EXECZ: { |
1148 | Register SReg = MRI.createVirtualRegister(BoolXExecRC); |
1149 | Register SReg2 = MRI.createVirtualRegister(RI.getBoolRC()); |
1150 | BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 |
1151 | : AMDGPU::S_OR_SAVEEXEC_B64), SReg2) |
1152 | .addImm(0); |
1153 | BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 |
1154 | : AMDGPU::S_CSELECT_B64), SReg) |
1155 | .addImm(0) |
1156 | .addImm(1); |
1157 | BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) |
1158 | .addImm(0) |
1159 | .addReg(FalseReg) |
1160 | .addImm(0) |
1161 | .addReg(TrueReg) |
1162 | .addReg(SReg); |
1163 | llvm_unreachable("Unhandled branch predicate EXECZ"); |
1164 | break; |
1165 | } |
1166 | default: |
1167 | llvm_unreachable("invalid branch predicate"); |
1168 | } |
1169 | } else { |
1170 | llvm_unreachable("Can only handle Cond size 1 or 2"); |
1171 | } |
1172 | } |
1173 | |
1174 | Register SIInstrInfo::insertEQ(MachineBasicBlock *MBB, |
1175 | MachineBasicBlock::iterator I, |
1176 | const DebugLoc &DL, |
1177 | Register SrcReg, int Value) const { |
1178 | MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); |
1179 | Register Reg = MRI.createVirtualRegister(RI.getBoolRC()); |
1180 | BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_EQ_I32_e64), Reg) |
1181 | .addImm(Value) |
1182 | .addReg(SrcReg); |
1183 | |
1184 | return Reg; |
1185 | } |
1186 | |
1187 | Register SIInstrInfo::insertNE(MachineBasicBlock *MBB, |
1188 | MachineBasicBlock::iterator I, |
1189 | const DebugLoc &DL, |
1190 | Register SrcReg, int Value) const { |
1191 | MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); |
1192 | Register Reg = MRI.createVirtualRegister(RI.getBoolRC()); |
1193 | BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_NE_I32_e64), Reg) |
1194 | .addImm(Value) |
1195 | .addReg(SrcReg); |
1196 | |
1197 | return Reg; |
1198 | } |
1199 | |
1200 | unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const { |
1201 | |
1202 | if (RI.hasAGPRs(DstRC)) |
1203 | return AMDGPU::COPY; |
1204 | if (RI.getRegSizeInBits(*DstRC) == 32) { |
1205 | return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; |
1206 | } else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC)) { |
1207 | return AMDGPU::S_MOV_B64; |
1208 | } else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC)) { |
1209 | return AMDGPU::V_MOV_B64_PSEUDO; |
1210 | } |
1211 | return AMDGPU::COPY; |
1212 | } |
1213 | |
1214 | const MCInstrDesc & |
1215 | SIInstrInfo::getIndirectGPRIDXPseudo(unsigned VecSize, |
1216 | bool IsIndirectSrc) const { |
1217 | if (IsIndirectSrc) { |
1218 | if (VecSize <= 32) |
1219 | return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1); |
1220 | if (VecSize <= 64) |
1221 | return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2); |
1222 | if (VecSize <= 96) |
1223 | return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3); |
1224 | if (VecSize <= 128) |
1225 | return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4); |
1226 | if (VecSize <= 160) |
1227 | return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5); |
1228 | if (VecSize <= 256) |
1229 | return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8); |
1230 | if (VecSize <= 512) |
1231 | return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16); |
1232 | if (VecSize <= 1024) |
1233 | return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32); |
1234 | |
1235 | llvm_unreachable("unsupported size for IndirectRegReadGPRIDX pseudos"); |
1236 | } |
1237 | |
1238 | if (VecSize <= 32) |
1239 | return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1); |
1240 | if (VecSize <= 64) |
1241 | return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2); |
1242 | if (VecSize <= 96) |
1243 | return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3); |
1244 | if (VecSize <= 128) |
1245 | return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4); |
1246 | if (VecSize <= 160) |
1247 | return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5); |
1248 | if (VecSize <= 256) |
1249 | return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8); |
1250 | if (VecSize <= 512) |
1251 | return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16); |
1252 | if (VecSize <= 1024) |
1253 | return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32); |
1254 | |
1255 | llvm_unreachable("unsupported size for IndirectRegWriteGPRIDX pseudos"); |
1256 | } |
1257 | |
1258 | static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize) { |
1259 | if (VecSize <= 32) |
1260 | return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1; |
1261 | if (VecSize <= 64) |
1262 | return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2; |
1263 | if (VecSize <= 96) |
1264 | return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3; |
1265 | if (VecSize <= 128) |
1266 | return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4; |
1267 | if (VecSize <= 160) |
1268 | return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5; |
1269 | if (VecSize <= 256) |
1270 | return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8; |
1271 | if (VecSize <= 512) |
1272 | return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16; |
1273 | if (VecSize <= 1024) |
1274 | return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32; |
1275 | |
1276 | llvm_unreachable("unsupported size for IndirectRegWrite pseudos"); |
1277 | } |
1278 | |
1279 | static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize) { |
1280 | if (VecSize <= 32) |
1281 | return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1; |
1282 | if (VecSize <= 64) |
1283 | return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2; |
1284 | if (VecSize <= 96) |
1285 | return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3; |
1286 | if (VecSize <= 128) |
1287 | return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4; |
1288 | if (VecSize <= 160) |
1289 | return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5; |
1290 | if (VecSize <= 256) |
1291 | return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8; |
1292 | if (VecSize <= 512) |
1293 | return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16; |
1294 | if (VecSize <= 1024) |
1295 | return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32; |
1296 | |
1297 | llvm_unreachable("unsupported size for IndirectRegWrite pseudos"); |
1298 | } |
1299 | |
1300 | static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize) { |
1301 | if (VecSize <= 64) |
1302 | return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1; |
1303 | if (VecSize <= 128) |
1304 | return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2; |
1305 | if (VecSize <= 256) |
1306 | return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4; |
1307 | if (VecSize <= 512) |
1308 | return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8; |
1309 | if (VecSize <= 1024) |
1310 | return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16; |
1311 | |
1312 | llvm_unreachable("unsupported size for IndirectRegWrite pseudos"); |
1313 | } |
1314 | |
1315 | const MCInstrDesc & |
1316 | SIInstrInfo::getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, |
1317 | bool IsSGPR) const { |
1318 | if (IsSGPR) { |
1319 | switch (EltSize) { |
1320 | case 32: |
1321 | return get(getIndirectSGPRWriteMovRelPseudo32(VecSize)); |
1322 | case 64: |
1323 | return get(getIndirectSGPRWriteMovRelPseudo64(VecSize)); |
1324 | default: |
1325 | llvm_unreachable("invalid reg indexing elt size"); |
1326 | } |
1327 | } |
1328 | |
1329 | assert(EltSize == 32 && "invalid reg indexing elt size"); |
1330 | return get(getIndirectVGPRWriteMovRelPseudoOpc(VecSize)); |
1331 | } |
1332 | |
1333 | static unsigned getSGPRSpillSaveOpcode(unsigned Size) { |
1334 | switch (Size) { |
1335 | case 4: |
1336 | return AMDGPU::SI_SPILL_S32_SAVE; |
1337 | case 8: |
1338 | return AMDGPU::SI_SPILL_S64_SAVE; |
1339 | case 12: |
1340 | return AMDGPU::SI_SPILL_S96_SAVE; |
1341 | case 16: |
1342 | return AMDGPU::SI_SPILL_S128_SAVE; |
1343 | case 20: |
1344 | return AMDGPU::SI_SPILL_S160_SAVE; |
1345 | case 24: |
1346 | return AMDGPU::SI_SPILL_S192_SAVE; |
1347 | case 28: |
1348 | return AMDGPU::SI_SPILL_S224_SAVE; |
1349 | case 32: |
1350 | return AMDGPU::SI_SPILL_S256_SAVE; |
1351 | case 64: |
1352 | return AMDGPU::SI_SPILL_S512_SAVE; |
1353 | case 128: |
1354 | return AMDGPU::SI_SPILL_S1024_SAVE; |
1355 | default: |
1356 | llvm_unreachable("unknown register size"); |
1357 | } |
1358 | } |
1359 | |
1360 | static unsigned getVGPRSpillSaveOpcode(unsigned Size) { |
1361 | switch (Size) { |
1362 | case 4: |
1363 | return AMDGPU::SI_SPILL_V32_SAVE; |
1364 | case 8: |
1365 | return AMDGPU::SI_SPILL_V64_SAVE; |
1366 | case 12: |
1367 | return AMDGPU::SI_SPILL_V96_SAVE; |
1368 | case 16: |
1369 | return AMDGPU::SI_SPILL_V128_SAVE; |
1370 | case 20: |
1371 | return AMDGPU::SI_SPILL_V160_SAVE; |
1372 | case 24: |
1373 | return AMDGPU::SI_SPILL_V192_SAVE; |
1374 | case 28: |
1375 | return AMDGPU::SI_SPILL_V224_SAVE; |
1376 | case 32: |
1377 | return AMDGPU::SI_SPILL_V256_SAVE; |
1378 | case 64: |
1379 | return AMDGPU::SI_SPILL_V512_SAVE; |
1380 | case 128: |
1381 | return AMDGPU::SI_SPILL_V1024_SAVE; |
1382 | default: |
1383 | llvm_unreachable("unknown register size"); |
1384 | } |
1385 | } |
1386 | |
1387 | static unsigned getAGPRSpillSaveOpcode(unsigned Size) { |
1388 | switch (Size) { |
1389 | case 4: |
1390 | return AMDGPU::SI_SPILL_A32_SAVE; |
1391 | case 8: |
1392 | return AMDGPU::SI_SPILL_A64_SAVE; |
1393 | case 12: |
1394 | return AMDGPU::SI_SPILL_A96_SAVE; |
1395 | case 16: |
1396 | return AMDGPU::SI_SPILL_A128_SAVE; |
1397 | case 20: |
1398 | return AMDGPU::SI_SPILL_A160_SAVE; |
1399 | case 24: |
1400 | return AMDGPU::SI_SPILL_A192_SAVE; |
1401 | case 28: |
1402 | return AMDGPU::SI_SPILL_A224_SAVE; |
1403 | case 32: |
1404 | return AMDGPU::SI_SPILL_A256_SAVE; |
1405 | case 64: |
1406 | return AMDGPU::SI_SPILL_A512_SAVE; |
1407 | case 128: |
1408 | return AMDGPU::SI_SPILL_A1024_SAVE; |
1409 | default: |
1410 | llvm_unreachable("unknown register size"); |
1411 | } |
1412 | } |
1413 | |
1414 | void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, |
1415 | MachineBasicBlock::iterator MI, |
1416 | Register SrcReg, bool isKill, |
1417 | int FrameIndex, |
1418 | const TargetRegisterClass *RC, |
1419 | const TargetRegisterInfo *TRI) const { |
1420 | MachineFunction *MF = MBB.getParent(); |
1421 | SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); |
1422 | MachineFrameInfo &FrameInfo = MF->getFrameInfo(); |
1423 | const DebugLoc &DL = MBB.findDebugLoc(MI); |
1424 | |
1425 | MachinePointerInfo PtrInfo |
1426 | = MachinePointerInfo::getFixedStack(*MF, FrameIndex); |
1427 | MachineMemOperand *MMO = MF->getMachineMemOperand( |
1428 | PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FrameIndex), |
1429 | FrameInfo.getObjectAlign(FrameIndex)); |
1430 | unsigned SpillSize = TRI->getSpillSize(*RC); |
1431 | |
1432 | if (RI.isSGPRClass(RC)) { |
1433 | MFI->setHasSpilledSGPRs(); |
1434 | assert(SrcReg != AMDGPU::M0 && "m0 should not be spilled"); |
1435 | assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI && |
1436 | SrcReg != AMDGPU::EXEC && "exec should not be spilled"); |
1437 | |
1438 | |
1439 | |
1440 | const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(SpillSize)); |
1441 | |
1442 | |
1443 | |
1444 | if (SrcReg.isVirtual() && SpillSize == 4) { |
1445 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
1446 | MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass); |
1447 | } |
1448 | |
1449 | BuildMI(MBB, MI, DL, OpDesc) |
1450 | .addReg(SrcReg, getKillRegState(isKill)) |
1451 | .addFrameIndex(FrameIndex) |
1452 | .addMemOperand(MMO) |
1453 | .addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit); |
1454 | |
1455 | if (RI.spillSGPRToVGPR()) |
1456 | FrameInfo.setStackID(FrameIndex, TargetStackID::SGPRSpill); |
1457 | return; |
1458 | } |
1459 | |
1460 | unsigned Opcode = RI.hasAGPRs(RC) ? getAGPRSpillSaveOpcode(SpillSize) |
1461 | : getVGPRSpillSaveOpcode(SpillSize); |
1462 | MFI->setHasSpilledVGPRs(); |
1463 | |
1464 | BuildMI(MBB, MI, DL, get(Opcode)) |
1465 | .addReg(SrcReg, getKillRegState(isKill)) |
1466 | .addFrameIndex(FrameIndex) |
1467 | .addReg(MFI->getStackPtrOffsetReg()) |
1468 | .addImm(0) |
1469 | .addMemOperand(MMO); |
1470 | } |
1471 | |
1472 | static unsigned getSGPRSpillRestoreOpcode(unsigned Size) { |
1473 | switch (Size) { |
1474 | case 4: |
1475 | return AMDGPU::SI_SPILL_S32_RESTORE; |
1476 | case 8: |
1477 | return AMDGPU::SI_SPILL_S64_RESTORE; |
1478 | case 12: |
1479 | return AMDGPU::SI_SPILL_S96_RESTORE; |
1480 | case 16: |
1481 | return AMDGPU::SI_SPILL_S128_RESTORE; |
1482 | case 20: |
1483 | return AMDGPU::SI_SPILL_S160_RESTORE; |
1484 | case 24: |
1485 | return AMDGPU::SI_SPILL_S192_RESTORE; |
1486 | case 28: |
1487 | return AMDGPU::SI_SPILL_S224_RESTORE; |
1488 | case 32: |
1489 | return AMDGPU::SI_SPILL_S256_RESTORE; |
1490 | case 64: |
1491 | return AMDGPU::SI_SPILL_S512_RESTORE; |
1492 | case 128: |
1493 | return AMDGPU::SI_SPILL_S1024_RESTORE; |
1494 | default: |
1495 | llvm_unreachable("unknown register size"); |
1496 | } |
1497 | } |
1498 | |
1499 | static unsigned getVGPRSpillRestoreOpcode(unsigned Size) { |
1500 | switch (Size) { |
1501 | case 4: |
1502 | return AMDGPU::SI_SPILL_V32_RESTORE; |
1503 | case 8: |
1504 | return AMDGPU::SI_SPILL_V64_RESTORE; |
1505 | case 12: |
1506 | return AMDGPU::SI_SPILL_V96_RESTORE; |
1507 | case 16: |
1508 | return AMDGPU::SI_SPILL_V128_RESTORE; |
1509 | case 20: |
1510 | return AMDGPU::SI_SPILL_V160_RESTORE; |
1511 | case 24: |
1512 | return AMDGPU::SI_SPILL_V192_RESTORE; |
1513 | case 28: |
1514 | return AMDGPU::SI_SPILL_V224_RESTORE; |
1515 | case 32: |
1516 | return AMDGPU::SI_SPILL_V256_RESTORE; |
1517 | case 64: |
1518 | return AMDGPU::SI_SPILL_V512_RESTORE; |
1519 | case 128: |
1520 | return AMDGPU::SI_SPILL_V1024_RESTORE; |
1521 | default: |
1522 | llvm_unreachable("unknown register size"); |
1523 | } |
1524 | } |
1525 | |
1526 | static unsigned getAGPRSpillRestoreOpcode(unsigned Size) { |
1527 | switch (Size) { |
1528 | case 4: |
1529 | return AMDGPU::SI_SPILL_A32_RESTORE; |
1530 | case 8: |
1531 | return AMDGPU::SI_SPILL_A64_RESTORE; |
1532 | case 12: |
1533 | return AMDGPU::SI_SPILL_A96_RESTORE; |
1534 | case 16: |
1535 | return AMDGPU::SI_SPILL_A128_RESTORE; |
1536 | case 20: |
1537 | return AMDGPU::SI_SPILL_A160_RESTORE; |
1538 | case 24: |
1539 | return AMDGPU::SI_SPILL_A192_RESTORE; |
1540 | case 28: |
1541 | return AMDGPU::SI_SPILL_A224_RESTORE; |
1542 | case 32: |
1543 | return AMDGPU::SI_SPILL_A256_RESTORE; |
1544 | case 64: |
1545 | return AMDGPU::SI_SPILL_A512_RESTORE; |
1546 | case 128: |
1547 | return AMDGPU::SI_SPILL_A1024_RESTORE; |
1548 | default: |
1549 | llvm_unreachable("unknown register size"); |
1550 | } |
1551 | } |
1552 | |
1553 | void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, |
1554 | MachineBasicBlock::iterator MI, |
1555 | Register DestReg, int FrameIndex, |
1556 | const TargetRegisterClass *RC, |
1557 | const TargetRegisterInfo *TRI) const { |
1558 | MachineFunction *MF = MBB.getParent(); |
1559 | SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); |
1560 | MachineFrameInfo &FrameInfo = MF->getFrameInfo(); |
1561 | const DebugLoc &DL = MBB.findDebugLoc(MI); |
1562 | unsigned SpillSize = TRI->getSpillSize(*RC); |
1563 | |
1564 | MachinePointerInfo PtrInfo |
1565 | = MachinePointerInfo::getFixedStack(*MF, FrameIndex); |
1566 | |
1567 | MachineMemOperand *MMO = MF->getMachineMemOperand( |
1568 | PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FrameIndex), |
1569 | FrameInfo.getObjectAlign(FrameIndex)); |
1570 | |
1571 | if (RI.isSGPRClass(RC)) { |
1572 | MFI->setHasSpilledSGPRs(); |
1573 | assert(DestReg != AMDGPU::M0 && "m0 should not be reloaded into"); |
1574 | assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI && |
1575 | DestReg != AMDGPU::EXEC && "exec should not be spilled"); |
1576 | |
1577 | |
1578 | |
1579 | const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(SpillSize)); |
1580 | if (DestReg.isVirtual() && SpillSize == 4) { |
1581 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
1582 | MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass); |
1583 | } |
1584 | |
1585 | if (RI.spillSGPRToVGPR()) |
1586 | FrameInfo.setStackID(FrameIndex, TargetStackID::SGPRSpill); |
1587 | BuildMI(MBB, MI, DL, OpDesc, DestReg) |
1588 | .addFrameIndex(FrameIndex) |
1589 | .addMemOperand(MMO) |
1590 | .addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit); |
1591 | |
1592 | return; |
1593 | } |
1594 | |
1595 | unsigned Opcode = RI.hasAGPRs(RC) ? getAGPRSpillRestoreOpcode(SpillSize) |
1596 | : getVGPRSpillRestoreOpcode(SpillSize); |
1597 | BuildMI(MBB, MI, DL, get(Opcode), DestReg) |
1598 | .addFrameIndex(FrameIndex) |
1599 | .addReg(MFI->getStackPtrOffsetReg()) |
1600 | .addImm(0) |
1601 | .addMemOperand(MMO); |
1602 | } |
1603 | |
1604 | void SIInstrInfo::insertNoop(MachineBasicBlock &MBB, |
1605 | MachineBasicBlock::iterator MI) const { |
1606 | insertNoops(MBB, MI, 1); |
1607 | } |
1608 | |
1609 | void SIInstrInfo::insertNoops(MachineBasicBlock &MBB, |
1610 | MachineBasicBlock::iterator MI, |
1611 | unsigned Quantity) const { |
1612 | DebugLoc DL = MBB.findDebugLoc(MI); |
1613 | while (Quantity > 0) { |
1614 | unsigned Arg = std::min(Quantity, 8u); |
1615 | Quantity -= Arg; |
1616 | BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP)).addImm(Arg - 1); |
1617 | } |
1618 | } |
1619 | |
1620 | void SIInstrInfo::insertReturn(MachineBasicBlock &MBB) const { |
1621 | auto MF = MBB.getParent(); |
1622 | SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>(); |
1623 | |
1624 | assert(Info->isEntryFunction()); |
1625 | |
1626 | if (MBB.succ_empty()) { |
1627 | bool HasNoTerminator = MBB.getFirstTerminator() == MBB.end(); |
1628 | if (HasNoTerminator) { |
1629 | if (Info->returnsVoid()) { |
1630 | BuildMI(MBB, MBB.end(), DebugLoc(), get(AMDGPU::S_ENDPGM)).addImm(0); |
1631 | } else { |
1632 | BuildMI(MBB, MBB.end(), DebugLoc(), get(AMDGPU::SI_RETURN_TO_EPILOG)); |
1633 | } |
1634 | } |
1635 | } |
1636 | } |
1637 | |
1638 | unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) { |
1639 | switch (MI.getOpcode()) { |
1640 | default: |
1641 | if (MI.isMetaInstruction()) |
1642 | return 0; |
1643 | return 1; |
1644 | |
1645 | case AMDGPU::S_NOP: |
1646 | return MI.getOperand(0).getImm() + 1; |
1647 | |
1648 | |
1649 | |
1650 | |
1651 | case AMDGPU::SI_MASKED_UNREACHABLE: |
1652 | case AMDGPU::WAVE_BARRIER: |
1653 | return 0; |
1654 | } |
1655 | } |
1656 | |
1657 | bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { |
1658 | const SIRegisterInfo *TRI = ST.getRegisterInfo(); |
1659 | MachineBasicBlock &MBB = *MI.getParent(); |
1660 | DebugLoc DL = MBB.findDebugLoc(MI); |
1661 | switch (MI.getOpcode()) { |
1662 | default: return TargetInstrInfo::expandPostRAPseudo(MI); |
1663 | case AMDGPU::S_MOV_B64_term: |
1664 | |
1665 | |
1666 | MI.setDesc(get(AMDGPU::S_MOV_B64)); |
1667 | break; |
1668 | |
1669 | case AMDGPU::S_MOV_B32_term: |
1670 | |
1671 | |
1672 | MI.setDesc(get(AMDGPU::S_MOV_B32)); |
1673 | break; |
1674 | |
1675 | case AMDGPU::S_XOR_B64_term: |
1676 | |
1677 | |
1678 | MI.setDesc(get(AMDGPU::S_XOR_B64)); |
1679 | break; |
1680 | |
1681 | case AMDGPU::S_XOR_B32_term: |
1682 | |
1683 | |
1684 | MI.setDesc(get(AMDGPU::S_XOR_B32)); |
1685 | break; |
1686 | case AMDGPU::S_OR_B64_term: |
1687 | |
1688 | |
1689 | MI.setDesc(get(AMDGPU::S_OR_B64)); |
1690 | break; |
1691 | case AMDGPU::S_OR_B32_term: |
1692 | |
1693 | |
1694 | MI.setDesc(get(AMDGPU::S_OR_B32)); |
1695 | break; |
1696 | |
1697 | case AMDGPU::S_ANDN2_B64_term: |
1698 | |
1699 | |
1700 | MI.setDesc(get(AMDGPU::S_ANDN2_B64)); |
1701 | break; |
1702 | |
1703 | case AMDGPU::S_ANDN2_B32_term: |
1704 | |
1705 | |
1706 | MI.setDesc(get(AMDGPU::S_ANDN2_B32)); |
1707 | break; |
1708 | |
1709 | case AMDGPU::S_AND_B64_term: |
1710 | |
1711 | |
1712 | MI.setDesc(get(AMDGPU::S_AND_B64)); |
1713 | break; |
1714 | |
1715 | case AMDGPU::S_AND_B32_term: |
1716 | |
1717 | |
1718 | MI.setDesc(get(AMDGPU::S_AND_B32)); |
1719 | break; |
1720 | |
1721 | case AMDGPU::V_MOV_B64_PSEUDO: { |
1722 | Register Dst = MI.getOperand(0).getReg(); |
1723 | Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0); |
1724 | Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1); |
1725 | |
1726 | const MachineOperand &SrcOp = MI.getOperand(1); |
1727 | |
1728 | assert(!SrcOp.isFPImm()); |
1729 | if (SrcOp.isImm()) { |
1730 | APInt Imm(64, SrcOp.getImm()); |
1731 | APInt Lo(32, Imm.getLoBits(32).getZExtValue()); |
1732 | APInt Hi(32, Imm.getHiBits(32).getZExtValue()); |
1733 | if (ST.hasPackedFP32Ops() && Lo == Hi && isInlineConstant(Lo)) { |
1734 | BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), Dst) |
1735 | .addImm(SISrcMods::OP_SEL_1) |
1736 | .addImm(Lo.getSExtValue()) |
1737 | .addImm(SISrcMods::OP_SEL_1) |
1738 | .addImm(Lo.getSExtValue()) |
1739 | .addImm(0) |
1740 | .addImm(0) |
1741 | .addImm(0) |
1742 | .addImm(0) |
1743 | .addImm(0); |
1744 | } else { |
1745 | BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo) |
1746 | .addImm(Lo.getSExtValue()) |
1747 | .addReg(Dst, RegState::Implicit | RegState::Define); |
1748 | BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi) |
1749 | .addImm(Hi.getSExtValue()) |
1750 | .addReg(Dst, RegState::Implicit | RegState::Define); |
1751 | } |
1752 | } else { |
1753 | assert(SrcOp.isReg()); |
1754 | if (ST.hasPackedFP32Ops() && |
1755 | !RI.isAGPR(MBB.getParent()->getRegInfo(), SrcOp.getReg())) { |
1756 | BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), Dst) |
1757 | .addImm(SISrcMods::OP_SEL_1) |
1758 | .addReg(SrcOp.getReg()) |
1759 | .addImm(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1) |
1760 | .addReg(SrcOp.getReg()) |
1761 | .addImm(0) |
1762 | .addImm(0) |
1763 | .addImm(0) |
1764 | .addImm(0) |
1765 | .addImm(0); |
1766 | } else { |
1767 | BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo) |
1768 | .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0)) |
1769 | .addReg(Dst, RegState::Implicit | RegState::Define); |
1770 | BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi) |
1771 | .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1)) |
1772 | .addReg(Dst, RegState::Implicit | RegState::Define); |
1773 | } |
1774 | } |
1775 | MI.eraseFromParent(); |
1776 | break; |
1777 | } |
1778 | case AMDGPU::V_MOV_B64_DPP_PSEUDO: { |
1779 | expandMovDPP64(MI); |
1780 | break; |
1781 | } |
1782 | case AMDGPU::S_MOV_B64_IMM_PSEUDO: { |
1783 | const MachineOperand &SrcOp = MI.getOperand(1); |
1784 | assert(!SrcOp.isFPImm()); |
1785 | APInt Imm(64, SrcOp.getImm()); |
1786 | if (Imm.isIntN(32) || isInlineConstant(Imm)) { |
1787 | MI.setDesc(get(AMDGPU::S_MOV_B64)); |
1788 | break; |
1789 | } |
1790 | |
1791 | Register Dst = MI.getOperand(0).getReg(); |
1792 | Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0); |
1793 | Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1); |
1794 | |
1795 | APInt Lo(32, Imm.getLoBits(32).getZExtValue()); |
1796 | APInt Hi(32, Imm.getHiBits(32).getZExtValue()); |
1797 | BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DstLo) |
1798 | .addImm(Lo.getSExtValue()) |
1799 | .addReg(Dst, RegState::Implicit | RegState::Define); |
1800 | BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DstHi) |
1801 | .addImm(Hi.getSExtValue()) |
1802 | .addReg(Dst, RegState::Implicit | RegState::Define); |
1803 | MI.eraseFromParent(); |
1804 | break; |
1805 | } |
1806 | case AMDGPU::V_SET_INACTIVE_B32: { |
1807 | unsigned NotOpc = ST.isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64; |
1808 | unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; |
1809 | auto FirstNot = BuildMI(MBB, MI, DL, get(NotOpc), Exec).addReg(Exec); |
1810 | FirstNot->addRegisterDead(AMDGPU::SCC, TRI); |
1811 | BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg()) |
1812 | .add(MI.getOperand(2)); |
1813 | BuildMI(MBB, MI, DL, get(NotOpc), Exec) |
1814 | .addReg(Exec); |
1815 | MI.eraseFromParent(); |
1816 | break; |
1817 | } |
1818 | case AMDGPU::V_SET_INACTIVE_B64: { |
1819 | unsigned NotOpc = ST.isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64; |
1820 | unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; |
1821 | auto FirstNot = BuildMI(MBB, MI, DL, get(NotOpc), Exec).addReg(Exec); |
1822 | FirstNot->addRegisterDead(AMDGPU::SCC, TRI); |
1823 | MachineInstr *Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), |
1824 | MI.getOperand(0).getReg()) |
1825 | .add(MI.getOperand(2)); |
1826 | expandPostRAPseudo(*Copy); |
1827 | BuildMI(MBB, MI, DL, get(NotOpc), Exec) |
1828 | .addReg(Exec); |
1829 | MI.eraseFromParent(); |
1830 | break; |
1831 | } |
1832 | case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1: |
1833 | case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2: |
1834 | case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3: |
1835 | case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4: |
1836 | case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5: |
1837 | case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8: |
1838 | case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16: |
1839 | case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32: |
1840 | case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1: |
1841 | case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2: |
1842 | case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3: |
1843 | case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4: |
1844 | case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5: |
1845 | case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8: |
1846 | case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16: |
1847 | case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32: |
1848 | case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1: |
1849 | case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2: |
1850 | case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4: |
1851 | case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8: |
1852 | case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: { |
1853 | const TargetRegisterClass *EltRC = getOpRegClass(MI, 2); |
1854 | |
1855 | unsigned Opc; |
1856 | if (RI.hasVGPRs(EltRC)) { |
1857 | Opc = AMDGPU::V_MOVRELD_B32_e32; |
1858 | } else { |
1859 | Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64 |
1860 | : AMDGPU::S_MOVRELD_B32; |
1861 | } |
1862 | |
1863 | const MCInstrDesc &OpDesc = get(Opc); |
1864 | Register VecReg = MI.getOperand(0).getReg(); |
1865 | bool IsUndef = MI.getOperand(1).isUndef(); |
1866 | unsigned SubReg = MI.getOperand(3).getImm(); |
1867 | assert(VecReg == MI.getOperand(1).getReg()); |
1868 | |
1869 | MachineInstrBuilder MIB = |
1870 | BuildMI(MBB, MI, DL, OpDesc) |
1871 | .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef) |
1872 | .add(MI.getOperand(2)) |
1873 | .addReg(VecReg, RegState::ImplicitDefine) |
1874 | .addReg(VecReg, RegState::Implicit | (IsUndef ? RegState::Undef : 0)); |
1875 | |
1876 | const int ImpDefIdx = |
1877 | OpDesc.getNumOperands() + OpDesc.getNumImplicitUses(); |
1878 | const int ImpUseIdx = ImpDefIdx + 1; |
1879 | MIB->tieOperands(ImpDefIdx, ImpUseIdx); |
1880 | MI.eraseFromParent(); |
1881 | break; |
1882 | } |
1883 | case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1: |
1884 | case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2: |
1885 | case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3: |
1886 | case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4: |
1887 | case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5: |
1888 | case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8: |
1889 | case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16: |
1890 | case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: { |
1891 | assert(ST.useVGPRIndexMode()); |
1892 | Register VecReg = MI.getOperand(0).getReg(); |
1893 | bool IsUndef = MI.getOperand(1).isUndef(); |
1894 | Register Idx = MI.getOperand(3).getReg(); |
1895 | Register SubReg = MI.getOperand(4).getImm(); |
1896 | |
1897 | MachineInstr *SetOn = BuildMI(MBB, MI, DL, get(AMDGPU::S_SET_GPR_IDX_ON)) |
1898 | .addReg(Idx) |
1899 | .addImm(AMDGPU::VGPRIndexMode::DST_ENABLE); |
1900 | SetOn->getOperand(3).setIsUndef(); |
1901 | |
1902 | const MCInstrDesc &OpDesc = get(AMDGPU::V_MOV_B32_indirect); |
1903 | MachineInstrBuilder MIB = |
1904 | BuildMI(MBB, MI, DL, OpDesc) |
1905 | .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef) |
1906 | .add(MI.getOperand(2)) |
1907 | .addReg(VecReg, RegState::ImplicitDefine) |
1908 | .addReg(VecReg, |
1909 | RegState::Implicit | (IsUndef ? RegState::Undef : 0)); |
1910 | |
1911 | const int ImpDefIdx = OpDesc.getNumOperands() + OpDesc.getNumImplicitUses(); |
1912 | const int ImpUseIdx = ImpDefIdx + 1; |
1913 | MIB->tieOperands(ImpDefIdx, ImpUseIdx); |
1914 | |
1915 | MachineInstr *SetOff = BuildMI(MBB, MI, DL, get(AMDGPU::S_SET_GPR_IDX_OFF)); |
1916 | |
1917 | finalizeBundle(MBB, SetOn->getIterator(), std::next(SetOff->getIterator())); |
1918 | |
1919 | MI.eraseFromParent(); |
1920 | break; |
1921 | } |
1922 | case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1: |
1923 | case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2: |
1924 | case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3: |
1925 | case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4: |
1926 | case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5: |
1927 | case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8: |
1928 | case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16: |
1929 | case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: { |
1930 | assert(ST.useVGPRIndexMode()); |
1931 | Register Dst = MI.getOperand(0).getReg(); |
1932 | Register VecReg = MI.getOperand(1).getReg(); |
1933 | bool IsUndef = MI.getOperand(1).isUndef(); |
1934 | Register Idx = MI.getOperand(2).getReg(); |
1935 | Register SubReg = MI.getOperand(3).getImm(); |
1936 | |
1937 | MachineInstr *SetOn = BuildMI(MBB, MI, DL, get(AMDGPU::S_SET_GPR_IDX_ON)) |
1938 | .addReg(Idx) |
1939 | .addImm(AMDGPU::VGPRIndexMode::SRC0_ENABLE); |
1940 | SetOn->getOperand(3).setIsUndef(); |
1941 | |
1942 | BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32)) |
1943 | .addDef(Dst) |
1944 | .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef) |
1945 | .addReg(VecReg, RegState::Implicit | (IsUndef ? RegState::Undef : 0)) |
1946 | .addReg(AMDGPU::M0, RegState::Implicit); |
1947 | |
1948 | MachineInstr *SetOff = BuildMI(MBB, MI, DL, get(AMDGPU::S_SET_GPR_IDX_OFF)); |
1949 | |
1950 | finalizeBundle(MBB, SetOn->getIterator(), std::next(SetOff->getIterator())); |
1951 | |
1952 | MI.eraseFromParent(); |
1953 | break; |
1954 | } |
1955 | case AMDGPU::SI_PC_ADD_REL_OFFSET: { |
1956 | MachineFunction &MF = *MBB.getParent(); |
1957 | Register Reg = MI.getOperand(0).getReg(); |
1958 | Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0); |
1959 | Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1); |
1960 | |
1961 | |
1962 | |
1963 | MIBundleBuilder Bundler(MBB, MI); |
1964 | Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg)); |
1965 | |
1966 | |
1967 | |
1968 | Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo) |
1969 | .addReg(RegLo) |
1970 | .add(MI.getOperand(1))); |
1971 | |
1972 | MachineInstrBuilder MIB = BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi) |
1973 | .addReg(RegHi); |
1974 | MIB.add(MI.getOperand(2)); |
1975 | |
1976 | Bundler.append(MIB); |
1977 | finalizeBundle(MBB, Bundler.begin()); |
1978 | |
1979 | MI.eraseFromParent(); |
1980 | break; |
1981 | } |
1982 | case AMDGPU::ENTER_STRICT_WWM: { |
1983 | |
1984 | |
1985 | MI.setDesc(get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 |
1986 | : AMDGPU::S_OR_SAVEEXEC_B64)); |
1987 | break; |
1988 | } |
1989 | case AMDGPU::ENTER_STRICT_WQM: { |
1990 | |
1991 | |
1992 | const unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; |
1993 | const unsigned WQMOp = ST.isWave32() ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64; |
1994 | const unsigned MovOp = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; |
1995 | BuildMI(MBB, MI, DL, get(MovOp), MI.getOperand(0).getReg()).addReg(Exec); |
1996 | BuildMI(MBB, MI, DL, get(WQMOp), Exec).addReg(Exec); |
1997 | |
1998 | MI.eraseFromParent(); |
1999 | break; |
2000 | } |
2001 | case AMDGPU::EXIT_STRICT_WWM: |
2002 | case AMDGPU::EXIT_STRICT_WQM: { |
2003 | |
2004 | |
2005 | MI.setDesc(get(ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64)); |
2006 | break; |
2007 | } |
2008 | } |
2009 | return true; |
2010 | } |
2011 | |
2012 | std::pair<MachineInstr*, MachineInstr*> |
2013 | SIInstrInfo::expandMovDPP64(MachineInstr &MI) const { |
2014 | assert (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO); |
2015 | |
2016 | MachineBasicBlock &MBB = *MI.getParent(); |
2017 | DebugLoc DL = MBB.findDebugLoc(MI); |
2018 | MachineFunction *MF = MBB.getParent(); |
2019 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
2020 | Register Dst = MI.getOperand(0).getReg(); |
2021 | unsigned Part = 0; |
2022 | MachineInstr *Split[2]; |
2023 | |
2024 | for (auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) { |
2025 | auto MovDPP = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_dpp)); |
2026 | if (Dst.isPhysical()) { |
2027 | MovDPP.addDef(RI.getSubReg(Dst, Sub)); |
2028 | } else { |
2029 | assert(MRI.isSSA()); |
2030 | auto Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
2031 | MovDPP.addDef(Tmp); |
2032 | } |
2033 | |
2034 | for (unsigned I = 1; I <= 2; ++I) { |
2035 | const MachineOperand &SrcOp = MI.getOperand(I); |
2036 | assert(!SrcOp.isFPImm()); |
2037 | if (SrcOp.isImm()) { |
2038 | APInt Imm(64, SrcOp.getImm()); |
2039 | Imm.ashrInPlace(Part * 32); |
2040 | MovDPP.addImm(Imm.getLoBits(32).getZExtValue()); |
2041 | } else { |
2042 | assert(SrcOp.isReg()); |
2043 | Register Src = SrcOp.getReg(); |
2044 | if (Src.isPhysical()) |
2045 | MovDPP.addReg(RI.getSubReg(Src, Sub)); |
2046 | else |
2047 | MovDPP.addReg(Src, SrcOp.isUndef() ? RegState::Undef : 0, Sub); |
2048 | } |
2049 | } |
2050 | |
2051 | for (unsigned I = 3; I < MI.getNumExplicitOperands(); ++I) |
2052 | MovDPP.addImm(MI.getOperand(I).getImm()); |
2053 | |
2054 | Split[Part] = MovDPP; |
2055 | ++Part; |
2056 | } |
2057 | |
2058 | if (Dst.isVirtual()) |
2059 | BuildMI(MBB, MI, DL, get(AMDGPU::REG_SEQUENCE), Dst) |
2060 | .addReg(Split[0]->getOperand(0).getReg()) |
2061 | .addImm(AMDGPU::sub0) |
2062 | .addReg(Split[1]->getOperand(0).getReg()) |
2063 | .addImm(AMDGPU::sub1); |
2064 | |
2065 | MI.eraseFromParent(); |
2066 | return std::make_pair(Split[0], Split[1]); |
2067 | } |
2068 | |
2069 | bool SIInstrInfo::swapSourceModifiers(MachineInstr &MI, |
2070 | MachineOperand &Src0, |
2071 | unsigned Src0OpName, |
2072 | MachineOperand &Src1, |
2073 | unsigned Src1OpName) const { |
2074 | MachineOperand *Src0Mods = getNamedOperand(MI, Src0OpName); |
2075 | if (!Src0Mods) |
2076 | return false; |
2077 | |
2078 | MachineOperand *Src1Mods = getNamedOperand(MI, Src1OpName); |
2079 | assert(Src1Mods && |
2080 | "All commutable instructions have both src0 and src1 modifiers"); |
2081 | |
2082 | int Src0ModsVal = Src0Mods->getImm(); |
2083 | int Src1ModsVal = Src1Mods->getImm(); |
2084 | |
2085 | Src1Mods->setImm(Src0ModsVal); |
2086 | Src0Mods->setImm(Src1ModsVal); |
2087 | return true; |
2088 | } |
2089 | |
2090 | static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI, |
2091 | MachineOperand &RegOp, |
2092 | MachineOperand &NonRegOp) { |
2093 | Register Reg = RegOp.getReg(); |
2094 | unsigned SubReg = RegOp.getSubReg(); |
2095 | bool IsKill = RegOp.isKill(); |
2096 | bool IsDead = RegOp.isDead(); |
2097 | bool IsUndef = RegOp.isUndef(); |
2098 | bool IsDebug = RegOp.isDebug(); |
2099 | |
2100 | if (NonRegOp.isImm()) |
2101 | RegOp.ChangeToImmediate(NonRegOp.getImm()); |
2102 | else if (NonRegOp.isFI()) |
2103 | RegOp.ChangeToFrameIndex(NonRegOp.getIndex()); |
2104 | else if (NonRegOp.isGlobal()) { |
2105 | RegOp.ChangeToGA(NonRegOp.getGlobal(), NonRegOp.getOffset(), |
2106 | NonRegOp.getTargetFlags()); |
2107 | } else |
2108 | return nullptr; |
2109 | |
2110 | |
2111 | RegOp.setTargetFlags(NonRegOp.getTargetFlags()); |
2112 | |
2113 | NonRegOp.ChangeToRegister(Reg, false, false, IsKill, IsDead, IsUndef, IsDebug); |
2114 | NonRegOp.setSubReg(SubReg); |
2115 | |
2116 | return &MI; |
2117 | } |
2118 | |
2119 | MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, |
2120 | unsigned Src0Idx, |
2121 | unsigned Src1Idx) const { |
2122 | assert(!NewMI && "this should never be used"); |
2123 | |
2124 | unsigned Opc = MI.getOpcode(); |
2125 | int CommutedOpcode = commuteOpcode(Opc); |
2126 | if (CommutedOpcode == -1) |
2127 | return nullptr; |
2128 | |
2129 | assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) == |
2130 | static_cast<int>(Src0Idx) && |
2131 | AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) == |
2132 | static_cast<int>(Src1Idx) && |
2133 | "inconsistency with findCommutedOpIndices"); |
2134 | |
2135 | MachineOperand &Src0 = MI.getOperand(Src0Idx); |
2136 | MachineOperand &Src1 = MI.getOperand(Src1Idx); |
2137 | |
2138 | MachineInstr *CommutedMI = nullptr; |
2139 | if (Src0.isReg() && Src1.isReg()) { |
2140 | if (isOperandLegal(MI, Src1Idx, &Src0)) { |
2141 | |
2142 | CommutedMI |
2143 | = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, Src0Idx, Src1Idx); |
2144 | } |
2145 | |
2146 | } else if (Src0.isReg() && !Src1.isReg()) { |
2147 | |
2148 | |
2149 | CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1); |
2150 | } else if (!Src0.isReg() && Src1.isReg()) { |
2151 | if (isOperandLegal(MI, Src1Idx, &Src0)) |
2152 | CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0); |
2153 | } else { |
2154 | |
2155 | return nullptr; |
2156 | } |
2157 | |
2158 | if (CommutedMI) { |
2159 | swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_modifiers, |
2160 | Src1, AMDGPU::OpName::src1_modifiers); |
2161 | |
2162 | CommutedMI->setDesc(get(CommutedOpcode)); |
2163 | } |
2164 | |
2165 | return CommutedMI; |
2166 | } |
2167 | |
2168 | |
2169 | |
2170 | |
2171 | bool SIInstrInfo::findCommutedOpIndices(const MachineInstr &MI, |
2172 | unsigned &SrcOpIdx0, |
2173 | unsigned &SrcOpIdx1) const { |
2174 | return findCommutedOpIndices(MI.getDesc(), SrcOpIdx0, SrcOpIdx1); |
2175 | } |
2176 | |
2177 | bool SIInstrInfo::findCommutedOpIndices(MCInstrDesc Desc, unsigned &SrcOpIdx0, |
2178 | unsigned &SrcOpIdx1) const { |
2179 | if (!Desc.isCommutable()) |
2180 | return false; |
2181 | |
2182 | unsigned Opc = Desc.getOpcode(); |
2183 | int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); |
2184 | if (Src0Idx == -1) |
2185 | return false; |
2186 | |
2187 | int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); |
2188 | if (Src1Idx == -1) |
2189 | return false; |
2190 | |
2191 | return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx); |
2192 | } |
2193 | |
2194 | bool SIInstrInfo::isBranchOffsetInRange(unsigned BranchOp, |
2195 | int64_t BrOffset) const { |
2196 | |
2197 | |
2198 | assert(BranchOp != AMDGPU::S_SETPC_B64); |
2199 | |
2200 | |
2201 | BrOffset /= 4; |
2202 | |
2203 | |
2204 | |
2205 | BrOffset -= 1; |
2206 | |
2207 | return isIntN(BranchOffsetBits, BrOffset); |
2208 | } |
2209 | |
2210 | MachineBasicBlock *SIInstrInfo::getBranchDestBlock( |
2211 | const MachineInstr &MI) const { |
2212 | if (MI.getOpcode() == AMDGPU::S_SETPC_B64) { |
2213 | |
2214 | |
2215 | return nullptr; |
2216 | } |
2217 | |
2218 | return MI.getOperand(0).getMBB(); |
2219 | } |
2220 | |
2221 | unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, |
2222 | MachineBasicBlock &DestBB, |
2223 | const DebugLoc &DL, |
2224 | int64_t BrOffset, |
2225 | RegScavenger *RS) const { |
2226 | assert(RS && "RegScavenger required for long branching"); |
2227 | assert(MBB.empty() && |
2228 | "new block should be inserted for expanding unconditional branch"); |
2229 | assert(MBB.pred_size() == 1); |
2230 | |
2231 | MachineFunction *MF = MBB.getParent(); |
2232 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
2233 | |
2234 | |
2235 | |
2236 | Register PCReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); |
2237 | |
2238 | auto I = MBB.end(); |
2239 | |
2240 | |
2241 | |
2242 | MachineInstr *GetPC = BuildMI(MBB, I, DL, get(AMDGPU::S_GETPC_B64), PCReg); |
2243 | |
2244 | auto &MCCtx = MF->getContext(); |
2245 | MCSymbol *PostGetPCLabel = |
2246 | MCCtx.createTempSymbol("post_getpc", true); |
2247 | GetPC->setPostInstrSymbol(*MF, PostGetPCLabel); |
2248 | |
2249 | MCSymbol *OffsetLo = |
2250 | MCCtx.createTempSymbol("offset_lo", true); |
2251 | MCSymbol *OffsetHi = |
2252 | MCCtx.createTempSymbol("offset_hi", true); |
2253 | BuildMI(MBB, I, DL, get(AMDGPU::S_ADD_U32)) |
2254 | .addReg(PCReg, RegState::Define, AMDGPU::sub0) |
2255 | .addReg(PCReg, 0, AMDGPU::sub0) |
2256 | .addSym(OffsetLo, MO_FAR_BRANCH_OFFSET); |
2257 | BuildMI(MBB, I, DL, get(AMDGPU::S_ADDC_U32)) |
2258 | .addReg(PCReg, RegState::Define, AMDGPU::sub1) |
2259 | .addReg(PCReg, 0, AMDGPU::sub1) |
2260 | .addSym(OffsetHi, MO_FAR_BRANCH_OFFSET); |
2261 | |
2262 | |
2263 | BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64)) |
2264 | .addReg(PCReg); |
2265 | |
2266 | auto ComputeBlockSize = [](const TargetInstrInfo *TII, |
2267 | const MachineBasicBlock &MBB) { |
2268 | unsigned Size = 0; |
2269 | for (const MachineInstr &MI : MBB) |
2270 | Size += TII->getInstSizeInBytes(MI); |
2271 | return Size; |
2272 | }; |
2273 | |
2274 | |
2275 | |
2276 | |
2277 | |
2278 | |
2279 | |
2280 | |
2281 | |
2282 | |
2283 | |
2284 | |
2285 | |
2286 | |
2287 | |
2288 | |
2289 | |
2290 | |
2291 | |
2292 | |
2293 | |
2294 | |
2295 | |
2296 | |
2297 | |
2298 | |
2299 | |
2300 | |
2301 | |
2302 | |
2303 | |
2304 | |
2305 | |
2306 | |
2307 | |
2308 | |
2309 | |
2310 | RS->enterBasicBlockEnd(MBB); |
2311 | Register Scav = RS->scavengeRegisterBackwards( |
2312 | AMDGPU::SReg_64RegClass, |
2313 | MachineBasicBlock::iterator(GetPC), false, 0); |
2314 | MRI.replaceRegWith(PCReg, Scav); |
2315 | MRI.clearVirtRegs(); |
2316 | RS->setRegUsed(Scav); |
2317 | |
2318 | |
2319 | auto *Offset = MCBinaryExpr::createSub( |
2320 | MCSymbolRefExpr::create(DestBB.getSymbol(), MCCtx), |
2321 | MCSymbolRefExpr::create(PostGetPCLabel, MCCtx), MCCtx); |
2322 | |
2323 | auto *Mask = MCConstantExpr::create(0xFFFFFFFFULL, MCCtx); |
2324 | OffsetLo->setVariableValue(MCBinaryExpr::createAnd(Offset, Mask, MCCtx)); |
2325 | auto *ShAmt = MCConstantExpr::create(32, MCCtx); |
2326 | OffsetHi->setVariableValue(MCBinaryExpr::createAShr(Offset, ShAmt, MCCtx)); |
2327 | return ComputeBlockSize(this, MBB); |
2328 | } |
2329 | |
2330 | unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) { |
2331 | switch (Cond) { |
2332 | case SIInstrInfo::SCC_TRUE: |
2333 | return AMDGPU::S_CBRANCH_SCC1; |
2334 | case SIInstrInfo::SCC_FALSE: |
2335 | return AMDGPU::S_CBRANCH_SCC0; |
2336 | case SIInstrInfo::VCCNZ: |
2337 | return AMDGPU::S_CBRANCH_VCCNZ; |
2338 | case SIInstrInfo::VCCZ: |
2339 | return AMDGPU::S_CBRANCH_VCCZ; |
2340 | case SIInstrInfo::EXECNZ: |
2341 | return AMDGPU::S_CBRANCH_EXECNZ; |
2342 | case SIInstrInfo::EXECZ: |
2343 | return AMDGPU::S_CBRANCH_EXECZ; |
2344 | default: |
2345 | llvm_unreachable("invalid branch predicate"); |
2346 | } |
2347 | } |
2348 | |
2349 | SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(unsigned Opcode) { |
2350 | switch (Opcode) { |
2351 | case AMDGPU::S_CBRANCH_SCC0: |
2352 | return SCC_FALSE; |
2353 | case AMDGPU::S_CBRANCH_SCC1: |
2354 | return SCC_TRUE; |
2355 | case AMDGPU::S_CBRANCH_VCCNZ: |
2356 | return VCCNZ; |
2357 | case AMDGPU::S_CBRANCH_VCCZ: |
2358 | return VCCZ; |
2359 | case AMDGPU::S_CBRANCH_EXECNZ: |
2360 | return EXECNZ; |
2361 | case AMDGPU::S_CBRANCH_EXECZ: |
2362 | return EXECZ; |
2363 | default: |
2364 | return INVALID_BR; |
2365 | } |
2366 | } |
2367 | |
2368 | bool SIInstrInfo::analyzeBranchImpl(MachineBasicBlock &MBB, |
2369 | MachineBasicBlock::iterator I, |
2370 | MachineBasicBlock *&TBB, |
2371 | MachineBasicBlock *&FBB, |
2372 | SmallVectorImpl<MachineOperand> &Cond, |
2373 | bool AllowModify) const { |
2374 | if (I->getOpcode() == AMDGPU::S_BRANCH) { |
2375 | |
2376 | TBB = I->getOperand(0).getMBB(); |
2377 | return false; |
2378 | } |
2379 | |
2380 | MachineBasicBlock *CondBB = nullptr; |
2381 | |
2382 | if (I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { |
2383 | CondBB = I->getOperand(1).getMBB(); |
2384 | Cond.push_back(I->getOperand(0)); |
2385 | } else { |
2386 | BranchPredicate Pred = getBranchPredicate(I->getOpcode()); |
2387 | if (Pred == INVALID_BR) |
2388 | return true; |
2389 | |
2390 | CondBB = I->getOperand(0).getMBB(); |
2391 | Cond.push_back(MachineOperand::CreateImm(Pred)); |
2392 | Cond.push_back(I->getOperand(1)); |
2393 | } |
2394 | ++I; |
2395 | |
2396 | if (I == MBB.end()) { |
2397 | |
2398 | TBB = CondBB; |
2399 | return false; |
2400 | } |
2401 | |
2402 | if (I->getOpcode() == AMDGPU::S_BRANCH) { |
2403 | TBB = CondBB; |
2404 | FBB = I->getOperand(0).getMBB(); |
2405 | return false; |
2406 | } |
2407 | |
2408 | return true; |
2409 | } |
2410 | |
2411 | bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, |
2412 | MachineBasicBlock *&FBB, |
2413 | SmallVectorImpl<MachineOperand> &Cond, |
2414 | bool AllowModify) const { |
2415 | MachineBasicBlock::iterator I = MBB.getFirstTerminator(); |
2416 | auto E = MBB.end(); |
2417 | if (I == E) |
2418 | return false; |
2419 | |
2420 | |
2421 | |
2422 | while (I != E && !I->isBranch() && !I->isReturn()) { |
2423 | switch (I->getOpcode()) { |
2424 | case AMDGPU::S_MOV_B64_term: |
2425 | case AMDGPU::S_XOR_B64_term: |
2426 | case AMDGPU::S_OR_B64_term: |
2427 | case AMDGPU::S_ANDN2_B64_term: |
2428 | case AMDGPU::S_AND_B64_term: |
2429 | case AMDGPU::S_MOV_B32_term: |
2430 | case AMDGPU::S_XOR_B32_term: |
2431 | case AMDGPU::S_OR_B32_term: |
2432 | case AMDGPU::S_ANDN2_B32_term: |
2433 | case AMDGPU::S_AND_B32_term: |
2434 | break; |
2435 | case AMDGPU::SI_IF: |
2436 | case AMDGPU::SI_ELSE: |
2437 | case AMDGPU::SI_KILL_I1_TERMINATOR: |
2438 | case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR: |
2439 | |
2440 | return true; |
2441 | default: |
2442 | llvm_unreachable("unexpected non-branch terminator inst"); |
2443 | } |
2444 | |
2445 | ++I; |
2446 | } |
2447 | |
2448 | if (I == E) |
2449 | return false; |
2450 | |
2451 | return analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify); |
2452 | } |
2453 | |
2454 | unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB, |
2455 | int *BytesRemoved) const { |
2456 | MachineBasicBlock::iterator I = MBB.getFirstTerminator(); |
2457 | |
2458 | unsigned Count = 0; |
2459 | unsigned RemovedSize = 0; |
2460 | while (I != MBB.end()) { |
2461 | MachineBasicBlock::iterator Next = std::next(I); |
2462 | RemovedSize += getInstSizeInBytes(*I); |
2463 | I->eraseFromParent(); |
2464 | ++Count; |
2465 | I = Next; |
2466 | } |
2467 | |
2468 | if (BytesRemoved) |
2469 | *BytesRemoved = RemovedSize; |
2470 | |
2471 | return Count; |
2472 | } |
2473 | |
2474 | |
2475 | static void preserveCondRegFlags(MachineOperand &CondReg, |
2476 | const MachineOperand &OrigCond) { |
2477 | CondReg.setIsUndef(OrigCond.isUndef()); |
2478 | CondReg.setIsKill(OrigCond.isKill()); |
2479 | } |
2480 | |
2481 | unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB, |
2482 | MachineBasicBlock *TBB, |
2483 | MachineBasicBlock *FBB, |
2484 | ArrayRef<MachineOperand> Cond, |
2485 | const DebugLoc &DL, |
2486 | int *BytesAdded) const { |
2487 | if (!FBB && Cond.empty()) { |
2488 | BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH)) |
2489 | .addMBB(TBB); |
2490 | if (BytesAdded) |
2491 | *BytesAdded = ST.hasOffset3fBug() ? 8 : 4; |
2492 | return 1; |
2493 | } |
2494 | |
2495 | if(Cond.size() == 1 && Cond[0].isReg()) { |
2496 | BuildMI(&MBB, DL, get(AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO)) |
2497 | .add(Cond[0]) |
2498 | .addMBB(TBB); |
2499 | return 1; |
2500 | } |
2501 | |
2502 | assert(TBB && Cond[0].isImm()); |
2503 | |
2504 | unsigned Opcode |
2505 | = getBranchOpcode(static_cast<BranchPredicate>(Cond[0].getImm())); |
2506 | |
2507 | if (!FBB) { |
2508 | Cond[1].isUndef(); |
2509 | MachineInstr *CondBr = |
2510 | BuildMI(&MBB, DL, get(Opcode)) |
2511 | .addMBB(TBB); |
2512 | |
2513 | |
2514 | preserveCondRegFlags(CondBr->getOperand(1), Cond[1]); |
2515 | fixImplicitOperands(*CondBr); |
2516 | |
2517 | if (BytesAdded) |
2518 | *BytesAdded = ST.hasOffset3fBug() ? 8 : 4; |
2519 | return 1; |
2520 | } |
2521 | |
2522 | assert(TBB && FBB); |
2523 | |
2524 | MachineInstr *CondBr = |
2525 | BuildMI(&MBB, DL, get(Opcode)) |
2526 | .addMBB(TBB); |
2527 | fixImplicitOperands(*CondBr); |
2528 | BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH)) |
2529 | .addMBB(FBB); |
2530 | |
2531 | MachineOperand &CondReg = CondBr->getOperand(1); |
2532 | CondReg.setIsUndef(Cond[1].isUndef()); |
2533 | CondReg.setIsKill(Cond[1].isKill()); |
2534 | |
2535 | if (BytesAdded) |
2536 | *BytesAdded = ST.hasOffset3fBug() ? 16 : 8; |
2537 | |
2538 | return 2; |
2539 | } |
2540 | |
2541 | bool SIInstrInfo::reverseBranchCondition( |
2542 | SmallVectorImpl<MachineOperand> &Cond) const { |
2543 | if (Cond.size() != 2) { |
2544 | return true; |
2545 | } |
2546 | |
2547 | if (Cond[0].isImm()) { |
2548 | Cond[0].setImm(-Cond[0].getImm()); |
2549 | return false; |
2550 | } |
2551 | |
2552 | return true; |
2553 | } |
2554 | |
2555 | bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB, |
2556 | ArrayRef<MachineOperand> Cond, |
2557 | Register DstReg, Register TrueReg, |
2558 | Register FalseReg, int &CondCycles, |
2559 | int &TrueCycles, int &FalseCycles) const { |
2560 | switch (Cond[0].getImm()) { |
2561 | case VCCNZ: |
2562 | case VCCZ: { |
2563 | const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); |
2564 | const TargetRegisterClass *RC = MRI.getRegClass(TrueReg); |
2565 | if (MRI.getRegClass(FalseReg) != RC) |
2566 | return false; |
2567 | |
2568 | int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32; |
2569 | CondCycles = TrueCycles = FalseCycles = NumInsts; |
2570 | |
2571 | |
2572 | return RI.hasVGPRs(RC) && NumInsts <= 6; |
2573 | } |
2574 | case SCC_TRUE: |
2575 | case SCC_FALSE: { |
2576 | |
2577 | |
2578 | const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); |
2579 | const TargetRegisterClass *RC = MRI.getRegClass(TrueReg); |
2580 | if (MRI.getRegClass(FalseReg) != RC) |
2581 | return false; |
2582 | |
2583 | int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32; |
2584 | |
2585 | |
2586 | if (NumInsts % 2 == 0) |
2587 | NumInsts /= 2; |
2588 | |
2589 | CondCycles = TrueCycles = FalseCycles = NumInsts; |
2590 | return RI.isSGPRClass(RC); |
2591 | } |
2592 | default: |
2593 | return false; |
2594 | } |
2595 | } |
2596 | |
2597 | void SIInstrInfo::insertSelect(MachineBasicBlock &MBB, |
2598 | MachineBasicBlock::iterator I, const DebugLoc &DL, |
2599 | Register DstReg, ArrayRef<MachineOperand> Cond, |
2600 | Register TrueReg, Register FalseReg) const { |
2601 | BranchPredicate Pred = static_cast<BranchPredicate>(Cond[0].getImm()); |
2602 | if (Pred == VCCZ || Pred == SCC_FALSE) { |
2603 | Pred = static_cast<BranchPredicate>(-Pred); |
2604 | std::swap(TrueReg, FalseReg); |
2605 | } |
2606 | |
2607 | MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); |
2608 | const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg); |
2609 | unsigned DstSize = RI.getRegSizeInBits(*DstRC); |
2610 | |
2611 | if (DstSize == 32) { |
2612 | MachineInstr *Select; |
2613 | if (Pred == SCC_TRUE) { |
2614 | Select = BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B32), DstReg) |
2615 | .addReg(TrueReg) |
2616 | .addReg(FalseReg); |
2617 | } else { |
2618 | |
2619 | Select = BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e32), DstReg) |
2620 | .addReg(FalseReg) |
2621 | .addReg(TrueReg); |
2622 | } |
2623 | |
2624 | preserveCondRegFlags(Select->getOperand(3), Cond[1]); |
2625 | return; |
2626 | } |
2627 | |
2628 | if (DstSize == 64 && Pred == SCC_TRUE) { |
2629 | MachineInstr *Select = |
2630 | BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), DstReg) |
2631 | .addReg(TrueReg) |
2632 | .addReg(FalseReg); |
2633 | |
2634 | preserveCondRegFlags(Select->getOperand(3), Cond[1]); |
2635 | return; |
2636 | } |
2637 | |
2638 | static const int16_t Sub0_15[] = { |
2639 | AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, |
2640 | AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, |
2641 | AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, |
2642 | AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, |
2643 | }; |
2644 | |
2645 | static const int16_t Sub0_15_64[] = { |
2646 | AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, |
2647 | AMDGPU::sub4_sub5, AMDGPU::sub6_sub7, |
2648 | AMDGPU::sub8_sub9, AMDGPU::sub10_sub11, |
2649 | AMDGPU::sub12_sub13, AMDGPU::sub14_sub15, |
2650 | }; |
2651 | |
2652 | unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32; |
2653 | const TargetRegisterClass *EltRC = &AMDGPU::VGPR_32RegClass; |
2654 | const int16_t *SubIndices = Sub0_15; |
2655 | int NElts = DstSize / 32; |
2656 | |
2657 | |
2658 | |
2659 | if (Pred == SCC_TRUE) { |
2660 | if (NElts % 2) { |
2661 | SelOp = AMDGPU::S_CSELECT_B32; |
2662 | EltRC = &AMDGPU::SGPR_32RegClass; |
2663 | } else { |
2664 | SelOp = AMDGPU::S_CSELECT_B64; |
2665 | EltRC = &AMDGPU::SGPR_64RegClass; |
2666 | SubIndices = Sub0_15_64; |
2667 | NElts /= 2; |
2668 | } |
2669 | } |
2670 | |
2671 | MachineInstrBuilder MIB = BuildMI( |
2672 | MBB, I, DL, get(AMDGPU::REG_SEQUENCE), DstReg); |
2673 | |
2674 | I = MIB->getIterator(); |
2675 | |
2676 | SmallVector<Register, 8> Regs; |
2677 | for (int Idx = 0; Idx != NElts; ++Idx) { |
2678 | Register DstElt = MRI.createVirtualRegister(EltRC); |
2679 | Regs.push_back(DstElt); |
2680 | |
2681 | unsigned SubIdx = SubIndices[Idx]; |
2682 | |
2683 | MachineInstr *Select; |
2684 | if (SelOp == AMDGPU::V_CNDMASK_B32_e32) { |
2685 | Select = |
2686 | BuildMI(MBB, I, DL, get(SelOp), DstElt) |
2687 | .addReg(FalseReg, 0, SubIdx) |
2688 | .addReg(TrueReg, 0, SubIdx); |
2689 | } else { |
2690 | Select = |
2691 | BuildMI(MBB, I, DL, get(SelOp), DstElt) |
2692 | .addReg(TrueReg, 0, SubIdx) |
2693 | .addReg(FalseReg, 0, SubIdx); |
2694 | } |
2695 | |
2696 | preserveCondRegFlags(Select->getOperand(3), Cond[1]); |
2697 | fixImplicitOperands(*Select); |
2698 | |
2699 | MIB.addReg(DstElt) |
2700 | .addImm(SubIdx); |
2701 | } |
2702 | } |
2703 | |
2704 | bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) { |
2705 | switch (MI.getOpcode()) { |
2706 | case AMDGPU::V_MOV_B32_e32: |
2707 | case AMDGPU::V_MOV_B32_e64: |
2708 | case AMDGPU::V_MOV_B64_PSEUDO: { |
2709 | |
2710 | |
2711 | unsigned NumOps = MI.getDesc().getNumOperands() + |
2712 | MI.getDesc().getNumImplicitUses(); |
2713 | |
2714 | return MI.getNumOperands() == NumOps; |
2715 | } |
2716 | case AMDGPU::S_MOV_B32: |
2717 | case AMDGPU::S_MOV_B64: |
2718 | case AMDGPU::COPY: |
2719 | case AMDGPU::V_ACCVGPR_WRITE_B32_e64: |
2720 | case AMDGPU::V_ACCVGPR_READ_B32_e64: |
2721 | case AMDGPU::V_ACCVGPR_MOV_B32: |
2722 | return true; |
2723 | default: |
2724 | return false; |
2725 | } |
2726 | } |
2727 | |
2728 | unsigned SIInstrInfo::getAddressSpaceForPseudoSourceKind( |
2729 | unsigned Kind) const { |
2730 | switch(Kind) { |
2731 | case PseudoSourceValue::Stack: |
2732 | case PseudoSourceValue::FixedStack: |
2733 | return AMDGPUAS::PRIVATE_ADDRESS; |
2734 | case PseudoSourceValue::ConstantPool: |
2735 | case PseudoSourceValue::GOT: |
2736 | case PseudoSourceValue::JumpTable: |
2737 | case PseudoSourceValue::GlobalValueCallEntry: |
2738 | case PseudoSourceValue::ExternalSymbolCallEntry: |
2739 | case PseudoSourceValue::TargetCustom: |
2740 | return AMDGPUAS::CONSTANT_ADDRESS; |
2741 | } |
2742 | return AMDGPUAS::FLAT_ADDRESS; |
2743 | } |
2744 | |
2745 | static void removeModOperands(MachineInstr &MI) { |
2746 | unsigned Opc = MI.getOpcode(); |
2747 | int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc, |
2748 | AMDGPU::OpName::src0_modifiers); |
2749 | int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc, |
2750 | AMDGPU::OpName::src1_modifiers); |
2751 | int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc, |
2752 | AMDGPU::OpName::src2_modifiers); |
2753 | |
2754 | MI.RemoveOperand(Src2ModIdx); |
2755 | MI.RemoveOperand(Src1ModIdx); |
2756 | MI.RemoveOperand(Src0ModIdx); |
2757 | } |
2758 | |
2759 | bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, |
2760 | Register Reg, MachineRegisterInfo *MRI) const { |
2761 | if (!MRI->hasOneNonDBGUse(Reg)) |
2762 | return false; |
2763 | |
2764 | switch (DefMI.getOpcode()) { |
2765 | default: |
2766 | return false; |
2767 | case AMDGPU::S_MOV_B64: |
2768 | |
2769 | |
2770 | return false; |
2771 | |
2772 | case AMDGPU::V_MOV_B32_e32: |
2773 | case AMDGPU::S_MOV_B32: |
2774 | case AMDGPU::V_ACCVGPR_WRITE_B32_e64: |
2775 | break; |
2776 | } |
2777 | |
2778 | const MachineOperand *ImmOp = getNamedOperand(DefMI, AMDGPU::OpName::src0); |
2779 | assert(ImmOp); |
2780 | |
2781 | if (!ImmOp->isImm()) |
2782 | return false; |
2783 | |
2784 | unsigned Opc = UseMI.getOpcode(); |
2785 | if (Opc == AMDGPU::COPY) { |
2786 | Register DstReg = UseMI.getOperand(0).getReg(); |
2787 | bool Is16Bit = getOpSize(UseMI, 0) == 2; |
2788 | bool isVGPRCopy = RI.isVGPR(*MRI, DstReg); |
2789 | unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32; |
2790 | APInt Imm(32, ImmOp->getImm()); |
2791 | |
2792 | if (UseMI.getOperand(1).getSubReg() == AMDGPU::hi16) |
2793 | Imm = Imm.ashr(16); |
2794 | |
2795 | if (RI.isAGPR(*MRI, DstReg)) { |
2796 | if (!isInlineConstant(Imm)) |
2797 | return false; |
2798 | NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64; |
2799 | } |
2800 | |
2801 | if (Is16Bit) { |
2802 | if (isVGPRCopy) |
2803 | return false; |
2804 | |
2805 | if (DstReg.isVirtual() && |
2806 | UseMI.getOperand(0).getSubReg() != AMDGPU::lo16) |
2807 | return false; |
2808 | |
2809 | UseMI.getOperand(0).setSubReg(0); |
2810 | if (DstReg.isPhysical()) { |
2811 | DstReg = RI.get32BitRegister(DstReg); |
2812 | UseMI.getOperand(0).setReg(DstReg); |
2813 | } |
2814 | assert(UseMI.getOperand(1).getReg().isVirtual()); |
2815 | } |
2816 | |
2817 | UseMI.setDesc(get(NewOpc)); |
2818 | UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue()); |
2819 | UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent()); |
2820 | return true; |
2821 | } |
2822 | |
2823 | if (Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 || |
2824 | Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 || |
2825 | Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 || |
2826 | Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64) { |
2827 | |
2828 | |
2829 | if (hasAnyModifiersSet(UseMI)) |
2830 | return false; |
2831 | |
2832 | |
2833 | |
2834 | |
2835 | MachineOperand *Src0 = getNamedOperand(UseMI, AMDGPU::OpName::src0); |
2836 | |
2837 | |
2838 | if (isInlineConstant(UseMI, *Src0, *ImmOp)) |
2839 | return false; |
2840 | |
2841 | bool IsF32 = Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 || |
2842 | Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64; |
2843 | bool IsFMA = Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 || |
2844 | Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64; |
2845 | MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1); |
2846 | MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2); |
2847 | |
2848 | |
2849 | |
2850 | if (Src0->isReg() && Src0->getReg() == Reg) { |
2851 | if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))) |
2852 | return false; |
2853 | |
2854 | if (!Src2->isReg() || RI.isSGPRClass(MRI->getRegClass(Src2->getReg()))) |
2855 | return false; |
2856 | |
2857 | unsigned NewOpc = |
2858 | IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32 : AMDGPU::V_FMAMK_F16) |
2859 | : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16); |
2860 | if (pseudoToMCOpcode(NewOpc) == -1) |
2861 | return false; |
2862 | |
2863 | |
2864 | |
2865 | const int64_t Imm = ImmOp->getImm(); |
2866 | |
2867 | |
2868 | |
2869 | |
2870 | |
2871 | UseMI.RemoveOperand( |
2872 | AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod)); |
2873 | UseMI.RemoveOperand( |
2874 | AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp)); |
2875 | |
2876 | Register Src1Reg = Src1->getReg(); |
2877 | unsigned Src1SubReg = Src1->getSubReg(); |
2878 | Src0->setReg(Src1Reg); |
2879 | Src0->setSubReg(Src1SubReg); |
2880 | Src0->setIsKill(Src1->isKill()); |
2881 | |
2882 | if (Opc == AMDGPU::V_MAC_F32_e64 || |
2883 | Opc == AMDGPU::V_MAC_F16_e64 || |
2884 | Opc == AMDGPU::V_FMAC_F32_e64 || |
2885 | Opc == AMDGPU::V_FMAC_F16_e64) |
2886 | UseMI.untieRegOperand( |
2887 | AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); |
2888 | |
2889 | Src1->ChangeToImmediate(Imm); |
2890 | |
2891 | removeModOperands(UseMI); |
2892 | UseMI.setDesc(get(NewOpc)); |
2893 | |
2894 | bool DeleteDef = MRI->hasOneNonDBGUse(Reg); |
2895 | if (DeleteDef) |
2896 | DefMI.eraseFromParent(); |
2897 | |
2898 | return true; |
2899 | } |
2900 | |
2901 | |
2902 | if (Src2->isReg() && Src2->getReg() == Reg) { |
2903 | |
2904 | |
2905 | bool Src0Inlined = false; |
2906 | if (Src0->isReg()) { |
2907 | |
2908 | |
2909 | |
2910 | MachineInstr *Def = MRI->getUniqueVRegDef(Src0->getReg()); |
2911 | if (Def && Def->isMoveImmediate() && |
2912 | isInlineConstant(Def->getOperand(1)) && |
2913 | MRI->hasOneUse(Src0->getReg())) { |
2914 | Src0->ChangeToImmediate(Def->getOperand(1).getImm()); |
2915 | Src0Inlined = true; |
2916 | } else if ((Src0->getReg().isPhysical() && |
2917 | (ST.getConstantBusLimit(Opc) <= 1 && |
2918 | RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg())))) || |
2919 | (Src0->getReg().isVirtual() && |
2920 | (ST.getConstantBusLimit(Opc) <= 1 && |
2921 | RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))) |
2922 | return false; |
2923 | |
2924 | } |
2925 | |
2926 | if (Src1->isReg() && !Src0Inlined ) { |
2927 | |
2928 | MachineInstr *Def = MRI->getUniqueVRegDef(Src1->getReg()); |
2929 | if (Def && Def->isMoveImmediate() && |
2930 | isInlineConstant(Def->getOperand(1)) && |
2931 | MRI->hasOneUse(Src1->getReg()) && |
2932 | commuteInstruction(UseMI)) { |
2933 | Src0->ChangeToImmediate(Def->getOperand(1).getImm()); |
2934 | } else if ((Src1->getReg().isPhysical() && |
2935 | RI.isSGPRClass(RI.getPhysRegClass(Src1->getReg()))) || |
2936 | (Src1->getReg().isVirtual() && |
2937 | RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))) |
2938 | return false; |
2939 | |
2940 | } |
2941 | |
2942 | unsigned NewOpc = |
2943 | IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32 : AMDGPU::V_FMAAK_F16) |
2944 | : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16); |
2945 | if (pseudoToMCOpcode(NewOpc) == -1) |
2946 | return false; |
2947 | |
2948 | const int64_t Imm = ImmOp->getImm(); |
2949 | |
2950 | |
2951 | |
2952 | |
2953 | |
2954 | UseMI.RemoveOperand( |
2955 | AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod)); |
2956 | UseMI.RemoveOperand( |
2957 | AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp)); |
2958 | |
2959 | if (Opc == AMDGPU::V_MAC_F32_e64 || |
2960 | Opc == AMDGPU::V_MAC_F16_e64 || |
2961 | Opc == AMDGPU::V_FMAC_F32_e64 || |
2962 | Opc == AMDGPU::V_FMAC_F16_e64) |
2963 | UseMI.untieRegOperand( |
2964 | AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); |
2965 | |
2966 | |
2967 | Src2->ChangeToImmediate(Imm); |
2968 | |
2969 | |
2970 | removeModOperands(UseMI); |
2971 | UseMI.setDesc(get(NewOpc)); |
2972 | |
2973 | |
2974 | |
2975 | legalizeOperands(UseMI); |
2976 | |
2977 | bool DeleteDef = MRI->hasOneNonDBGUse(Reg); |
2978 | if (DeleteDef) |
2979 | DefMI.eraseFromParent(); |
2980 | |
2981 | return true; |
2982 | } |
2983 | } |
2984 | |
2985 | return false; |
2986 | } |
2987 | |
2988 | static bool |
2989 | memOpsHaveSameBaseOperands(ArrayRef<const MachineOperand *> BaseOps1, |
2990 | ArrayRef<const MachineOperand *> BaseOps2) { |
2991 | if (BaseOps1.size() != BaseOps2.size()) |
2992 | return false; |
2993 | for (size_t I = 0, E = BaseOps1.size(); I < E; ++I) { |
2994 | if (!BaseOps1[I]->isIdenticalTo(*BaseOps2[I])) |
2995 | return false; |
2996 | } |
2997 | return true; |
2998 | } |
2999 | |
3000 | static bool offsetsDoNotOverlap(int WidthA, int OffsetA, |
3001 | int WidthB, int OffsetB) { |
3002 | int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; |
3003 | int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; |
3004 | int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; |
3005 | return LowOffset + LowWidth <= HighOffset; |
3006 | } |
3007 | |
3008 | bool SIInstrInfo::checkInstOffsetsDoNotOverlap(const MachineInstr &MIa, |
3009 | const MachineInstr &MIb) const { |
3010 | SmallVector<const MachineOperand *, 4> BaseOps0, BaseOps1; |
3011 | int64_t Offset0, Offset1; |
3012 | unsigned Dummy0, Dummy1; |
3013 | bool Offset0IsScalable, Offset1IsScalable; |
3014 | if (!getMemOperandsWithOffsetWidth(MIa, BaseOps0, Offset0, Offset0IsScalable, |
3015 | Dummy0, &RI) || |
3016 | !getMemOperandsWithOffsetWidth(MIb, BaseOps1, Offset1, Offset1IsScalable, |
3017 | Dummy1, &RI)) |
3018 | return false; |
3019 | |
3020 | if (!memOpsHaveSameBaseOperands(BaseOps0, BaseOps1)) |
3021 | return false; |
3022 | |
3023 | if (!MIa.hasOneMemOperand() || !MIb.hasOneMemOperand()) { |
3024 | |
3025 | return false; |
3026 | } |
3027 | unsigned Width0 = MIa.memoperands().front()->getSize(); |
3028 | unsigned Width1 = MIb.memoperands().front()->getSize(); |
3029 | return offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1); |
3030 | } |
3031 | |
3032 | bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, |
3033 | const MachineInstr &MIb) const { |
3034 | assert(MIa.mayLoadOrStore() && |
3035 | "MIa must load from or modify a memory location"); |
3036 | assert(MIb.mayLoadOrStore() && |
3037 | "MIb must load from or modify a memory location"); |
3038 | |
3039 | if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects()) |
3040 | return false; |
3041 | |
3042 | |
3043 | if (MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) |
3044 | return false; |
3045 | |
3046 | |
3047 | |
3048 | |
3049 | |
3050 | |
3051 | if (isDS(MIa)) { |
3052 | if (isDS(MIb)) |
3053 | return checkInstOffsetsDoNotOverlap(MIa, MIb); |
3054 | |
3055 | return !isFLAT(MIb) || isSegmentSpecificFLAT(MIb); |
3056 | } |
3057 | |
3058 | if (isMUBUF(MIa) || isMTBUF(MIa)) { |
3059 | if (isMUBUF(MIb) || isMTBUF(MIb)) |
3060 | return checkInstOffsetsDoNotOverlap(MIa, MIb); |
3061 | |
3062 | return !isFLAT(MIb) && !isSMRD(MIb); |
3063 | } |
3064 | |
3065 | if (isSMRD(MIa)) { |
3066 | if (isSMRD(MIb)) |
3067 | return checkInstOffsetsDoNotOverlap(MIa, MIb); |
3068 | |
3069 | return !isFLAT(MIb) && !isMUBUF(MIb) && !isMTBUF(MIb); |
3070 | } |
3071 | |
3072 | if (isFLAT(MIa)) { |
3073 | if (isFLAT(MIb)) |
3074 | return checkInstOffsetsDoNotOverlap(MIa, MIb); |
3075 | |
3076 | return false; |
3077 | } |
3078 | |
3079 | return false; |
3080 | } |
3081 | |
3082 | static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, |
3083 | int64_t &Imm) { |
3084 | if (Reg.isPhysical()) |
3085 | return false; |
3086 | auto *Def = MRI.getUniqueVRegDef(Reg); |
3087 | if (Def && SIInstrInfo::isFoldableCopy(*Def) && Def->getOperand(1).isImm()) { |
3088 | Imm = Def->getOperand(1).getImm(); |
3089 | return true; |
3090 | } |
3091 | return false; |
3092 | } |
3093 | |
3094 | static bool getFoldableImm(const MachineOperand *MO, int64_t &Imm) { |
3095 | if (!MO->isReg()) |
3096 | return false; |
3097 | const MachineFunction *MF = MO->getParent()->getParent()->getParent(); |
3098 | const MachineRegisterInfo &MRI = MF->getRegInfo(); |
3099 | return getFoldableImm(MO->getReg(), MRI, Imm); |
3100 | } |
3101 | |
3102 | static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, |
3103 | MachineInstr &NewMI) { |
3104 | if (LV) { |
3105 | unsigned NumOps = MI.getNumOperands(); |
3106 | for (unsigned I = 1; I < NumOps; ++I) { |
3107 | MachineOperand &Op = MI.getOperand(I); |
3108 | if (Op.isReg() && Op.isKill()) |
3109 | LV->replaceKillInstruction(Op.getReg(), MI, NewMI); |
3110 | } |
3111 | } |
3112 | } |
3113 | |
3114 | MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB, |
3115 | MachineInstr &MI, |
3116 | LiveVariables *LV) const { |
3117 | unsigned Opc = MI.getOpcode(); |
3118 | bool IsF16 = false; |
3119 | bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 || |
3120 | Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 || |
3121 | Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64; |
3122 | bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64; |
3123 | |
3124 | switch (Opc) { |
3125 | default: |
3126 | return nullptr; |
3127 | case AMDGPU::V_MAC_F16_e64: |
3128 | case AMDGPU::V_FMAC_F16_e64: |
3129 | IsF16 = true; |
3130 | LLVM_FALLTHROUGH; |
3131 | case AMDGPU::V_MAC_F32_e64: |
3132 | case AMDGPU::V_FMAC_F32_e64: |
3133 | case AMDGPU::V_FMAC_F64_e64: |
3134 | break; |
3135 | case AMDGPU::V_MAC_F16_e32: |
3136 | case AMDGPU::V_FMAC_F16_e32: |
3137 | IsF16 = true; |
3138 | LLVM_FALLTHROUGH; |
3139 | case AMDGPU::V_MAC_F32_e32: |
3140 | case AMDGPU::V_FMAC_F32_e32: |
3141 | case AMDGPU::V_FMAC_F64_e32: { |
3142 | int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), |
3143 | AMDGPU::OpName::src0); |
3144 | const MachineOperand *Src0 = &MI.getOperand(Src0Idx); |
3145 | if (!Src0->isReg() && !Src0->isImm()) |
3146 | return nullptr; |
3147 | |
3148 | if (Src0->isImm() && !isInlineConstant(MI, Src0Idx, *Src0)) |
3149 | return nullptr; |
3150 | |
3151 | break; |
3152 | } |
3153 | } |
3154 | |
3155 | const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst); |
3156 | const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0); |
3157 | const MachineOperand *Src0Mods = |
3158 | getNamedOperand(MI, AMDGPU::OpName::src0_modifiers); |
3159 | const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1); |
3160 | const MachineOperand *Src1Mods = |
3161 | getNamedOperand(MI, AMDGPU::OpName::src1_modifiers); |
3162 | const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2); |
3163 | const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp); |
3164 | const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod); |
3165 | MachineInstrBuilder MIB; |
3166 | |
3167 | if (!Src0Mods && !Src1Mods && !Clamp && !Omod && !IsF64 && |
3168 | |
3169 | (ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() || |
3170 | !RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg()))) { |
3171 | int64_t Imm; |
3172 | if (getFoldableImm(Src2, Imm)) { |
3173 | unsigned NewOpc = |
3174 | IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32) |
3175 | : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32); |
3176 | if (pseudoToMCOpcode(NewOpc) != -1) { |
3177 | MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc)) |
3178 | .add(*Dst) |
3179 | .add(*Src0) |
3180 | .add(*Src1) |
3181 | .addImm(Imm); |
3182 | updateLiveVariables(LV, MI, *MIB); |
3183 | return MIB; |
3184 | } |
3185 | } |
3186 | unsigned NewOpc = IsFMA |
3187 | ? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32) |
3188 | : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32); |
3189 | if (getFoldableImm(Src1, Imm)) { |
3190 | if (pseudoToMCOpcode(NewOpc) != -1) { |
3191 | MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc)) |
3192 | .add(*Dst) |
3193 | .add(*Src0) |
3194 | .addImm(Imm) |
3195 | .add(*Src2); |
3196 | updateLiveVariables(LV, MI, *MIB); |
3197 | return MIB; |
3198 | } |
3199 | } |
3200 | if (getFoldableImm(Src0, Imm)) { |
3201 | if (pseudoToMCOpcode(NewOpc) != -1 && |
3202 | isOperandLegal( |
3203 | MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0), |
3204 | Src1)) { |
3205 | MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc)) |
3206 | .add(*Dst) |
3207 | .add(*Src1) |
3208 | .addImm(Imm) |
3209 | .add(*Src2); |
3210 | updateLiveVariables(LV, MI, *MIB); |
3211 | return MIB; |
3212 | } |
3213 | } |
3214 | } |
3215 | |
3216 | unsigned NewOpc = IsFMA ? (IsF16 ? AMDGPU::V_FMA_F16_e64 |
3217 | : IsF64 ? AMDGPU::V_FMA_F64_e64 |
3218 | : AMDGPU::V_FMA_F32_e64) |
3219 | : (IsF16 ? AMDGPU::V_MAD_F16_e64 : AMDGPU::V_MAD_F32_e64); |
3220 | if (pseudoToMCOpcode(NewOpc) == -1) |
3221 | return nullptr; |
3222 | |
3223 | MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc)) |
3224 | .add(*Dst) |
3225 | .addImm(Src0Mods ? Src0Mods->getImm() : 0) |
3226 | .add(*Src0) |
3227 | .addImm(Src1Mods ? Src1Mods->getImm() : 0) |
3228 | .add(*Src1) |
3229 | .addImm(0) |
3230 | .add(*Src2) |
3231 | .addImm(Clamp ? Clamp->getImm() : 0) |
3232 | .addImm(Omod ? Omod->getImm() : 0); |
3233 | updateLiveVariables(LV, MI, *MIB); |
3234 | return MIB; |
3235 | } |
3236 | |
3237 | |
3238 | |
3239 | |
3240 | static bool changesVGPRIndexingMode(const MachineInstr &MI) { |
3241 | switch (MI.getOpcode()) { |
3242 | case AMDGPU::S_SET_GPR_IDX_ON: |
3243 | case AMDGPU::S_SET_GPR_IDX_MODE: |
3244 | case AMDGPU::S_SET_GPR_IDX_OFF: |
3245 | return true; |
3246 | default: |
3247 | return false; |
3248 | } |
3249 | } |
3250 | |
3251 | bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI, |
3252 | const MachineBasicBlock *MBB, |
3253 | const MachineFunction &MF) const { |
3254 | |
3255 | |
3256 | |
3257 | |
3258 | |
3259 | |
3260 | |
3261 | |
3262 | if (MI.isTerminator() || MI.isPosition()) |
3263 | return true; |
3264 | |
3265 | |
3266 | if (MI.getOpcode() == TargetOpcode::INLINEASM_BR) |
3267 | return true; |
3268 | |
3269 | |
3270 | |
3271 | |
3272 | return MI.modifiesRegister(AMDGPU::EXEC, &RI) || |
3273 | MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 || |
3274 | MI.getOpcode() == AMDGPU::S_SETREG_B32 || |
3275 | changesVGPRIndexingMode(MI); |
3276 | } |
3277 | |
3278 | bool SIInstrInfo::isAlwaysGDS(uint16_t Opcode) const { |
3279 | return Opcode == AMDGPU::DS_ORDERED_COUNT || |
3280 | Opcode == AMDGPU::DS_GWS_INIT || |
3281 | Opcode == AMDGPU::DS_GWS_SEMA_V || |
3282 | Opcode == AMDGPU::DS_GWS_SEMA_BR || |
3283 | Opcode == AMDGPU::DS_GWS_SEMA_P || |
3284 | Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL || |
3285 | Opcode == AMDGPU::DS_GWS_BARRIER; |
3286 | } |
3287 | |
3288 | bool SIInstrInfo::modifiesModeRegister(const MachineInstr &MI) { |
3289 | |
3290 | |
3291 | |
3292 | if (const MCPhysReg *ImpDef = MI.getDesc().getImplicitDefs()) { |
3293 | for (; ImpDef && *ImpDef; ++ImpDef) { |
3294 | if (*ImpDef == AMDGPU::MODE) |
3295 | return true; |
3296 | } |
3297 | } |
3298 | |
3299 | return false; |
3300 | } |
3301 | |
3302 | bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const { |
3303 | unsigned Opcode = MI.getOpcode(); |
3304 | |
3305 | if (MI.mayStore() && isSMRD(MI)) |
3306 | return true; |
3307 | |
3308 | |
3309 | if (MI.isReturn()) |
3310 | return true; |
3311 | |
3312 | |
3313 | |
3314 | |
3315 | |
3316 | |
3317 | |
3318 | if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT || |
3319 | isEXP(Opcode) || |
3320 | Opcode == AMDGPU::DS_ORDERED_COUNT || Opcode == AMDGPU::S_TRAP || |
3321 | Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER) |
3322 | return true; |
3323 | |
3324 | if (MI.isCall() || MI.isInlineAsm()) |
3325 | return true; |
3326 | |
3327 | |
3328 | if (modifiesModeRegister(MI)) |
3329 | return true; |
3330 | |
3331 | |
3332 | |
3333 | |
3334 | |
3335 | |
3336 | if (Opcode == AMDGPU::V_READFIRSTLANE_B32 || |
3337 | Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32) |
3338 | return true; |
3339 | |
3340 | return false; |
3341 | } |
3342 | |
3343 | bool SIInstrInfo::mayReadEXEC(const MachineRegisterInfo &MRI, |
3344 | const MachineInstr &MI) const { |
3345 | if (MI.isMetaInstruction()) |
3346 | return false; |
3347 | |
3348 | |
3349 | if (MI.isCopyLike()) { |
3350 | if (!RI.isSGPRReg(MRI, MI.getOperand(0).getReg())) |
3351 | return true; |
3352 | |
3353 | |
3354 | return MI.readsRegister(AMDGPU::EXEC, &RI); |
3355 | } |
3356 | |
3357 | |
3358 | if (MI.isCall()) |
3359 | return true; |
3360 | |
3361 | |
3362 | if (!isTargetSpecificOpcode(MI.getOpcode())) |
3363 | return true; |
3364 | |
3365 | return !isSALU(MI) || MI.readsRegister(AMDGPU::EXEC, &RI); |
3366 | } |
3367 | |
3368 | bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { |
3369 | switch (Imm.getBitWidth()) { |
3370 | case 1: |
3371 | return true; |
3372 | |
3373 | case 32: |
3374 | return AMDGPU::isInlinableLiteral32(Imm.getSExtValue(), |
3375 | ST.hasInv2PiInlineImm()); |
3376 | case 64: |
3377 | return AMDGPU::isInlinableLiteral64(Imm.getSExtValue(), |
3378 | ST.hasInv2PiInlineImm()); |
3379 | case 16: |
3380 | return ST.has16BitInsts() && |
3381 | AMDGPU::isInlinableLiteral16(Imm.getSExtValue(), |
3382 | ST.hasInv2PiInlineImm()); |
3383 | default: |
3384 | llvm_unreachable("invalid bitwidth"); |
3385 | } |
3386 | } |
3387 | |
3388 | bool SIInstrInfo::isInlineConstant(const MachineOperand &MO, |
3389 | uint8_t OperandType) const { |
3390 | if (!MO.isImm() || |
3391 | OperandType < AMDGPU::OPERAND_SRC_FIRST || |
3392 | OperandType > AMDGPU::OPERAND_SRC_LAST) |
3393 | return false; |
3394 | |
3395 | |
3396 | |
3397 | |
3398 | |
3399 | |
3400 | int64_t Imm = MO.getImm(); |
3401 | switch (OperandType) { |
3402 | case AMDGPU::OPERAND_REG_IMM_INT32: |
3403 | case AMDGPU::OPERAND_REG_IMM_FP32: |
3404 | case AMDGPU::OPERAND_REG_INLINE_C_INT32: |
3405 | case AMDGPU::OPERAND_REG_INLINE_C_FP32: |
3406 | case AMDGPU::OPERAND_REG_IMM_V2FP32: |
3407 | case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: |
3408 | case AMDGPU::OPERAND_REG_IMM_V2INT32: |
3409 | case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: |
3410 | case AMDGPU::OPERAND_REG_INLINE_AC_INT32: |
3411 | case AMDGPU::OPERAND_REG_INLINE_AC_FP32: { |
3412 | int32_t Trunc = static_cast<int32_t>(Imm); |
3413 | return AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm()); |
3414 | } |
3415 | case AMDGPU::OPERAND_REG_IMM_INT64: |
3416 | case AMDGPU::OPERAND_REG_IMM_FP64: |
3417 | case AMDGPU::OPERAND_REG_INLINE_C_INT64: |
3418 | case AMDGPU::OPERAND_REG_INLINE_C_FP64: |
3419 | case AMDGPU::OPERAND_REG_INLINE_AC_FP64: |
3420 | return AMDGPU::isInlinableLiteral64(MO.getImm(), |
3421 | ST.hasInv2PiInlineImm()); |
3422 | case AMDGPU::OPERAND_REG_IMM_INT16: |
3423 | case AMDGPU::OPERAND_REG_INLINE_C_INT16: |
3424 | case AMDGPU::OPERAND_REG_INLINE_AC_INT16: |
3425 | |
3426 | |
3427 | |
3428 | |
3429 | |
3430 | |
3431 | |
3432 | |
3433 | |
3434 | |
3435 | return AMDGPU::isInlinableIntLiteral(Imm); |
3436 | case AMDGPU::OPERAND_REG_IMM_V2INT16: |
3437 | case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: |
3438 | case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: |
3439 | |
3440 | return AMDGPU::isInlinableIntLiteralV216(Imm); |
3441 | case AMDGPU::OPERAND_REG_IMM_FP16: |
3442 | case AMDGPU::OPERAND_REG_INLINE_C_FP16: |
3443 | case AMDGPU::OPERAND_REG_INLINE_AC_FP16: { |
3444 | if (isInt<16>(Imm) || isUInt<16>(Imm)) { |
3445 | |
3446 | |
3447 | |
3448 | |
3449 | int16_t Trunc = static_cast<int16_t>(Imm); |
3450 | return ST.has16BitInsts() && |
3451 | AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm()); |
3452 | } |
3453 | |
3454 | return false; |
3455 | } |
3456 | case AMDGPU::OPERAND_REG_IMM_V2FP16: |
3457 | case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: |
3458 | case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { |
3459 | uint32_t Trunc = static_cast<uint32_t>(Imm); |
3460 | return AMDGPU::isInlinableLiteralV216(Trunc, ST.hasInv2PiInlineImm()); |
3461 | } |
3462 | default: |
3463 | llvm_unreachable("invalid bitwidth"); |
3464 | } |
3465 | } |
3466 | |
3467 | bool SIInstrInfo::isLiteralConstantLike(const MachineOperand &MO, |
3468 | const MCOperandInfo &OpInfo) const { |
3469 | switch (MO.getType()) { |
3470 | case MachineOperand::MO_Register: |
3471 | return false; |
3472 | case MachineOperand::MO_Immediate: |
3473 | return !isInlineConstant(MO, OpInfo); |
3474 | case MachineOperand::MO_FrameIndex: |
3475 | case MachineOperand::MO_MachineBasicBlock: |
3476 | case MachineOperand::MO_ExternalSymbol: |
3477 | case MachineOperand::MO_GlobalAddress: |
3478 | case MachineOperand::MO_MCSymbol: |
3479 | return true; |
3480 | default: |
3481 | llvm_unreachable("unexpected operand type"); |
3482 | } |
3483 | } |
3484 | |
3485 | static bool compareMachineOp(const MachineOperand &Op0, |
3486 | const MachineOperand &Op1) { |
3487 | if (Op0.getType() != Op1.getType()) |
3488 | return false; |
3489 | |
3490 | switch (Op0.getType()) { |
3491 | case MachineOperand::MO_Register: |
3492 | return Op0.getReg() == Op1.getReg(); |
3493 | case MachineOperand::MO_Immediate: |
3494 | return Op0.getImm() == Op1.getImm(); |
3495 | default: |
3496 | llvm_unreachable("Didn't expect to be comparing these operand types"); |
3497 | } |
3498 | } |
3499 | |
3500 | bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, |
3501 | const MachineOperand &MO) const { |
3502 | const MCInstrDesc &InstDesc = MI.getDesc(); |
3503 | const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpNo]; |
3504 | |
3505 | assert(MO.isImm() || MO.isTargetIndex() || MO.isFI() || MO.isGlobal()); |
3506 | |
3507 | if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE) |
3508 | return true; |
3509 | |
3510 | if (OpInfo.RegClass < 0) |
3511 | return false; |
3512 | |
3513 | if (MO.isImm() && isInlineConstant(MO, OpInfo)) { |
3514 | if (isMAI(MI) && ST.hasMFMAInlineLiteralBug() && |
3515 | OpNo ==(unsigned)AMDGPU::getNamedOperandIdx(MI.getOpcode(), |
3516 | AMDGPU::OpName::src2)) |
3517 | return false; |
3518 | return RI.opCanUseInlineConstant(OpInfo.OperandType); |
3519 | } |
3520 | |
3521 | if (!RI.opCanUseLiteralConstant(OpInfo.OperandType)) |
3522 | return false; |
3523 | |
3524 | if (!isVOP3(MI) || !AMDGPU::isSISrcOperand(InstDesc, OpNo)) |
3525 | return true; |
3526 | |
3527 | return ST.hasVOP3Literal(); |
3528 | } |
3529 | |
3530 | bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const { |
3531 | |
3532 | if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.hasGFX90AInsts()) |
3533 | return false; |
3534 | |
3535 | int Op32 = AMDGPU::getVOPe32(Opcode); |
3536 | if (Op32 == -1) |
3537 | return false; |
3538 | |
3539 | return pseudoToMCOpcode(Op32) != -1; |
3540 | } |
3541 | |
3542 | bool SIInstrInfo::hasModifiers(unsigned Opcode) const { |
3543 | |
3544 | |
3545 | |
3546 | return AMDGPU::getNamedOperandIdx(Opcode, |
3547 | AMDGPU::OpName::src0_modifiers) != -1; |
3548 | } |
3549 | |
3550 | bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI, |
3551 | unsigned OpName) const { |
3552 | const MachineOperand *Mods = getNamedOperand(MI, OpName); |
3553 | return Mods && Mods->getImm(); |
3554 | } |
3555 | |
3556 | bool SIInstrInfo::hasAnyModifiersSet(const MachineInstr &MI) const { |
3557 | return hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) || |
3558 | hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) || |
3559 | hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers) || |
3560 | hasModifiersSet(MI, AMDGPU::OpName::clamp) || |
3561 | hasModifiersSet(MI, AMDGPU::OpName::omod); |
3562 | } |
3563 | |
3564 | bool SIInstrInfo::canShrink(const MachineInstr &MI, |
3565 | const MachineRegisterInfo &MRI) const { |
3566 | const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2); |
3567 | |
3568 | |
3569 | |
3570 | |
3571 | |
3572 | |
3573 | |
3574 | if (Src2) { |
3575 | switch (MI.getOpcode()) { |
3576 | default: return false; |
3577 | |
3578 | case AMDGPU::V_ADDC_U32_e64: |
3579 | case AMDGPU::V_SUBB_U32_e64: |
3580 | case AMDGPU::V_SUBBREV_U32_e64: { |
3581 | const MachineOperand *Src1 |
3582 | = getNamedOperand(MI, AMDGPU::OpName::src1); |
3583 | if (!Src1->isReg() || !RI.isVGPR(MRI, Src1->getReg())) |
3584 | return false; |
3585 | |
3586 | return true; |
3587 | } |
3588 | case AMDGPU::V_MAC_F32_e64: |
3589 | case AMDGPU::V_MAC_F16_e64: |
3590 | case AMDGPU::V_FMAC_F32_e64: |
3591 | case AMDGPU::V_FMAC_F16_e64: |
3592 | case AMDGPU::V_FMAC_F64_e64: |
3593 | if (!Src2->isReg() || !RI.isVGPR(MRI, Src2->getReg()) || |
3594 | hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers)) |
3595 | return false; |
3596 | break; |
3597 | |
3598 | case AMDGPU::V_CNDMASK_B32_e64: |
3599 | break; |
3600 | } |
3601 | } |
3602 | |
3603 | const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1); |
3604 | if (Src1 && (!Src1->isReg() || !RI.isVGPR(MRI, Src1->getReg()) || |
3605 | hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers))) |
3606 | return false; |
3607 | |
3608 | |
3609 | |
3610 | if (hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers)) |
3611 | return false; |
3612 | |
3613 | |
3614 | if (!hasVALU32BitEncoding(MI.getOpcode())) |
3615 | return false; |
3616 | |
3617 | |
3618 | return !hasModifiersSet(MI, AMDGPU::OpName::omod) && |
3619 | !hasModifiersSet(MI, AMDGPU::OpName::clamp); |
3620 | } |
3621 | |
3622 | |
3623 | |
3624 | static void copyFlagsToImplicitVCC(MachineInstr &MI, |
3625 | const MachineOperand &Orig) { |
3626 | |
3627 | for (MachineOperand &Use : MI.implicit_operands()) { |
3628 | if (Use.isUse() && |
3629 | (Use.getReg() == AMDGPU::VCC || Use.getReg() == AMDGPU::VCC_LO)) { |
3630 | Use.setIsUndef(Orig.isUndef()); |
3631 | Use.setIsKill(Orig.isKill()); |
3632 | return; |
3633 | } |
3634 | } |
3635 | } |
3636 | |
3637 | MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI, |
3638 | unsigned Op32) const { |
3639 | MachineBasicBlock *MBB = MI.getParent();; |
3640 | MachineInstrBuilder Inst32 = |
3641 | BuildMI(*MBB, MI, MI.getDebugLoc(), get(Op32)) |
3642 | .setMIFlags(MI.getFlags()); |
3643 | |
3644 | |
3645 | |
3646 | int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst); |
3647 | if (Op32DstIdx != -1) { |
3648 | |
3649 | Inst32.add(MI.getOperand(0)); |
3650 | } else { |
3651 | assert(((MI.getOperand(0).getReg() == AMDGPU::VCC) || |
3652 | (MI.getOperand(0).getReg() == AMDGPU::VCC_LO)) && |
3653 | "Unexpected case"); |
3654 | } |
3655 | |
3656 | Inst32.add(*getNamedOperand(MI, AMDGPU::OpName::src0)); |
3657 | |
3658 | const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1); |
3659 | if (Src1) |
3660 | Inst32.add(*Src1); |
3661 | |
3662 | const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2); |
3663 | |
3664 | if (Src2) { |
3665 | int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2); |
3666 | if (Op32Src2Idx != -1) { |
3667 | Inst32.add(*Src2); |
3668 | } else { |
3669 | |
3670 | |
3671 | |
3672 | |
3673 | |
3674 | fixImplicitOperands(*Inst32); |
3675 | copyFlagsToImplicitVCC(*Inst32, *Src2); |
3676 | } |
3677 | } |
3678 | |
3679 | return Inst32; |
3680 | } |
3681 | |
3682 | bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI, |
3683 | const MachineOperand &MO, |
3684 | const MCOperandInfo &OpInfo) const { |
3685 | |
3686 | |
3687 | |
3688 | if (MO.isImm()) |
3689 | return !isInlineConstant(MO, OpInfo); |
3690 | |
3691 | if (!MO.isReg()) |
3692 | return true; |
3693 | |
3694 | if (!MO.isUse()) |
3695 | return false; |
3696 | |
3697 | if (MO.getReg().isVirtual()) |
3698 | return RI.isSGPRClass(MRI.getRegClass(MO.getReg())); |
3699 | |
3700 | |
3701 | if (MO.getReg() == AMDGPU::SGPR_NULL) |
3702 | return false; |
3703 | |
3704 | |
3705 | if (MO.isImplicit()) { |
3706 | return MO.getReg() == AMDGPU::M0 || |
3707 | MO.getReg() == AMDGPU::VCC || |
3708 | MO.getReg() == AMDGPU::VCC_LO; |
3709 | } else { |
3710 | return AMDGPU::SReg_32RegClass.contains(MO.getReg()) || |
3711 | AMDGPU::SReg_64RegClass.contains(MO.getReg()); |
3712 | } |
3713 | } |
3714 | |
3715 | static Register findImplicitSGPRRead(const MachineInstr &MI) { |
3716 | for (const MachineOperand &MO : MI.implicit_operands()) { |
3717 | |
3718 | if (MO.isDef()) |
3719 | continue; |
3720 | |
3721 | switch (MO.getReg()) { |
3722 | case AMDGPU::VCC: |
3723 | case AMDGPU::VCC_LO: |
3724 | case AMDGPU::VCC_HI: |
3725 | case AMDGPU::M0: |
3726 | case AMDGPU::FLAT_SCR: |
3727 | return MO.getReg(); |
3728 | |
3729 | default: |
3730 | break; |
3731 | } |
3732 | } |
3733 | |
3734 | return AMDGPU::NoRegister; |
3735 | } |
3736 | |
3737 | static bool shouldReadExec(const MachineInstr &MI) { |
3738 | if (SIInstrInfo::isVALU(MI)) { |
3739 | switch (MI.getOpcode()) { |
3740 | case AMDGPU::V_READLANE_B32: |
3741 | case AMDGPU::V_WRITELANE_B32: |
3742 | return false; |
3743 | } |
3744 | |
3745 | return true; |
3746 | } |
3747 | |
3748 | if (MI.isPreISelOpcode() || |
3749 | SIInstrInfo::isGenericOpcode(MI.getOpcode()) || |
3750 | SIInstrInfo::isSALU(MI) || |
3751 | SIInstrInfo::isSMRD(MI)) |
3752 | return false; |
3753 | |
3754 | return true; |
3755 | } |
3756 | |
3757 | static bool isSubRegOf(const SIRegisterInfo &TRI, |
3758 | const MachineOperand &SuperVec, |
3759 | const MachineOperand &SubReg) { |
3760 | if (SubReg.getReg().isPhysical()) |
3761 | return TRI.isSubRegister(SuperVec.getReg(), SubReg.getReg()); |
3762 | |
3763 | return SubReg.getSubReg() != AMDGPU::NoSubRegister && |
3764 | SubReg.getReg() == SuperVec.getReg(); |
3765 | } |
3766 | |
3767 | bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, |
3768 | StringRef &ErrInfo) const { |
3769 | uint16_t Opcode = MI.getOpcode(); |
3770 | if (SIInstrInfo::isGenericOpcode(MI.getOpcode())) |
3771 | return true; |
3772 | |
3773 | const MachineFunction *MF = MI.getParent()->getParent(); |
3774 | const MachineRegisterInfo &MRI = MF->getRegInfo(); |
3775 | |
3776 | int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); |
3777 | int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); |
3778 | int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); |
3779 | |
3780 | |
3781 | const MCInstrDesc &Desc = get(Opcode); |
3782 | if (!Desc.isVariadic() && |
3783 | Desc.getNumOperands() != MI.getNumExplicitOperands()) { |
3784 | ErrInfo = "Instruction has wrong number of operands."; |
3785 | return false; |
3786 | } |
3787 | |
3788 | if (MI.isInlineAsm()) { |
3789 | |
3790 | for (unsigned I = InlineAsm::MIOp_FirstOperand, E = MI.getNumOperands(); |
3791 | I != E; ++I) { |
3792 | const TargetRegisterClass *RC = MI.getRegClassConstraint(I, this, &RI); |
3793 | if (!RC) |
3794 | continue; |
3795 | |
3796 | const MachineOperand &Op = MI.getOperand(I); |
3797 | if (!Op.isReg()) |
3798 | continue; |
3799 | |
3800 | Register Reg = Op.getReg(); |
3801 | if (!Reg.isVirtual() && !RC->contains(Reg)) { |
3802 | ErrInfo = "inlineasm operand has incorrect register class."; |
3803 | return false; |
3804 | } |
3805 | } |
3806 | |
3807 | return true; |
3808 | } |
3809 | |
3810 | if (isMIMG(MI) && MI.memoperands_empty() && MI.mayLoadOrStore()) { |
3811 | ErrInfo = "missing memory operand from MIMG instruction."; |
3812 | return false; |
3813 | } |
3814 | |
3815 | |
3816 | for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) { |
3817 | const MachineOperand &MO = MI.getOperand(i); |
3818 | if (MO.isFPImm()) { |
3819 | ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast " |
3820 | "all fp values to integers."; |
3821 | return false; |
3822 | } |
3823 | |
3824 | int RegClass = Desc.OpInfo[i].RegClass; |
3825 | |
3826 | switch (Desc.OpInfo[i].OperandType) { |
3827 | case MCOI::OPERAND_REGISTER: |
3828 | if (MI.getOperand(i).isImm() || MI.getOperand(i).isGlobal()) { |
3829 | ErrInfo = "Illegal immediate value for operand."; |
3830 | return false; |
3831 | } |
3832 | break; |
3833 | case AMDGPU::OPERAND_REG_IMM_INT32: |
3834 | case AMDGPU::OPERAND_REG_IMM_FP32: |
3835 | break; |
3836 | case AMDGPU::OPERAND_REG_INLINE_C_INT32: |
3837 | case AMDGPU::OPERAND_REG_INLINE_C_FP32: |
3838 | case AMDGPU::OPERAND_REG_INLINE_C_INT64: |
3839 | case AMDGPU::OPERAND_REG_INLINE_C_FP64: |
3840 | case AMDGPU::OPERAND_REG_INLINE_C_INT16: |
3841 | case AMDGPU::OPERAND_REG_INLINE_C_FP16: |
3842 | case AMDGPU::OPERAND_REG_INLINE_AC_INT32: |
3843 | case AMDGPU::OPERAND_REG_INLINE_AC_FP32: |
3844 | case AMDGPU::OPERAND_REG_INLINE_AC_INT16: |
3845 | case AMDGPU::OPERAND_REG_INLINE_AC_FP16: |
3846 | case AMDGPU::OPERAND_REG_INLINE_AC_FP64: { |
3847 | if (!MO.isReg() && (!MO.isImm() || !isInlineConstant(MI, i))) { |
3848 | ErrInfo = "Illegal immediate value for operand."; |
3849 | return false; |
3850 | } |
3851 | break; |
3852 | } |
3853 | case MCOI::OPERAND_IMMEDIATE: |
3854 | case AMDGPU::OPERAND_KIMM32: |
3855 | |
3856 | |
3857 | |
3858 | if (!MI.getOperand(i).isImm() && !MI.getOperand(i).isFI()) { |
3859 | ErrInfo = "Expected immediate, but got non-immediate"; |
3860 | return false; |
3861 | } |
3862 | LLVM_FALLTHROUGH; |
3863 | default: |
3864 | continue; |
3865 | } |
3866 | |
3867 | if (!MO.isReg()) |
3868 | continue; |
3869 | Register Reg = MO.getReg(); |
3870 | if (!Reg) |
3871 | continue; |
3872 | |
3873 | |
3874 | |
3875 | |
3876 | |
3877 | if (ST.needsAlignedVGPRs()) { |
3878 | const TargetRegisterClass *RC = RI.getRegClassForReg(MRI, Reg); |
3879 | const bool IsVGPR = RI.hasVGPRs(RC); |
3880 | const bool IsAGPR = !IsVGPR && RI.hasAGPRs(RC); |
3881 | if ((IsVGPR || IsAGPR) && MO.getSubReg()) { |
3882 | const TargetRegisterClass *SubRC = |
3883 | RI.getSubRegClass(RC, MO.getSubReg()); |
3884 | RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.getSubReg()); |
3885 | if (RC) |
3886 | RC = SubRC; |
3887 | } |
3888 | |
3889 | |
3890 | if (!RC || !RI.isProperlyAlignedRC(*RC)) { |
3891 | ErrInfo = "Subtarget requires even aligned vector registers"; |
3892 | return false; |
3893 | } |
3894 | } |
3895 | |
3896 | if (RegClass != -1) { |
3897 | if (Reg.isVirtual()) |
3898 | continue; |
3899 | |
3900 | const TargetRegisterClass *RC = RI.getRegClass(RegClass); |
3901 | if (!RC->contains(Reg)) { |
3902 | ErrInfo = "Operand has incorrect register class."; |
3903 | return false; |
3904 | } |
3905 | } |
3906 | } |
3907 | |
3908 | |
3909 | if (isSDWA(MI)) { |
3910 | if (!ST.hasSDWA()) { |
3911 | ErrInfo = "SDWA is not supported on this target"; |
3912 | return false; |
3913 | } |
3914 | |
3915 | int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); |
3916 | |
3917 | const int OpIndicies[] = { DstIdx, Src0Idx, Src1Idx, Src2Idx }; |
3918 | |
3919 | for (int OpIdx: OpIndicies) { |
3920 | if (OpIdx == -1) |
3921 | continue; |
3922 | const MachineOperand &MO = MI.getOperand(OpIdx); |
3923 | |
3924 | if (!ST.hasSDWAScalar()) { |
3925 | |
3926 | if (!MO.isReg() || !RI.hasVGPRs(RI.getRegClassForReg(MRI, MO.getReg()))) { |
3927 | ErrInfo = "Only VGPRs allowed as operands in SDWA instructions on VI"; |
3928 | return false; |
3929 | } |
3930 | } else { |
3931 | |
3932 | if (!MO.isReg()) { |
3933 | ErrInfo = |
3934 | "Only reg allowed as operands in SDWA instructions on GFX9+"; |
3935 | return false; |
3936 | } |
3937 | } |
3938 | } |
3939 | |
3940 | if (!ST.hasSDWAOmod()) { |
3941 | |
3942 | const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod); |
3943 | if (OMod != nullptr && |
3944 | (!OMod->isImm() || OMod->getImm() != 0)) { |
3945 | ErrInfo = "OMod not allowed in SDWA instructions on VI"; |
3946 | return false; |
3947 | } |
3948 | } |
3949 | |
3950 | uint16_t BasicOpcode = AMDGPU::getBasicFromSDWAOp(Opcode); |
3951 | if (isVOPC(BasicOpcode)) { |
3952 | if (!ST.hasSDWASdst() && DstIdx != -1) { |
3953 | |
3954 | const MachineOperand &Dst = MI.getOperand(DstIdx); |
3955 | if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) { |
3956 | ErrInfo = "Only VCC allowed as dst in SDWA instructions on VI"; |
3957 | return false; |
3958 | } |
3959 | } else if (!ST.hasSDWAOutModsVOPC()) { |
3960 | |
3961 | const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp); |
3962 | if (Clamp && (!Clamp->isImm() || Clamp->getImm() != 0)) { |
3963 | ErrInfo = "Clamp not allowed in VOPC SDWA instructions on VI"; |
3964 | return false; |
3965 | } |
3966 | |
3967 | |
3968 | const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod); |
3969 | if (OMod && (!OMod->isImm() || OMod->getImm() != 0)) { |
3970 | ErrInfo = "OMod not allowed in VOPC SDWA instructions on VI"; |
3971 | return false; |
3972 | } |
3973 | } |
3974 | } |
3975 | |
3976 | const MachineOperand *DstUnused = getNamedOperand(MI, AMDGPU::OpName::dst_unused); |
3977 | if (DstUnused && DstUnused->isImm() && |
3978 | DstUnused->getImm() == AMDGPU::SDWA::UNUSED_PRESERVE) { |
3979 | const MachineOperand &Dst = MI.getOperand(DstIdx); |
3980 | if (!Dst.isReg() || !Dst.isTied()) { |
3981 | ErrInfo = "Dst register should have tied register"; |
3982 | return false; |
3983 | } |
3984 | |
3985 | const MachineOperand &TiedMO = |
3986 | MI.getOperand(MI.findTiedOperandIdx(DstIdx)); |
3987 | if (!TiedMO.isReg() || !TiedMO.isImplicit() || !TiedMO.isUse()) { |
3988 | ErrInfo = |
3989 | "Dst register should be tied to implicit use of preserved register"; |
3990 | return false; |
3991 | } else if (TiedMO.getReg().isPhysical() && |
3992 | Dst.getReg() != TiedMO.getReg()) { |
3993 | ErrInfo = "Dst register should use same physical register as preserved"; |
3994 | return false; |
3995 | } |
3996 | } |
3997 | } |
3998 | |
3999 | |
4000 | if (isMIMG(MI.getOpcode()) && !MI.mayStore()) { |
4001 | |
4002 | |
4003 | const MachineOperand *DMask = getNamedOperand(MI, AMDGPU::OpName::dmask); |
4004 | if (DMask) { |
4005 | uint64_t DMaskImm = DMask->getImm(); |
4006 | uint32_t RegCount = |
4007 | isGather4(MI.getOpcode()) ? 4 : countPopulation(DMaskImm); |
4008 | const MachineOperand *TFE = getNamedOperand(MI, AMDGPU::OpName::tfe); |
4009 | const MachineOperand *LWE = getNamedOperand(MI, AMDGPU::OpName::lwe); |
4010 | const MachineOperand *D16 = getNamedOperand(MI, AMDGPU::OpName::d16); |
4011 | |
4012 | |
4013 | if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem()) |
4014 | RegCount >>= 1; |
4015 | |
4016 | |
4017 | if ((LWE && LWE->getImm()) || (TFE && TFE->getImm())) |
4018 | RegCount += 1; |
4019 | |
4020 | const uint32_t DstIdx = |
4021 | AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata); |
4022 | const MachineOperand &Dst = MI.getOperand(DstIdx); |
4023 | if (Dst.isReg()) { |
4024 | const TargetRegisterClass *DstRC = getOpRegClass(MI, DstIdx); |
4025 | uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32; |
4026 | if (RegCount > DstSize) { |
4027 | ErrInfo = "MIMG instruction returns too many registers for dst " |
4028 | "register class"; |
4029 | return false; |
4030 | } |
4031 | } |
4032 | } |
4033 | } |
4034 | |
4035 | |
4036 | if (Desc.getOpcode() != AMDGPU::V_WRITELANE_B32 |
4037 | && (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isVOPC(MI) || isSDWA(MI))) { |
4038 | |
4039 | |
4040 | |
4041 | const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; |
4042 | |
4043 | unsigned ConstantBusCount = 0; |
4044 | bool UsesLiteral = false; |
4045 | const MachineOperand *LiteralVal = nullptr; |
4046 | |
4047 | if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) |
4048 | ++ConstantBusCount; |
4049 | |
4050 | SmallVector<Register, 2> SGPRsUsed; |
4051 | Register SGPRUsed; |
4052 | |
4053 | for (int OpIdx : OpIndices) { |
4054 | if (OpIdx == -1) |
4055 | break; |
4056 | const MachineOperand &MO = MI.getOperand(OpIdx); |
4057 | if (usesConstantBus(MRI, MO, MI.getDesc().OpInfo[OpIdx])) { |
4058 | if (MO.isReg()) { |
4059 | SGPRUsed = MO.getReg(); |
4060 | if (llvm::all_of(SGPRsUsed, [SGPRUsed](unsigned SGPR) { |
4061 | return SGPRUsed != SGPR; |
4062 | })) { |
4063 | ++ConstantBusCount; |
4064 | SGPRsUsed.push_back(SGPRUsed); |
4065 | } |
4066 | } else { |
4067 | if (!UsesLiteral) { |
4068 | ++ConstantBusCount; |
4069 | UsesLiteral = true; |
4070 | LiteralVal = &MO; |
4071 | } else if (!MO.isIdenticalTo(*LiteralVal)) { |
4072 | assert(isVOP3(MI)); |
4073 | ErrInfo = "VOP3 instruction uses more than one literal"; |
4074 | return false; |
4075 | } |
4076 | } |
4077 | } |
4078 | } |
4079 | |
4080 | SGPRUsed = findImplicitSGPRRead(MI); |
4081 | if (SGPRUsed != AMDGPU::NoRegister) { |
4082 | |
4083 | if (llvm::all_of(SGPRsUsed, [this, SGPRUsed](unsigned SGPR) { |
4084 | return !RI.regsOverlap(SGPRUsed, SGPR); |
4085 | })) { |
4086 | ++ConstantBusCount; |
4087 | SGPRsUsed.push_back(SGPRUsed); |
4088 | } |
4089 | } |
4090 | |
4091 | |
4092 | |
4093 | if (ConstantBusCount > ST.getConstantBusLimit(Opcode) && |
4094 | Opcode != AMDGPU::V_WRITELANE_B32) { |
4095 | ErrInfo = "VOP* instruction violates constant bus restriction"; |
4096 | return false; |
4097 | } |
4098 | |
4099 | if (isVOP3(MI) && UsesLiteral && !ST.hasVOP3Literal()) { |
4100 | ErrInfo = "VOP3 instruction uses literal"; |
4101 | return false; |
4102 | } |
4103 | } |
4104 | |
4105 | |
4106 | |
4107 | if (Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) { |
4108 | unsigned SGPRCount = 0; |
4109 | Register SGPRUsed = AMDGPU::NoRegister; |
4110 | |
4111 | for (int OpIdx : {Src0Idx, Src1Idx, Src2Idx}) { |
4112 | if (OpIdx == -1) |
4113 | break; |
4114 | |
4115 | const MachineOperand &MO = MI.getOperand(OpIdx); |
4116 | |
4117 | if (usesConstantBus(MRI, MO, MI.getDesc().OpInfo[OpIdx])) { |
4118 | if (MO.isReg() && MO.getReg() != AMDGPU::M0) { |
4119 | if (MO.getReg() != SGPRUsed) |
4120 | ++SGPRCount; |
4121 | SGPRUsed = MO.getReg(); |
4122 | } |
4123 | } |
4124 | if (SGPRCount > ST.getConstantBusLimit(Opcode)) { |
4125 | ErrInfo = "WRITELANE instruction violates constant bus restriction"; |
4126 | return false; |
4127 | } |
4128 | } |
4129 | } |
4130 | |
4131 | |
4132 | if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 || |
4133 | Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) { |
4134 | const MachineOperand &Src0 = MI.getOperand(Src0Idx); |
4135 | const MachineOperand &Src1 = MI.getOperand(Src1Idx); |
4136 | const MachineOperand &Src2 = MI.getOperand(Src2Idx); |
4137 | if (Src0.isReg() && Src1.isReg() && Src2.isReg()) { |
4138 | if (!compareMachineOp(Src0, Src1) && |
4139 | !compareMachineOp(Src0, Src2)) { |
4140 | ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2"; |
4141 | return false; |
4142 | } |
4143 | } |
4144 | if ((getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm() & |
4145 | SISrcMods::ABS) || |
4146 | (getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm() & |
4147 | SISrcMods::ABS) || |
4148 | (getNamedOperand(MI, AMDGPU::OpName::src2_modifiers)->getImm() & |
4149 | SISrcMods::ABS)) { |
4150 | ErrInfo = "ABS not allowed in VOP3B instructions"; |
4151 | return false; |
4152 | } |
4153 | } |
4154 | |
4155 | if (isSOP2(MI) || isSOPC(MI)) { |
4156 | const MachineOperand &Src0 = MI.getOperand(Src0Idx); |
4157 | const MachineOperand &Src1 = MI.getOperand(Src1Idx); |
4158 | unsigned Immediates = 0; |
4159 | |
4160 | if (!Src0.isReg() && |
4161 | !isInlineConstant(Src0, Desc.OpInfo[Src0Idx].OperandType)) |
4162 | Immediates++; |
4163 | if (!Src1.isReg() && |
4164 | !isInlineConstant(Src1, Desc.OpInfo[Src1Idx].OperandType)) |
4165 | Immediates++; |
4166 | |
4167 | if (Immediates > 1) { |
4168 | ErrInfo = "SOP2/SOPC instruction requires too many immediate constants"; |
4169 | return false; |
4170 | } |
4171 | } |
4172 | |
4173 | if (isSOPK(MI)) { |
4174 | auto Op = getNamedOperand(MI, AMDGPU::OpName::simm16); |
4175 | if (Desc.isBranch()) { |
4176 | if (!Op->isMBB()) { |
4177 | ErrInfo = "invalid branch target for SOPK instruction"; |
4178 | return false; |
4179 | } |
4180 | } else { |
4181 | uint64_t Imm = Op->getImm(); |
4182 | if (sopkIsZext(MI)) { |
4183 | if (!isUInt<16>(Imm)) { |
4184 | ErrInfo = "invalid immediate for SOPK instruction"; |
4185 | return false; |
4186 | } |
4187 | } else { |
4188 | if (!isInt<16>(Imm)) { |
4189 | ErrInfo = "invalid immediate for SOPK instruction"; |
4190 | return false; |
4191 | } |
4192 | } |
4193 | } |
4194 | } |
4195 | |
4196 | if (Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 || |
4197 | Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 || |
4198 | Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 || |
4199 | Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) { |
4200 | const bool IsDst = Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 || |
4201 | Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64; |
4202 | |
4203 | const unsigned StaticNumOps = Desc.getNumOperands() + |
4204 | Desc.getNumImplicitUses(); |
4205 | const unsigned NumImplicitOps = IsDst ? 2 : 1; |
4206 | |
4207 | |
4208 | |
4209 | |
4210 | if (MI.getNumOperands() < StaticNumOps + NumImplicitOps) { |
4211 | ErrInfo = "missing implicit register operands"; |
4212 | return false; |
4213 | } |
4214 | |
4215 | const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst); |
4216 | if (IsDst) { |
4217 | if (!Dst->isUse()) { |
4218 | ErrInfo = "v_movreld_b32 vdst should be a use operand"; |
4219 | return false; |
4220 | } |
4221 | |
4222 | unsigned UseOpIdx; |
4223 | if (!MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) || |
4224 | UseOpIdx != StaticNumOps + 1) { |
4225 | ErrInfo = "movrel implicit operands should be tied"; |
4226 | return false; |
4227 | } |
4228 | } |
4229 | |
4230 | const MachineOperand &Src0 = MI.getOperand(Src0Idx); |
4231 | const MachineOperand &ImpUse |
4232 | = MI.getOperand(StaticNumOps + NumImplicitOps - 1); |
4233 | if (!ImpUse.isReg() || !ImpUse.isUse() || |
4234 | !isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) { |
4235 | ErrInfo = "src0 should be subreg of implicit vector use"; |
4236 | return false; |
4237 | } |
4238 | } |
4239 | |
4240 | |
4241 | |
4242 | if (shouldReadExec(MI)) { |
4243 | if (!MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) { |
4244 | ErrInfo = "VALU instruction does not implicitly read exec mask"; |
4245 | return false; |
4246 | } |
4247 | } |
4248 | |
4249 | if (isSMRD(MI)) { |
4250 | if (MI.mayStore()) { |
4251 | |
4252 | |
4253 | const MachineOperand *Soff = getNamedOperand(MI, AMDGPU::OpName::soff); |
4254 | if (Soff && Soff->getReg() != AMDGPU::M0) { |
4255 | ErrInfo = "scalar stores must use m0 as offset register"; |
4256 | return false; |
4257 | } |
4258 | } |
4259 | } |
4260 | |
4261 | if (isFLAT(MI) && !ST.hasFlatInstOffsets()) { |
4262 | const MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset); |
4263 | if (Offset->getImm() != 0) { |
4264 | ErrInfo = "subtarget does not support offsets in flat instructions"; |
4265 | return false; |
4266 | } |
4267 | } |
4268 | |
4269 | if (isMIMG(MI)) { |
4270 | const MachineOperand *DimOp = getNamedOperand(MI, AMDGPU::OpName::dim); |
4271 | if (DimOp) { |
4272 | int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode, |
4273 | AMDGPU::OpName::vaddr0); |
4274 | int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc); |
4275 | const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opcode); |
4276 | const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = |
4277 | AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); |
4278 | const AMDGPU::MIMGDimInfo *Dim = |
4279 | AMDGPU::getMIMGDimInfoByEncoding(DimOp->getImm()); |
4280 | |
4281 | if (!Dim) { |
4282 | ErrInfo = "dim is out of range"; |
4283 | return false; |
4284 | } |
4285 | |
4286 | bool IsA16 = false; |
4287 | if (ST.hasR128A16()) { |
4288 | const MachineOperand *R128A16 = getNamedOperand(MI, AMDGPU::OpName::r128); |
4289 | IsA16 = R128A16->getImm() != 0; |
4290 | } else if (ST.hasGFX10A16()) { |
4291 | const MachineOperand *A16 = getNamedOperand(MI, AMDGPU::OpName::a16); |
4292 | IsA16 = A16->getImm() != 0; |
4293 | } |
4294 | |
4295 | bool IsNSA = SRsrcIdx - VAddr0Idx > 1; |
4296 | |
4297 | unsigned AddrWords = |
4298 | AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, ST.hasG16()); |
4299 | |
4300 | unsigned VAddrWords; |
4301 | if (IsNSA) { |
4302 | VAddrWords = SRsrcIdx - VAddr0Idx; |
4303 | } else { |
4304 | const TargetRegisterClass *RC = getOpRegClass(MI, VAddr0Idx); |
4305 | VAddrWords = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC) / 32; |
4306 | if (AddrWords > 8) |
4307 | AddrWords = 16; |
4308 | } |
4309 | |
4310 | if (VAddrWords != AddrWords) { |
4311 | LLVM_DEBUG(dbgs() << "bad vaddr size, expected " << AddrWords |
4312 | << " but got " << VAddrWords << "\n"); |
4313 | ErrInfo = "bad vaddr size"; |
4314 | return false; |
4315 | } |
4316 | } |
4317 | } |
4318 | |
4319 | const MachineOperand *DppCt = getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl); |
4320 | if (DppCt) { |
4321 | using namespace AMDGPU::DPP; |
4322 | |
4323 | unsigned DC = DppCt->getImm(); |
4324 | if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 || |
4325 | DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST || |
4326 | (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) || |
4327 | (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) || |
4328 | (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) || |
4329 | (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) || |
4330 | (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) { |
4331 | ErrInfo = "Invalid dpp_ctrl value"; |
4332 | return false; |
4333 | } |
4334 | if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 && |
4335 | ST.getGeneration() >= AMDGPUSubtarget::GFX10) { |
4336 | ErrInfo = "Invalid dpp_ctrl value: " |
4337 | "wavefront shifts are not supported on GFX10+"; |
4338 | return false; |
4339 | } |
4340 | if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 && |
4341 | ST.getGeneration() >= AMDGPUSubtarget::GFX10) { |
4342 | ErrInfo = "Invalid dpp_ctrl value: " |
4343 | "broadcasts are not supported on GFX10+"; |
4344 | return false; |
4345 | } |
4346 | if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST && |
4347 | ST.getGeneration() < AMDGPUSubtarget::GFX10) { |
4348 | if (DC >= DppCtrl::ROW_NEWBCAST_FIRST && |
4349 | DC <= DppCtrl::ROW_NEWBCAST_LAST && |
4350 | !ST.hasGFX90AInsts()) { |
4351 | ErrInfo = "Invalid dpp_ctrl value: " |
4352 | "row_newbroadcast/row_share is not supported before " |
4353 | "GFX90A/GFX10"; |
4354 | return false; |
4355 | } else if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.hasGFX90AInsts()) { |
4356 | ErrInfo = "Invalid dpp_ctrl value: " |
4357 | "row_share and row_xmask are not supported before GFX10"; |
4358 | return false; |
4359 | } |
4360 | } |
4361 | |
4362 | int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); |
4363 | int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); |
4364 | |
4365 | if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO && |
4366 | ((DstIdx >= 0 && |
4367 | (Desc.OpInfo[DstIdx].RegClass == AMDGPU::VReg_64RegClassID || |
4368 | Desc.OpInfo[DstIdx].RegClass == AMDGPU::VReg_64_Align2RegClassID)) || |
4369 | ((Src0Idx >= 0 && |
4370 | (Desc.OpInfo[Src0Idx].RegClass == AMDGPU::VReg_64RegClassID || |
4371 | Desc.OpInfo[Src0Idx].RegClass == |
4372 | AMDGPU::VReg_64_Align2RegClassID)))) && |
4373 | !AMDGPU::isLegal64BitDPPControl(DC)) { |
4374 | ErrInfo = "Invalid dpp_ctrl value: " |
4375 | "64 bit dpp only support row_newbcast"; |
4376 | return false; |
4377 | } |
4378 | } |
4379 | |
4380 | if ((MI.mayStore() || MI.mayLoad()) && !isVGPRSpill(MI)) { |
4381 | const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst); |
4382 | uint16_t DataNameIdx = isDS(Opcode) ? AMDGPU::OpName::data0 |
4383 | : AMDGPU::OpName::vdata; |
4384 | const MachineOperand *Data = getNamedOperand(MI, DataNameIdx); |
4385 | const MachineOperand *Data2 = getNamedOperand(MI, AMDGPU::OpName::data1); |
4386 | if (Data && !Data->isReg()) |
4387 | Data = nullptr; |
4388 | |
4389 | if (ST.hasGFX90AInsts()) { |
4390 | if (Dst && Data && |
4391 | (RI.isAGPR(MRI, Dst->getReg()) != RI.isAGPR(MRI, Data->getReg()))) { |
4392 | ErrInfo = "Invalid register class: " |
4393 | "vdata and vdst should be both VGPR or AGPR"; |
4394 | return false; |
4395 | } |
4396 | if (Data && Data2 && |
4397 | (RI.isAGPR(MRI, Data->getReg()) != RI.isAGPR(MRI, Data2->getReg()))) { |
4398 | ErrInfo = "Invalid register class: " |
4399 | "both data operands should be VGPR or AGPR"; |
4400 | return false; |
4401 | } |
4402 | } else { |
4403 | if ((Dst && RI.isAGPR(MRI, Dst->getReg())) || |
4404 | (Data && RI.isAGPR(MRI, Data->getReg())) || |
4405 | (Data2 && RI.isAGPR(MRI, Data2->getReg()))) { |
4406 | ErrInfo = "Invalid register class: " |
4407 | "agpr loads and stores not supported on this GPU"; |
4408 | return false; |
4409 | } |
4410 | } |
4411 | } |
4412 | |
4413 | if (ST.needsAlignedVGPRs() && |
4414 | (MI.getOpcode() == AMDGPU::DS_GWS_INIT || |
4415 | MI.getOpcode() == AMDGPU::DS_GWS_SEMA_BR || |
4416 | MI.getOpcode() == AMDGPU::DS_GWS_BARRIER)) { |
4417 | const MachineOperand *Op = getNamedOperand(MI, AMDGPU::OpName::data0); |
4418 | Register Reg = Op->getReg(); |
4419 | bool Aligned = true; |
4420 | if (Reg.isPhysical()) { |
4421 | Aligned = !(RI.getHWRegIndex(Reg) & 1); |
4422 | } else { |
4423 | const TargetRegisterClass &RC = *MRI.getRegClass(Reg); |
4424 | Aligned = RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) && |
4425 | !(RI.getChannelFromSubReg(Op->getSubReg()) & 1); |
4426 | } |
4427 | |
4428 | if (!Aligned) { |
4429 | ErrInfo = "Subtarget requires even aligned vector registers " |
4430 | "for DS_GWS instructions"; |
4431 | return false; |
4432 | } |
4433 | } |
4434 | |
4435 | return true; |
4436 | } |
4437 | |
4438 | unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const { |
4439 | switch (MI.getOpcode()) { |
4440 | default: return AMDGPU::INSTRUCTION_LIST_END; |
4441 | case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE; |
4442 | case AMDGPU::COPY: return AMDGPU::COPY; |
4443 | case AMDGPU::PHI: return AMDGPU::PHI; |
4444 | case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG; |
4445 | case AMDGPU::WQM: return AMDGPU::WQM; |
4446 | case AMDGPU::SOFT_WQM: return AMDGPU::SOFT_WQM; |
4447 | case AMDGPU::STRICT_WWM: return AMDGPU::STRICT_WWM; |
4448 | case AMDGPU::STRICT_WQM: return AMDGPU::STRICT_WQM; |
4449 | case AMDGPU::S_MOV_B32: { |
4450 | const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); |
4451 | return MI.getOperand(1).isReg() || |
4452 | RI.isAGPR(MRI, MI.getOperand(0).getReg()) ? |
4453 | AMDGPU::COPY : AMDGPU::V_MOV_B32_e32; |
4454 | } |
4455 | case AMDGPU::S_ADD_I32: |
4456 | return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32; |
4457 | case AMDGPU::S_ADDC_U32: |
4458 | return AMDGPU::V_ADDC_U32_e32; |
4459 | case AMDGPU::S_SUB_I32: |
4460 | return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32; |
4461 | |
4462 | |
4463 | case AMDGPU::S_ADD_U32: |
4464 | return AMDGPU::V_ADD_CO_U32_e32; |
4465 | case AMDGPU::S_SUB_U32: |
4466 | return AMDGPU::V_SUB_CO_U32_e32; |
4467 | case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32; |
4468 | case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_U32_e64; |
4469 | case AMDGPU::S_MUL_HI_U32: return AMDGPU::V_MUL_HI_U32_e64; |
4470 | case AMDGPU::S_MUL_HI_I32: return AMDGPU::V_MUL_HI_I32_e64; |
4471 | case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e64; |
4472 | case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e64; |
4473 | case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e64; |
4474 | case AMDGPU::S_XNOR_B32: |
4475 | return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END; |
4476 | case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e64; |
4477 | case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e64; |
4478 | case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e64; |
4479 | case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e64; |
4480 | case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32; |
4481 | case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64_e64; |
4482 | case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32; |
4483 | case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64_e64; |
4484 | case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32; |
4485 | case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64_e64; |
4486 | case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32_e64; |
4487 | case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32_e64; |
4488 | case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32_e64; |
4489 | case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32_e64; |
4490 | case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64; |
4491 | case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32; |
4492 | case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32; |
4493 | case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32; |
4494 | case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e64; |
4495 | case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e64; |
4496 | case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e64; |
4497 | case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e64; |
4498 | case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e64; |
4499 | case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e64; |
4500 | case AMDGPU::S_CMP_EQ_U32: return AMDGPU::V_CMP_EQ_U32_e64; |
4501 | case AMDGPU::S_CMP_LG_U32: return AMDGPU::V_CMP_NE_U32_e64; |
4502 | case AMDGPU::S_CMP_GT_U32: return AMDGPU::V_CMP_GT_U32_e64; |
4503 | case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e64; |
4504 | case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e64; |
4505 | case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e64; |
4506 | case AMDGPU::S_CMP_EQ_U64: return AMDGPU::V_CMP_EQ_U64_e64; |
4507 | case AMDGPU::S_CMP_LG_U64: return AMDGPU::V_CMP_NE_U64_e64; |
4508 | case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64; |
4509 | case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32; |
4510 | case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32; |
4511 | case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64; |
4512 | case AMDGPU::S_CBRANCH_SCC0: return AMDGPU::S_CBRANCH_VCCZ; |
4513 | case AMDGPU::S_CBRANCH_SCC1: return AMDGPU::S_CBRANCH_VCCNZ; |
4514 | } |
4515 | llvm_unreachable( |
4516 | "Unexpected scalar opcode without corresponding vector one!"); |
4517 | } |
4518 | |
4519 | static unsigned adjustAllocatableRegClass(const GCNSubtarget &ST, |
4520 | const MachineRegisterInfo &MRI, |
4521 | const MCInstrDesc &TID, |
4522 | unsigned RCID, |
4523 | bool IsAllocatable) { |
4524 | if ((IsAllocatable || !ST.hasGFX90AInsts() || !MRI.reservedRegsFrozen()) && |
4525 | (TID.mayLoad() || TID.mayStore() || |
4526 | (TID.TSFlags & (SIInstrFlags::DS | SIInstrFlags::MIMG)))) { |
4527 | switch (RCID) { |
4528 | case AMDGPU::AV_32RegClassID: return AMDGPU::VGPR_32RegClassID; |
4529 | case AMDGPU::AV_64RegClassID: return AMDGPU::VReg_64RegClassID; |
4530 | case AMDGPU::AV_96RegClassID: return AMDGPU::VReg_96RegClassID; |
4531 | case AMDGPU::AV_128RegClassID: return AMDGPU::VReg_128RegClassID; |
4532 | case AMDGPU::AV_160RegClassID: return AMDGPU::VReg_160RegClassID; |
4533 | default: |
4534 | break; |
4535 | } |
4536 | } |
4537 | return RCID; |
4538 | } |
4539 | |
4540 | const TargetRegisterClass *SIInstrInfo::getRegClass(const MCInstrDesc &TID, |
4541 | unsigned OpNum, const TargetRegisterInfo *TRI, |
4542 | const MachineFunction &MF) |
4543 | const { |
4544 | if (OpNum >= TID.getNumOperands()) |
4545 | return nullptr; |
4546 | auto RegClass = TID.OpInfo[OpNum].RegClass; |
4547 | bool IsAllocatable = false; |
4548 | if (TID.TSFlags & (SIInstrFlags::DS | SIInstrFlags::FLAT)) { |
4549 | |
4550 | |
4551 | |
4552 | |
4553 | |
4554 | |
4555 | |
4556 | const int VDstIdx = AMDGPU::getNamedOperandIdx(TID.Opcode, |
4557 | AMDGPU::OpName::vdst); |
4558 | const int DataIdx = AMDGPU::getNamedOperandIdx(TID.Opcode, |
4559 | (TID.TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 |
4560 | : AMDGPU::OpName::vdata); |
4561 | if (DataIdx != -1) { |
4562 | IsAllocatable = VDstIdx != -1 || |
4563 | AMDGPU::getNamedOperandIdx(TID.Opcode, |
4564 | AMDGPU::OpName::data1) != -1; |
4565 | } |
4566 | } |
4567 | RegClass = adjustAllocatableRegClass(ST, MF.getRegInfo(), TID, RegClass, |
4568 | IsAllocatable); |
4569 | return RI.getRegClass(RegClass); |
4570 | } |
4571 | |
4572 | const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, |
4573 | unsigned OpNo) const { |
4574 | const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); |
4575 | const MCInstrDesc &Desc = get(MI.getOpcode()); |
4576 | if (MI.isVariadic() || OpNo >= Desc.getNumOperands() || |
4577 | Desc.OpInfo[OpNo].RegClass == -1) { |
4578 | Register Reg = MI.getOperand(OpNo).getReg(); |
4579 | |
4580 | if (Reg.isVirtual()) |
4581 | return MRI.getRegClass(Reg); |
4582 | return RI.getPhysRegClass(Reg); |
4583 | } |
4584 | |
4585 | unsigned RCID = Desc.OpInfo[OpNo].RegClass; |
4586 | RCID = adjustAllocatableRegClass(ST, MRI, Desc, RCID, true); |
4587 | return RI.getRegClass(RCID); |
4588 | } |
4589 | |
4590 | void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const { |
4591 | MachineBasicBlock::iterator I = MI; |
4592 | MachineBasicBlock *MBB = MI.getParent(); |
4593 | MachineOperand &MO = MI.getOperand(OpIdx); |
4594 | MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); |
4595 | unsigned RCID = get(MI.getOpcode()).OpInfo[OpIdx].RegClass; |
4596 | const TargetRegisterClass *RC = RI.getRegClass(RCID); |
4597 | unsigned Size = RI.getRegSizeInBits(*RC); |
4598 | unsigned Opcode = (Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO : AMDGPU::V_MOV_B32_e32; |
4599 | if (MO.isReg()) |
4600 | Opcode = AMDGPU::COPY; |
4601 | else if (RI.isSGPRClass(RC)) |
4602 | Opcode = (Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32; |
4603 | |
4604 | const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC); |
4605 | const TargetRegisterClass *VRC64 = RI.getVGPR64Class(); |
4606 | if (RI.getCommonSubClass(VRC64, VRC)) |
4607 | VRC = VRC64; |
4608 | else |
4609 | VRC = &AMDGPU::VGPR_32RegClass; |
4610 | |
4611 | Register Reg = MRI.createVirtualRegister(VRC); |
4612 | DebugLoc DL = MBB->findDebugLoc(I); |
4613 | BuildMI(*MI.getParent(), I, DL, get(Opcode), Reg).add(MO); |
4614 | MO.ChangeToRegister(Reg, false); |
4615 | } |
4616 | |
4617 | unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI, |
4618 | MachineRegisterInfo &MRI, |
4619 | MachineOperand &SuperReg, |
4620 | const TargetRegisterClass *SuperRC, |
4621 | unsigned SubIdx, |
4622 | const TargetRegisterClass *SubRC) |
4623 | const { |
4624 | MachineBasicBlock *MBB = MI->getParent(); |
4625 | DebugLoc DL = MI->getDebugLoc(); |
4626 | Register SubReg = MRI.createVirtualRegister(SubRC); |
4627 | |
4628 | if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) { |
4629 | BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg) |
4630 | .addReg(SuperReg.getReg(), 0, SubIdx); |
4631 | return SubReg; |
4632 | } |
4633 | |
4634 | |
4635 | |
4636 | |
4637 | |
4638 | Register NewSuperReg = MRI.createVirtualRegister(SuperRC); |
4639 | |
4640 | BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg) |
4641 | .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg()); |
4642 | |
4643 | BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg) |
4644 | .addReg(NewSuperReg, 0, SubIdx); |
4645 | |
4646 | return SubReg; |
4647 | } |
4648 | |
4649 | MachineOperand SIInstrInfo::buildExtractSubRegOrImm( |
4650 | MachineBasicBlock::iterator MII, |
4651 | MachineRegisterInfo &MRI, |
4652 | MachineOperand &Op, |
4653 | const TargetRegisterClass *SuperRC, |
4654 | unsigned SubIdx, |
4655 | const TargetRegisterClass *SubRC) const { |
4656 | if (Op.isImm()) { |
4657 | if (SubIdx == AMDGPU::sub0) |
4658 | return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm())); |
4659 | if (SubIdx == AMDGPU::sub1) |
4660 | return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm() >> 32)); |
4661 | |
4662 | llvm_unreachable("Unhandled register index for immediate"); |
4663 | } |
4664 | |
4665 | unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC, |
4666 | SubIdx, SubRC); |
4667 | return MachineOperand::CreateReg(SubReg, false); |
4668 | } |
4669 | |
4670 | |
4671 | void SIInstrInfo::swapOperands(MachineInstr &Inst) const { |
4672 | assert(Inst.getNumExplicitOperands() == 3); |
4673 | MachineOperand Op1 = Inst.getOperand(1); |
4674 | Inst.RemoveOperand(1); |
4675 | Inst.addOperand(Op1); |
4676 | } |
4677 | |
4678 | bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI, |
4679 | const MCOperandInfo &OpInfo, |
4680 | const MachineOperand &MO) const { |
4681 | if (!MO.isReg()) |
4682 | return false; |
4683 | |
4684 | Register Reg = MO.getReg(); |
4685 | |
4686 | const TargetRegisterClass *DRC = RI.getRegClass(OpInfo.RegClass); |
4687 | if (Reg.isPhysical()) |
4688 | return DRC->contains(Reg); |
4689 | |
4690 | const TargetRegisterClass *RC = MRI.getRegClass(Reg); |
4691 | |
4692 | if (MO.getSubReg()) { |
4693 | const MachineFunction *MF = MO.getParent()->getParent()->getParent(); |
4694 | const TargetRegisterClass *SuperRC = RI.getLargestLegalSuperClass(RC, *MF); |
4695 | if (!SuperRC) |
4696 | return false; |
4697 | |
4698 | DRC = RI.getMatchingSuperRegClass(SuperRC, DRC, MO.getSubReg()); |
4699 | if (!DRC) |
4700 | return false; |
4701 | } |
4702 | return RC->hasSuperClassEq(DRC); |
4703 | } |
4704 | |
4705 | bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI, |
4706 | const MCOperandInfo &OpInfo, |
4707 | const MachineOperand &MO) const { |
4708 | if (MO.isReg()) |
4709 | return isLegalRegOperand(MRI, OpInfo, MO); |
4710 | |
4711 | |
4712 | assert(MO.isImm() || MO.isTargetIndex() || MO.isFI() || MO.isGlobal()); |
4713 | return true; |
4714 | } |
4715 | |
4716 | bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, |
4717 | const MachineOperand *MO) const { |
4718 | const MachineFunction &MF = *MI.getParent()->getParent(); |
4719 | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
4720 | const MCInstrDesc &InstDesc = MI.getDesc(); |
4721 | const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx]; |
4722 | const TargetRegisterClass *DefinedRC = |
4723 | OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr; |
4724 | if (!MO) |
4725 | MO = &MI.getOperand(OpIdx); |
4726 | |
4727 | int ConstantBusLimit = ST.getConstantBusLimit(MI.getOpcode()); |
4728 | int VOP3LiteralLimit = ST.hasVOP3Literal() ? 1 : 0; |
4729 | if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) { |
4730 | if (isVOP3(MI) && isLiteralConstantLike(*MO, OpInfo) && !VOP3LiteralLimit--) |
4731 | return false; |
4732 | |
4733 | SmallDenseSet<RegSubRegPair> SGPRsUsed; |
4734 | if (MO->isReg()) |
4735 | SGPRsUsed.insert(RegSubRegPair(MO->getReg(), MO->getSubReg())); |
4736 | |
4737 | for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { |
4738 | if (i == OpIdx) |
4739 | continue; |
4740 | const MachineOperand &Op = MI.getOperand(i); |
4741 | if (Op.isReg()) { |
4742 | RegSubRegPair SGPR(Op.getReg(), Op.getSubReg()); |
4743 | if (!SGPRsUsed.count(SGPR) && |
4744 | usesConstantBus(MRI, Op, InstDesc.OpInfo[i])) { |
4745 | if (--ConstantBusLimit <= 0) |
4746 | return false; |
4747 | SGPRsUsed.insert(SGPR); |
4748 | } |
4749 | } else if (InstDesc.OpInfo[i].OperandType == AMDGPU::OPERAND_KIMM32) { |
4750 | if (--ConstantBusLimit <= 0) |
4751 | return false; |
4752 | } else if (isVOP3(MI) && AMDGPU::isSISrcOperand(InstDesc, i) && |
4753 | isLiteralConstantLike(Op, InstDesc.OpInfo[i])) { |
4754 | if (!VOP3LiteralLimit--) |
4755 | return false; |
4756 | if (--ConstantBusLimit <= 0) |
4757 | return false; |
4758 | } |
4759 | } |
4760 | } |
4761 | |
4762 | if (MO->isReg()) { |
4763 | assert(DefinedRC); |
4764 | if (!isLegalRegOperand(MRI, OpInfo, *MO)) |
4765 | return false; |
4766 | bool IsAGPR = RI.isAGPR(MRI, MO->getReg()); |
4767 | if (IsAGPR && !ST.hasMAIInsts()) |
4768 | return false; |
4769 | unsigned Opc = MI.getOpcode(); |
4770 | if (IsAGPR && |
4771 | (!ST.hasGFX90AInsts() || !MRI.reservedRegsFrozen()) && |
4772 | (MI.mayLoad() || MI.mayStore() || isDS(Opc) || isMIMG(Opc))) |
4773 | return false; |
4774 | |
4775 | const int VDstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); |
4776 | const int DataIdx = AMDGPU::getNamedOperandIdx(Opc, |
4777 | isDS(Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata); |
4778 | if ((int)OpIdx == VDstIdx && DataIdx != -1 && |
4779 | MI.getOperand(DataIdx).isReg() && |
4780 | RI.isAGPR(MRI, MI.getOperand(DataIdx).getReg()) != IsAGPR) |
4781 | return false; |
4782 | if ((int)OpIdx == DataIdx) { |
4783 | if (VDstIdx != -1 && |
4784 | RI.isAGPR(MRI, MI.getOperand(VDstIdx).getReg()) != IsAGPR) |
4785 | return false; |
4786 | |
4787 | const int Data1Idx = AMDGPU::getNamedOperandIdx(Opc, |
4788 | AMDGPU::OpName::data1); |
4789 | if (Data1Idx != -1 && MI.getOperand(Data1Idx).isReg() && |
4790 | RI.isAGPR(MRI, MI.getOperand(Data1Idx).getReg()) != IsAGPR) |
4791 | return false; |
4792 | } |
4793 | if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && |
4794 | (int)OpIdx == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) && |
4795 | RI.isSGPRReg(MRI, MO->getReg())) |
4796 | return false; |
4797 | return true; |
4798 | } |
4799 | |
4800 | |
4801 | assert(MO->isImm() || MO->isTargetIndex() || MO->isFI() || MO->isGlobal()); |
4802 | |
4803 | if (!DefinedRC) { |
4804 | |
4805 | return true; |
4806 | } |
4807 | |
4808 | return isImmOperandLegal(MI, OpIdx, *MO); |
4809 | } |
4810 | |
4811 | void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI, |
4812 | MachineInstr &MI) const { |
4813 | unsigned Opc = MI.getOpcode(); |
4814 | const MCInstrDesc &InstrDesc = get(Opc); |
4815 | |
4816 | int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); |
4817 | MachineOperand &Src0 = MI.getOperand(Src0Idx); |
4818 | |
4819 | int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); |
4820 | MachineOperand &Src1 = MI.getOperand(Src1Idx); |
4821 | |
4822 | |
4823 | |
4824 | bool HasImplicitSGPR = findImplicitSGPRRead(MI) != AMDGPU::NoRegister; |
4825 | if (HasImplicitSGPR && ST.getConstantBusLimit(Opc) <= 1 && |
4826 | Src0.isReg() && (RI.isSGPRReg(MRI, Src0.getReg()) || |
4827 | isLiteralConstantLike(Src0, InstrDesc.OpInfo[Src0Idx]))) |
4828 | legalizeOpWithMove(MI, Src0Idx); |
4829 | |
4830 | |
4831 | |
4832 | |
4833 | if (Opc == AMDGPU::V_WRITELANE_B32) { |
4834 | const DebugLoc &DL = MI.getDebugLoc(); |
4835 | if (Src0.isReg() && RI.isVGPR(MRI, Src0.getReg())) { |
4836 | Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); |
4837 | BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) |
4838 | .add(Src0); |
4839 | Src0.ChangeToRegister(Reg, false); |
4840 | } |
4841 | if (Src1.isReg() && RI.isVGPR(MRI, Src1.getReg())) { |
4842 | Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); |
4843 | const DebugLoc &DL = MI.getDebugLoc(); |
4844 | BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) |
4845 | .add(Src1); |
4846 | Src1.ChangeToRegister(Reg, false); |
4847 | } |
4848 | return; |
4849 | } |
4850 | |
4851 | |
4852 | if (Src0.isReg() && RI.isAGPR(MRI, Src0.getReg())) |
4853 | legalizeOpWithMove(MI, Src0Idx); |
4854 | |
4855 | if (Src1.isReg() && RI.isAGPR(MRI, Src1.getReg())) |
4856 | legalizeOpWithMove(MI, Src1Idx); |
4857 | |
4858 | |
4859 | |
4860 | if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1)) |
4861 | return; |
4862 | |
4863 | |
4864 | |
4865 | |
4866 | if (Opc == AMDGPU::V_READLANE_B32 && Src1.isReg() && |
4867 | RI.isVGPR(MRI, Src1.getReg())) { |
4868 | Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); |
4869 | const DebugLoc &DL = MI.getDebugLoc(); |
4870 | BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) |
4871 | .add(Src1); |
4872 | Src1.ChangeToRegister(Reg, false); |
4873 | return; |
4874 | } |
4875 | |
4876 | |
4877 | |
4878 | |
4879 | |
4880 | if (HasImplicitSGPR || !MI.isCommutable()) { |
4881 | legalizeOpWithMove(MI, Src1Idx); |
4882 | return; |
4883 | } |
4884 | |
4885 | |
4886 | |
4887 | |
4888 | |
4889 | |
4890 | if ((!Src1.isImm() && !Src1.isReg()) || |
4891 | !isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) { |
4892 | legalizeOpWithMove(MI, Src1Idx); |
4893 | return; |
4894 | } |
4895 | |
4896 | int CommutedOpc = commuteOpcode(MI); |
4897 | if (CommutedOpc == -1) { |
4898 | legalizeOpWithMove(MI, Src1Idx); |
4899 | return; |
4900 | } |
4901 | |
4902 | MI.setDesc(get(CommutedOpc)); |
4903 | |
4904 | Register Src0Reg = Src0.getReg(); |
4905 | unsigned Src0SubReg = Src0.getSubReg(); |
4906 | bool Src0Kill = Src0.isKill(); |
4907 | |
4908 | if (Src1.isImm()) |
4909 | Src0.ChangeToImmediate(Src1.getImm()); |
4910 | else if (Src1.isReg()) { |
4911 | Src0.ChangeToRegister(Src1.getReg(), false, false, Src1.isKill()); |
4912 | Src0.setSubReg(Src1.getSubReg()); |
4913 | } else |
4914 | llvm_unreachable("Should only have register or immediate operands"); |
4915 | |
4916 | Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill); |
4917 | Src1.setSubReg(Src0SubReg); |
4918 | fixImplicitOperands(MI); |
4919 | } |
4920 | |
4921 | |
4922 | |
4923 | void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI, |
4924 | MachineInstr &MI) const { |
4925 | unsigned Opc = MI.getOpcode(); |
4926 | |
4927 | int VOP3Idx[3] = { |
4928 | AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0), |
4929 | AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1), |
4930 | AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2) |
4931 | }; |
4932 | |
4933 | if (Opc == AMDGPU::V_PERMLANE16_B32_e64 || |
4934 | Opc == AMDGPU::V_PERMLANEX16_B32_e64) { |
4935 | |
4936 | MachineOperand &Src1 = MI.getOperand(VOP3Idx[1]); |
4937 | MachineOperand &Src2 = MI.getOperand(VOP3Idx[2]); |
4938 | const DebugLoc &DL = MI.getDebugLoc(); |
4939 | if (Src1.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src1.getReg()))) { |
4940 | Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); |
4941 | BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) |
4942 | .add(Src1); |
4943 | Src1.ChangeToRegister(Reg, false); |
4944 | } |
4945 | if (Src2.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src2.getReg()))) { |
4946 | Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); |
4947 | BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) |
4948 | .add(Src2); |
4949 | Src2.ChangeToRegister(Reg, false); |
4950 | } |
4951 | } |
4952 | |
4953 | |
4954 | int ConstantBusLimit = ST.getConstantBusLimit(Opc); |
4955 | int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0; |
4956 | SmallDenseSet<unsigned> SGPRsUsed; |
4957 | Register SGPRReg = findUsedSGPR(MI, VOP3Idx); |
4958 | if (SGPRReg != AMDGPU::NoRegister) { |
4959 | SGPRsUsed.insert(SGPRReg); |
4960 | --ConstantBusLimit; |
4961 | } |
4962 | |
4963 | for (unsigned i = 0; i < 3; ++i) { |
4964 | int Idx = VOP3Idx[i]; |
4965 | if (Idx == -1) |
4966 | break; |
4967 | MachineOperand &MO = MI.getOperand(Idx); |
4968 | |
4969 | if (!MO.isReg()) { |
4970 | if (!isLiteralConstantLike(MO, get(Opc).OpInfo[Idx])) |
4971 | continue; |
4972 | |
4973 | if (LiteralLimit > 0 && ConstantBusLimit > 0) { |
4974 | --LiteralLimit; |
4975 | --ConstantBusLimit; |
4976 | continue; |
4977 | } |
4978 | |
4979 | --LiteralLimit; |
4980 | --ConstantBusLimit; |
4981 | legalizeOpWithMove(MI, Idx); |
4982 | continue; |
4983 | } |
4984 | |
4985 | if (RI.hasAGPRs(RI.getRegClassForReg(MRI, MO.getReg())) && |
4986 | !isOperandLegal(MI, Idx, &MO)) { |
4987 | legalizeOpWithMove(MI, Idx); |
4988 | continue; |
4989 | } |
4990 | |
4991 | if (!RI.isSGPRClass(RI.getRegClassForReg(MRI, MO.getReg()))) |
4992 | continue; |
4993 | |
4994 | |
4995 | |
4996 | if (SGPRsUsed.count(MO.getReg())) |
4997 | continue; |
4998 | if (ConstantBusLimit > 0) { |
4999 | SGPRsUsed.insert(MO.getReg()); |
5000 | --ConstantBusLimit; |
5001 | continue; |
5002 | } |
5003 | |
5004 | |
5005 | |
5006 | legalizeOpWithMove(MI, Idx); |
5007 | } |
5008 | } |
5009 | |
5010 | Register SIInstrInfo::readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, |
5011 | MachineRegisterInfo &MRI) const { |
5012 | const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg); |
5013 | const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC); |
5014 | Register DstReg = MRI.createVirtualRegister(SRC); |
5015 | unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32; |
5016 | |
5017 | if (RI.hasAGPRs(VRC)) { |
5018 | VRC = RI.getEquivalentVGPRClass(VRC); |
5019 | Register NewSrcReg = MRI.createVirtualRegister(VRC); |
5020 | BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), |
5021 | get(TargetOpcode::COPY), NewSrcReg) |
5022 | .addReg(SrcReg); |
5023 | SrcReg = NewSrcReg; |
5024 | } |
5025 | |
5026 | if (SubRegs == 1) { |
5027 | BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), |
5028 | get(AMDGPU::V_READFIRSTLANE_B32), DstReg) |
5029 | .addReg(SrcReg); |
5030 | return DstReg; |
5031 | } |
5032 | |
5033 | SmallVector<unsigned, 8> SRegs; |
5034 | for (unsigned i = 0; i < SubRegs; ++i) { |
5035 | Register SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); |
5036 | BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), |
5037 | get(AMDGPU::V_READFIRSTLANE_B32), SGPR) |
5038 | .addReg(SrcReg, 0, RI.getSubRegFromChannel(i)); |
5039 | SRegs.push_back(SGPR); |
5040 | } |
5041 | |
5042 | MachineInstrBuilder MIB = |
5043 | BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), |
5044 | get(AMDGPU::REG_SEQUENCE), DstReg); |
5045 | for (unsigned i = 0; i < SubRegs; ++i) { |
5046 | MIB.addReg(SRegs[i]); |
5047 | MIB.addImm(RI.getSubRegFromChannel(i)); |
5048 | } |
5049 | return DstReg; |
5050 | } |
5051 | |
5052 | void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI, |
5053 | MachineInstr &MI) const { |
5054 | |
5055 | |
5056 | |
5057 | |
5058 | |
5059 | MachineOperand *SBase = getNamedOperand(MI, AMDGPU::OpName::sbase); |
5060 | if (SBase && !RI.isSGPRClass(MRI.getRegClass(SBase->getReg()))) { |
5061 | Register SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI); |
5062 | SBase->setReg(SGPR); |
5063 | } |
5064 | MachineOperand *SOff = getNamedOperand(MI, AMDGPU::OpName::soff); |
5065 | if (SOff && !RI.isSGPRClass(MRI.getRegClass(SOff->getReg()))) { |
5066 | Register SGPR = readlaneVGPRToSGPR(SOff->getReg(), MI, MRI); |
5067 | SOff->setReg(SGPR); |
5068 | } |
5069 | } |
5070 | |
5071 | bool SIInstrInfo::moveFlatAddrToVGPR(MachineInstr &Inst) const { |
5072 | unsigned Opc = Inst.getOpcode(); |
5073 | int OldSAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr); |
5074 | if (OldSAddrIdx < 0) |
5075 | return false; |
5076 | |
5077 | assert(isSegmentSpecificFLAT(Inst)); |
5078 | |
5079 | int NewOpc = AMDGPU::getGlobalVaddrOp(Opc); |
5080 | if (NewOpc < 0) |
5081 | NewOpc = AMDGPU::getFlatScratchInstSVfromSS(Opc); |
5082 | if (NewOpc < 0) |
5083 | return false; |
5084 | |
5085 | MachineRegisterInfo &MRI = Inst.getMF()->getRegInfo(); |
5086 | MachineOperand &SAddr = Inst.getOperand(OldSAddrIdx); |
5087 | if (RI.isSGPRReg(MRI, SAddr.getReg())) |
5088 | return false; |
5089 | |
5090 | int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr); |
5091 | if (NewVAddrIdx < 0) |
5092 | return false; |
5093 | |
5094 | int OldVAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr); |
5095 | |
5096 | |
5097 | MachineInstr *VAddrDef = nullptr; |
5098 | if (OldVAddrIdx >= 0) { |
5099 | MachineOperand &VAddr = Inst.getOperand(OldVAddrIdx); |
5100 | VAddrDef = MRI.getUniqueVRegDef(VAddr.getReg()); |
5101 | if (!VAddrDef || VAddrDef->getOpcode() != AMDGPU::V_MOV_B32_e32 || |
5102 | !VAddrDef->getOperand(1).isImm() || |
5103 | VAddrDef->getOperand(1).getImm() != 0) |
5104 | return false; |
5105 | } |
5106 | |
5107 | const MCInstrDesc &NewDesc = get(NewOpc); |
5108 | Inst.setDesc(NewDesc); |
5109 | |
5110 | |
5111 | |
5112 | if (OldVAddrIdx == NewVAddrIdx) { |
5113 | MachineOperand &NewVAddr = Inst.getOperand(NewVAddrIdx); |
5114 | |
5115 | MRI.removeRegOperandFromUseList(&NewVAddr); |
5116 | MRI.moveOperands(&NewVAddr, &SAddr, 1); |
5117 | Inst.RemoveOperand(OldSAddrIdx); |
5118 | |
5119 | |
5120 | MRI.removeRegOperandFromUseList(&NewVAddr); |
5121 | MRI.addRegOperandToUseList(&NewVAddr); |
5122 | } else { |
5123 | assert(OldSAddrIdx == NewVAddrIdx); |
5124 | |
5125 | if (OldVAddrIdx >= 0) { |
5126 | int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc, |
5127 | AMDGPU::OpName::vdst_in); |
5128 | |
5129 | |
5130 | |
5131 | if (NewVDstIn != -1) { |
5132 | int OldVDstIn = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in); |
5133 | Inst.untieRegOperand(OldVDstIn); |
5134 | } |
5135 | |
5136 | Inst.RemoveOperand(OldVAddrIdx); |
5137 | |
5138 | if (NewVDstIn != -1) { |
5139 | int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst); |
5140 | Inst.tieOperands(NewVDst, NewVDstIn); |
5141 | } |
5142 | } |
5143 | } |
5144 | |
5145 | if (VAddrDef && MRI.use_nodbg_empty(VAddrDef->getOperand(0).getReg())) |
5146 | VAddrDef->eraseFromParent(); |
5147 | |
5148 | return true; |
5149 | } |
5150 | |
5151 | |
5152 | void SIInstrInfo::legalizeOperandsFLAT(MachineRegisterInfo &MRI, |
5153 | MachineInstr &MI) const { |
5154 | if (!isSegmentSpecificFLAT(MI)) |
5155 | return; |
5156 | |
5157 | |
5158 | |
5159 | MachineOperand *SAddr = getNamedOperand(MI, AMDGPU::OpName::saddr); |
5160 | if (!SAddr || RI.isSGPRClass(MRI.getRegClass(SAddr->getReg()))) |
5161 | return; |
5162 | |
5163 | if (moveFlatAddrToVGPR(MI)) |
5164 | return; |
5165 | |
5166 | Register ToSGPR = readlaneVGPRToSGPR(SAddr->getReg(), MI, MRI); |
5167 | SAddr->setReg(ToSGPR); |
5168 | } |
5169 | |
5170 | void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB, |
5171 | MachineBasicBlock::iterator I, |
5172 | const TargetRegisterClass *DstRC, |
5173 | MachineOperand &Op, |
5174 | MachineRegisterInfo &MRI, |
5175 | const DebugLoc &DL) const { |
5176 | Register OpReg = Op.getReg(); |
5177 | unsigned OpSubReg = Op.getSubReg(); |
5178 | |
5179 | const TargetRegisterClass *OpRC = RI.getSubClassWithSubReg( |
5180 | RI.getRegClassForReg(MRI, OpReg), OpSubReg); |
5181 | |
5182 | |
5183 | if (DstRC == OpRC) |
5184 | return; |
5185 | |
5186 | Register DstReg = MRI.createVirtualRegister(DstRC); |
5187 | MachineInstr *Copy = |
5188 | BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op); |
5189 | |
5190 | Op.setReg(DstReg); |
5191 | Op.setSubReg(0); |
5192 | |
5193 | MachineInstr *Def = MRI.getVRegDef(OpReg); |
5194 | if (!Def) |
5195 | return; |
5196 | |
5197 | |
5198 | if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass) |
5199 | FoldImmediate(*Copy, *Def, OpReg, &MRI); |
5200 | |
5201 | bool ImpDef = Def->isImplicitDef(); |
5202 | while (!ImpDef && Def && Def->isCopy()) { |
5203 | if (Def->getOperand(1).getReg().isPhysical()) |
5204 | break; |
5205 | Def = MRI.getUniqueVRegDef(Def->getOperand(1).getReg()); |
5206 | ImpDef = Def && Def->isImplicitDef(); |
5207 | } |
5208 | if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) && |
5209 | !ImpDef) |
5210 | Copy->addOperand(MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); |
5211 | } |
5212 | |
5213 | |
5214 | |
5215 | |
5216 | static void |
5217 | emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, |
5218 | MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB, |
5219 | const DebugLoc &DL, MachineOperand &Rsrc) { |
5220 | MachineFunction &MF = *OrigBB.getParent(); |
5221 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
5222 | const SIRegisterInfo *TRI = ST.getRegisterInfo(); |
5223 | unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; |
5224 | unsigned SaveExecOpc = |
5225 | ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64; |
5226 | unsigned XorTermOpc = |
5227 | ST.isWave32() ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term; |
5228 | unsigned AndOpc = |
5229 | ST.isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64; |
5230 | const auto *BoolXExecRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID); |
5231 | |
5232 | MachineBasicBlock::iterator I = LoopBB.begin(); |
5233 | |
5234 | SmallVector<Register, 8> ReadlanePieces; |
5235 | Register CondReg = AMDGPU::NoRegister; |
5236 | |
5237 | Register VRsrc = Rsrc.getReg(); |
5238 | unsigned VRsrcUndef = getUndefRegState(Rsrc.isUndef()); |
5239 | |
5240 | unsigned RegSize = TRI->getRegSizeInBits(Rsrc.getReg(), MRI); |
5241 | unsigned NumSubRegs = RegSize / 32; |
5242 | assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 && "Unhandled register size"); |
5243 | |
5244 | for (unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) { |
5245 | |
5246 | Register CurRegLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); |
5247 | Register CurRegHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); |
5248 | |
5249 | |
5250 | BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo) |
5251 | .addReg(VRsrc, VRsrcUndef, TRI->getSubRegFromChannel(Idx)); |
5252 | |
5253 | |
5254 | BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi) |
5255 | .addReg(VRsrc, VRsrcUndef, TRI->getSubRegFromChannel(Idx + 1)); |
5256 | |
5257 | ReadlanePieces.push_back(CurRegLo); |
5258 | ReadlanePieces.push_back(CurRegHi); |
5259 | |
5260 | |
5261 | Register CurReg = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass); |
5262 | BuildMI(LoopBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), CurReg) |
5263 | .addReg(CurRegLo) |
5264 | .addImm(AMDGPU::sub0) |
5265 | .addReg(CurRegHi) |
5266 | .addImm(AMDGPU::sub1); |
5267 | |
5268 | Register NewCondReg = MRI.createVirtualRegister(BoolXExecRC); |
5269 | auto Cmp = |
5270 | BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U64_e64), NewCondReg) |
5271 | .addReg(CurReg); |
5272 | if (NumSubRegs <= 2) |
5273 | Cmp.addReg(VRsrc); |
5274 | else |
5275 | Cmp.addReg(VRsrc, VRsrcUndef, TRI->getSubRegFromChannel(Idx, 2)); |
5276 | |
5277 | |
5278 | if (CondReg == AMDGPU::NoRegister) |
5279 | CondReg = NewCondReg; |
5280 | else { |
5281 | Register AndReg = MRI.createVirtualRegister(BoolXExecRC); |
5282 | BuildMI(LoopBB, I, DL, TII.get(AndOpc), AndReg) |
5283 | .addReg(CondReg) |
5284 | .addReg(NewCondReg); |
5285 | CondReg = AndReg; |
5286 | } |
5287 | } |
5288 | |
5289 | auto SRsrcRC = TRI->getEquivalentSGPRClass(MRI.getRegClass(VRsrc)); |
5290 | Register SRsrc = MRI.createVirtualRegister(SRsrcRC); |
5291 | |
5292 | |
5293 | auto Merge = BuildMI(LoopBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), SRsrc); |
5294 | unsigned Channel = 0; |
5295 | for (Register Piece : ReadlanePieces) { |
5296 | Merge.addReg(Piece) |
5297 | .addImm(TRI->getSubRegFromChannel(Channel++)); |
5298 | } |
5299 | |
5300 | |
5301 | Rsrc.setReg(SRsrc); |
5302 | Rsrc.setIsKill(true); |
5303 | |
5304 | Register SaveExec = MRI.createVirtualRegister(BoolXExecRC); |
5305 | MRI.setSimpleHint(SaveExec, CondReg); |
5306 | |
5307 | |
5308 | BuildMI(LoopBB, I, DL, TII.get(SaveExecOpc), SaveExec) |
5309 | .addReg(CondReg, RegState::Kill); |
5310 | |
5311 | |
5312 | I = LoopBB.end(); |
5313 | |
5314 | |
5315 | BuildMI(LoopBB, I, DL, TII.get(XorTermOpc), Exec) |
5316 | .addReg(Exec) |
5317 | .addReg(SaveExec); |
5318 | |
5319 | BuildMI(LoopBB, I, DL, TII.get(AMDGPU::SI_WATERFALL_LOOP)).addMBB(&LoopBB); |
5320 | } |
5321 | |
5322 | |
5323 | |
5324 | |
5325 | static MachineBasicBlock * |
5326 | loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, |
5327 | MachineOperand &Rsrc, MachineDominatorTree *MDT, |
5328 | MachineBasicBlock::iterator Begin = nullptr, |
5329 | MachineBasicBlock::iterator End = nullptr) { |
5330 | MachineBasicBlock &MBB = *MI.getParent(); |
5331 | MachineFunction &MF = *MBB.getParent(); |
5332 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
5333 | const SIRegisterInfo *TRI = ST.getRegisterInfo(); |
5334 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
5335 | if (!Begin.isValid()) |
5336 | Begin = &MI; |
5337 | if (!End.isValid()) { |
5338 | End = &MI; |
5339 | ++End; |
5340 | } |
5341 | const DebugLoc &DL = MI.getDebugLoc(); |
5342 | unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; |
5343 | unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; |
5344 | const auto *BoolXExecRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID); |
5345 | |
5346 | Register SaveExec = MRI.createVirtualRegister(BoolXExecRC); |
5347 | |
5348 | |
5349 | BuildMI(MBB, Begin, DL, TII.get(MovExecOpc), SaveExec).addReg(Exec); |
5350 | |
5351 | |
5352 | |
5353 | MachineBasicBlock::iterator AfterMI = MI; |
5354 | ++AfterMI; |
5355 | for (auto I = Begin; I != AfterMI; I++) { |
5356 | for (auto &MO : I->uses()) { |
5357 | if (MO.isReg() && MO.isUse()) { |
5358 | MRI.clearKillFlags(MO.getReg()); |
5359 | } |
5360 | } |
5361 | } |
5362 | |
5363 | |
5364 | |
5365 | MachineBasicBlock *LoopBB = MF.CreateMachineBasicBlock(); |
5366 | MachineBasicBlock *RemainderBB = MF.CreateMachineBasicBlock(); |
5367 | MachineFunction::iterator MBBI(MBB); |
5368 | ++MBBI; |
5369 | |
5370 | MF.insert(MBBI, LoopBB); |
5371 | MF.insert(MBBI, RemainderBB); |
5372 | |
5373 | LoopBB->addSuccessor(LoopBB); |
5374 | LoopBB->addSuccessor(RemainderBB); |
5375 | |
5376 | |
5377 | |
5378 | RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB); |
5379 | RemainderBB->splice(RemainderBB->begin(), &MBB, End, MBB.end()); |
5380 | LoopBB->splice(LoopBB->begin(), &MBB, Begin, MBB.end()); |
5381 | |
5382 | MBB.addSuccessor(LoopBB); |
5383 | |
5384 | |
5385 | |
5386 | |
5387 | |
5388 | if (MDT) { |
5389 | MDT->addNewBlock(LoopBB, &MBB); |
5390 | MDT->addNewBlock(RemainderBB, LoopBB); |
5391 | for (auto &Succ : RemainderBB->successors()) { |
5392 | if (MDT->properlyDominates(&MBB, Succ)) { |
5393 | MDT->changeImmediateDominator(Succ, RemainderBB); |
5394 | } |
5395 | } |
5396 | } |
5397 | |
5398 | emitLoadSRsrcFromVGPRLoop(TII, MRI, MBB, *LoopBB, DL, Rsrc); |
5399 | |
5400 | |
5401 | MachineBasicBlock::iterator First = RemainderBB->begin(); |
5402 | BuildMI(*RemainderBB, First, DL, TII.get(MovExecOpc), Exec).addReg(SaveExec); |
5403 | return LoopBB; |
5404 | } |
5405 | |
5406 | |
5407 | static std::tuple<unsigned, unsigned> |
5408 | extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc) { |
5409 | MachineBasicBlock &MBB = *MI.getParent(); |
5410 | MachineFunction &MF = *MBB.getParent(); |
5411 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
5412 | |
5413 | |
5414 | unsigned RsrcPtr = |
5415 | TII.buildExtractSubReg(MI, MRI, Rsrc, &AMDGPU::VReg_128RegClass, |
5416 | AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass); |
5417 | |
5418 | |
5419 | Register Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); |
5420 | Register SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); |
5421 | Register SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); |
5422 | Register NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass); |
5423 | uint64_t RsrcDataFormat = TII.getDefaultRsrcDataFormat(); |
5424 | |
5425 | |
5426 | BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B64), Zero64) |
5427 | .addImm(0); |
5428 | |
5429 | |
5430 | BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), SRsrcFormatLo) |
5431 | .addImm(RsrcDataFormat & 0xFFFFFFFF); |
5432 | |
5433 | |
5434 | BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), SRsrcFormatHi) |
5435 | .addImm(RsrcDataFormat >> 32); |
5436 | |
5437 | |
5438 | BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::REG_SEQUENCE), NewSRsrc) |
5439 | .addReg(Zero64) |
5440 | .addImm(AMDGPU::sub0_sub1) |
5441 | .addReg(SRsrcFormatLo) |
5442 | .addImm(AMDGPU::sub2) |
5443 | .addReg(SRsrcFormatHi) |
5444 | .addImm(AMDGPU::sub3); |
5445 | |
5446 | return std::make_tuple(RsrcPtr, NewSRsrc); |
5447 | } |
5448 | |
5449 | MachineBasicBlock * |
5450 | SIInstrInfo::legalizeOperands(MachineInstr &MI, |
5451 | MachineDominatorTree *MDT) const { |
5452 | MachineFunction &MF = *MI.getParent()->getParent(); |
5453 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
5454 | MachineBasicBlock *CreatedBB = nullptr; |
5455 | |
5456 | |
5457 | if (isVOP2(MI) || isVOPC(MI)) { |
5458 | legalizeOperandsVOP2(MRI, MI); |
5459 | return CreatedBB; |
5460 | } |
5461 | |
5462 | |
5463 | if (isVOP3(MI)) { |
5464 | legalizeOperandsVOP3(MRI, MI); |
5465 | return CreatedBB; |
5466 | } |
5467 | |
5468 | |
5469 | if (isSMRD(MI)) { |
5470 | legalizeOperandsSMRD(MRI, MI); |
5471 | return CreatedBB; |
5472 | } |
5473 | |
5474 | |
5475 | if (isFLAT(MI)) { |
5476 | legalizeOperandsFLAT(MRI, MI); |
5477 | return CreatedBB; |
5478 | } |
5479 | |
5480 | |
5481 | |
5482 | |
5483 | if (MI.getOpcode() == AMDGPU::PHI) { |
5484 | const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr; |
5485 | for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { |
5486 | if (!MI.getOperand(i).isReg() || !MI.getOperand(i).getReg().isVirtual()) |
5487 | continue; |
5488 | const TargetRegisterClass *OpRC = |
5489 | MRI.getRegClass(MI.getOperand(i).getReg()); |
5490 | if (RI.hasVectorRegisters(OpRC)) { |
5491 | VRC = OpRC; |
5492 | } else { |
5493 | SRC = OpRC; |
5494 | } |
5495 | } |
5496 | |
5497 | |
5498 | |
5499 | |
5500 | if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) { |
5501 | if (!VRC) { |
5502 | assert(SRC); |
5503 | if (getOpRegClass(MI, 0) == &AMDGPU::VReg_1RegClass) { |
5504 | VRC = &AMDGPU::VReg_1RegClass; |
5505 | } else |
5506 | VRC = RI.hasAGPRs(getOpRegClass(MI, 0)) |
5507 | ? RI.getEquivalentAGPRClass(SRC) |
5508 | : RI.getEquivalentVGPRClass(SRC); |
5509 | } else { |
5510 | VRC = RI.hasAGPRs(getOpRegClass(MI, 0)) |
5511 | ? RI.getEquivalentAGPRClass(VRC) |
5512 | : RI.getEquivalentVGPRClass(VRC); |
5513 | } |
5514 | RC = VRC; |
5515 | } else { |
5516 | RC = SRC; |
5517 | } |
5518 | |
5519 | |
5520 | for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { |
5521 | MachineOperand &Op = MI.getOperand(I); |
5522 | if (!Op.isReg() || !Op.getReg().isVirtual()) |
5523 | continue; |
5524 | |
5525 | |
5526 | MachineBasicBlock *InsertBB = MI.getOperand(I + 1).getMBB(); |
5527 | MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator(); |
5528 | |
5529 | |
5530 | |
5531 | legalizeGenericOperand(*InsertBB, Insert, RC, Op, MRI, MI.getDebugLoc()); |
5532 | } |
5533 | } |
5534 | |
5535 | |
5536 | |
5537 | |
5538 | if (MI.getOpcode() == AMDGPU::REG_SEQUENCE) { |
5539 | MachineBasicBlock *MBB = MI.getParent(); |
5540 | const TargetRegisterClass *DstRC = getOpRegClass(MI, 0); |
5541 | if (RI.hasVGPRs(DstRC)) { |
5542 | |
5543 | |
5544 | |
5545 | for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { |
5546 | MachineOperand &Op = MI.getOperand(I); |
5547 | if (!Op.isReg() || !Op.getReg().isVirtual()) |
5548 | continue; |
5549 | |
5550 | const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg()); |
5551 | const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(OpRC); |
5552 | if (VRC == OpRC) |
5553 | continue; |
5554 | |
5555 | legalizeGenericOperand(*MBB, MI, VRC, Op, MRI, MI.getDebugLoc()); |
5556 | Op.setIsKill(); |
5557 | } |
5558 | } |
5559 | |
5560 | return CreatedBB; |
5561 | } |
5562 | |
5563 | |
5564 | |
5565 | if (MI.getOpcode() == AMDGPU::INSERT_SUBREG) { |
5566 | Register Dst = MI.getOperand(0).getReg(); |
5567 | Register Src0 = MI.getOperand(1).getReg(); |
5568 | const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); |
5569 | const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0); |
5570 | if (DstRC != Src0RC) { |
5571 | MachineBasicBlock *MBB = MI.getParent(); |
5572 | MachineOperand &Op = MI.getOperand(1); |
5573 | legalizeGenericOperand(*MBB, MI, DstRC, Op, MRI, MI.getDebugLoc()); |
5574 | } |
5575 | return CreatedBB; |
5576 | } |
5577 | |
5578 | |
5579 | if (MI.getOpcode() == AMDGPU::SI_INIT_M0) { |
5580 | MachineOperand &Src = MI.getOperand(0); |
5581 | if (Src.isReg() && RI.hasVectorRegisters(MRI.getRegClass(Src.getReg()))) |
5582 | Src.setReg(readlaneVGPRToSGPR(Src.getReg(), MI, MRI)); |
5583 | return CreatedBB; |
5584 | } |
5585 | |
5586 | |
5587 | |
5588 | |
5589 | |
5590 | |
5591 | if (isMIMG(MI) || (AMDGPU::isGraphics(MF.getFunction().getCallingConv()) && |
5592 | (isMUBUF(MI) || isMTBUF(MI)))) { |
5593 | MachineOperand *SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc); |
5594 | if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) |
5595 | CreatedBB = loadSRsrcFromVGPR(*this, MI, *SRsrc, MDT); |
5596 | |
5597 | MachineOperand *SSamp = getNamedOperand(MI, AMDGPU::OpName::ssamp); |
5598 | if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg()))) |
5599 | CreatedBB = loadSRsrcFromVGPR(*this, MI, *SSamp, MDT); |
5600 | |
5601 | return CreatedBB; |
5602 | } |
5603 | |
5604 | |
5605 | if (MI.getOpcode() == AMDGPU::SI_CALL_ISEL) { |
5606 | MachineOperand *Dest = &MI.getOperand(0); |
5607 | if (!RI.isSGPRClass(MRI.getRegClass(Dest->getReg()))) { |
5608 | |
5609 | |
5610 | |
5611 | unsigned FrameSetupOpcode = getCallFrameSetupOpcode(); |
5612 | unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode(); |
5613 | |
5614 | |
5615 | MachineBasicBlock &MBB = *MI.getParent(); |
5616 | MachineBasicBlock::iterator Start(&MI); |
5617 | while (Start->getOpcode() != FrameSetupOpcode) |
5618 | --Start; |
5619 | MachineBasicBlock::iterator End(&MI); |
5620 | while (End->getOpcode() != FrameDestroyOpcode) |
5621 | ++End; |
5622 | |
5623 | ++End; |
5624 | while (End != MBB.end() && End->isCopy() && End->getOperand(1).isReg() && |
5625 | MI.definesRegister(End->getOperand(1).getReg())) |
5626 | ++End; |
5627 | CreatedBB = loadSRsrcFromVGPR(*this, MI, *Dest, MDT, Start, End); |
5628 | } |
5629 | } |
5630 | |
5631 | |
5632 | int RsrcIdx = |
5633 | AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc); |
5634 | if (RsrcIdx != -1) { |
5635 | |
5636 | MachineOperand *Rsrc = &MI.getOperand(RsrcIdx); |
5637 | unsigned RsrcRC = get(MI.getOpcode()).OpInfo[RsrcIdx].RegClass; |
5638 | if (RI.getCommonSubClass(MRI.getRegClass(Rsrc->getReg()), |
5639 | RI.getRegClass(RsrcRC))) { |
5640 | |
5641 | |
5642 | return CreatedBB; |
5643 | } |
5644 | |
5645 | |
5646 | |
5647 | |
5648 | |
5649 | |
5650 | |
5651 | |
5652 | |
5653 | |
5654 | |
5655 | |
5656 | |
5657 | |
5658 | MachineBasicBlock &MBB = *MI.getParent(); |
5659 | |
5660 | MachineOperand *VAddr = getNamedOperand(MI, AMDGPU::OpName::vaddr); |
5661 | if (VAddr && AMDGPU::getIfAddr64Inst(MI.getOpcode()) != -1) { |
5662 | |
5663 | |
5664 | Register NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
5665 | Register NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
5666 | Register NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); |
5667 | |
5668 | const auto *BoolXExecRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID); |
5669 | Register CondReg0 = MRI.createVirtualRegister(BoolXExecRC); |
5670 | Register CondReg1 = MRI.createVirtualRegister(BoolXExecRC); |
5671 | |
5672 | unsigned RsrcPtr, NewSRsrc; |
5673 | std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(*this, MI, *Rsrc); |
5674 | |
5675 | |
5676 | const DebugLoc &DL = MI.getDebugLoc(); |
5677 | BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_CO_U32_e64), NewVAddrLo) |
5678 | .addDef(CondReg0) |
5679 | .addReg(RsrcPtr, 0, AMDGPU::sub0) |
5680 | .addReg(VAddr->getReg(), 0, AMDGPU::sub0) |
5681 | .addImm(0); |
5682 | |
5683 | |
5684 | BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e64), NewVAddrHi) |
5685 | .addDef(CondReg1, RegState::Dead) |
5686 | .addReg(RsrcPtr, 0, AMDGPU::sub1) |
5687 | .addReg(VAddr->getReg(), 0, AMDGPU::sub1) |
5688 | .addReg(CondReg0, RegState::Kill) |
5689 | .addImm(0); |
5690 | |
5691 | |
5692 | BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr) |
5693 | .addReg(NewVAddrLo) |
5694 | .addImm(AMDGPU::sub0) |
5695 | .addReg(NewVAddrHi) |
5696 | .addImm(AMDGPU::sub1); |
5697 | |
5698 | VAddr->setReg(NewVAddr); |
5699 | Rsrc->setReg(NewSRsrc); |
5700 | } else if (!VAddr && ST.hasAddr64()) { |
5701 | |
5702 | |
5703 | assert(ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS && |
5704 | "FIXME: Need to emit flat atomics here"); |
5705 | |
5706 | unsigned RsrcPtr, NewSRsrc; |
5707 | std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(*this, MI, *Rsrc); |
5708 | |
5709 | Register NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); |
5710 | MachineOperand *VData = getNamedOperand(MI, AMDGPU::OpName::vdata); |
5711 | MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset); |
5712 | MachineOperand *SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset); |
5713 | unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI.getOpcode()); |
5714 | |
5715 | |
5716 | |
5717 | MachineOperand *VDataIn = getNamedOperand(MI, AMDGPU::OpName::vdata_in); |
5718 | MachineInstr *Addr64; |
5719 | |
5720 | if (!VDataIn) { |
5721 | |
5722 | MachineInstrBuilder MIB = |
5723 | BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode)) |
5724 | .add(*VData) |
5725 | .addReg(NewVAddr) |
5726 | .addReg(NewSRsrc) |
5727 | .add(*SOffset) |
5728 | .add(*Offset); |
5729 | |
5730 | if (const MachineOperand *CPol = |
5731 | getNamedOperand(MI, AMDGPU::OpName::cpol)) { |
5732 | MIB.addImm(CPol->getImm()); |
5733 | } |
5734 | |
5735 | if (const MachineOperand *TFE = |
5736 | getNamedOperand(MI, AMDGPU::OpName::tfe)) { |
5737 | MIB.addImm(TFE->getImm()); |
5738 | } |
5739 | |
5740 | MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::swz)); |
5741 | |
5742 | MIB.cloneMemRefs(MI); |
5743 | Addr64 = MIB; |
5744 | } else { |
5745 | |
5746 | Addr64 = BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode)) |
5747 | .add(*VData) |
5748 | .add(*VDataIn) |
5749 | .addReg(NewVAddr) |
5750 | .addReg(NewSRsrc) |
5751 | .add(*SOffset) |
5752 | .add(*Offset) |
5753 | .addImm(getNamedImmOperand(MI, AMDGPU::OpName::cpol)) |
5754 | .cloneMemRefs(MI); |
5755 | } |
5756 | |
5757 | MI.removeFromParent(); |
5758 | |
5759 | |
5760 | BuildMI(MBB, Addr64, Addr64->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), |
5761 | NewVAddr) |
5762 | .addReg(RsrcPtr, 0, AMDGPU::sub0) |
5763 | .addImm(AMDGPU::sub0) |
5764 | .addReg(RsrcPtr, 0, AMDGPU::sub1) |
5765 | .addImm(AMDGPU::sub1); |
5766 | } else { |
5767 | |
5768 | |
5769 | CreatedBB = loadSRsrcFromVGPR(*this, MI, *Rsrc, MDT); |
5770 | return CreatedBB; |
5771 | } |
5772 | } |
5773 | return CreatedBB; |
5774 | } |
5775 | |
5776 | MachineBasicBlock *SIInstrInfo::moveToVALU(MachineInstr &TopInst, |
5777 | MachineDominatorTree *MDT) const { |
5778 | SetVectorType Worklist; |
5779 | Worklist.insert(&TopInst); |
5780 | MachineBasicBlock *CreatedBB = nullptr; |
5781 | MachineBasicBlock *CreatedBBTmp = nullptr; |
5782 | |
5783 | while (!Worklist.empty()) { |
5784 | MachineInstr &Inst = *Worklist.pop_back_val(); |
5785 | MachineBasicBlock *MBB = Inst.getParent(); |
5786 | MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); |
5787 | |
5788 | unsigned Opcode = Inst.getOpcode(); |
5789 | unsigned NewOpcode = getVALUOp(Inst); |
5790 | |
5791 | |
5792 | switch (Opcode) { |
5793 | default: |
5794 | break; |
5795 | case AMDGPU::S_ADD_U64_PSEUDO: |
5796 | case AMDGPU::S_SUB_U64_PSEUDO: |
5797 | splitScalar64BitAddSub(Worklist, Inst, MDT); |
5798 | Inst.eraseFromParent(); |
5799 | continue; |
5800 | case AMDGPU::S_ADD_I32: |
5801 | case AMDGPU::S_SUB_I32: { |
5802 | |
5803 | bool Changed; |
5804 | std::tie(Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT); |
5805 | if (CreatedBBTmp && TopInst.getParent() == CreatedBBTmp) |
5806 | CreatedBB = CreatedBBTmp; |
5807 | if (Changed) |
5808 | continue; |
5809 | |
5810 | |
5811 | break; |
5812 | } |
5813 | case AMDGPU::S_AND_B64: |
5814 | splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT); |
5815 | Inst.eraseFromParent(); |
5816 | continue; |
5817 | |
5818 | case AMDGPU::S_OR_B64: |
5819 | splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT); |
5820 | Inst.eraseFromParent(); |
5821 | continue; |
5822 | |
5823 | case AMDGPU::S_XOR_B64: |
5824 | splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT); |
5825 | Inst.eraseFromParent(); |
5826 | continue; |
5827 | |
5828 | case AMDGPU::S_NAND_B64: |
5829 | splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT); |
5830 | Inst.eraseFromParent(); |
5831 | continue; |
5832 | |
5833 | case AMDGPU::S_NOR_B64: |
5834 | splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT); |
5835 | Inst.eraseFromParent(); |
5836 | continue; |
5837 | |
5838 | case AMDGPU::S_XNOR_B64: |
5839 | if (ST.hasDLInsts()) |
5840 | splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT); |
5841 | else |
5842 | splitScalar64BitXnor(Worklist, Inst, MDT); |
5843 | Inst.eraseFromParent(); |
5844 | continue; |
5845 | |
5846 | case AMDGPU::S_ANDN2_B64: |
5847 | splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT); |
5848 | Inst.eraseFromParent(); |
5849 | continue; |
5850 | |
5851 | case AMDGPU::S_ORN2_B64: |
5852 | splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT); |
5853 | Inst.eraseFromParent(); |
5854 | continue; |
5855 | |
5856 | case AMDGPU::S_BREV_B64: |
5857 | splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32, true); |
5858 | Inst.eraseFromParent(); |
5859 | continue; |
5860 | |
5861 | case AMDGPU::S_NOT_B64: |
5862 | splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32); |
5863 | Inst.eraseFromParent(); |
5864 | continue; |
5865 | |
5866 | case AMDGPU::S_BCNT1_I32_B64: |
5867 | splitScalar64BitBCNT(Worklist, Inst); |
5868 | Inst.eraseFromParent(); |
5869 | continue; |
5870 | |
5871 | case AMDGPU::S_BFE_I64: |
5872 | splitScalar64BitBFE(Worklist, Inst); |
5873 | Inst.eraseFromParent(); |
5874 | continue; |
5875 | |
5876 | case AMDGPU::S_LSHL_B32: |
5877 | if (ST.hasOnlyRevVALUShifts()) { |
5878 | NewOpcode = AMDGPU::V_LSHLREV_B32_e64; |
5879 | swapOperands(Inst); |
5880 | } |
5881 | break; |
5882 | case AMDGPU::S_ASHR_I32: |
5883 | if (ST.hasOnlyRevVALUShifts()) { |
5884 | NewOpcode = AMDGPU::V_ASHRREV_I32_e64; |
5885 | swapOperands(Inst); |
5886 | } |
5887 | break; |
5888 | case AMDGPU::S_LSHR_B32: |
5889 | if (ST.hasOnlyRevVALUShifts()) { |
5890 | NewOpcode = AMDGPU::V_LSHRREV_B32_e64; |
5891 | swapOperands(Inst); |
5892 | } |
5893 | break; |
5894 | case AMDGPU::S_LSHL_B64: |
5895 | if (ST.hasOnlyRevVALUShifts()) { |
5896 | NewOpcode = AMDGPU::V_LSHLREV_B64_e64; |
5897 | swapOperands(Inst); |
5898 | } |
5899 | break; |
5900 | case AMDGPU::S_ASHR_I64: |
5901 | if (ST.hasOnlyRevVALUShifts()) { |
5902 | NewOpcode = AMDGPU::V_ASHRREV_I64_e64; |
5903 | swapOperands(Inst); |
5904 | } |
5905 | break; |
5906 | case AMDGPU::S_LSHR_B64: |
5907 | if (ST.hasOnlyRevVALUShifts()) { |
5908 | NewOpcode = AMDGPU::V_LSHRREV_B64_e64; |
5909 | swapOperands(Inst); |
5910 | } |
5911 | break; |
5912 | |
5913 | case AMDGPU::S_ABS_I32: |
5914 | lowerScalarAbs(Worklist, Inst); |
5915 | Inst.eraseFromParent(); |
5916 | continue; |
5917 | |
5918 | case AMDGPU::S_CBRANCH_SCC0: |
5919 | case AMDGPU::S_CBRANCH_SCC1: { |
5920 | |
5921 | Register CondReg = Inst.getOperand(1).getReg(); |
5922 | bool IsSCC = CondReg == AMDGPU::SCC; |
5923 | Register VCC = RI.getVCC(); |
5924 | Register EXEC = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; |
5925 | unsigned Opc = ST.isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64; |
5926 | BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(Opc), VCC) |
5927 | .addReg(EXEC) |
5928 | .addReg(IsSCC ? VCC : CondReg); |
5929 | Inst.RemoveOperand(1); |
5930 | } |
5931 | break; |
5932 | |
5933 | case AMDGPU::S_BFE_U64: |
5934 | case AMDGPU::S_BFM_B64: |
5935 | llvm_unreachable("Moving this op to VALU not implemented"); |
5936 | |
5937 | case AMDGPU::S_PACK_LL_B32_B16: |
5938 | case AMDGPU::S_PACK_LH_B32_B16: |
5939 | case AMDGPU::S_PACK_HH_B32_B16: |
5940 | movePackToVALU(Worklist, MRI, Inst); |
5941 | Inst.eraseFromParent(); |
5942 | continue; |
5943 | |
5944 | case AMDGPU::S_XNOR_B32: |
5945 | lowerScalarXnor(Worklist, Inst); |
5946 | Inst.eraseFromParent(); |
5947 | continue; |
5948 | |
5949 | case AMDGPU::S_NAND_B32: |
5950 | splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32); |
5951 | Inst.eraseFromParent(); |
5952 | continue; |
5953 | |
5954 | case AMDGPU::S_NOR_B32: |
5955 | splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32); |
5956 | Inst.eraseFromParent(); |
5957 | continue; |
5958 | |
5959 | case AMDGPU::S_ANDN2_B32: |
5960 | splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32); |
5961 | Inst.eraseFromParent(); |
5962 | continue; |
5963 | |
5964 | case AMDGPU::S_ORN2_B32: |
5965 | splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32); |
5966 | Inst.eraseFromParent(); |
5967 | continue; |
5968 | |
5969 | |
5970 | |
5971 | |
5972 | |
5973 | case AMDGPU::S_ADD_CO_PSEUDO: |
5974 | case AMDGPU::S_SUB_CO_PSEUDO: { |
5975 | unsigned Opc = (Inst.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO) |
5976 | ? AMDGPU::V_ADDC_U32_e64 |
5977 | : AMDGPU::V_SUBB_U32_e64; |
5978 | const auto *CarryRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID); |
5979 | |
5980 | Register CarryInReg = Inst.getOperand(4).getReg(); |
5981 | if (!MRI.constrainRegClass(CarryInReg, CarryRC)) { |
5982 | Register NewCarryReg = MRI.createVirtualRegister(CarryRC); |
5983 | BuildMI(*MBB, &Inst, Inst.getDebugLoc(), get(AMDGPU::COPY), NewCarryReg) |
5984 | .addReg(CarryInReg); |
5985 | } |
5986 | |
5987 | Register CarryOutReg = Inst.getOperand(1).getReg(); |
5988 | |
5989 | Register DestReg = MRI.createVirtualRegister(RI.getEquivalentVGPRClass( |
5990 | MRI.getRegClass(Inst.getOperand(0).getReg()))); |
5991 | MachineInstr *CarryOp = |
5992 | BuildMI(*MBB, &Inst, Inst.getDebugLoc(), get(Opc), DestReg) |
5993 | .addReg(CarryOutReg, RegState::Define) |
5994 | .add(Inst.getOperand(2)) |
5995 | .add(Inst.getOperand(3)) |
5996 | .addReg(CarryInReg) |
5997 | .addImm(0); |
5998 | CreatedBBTmp = legalizeOperands(*CarryOp); |
5999 | if (CreatedBBTmp && TopInst.getParent() == CreatedBBTmp) |
6000 | CreatedBB = CreatedBBTmp; |
6001 | MRI.replaceRegWith(Inst.getOperand(0).getReg(), DestReg); |
6002 | addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist); |
6003 | Inst.eraseFromParent(); |
6004 | } |
6005 | continue; |
6006 | case AMDGPU::S_UADDO_PSEUDO: |
6007 | case AMDGPU::S_USUBO_PSEUDO: { |
6008 | const DebugLoc &DL = Inst.getDebugLoc(); |
6009 | MachineOperand &Dest0 = Inst.getOperand(0); |
6010 | MachineOperand &Dest1 = Inst.getOperand(1); |
6011 | MachineOperand &Src0 = Inst.getOperand(2); |
6012 | MachineOperand &Src1 = Inst.getOperand(3); |
6013 | |
6014 | unsigned Opc = (Inst.getOpcode() == AMDGPU::S_UADDO_PSEUDO) |
6015 | ? AMDGPU::V_ADD_CO_U32_e64 |
6016 | : AMDGPU::V_SUB_CO_U32_e64; |
6017 | const TargetRegisterClass *NewRC = |
6018 | RI.getEquivalentVGPRClass(MRI.getRegClass(Dest0.getReg())); |
6019 | Register DestReg = MRI.createVirtualRegister(NewRC); |
6020 | MachineInstr *NewInstr = BuildMI(*MBB, &Inst, DL, get(Opc), DestReg) |
6021 | .addReg(Dest1.getReg(), RegState::Define) |
6022 | .add(Src0) |
6023 | .add(Src1) |
6024 | .addImm(0); |
6025 | |
6026 | CreatedBBTmp = legalizeOperands(*NewInstr, MDT); |
6027 | if (CreatedBBTmp && TopInst.getParent() == CreatedBBTmp) |
6028 | CreatedBB = CreatedBBTmp; |
6029 | |
6030 | MRI.replaceRegWith(Dest0.getReg(), DestReg); |
6031 | addUsersToMoveToVALUWorklist(NewInstr->getOperand(0).getReg(), MRI, |
6032 | Worklist); |
6033 | Inst.eraseFromParent(); |
6034 | } |
6035 | continue; |
6036 | |
6037 | case AMDGPU::S_CSELECT_B32: |
6038 | case AMDGPU::S_CSELECT_B64: |
6039 | lowerSelect(Worklist, Inst, MDT); |
6040 | Inst.eraseFromParent(); |
6041 | continue; |
6042 | case AMDGPU::S_CMP_EQ_I32: |
6043 | case AMDGPU::S_CMP_LG_I32: |
6044 | case AMDGPU::S_CMP_GT_I32: |
6045 | case AMDGPU::S_CMP_GE_I32: |
6046 | case AMDGPU::S_CMP_LT_I32: |
6047 | case AMDGPU::S_CMP_LE_I32: |
6048 | case AMDGPU::S_CMP_EQ_U32: |
6049 | case AMDGPU::S_CMP_LG_U32: |
6050 | case AMDGPU::S_CMP_GT_U32: |
6051 | case AMDGPU::S_CMP_GE_U32: |
6052 | case AMDGPU::S_CMP_LT_U32: |
6053 | case AMDGPU::S_CMP_LE_U32: |
6054 | case AMDGPU::S_CMP_EQ_U64: |
6055 | case AMDGPU::S_CMP_LG_U64: { |
6056 | const MCInstrDesc &NewDesc = get(NewOpcode); |
6057 | Register CondReg = MRI.createVirtualRegister(RI.getWaveMaskRegClass()); |
6058 | MachineInstr *NewInstr = |
6059 | BuildMI(*MBB, Inst, Inst.getDebugLoc(), NewDesc, CondReg) |
6060 | .add(Inst.getOperand(0)) |
6061 | .add(Inst.getOperand(1)); |
6062 | legalizeOperands(*NewInstr, MDT); |
6063 | int SCCIdx = Inst.findRegisterDefOperandIdx(AMDGPU::SCC); |
6064 | MachineOperand SCCOp = Inst.getOperand(SCCIdx); |
6065 | addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg); |
6066 | Inst.eraseFromParent(); |
6067 | } |
6068 | continue; |
6069 | } |
6070 | |
6071 | |
6072 | if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) { |
6073 | |
6074 | |
6075 | CreatedBBTmp = legalizeOperands(Inst, MDT); |
6076 | if (CreatedBBTmp && TopInst.getParent() == CreatedBBTmp) |
6077 | CreatedBB = CreatedBBTmp; |
6078 | continue; |
6079 | } |
6080 | |
6081 | |
6082 | const MCInstrDesc &NewDesc = get(NewOpcode); |
6083 | Inst.setDesc(NewDesc); |
6084 | |
6085 | |
6086 | |
6087 | |
6088 | for (unsigned i = Inst.getNumOperands() - 1; i > 0; --i) { |
6089 | MachineOperand &Op = Inst.getOperand(i); |
6090 | if (Op.isReg() && Op.getReg() == AMDGPU::SCC) { |
6091 | |
6092 | if (Op.isDef() && !Op.isDead()) |
6093 | addSCCDefUsersToVALUWorklist(Op, Inst, Worklist); |
6094 | if (Op.isUse()) |
6095 | addSCCDefsToVALUWorklist(Op, Worklist); |
6096 | Inst.RemoveOperand(i); |
6097 | } |
6098 | } |
6099 | |
6100 | if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) { |
6101 | |
6102 | |
6103 | unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16; |
6104 | Inst.addOperand(MachineOperand::CreateImm(0)); |
6105 | Inst.addOperand(MachineOperand::CreateImm(Size)); |
6106 | |
6107 | } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) { |
6108 | |
6109 | |
6110 | Inst.addOperand(MachineOperand::CreateImm(0)); |
6111 | } |
6112 | |
6113 | Inst.addImplicitDefUseOperands(*Inst.getParent()->getParent()); |
6114 | fixImplicitOperands(Inst); |
6115 | |
6116 | if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) { |
6117 | const MachineOperand &OffsetWidthOp = Inst.getOperand(2); |
6118 | |
6119 | |
6120 | assert(OffsetWidthOp.isImm() && |
6121 | "Scalar BFE is only implemented for constant width and offset"); |
6122 | uint32_t Imm = OffsetWidthOp.getImm(); |
6123 | |
6124 | uint32_t Offset = Imm & 0x3f; |
6125 | uint32_t BitWidth = (Imm & 0x7f0000) >> 16; |
6126 | Inst.RemoveOperand(2); |
6127 | Inst.addOperand(MachineOperand::CreateImm(Offset)); |
6128 | Inst.addOperand(MachineOperand::CreateImm(BitWidth)); |
6129 | } |
6130 | |
6131 | bool HasDst = Inst.getOperand(0).isReg() && Inst.getOperand(0).isDef(); |
6132 | unsigned NewDstReg = AMDGPU::NoRegister; |
6133 | if (HasDst) { |
6134 | Register DstReg = Inst.getOperand(0).getReg(); |
6135 | if (DstReg.isPhysical()) |
6136 | continue; |
6137 | |
6138 | |
6139 | const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst); |
6140 | if (!NewDstRC) |
6141 | continue; |
6142 | |
6143 | if (Inst.isCopy() && Inst.getOperand(1).getReg().isVirtual() && |
6144 | NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) { |
6145 | |
6146 | |
6147 | |
6148 | |
6149 | |
6150 | |
6151 | addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist); |
6152 | MRI.replaceRegWith(DstReg, Inst.getOperand(1).getReg()); |
6153 | MRI.clearKillFlags(Inst.getOperand(1).getReg()); |
6154 | Inst.getOperand(0).setReg(DstReg); |
6155 | |
6156 | |
6157 | |
6158 | |
6159 | for (unsigned I = Inst.getNumOperands() - 1; I != 0; --I) |
6160 | Inst.RemoveOperand(I); |
6161 | Inst.setDesc(get(AMDGPU::IMPLICIT_DEF)); |
6162 | continue; |
6163 | } |
6164 | |
6165 | NewDstReg = MRI.createVirtualRegister(NewDstRC); |
6166 | MRI.replaceRegWith(DstReg, NewDstReg); |
6167 | } |
6168 | |
6169 | |
6170 | CreatedBBTmp = legalizeOperands(Inst, MDT); |
6171 | if (CreatedBBTmp && TopInst.getParent() == CreatedBBTmp) |
6172 | CreatedBB = CreatedBBTmp; |
6173 | |
6174 | if (HasDst) |
6175 | addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist); |
6176 | } |
6177 | return CreatedBB; |
6178 | } |
6179 | |
6180 | |
6181 | std::pair<bool, MachineBasicBlock *> |
6182 | SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst, |
6183 | MachineDominatorTree *MDT) const { |
6184 | if (ST.hasAddNoCarry()) { |
6185 | |
6186 | |
6187 | |
6188 | |
6189 | MachineBasicBlock &MBB = *Inst.getParent(); |
6190 | MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); |
6191 | |
6192 | Register OldDstReg = Inst.getOperand(0).getReg(); |
6193 | Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
6194 | |
6195 | unsigned Opc = Inst.getOpcode(); |
6196 | assert(Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32); |
6197 | |
6198 | unsigned NewOpc = Opc == AMDGPU::S_ADD_I32 ? |
6199 | AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64; |
6200 | |
6201 | assert(Inst.getOperand(3).getReg() == AMDGPU::SCC); |
6202 | Inst.RemoveOperand(3); |
6203 | |
6204 | Inst.setDesc(get(NewOpc)); |
6205 | Inst.addOperand(MachineOperand::CreateImm(0)); |
6206 | Inst.addImplicitDefUseOperands(*MBB.getParent()); |
6207 | MRI.replaceRegWith(OldDstReg, ResultReg); |
6208 | MachineBasicBlock *NewBB = legalizeOperands(Inst, MDT); |
6209 | |
6210 | addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); |
6211 | return std::make_pair(true, NewBB); |
6212 | } |
6213 | |
6214 | return std::make_pair(false, nullptr); |
6215 | } |
6216 | |
6217 | void SIInstrInfo::lowerSelect(SetVectorType &Worklist, MachineInstr &Inst, |
6218 | MachineDominatorTree *MDT) const { |
6219 | |
6220 | MachineBasicBlock &MBB = *Inst.getParent(); |
6221 | MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); |
6222 | MachineBasicBlock::iterator MII = Inst; |
6223 | DebugLoc DL = Inst.getDebugLoc(); |
6224 | |
6225 | MachineOperand &Dest = Inst.getOperand(0); |
6226 | MachineOperand &Src0 = Inst.getOperand(1); |
6227 | MachineOperand &Src1 = Inst.getOperand(2); |
6228 | MachineOperand &Cond = Inst.getOperand(3); |
6229 | |
6230 | Register SCCSource = Cond.getReg(); |
6231 | bool IsSCC = (SCCSource == AMDGPU::SCC); |
6232 | |
6233 | |
6234 | |
6235 | |
6236 | |
6237 | if (!IsSCC && Src0.isImm() && (Src0.getImm() == -1) && Src1.isImm() && |
6238 | (Src1.getImm() == 0)) { |
6239 | MRI.replaceRegWith(Dest.getReg(), SCCSource); |
6240 | return; |
6241 | } |
6242 | |
6243 | const TargetRegisterClass *TC = |
6244 | RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID); |
6245 | |
6246 | Register CopySCC = MRI.createVirtualRegister(TC); |
6247 | |
6248 | if (IsSCC) { |
6249 | |
6250 | |
6251 | bool CopyFound = false; |
6252 | for (MachineInstr &CandI : |
6253 | make_range(std::next(MachineBasicBlock::reverse_iterator(Inst)), |
6254 | Inst.getParent()->rend())) { |
6255 | if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, false, false, &RI) != |
6256 | -1) { |
6257 | if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) { |
6258 | BuildMI(MBB, MII, DL, get(AMDGPU::COPY), CopySCC) |
6259 | .addReg(CandI.getOperand(1).getReg()); |
6260 | CopyFound = true; |
6261 | } |
6262 | break; |
6263 | } |
6264 | } |
6265 | if (!CopyFound) { |
6266 | |
6267 | |
6268 | |
6269 | |
6270 | unsigned Opcode = (ST.getWavefrontSize() == 64) ? AMDGPU::S_CSELECT_B64 |
6271 | : AMDGPU::S_CSELECT_B32; |
6272 | auto NewSelect = |
6273 | BuildMI(MBB, MII, DL, get(Opcode), CopySCC).addImm(-1).addImm(0); |
6274 | NewSelect->getOperand(3).setIsUndef(Cond.isUndef()); |
6275 | } |
6276 | } |
6277 | |
6278 | Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
6279 | |
6280 | auto UpdatedInst = |
6281 | BuildMI(MBB, MII, DL, get(AMDGPU::V_CNDMASK_B32_e64), ResultReg) |
6282 | .addImm(0) |
6283 | .add(Src1) |
6284 | .addImm(0) |
6285 | .add(Src0) |
6286 | .addReg(IsSCC ? CopySCC : SCCSource); |
6287 | |
6288 | MRI.replaceRegWith(Dest.getReg(), ResultReg); |
6289 | legalizeOperands(*UpdatedInst, MDT); |
6290 | addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); |
6291 | } |
6292 | |
6293 | void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist, |
6294 | MachineInstr &Inst) const { |
6295 | MachineBasicBlock &MBB = *Inst.getParent(); |
6296 | MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); |
6297 | MachineBasicBlock::iterator MII = Inst; |
6298 | DebugLoc DL = Inst.getDebugLoc(); |
6299 | |
6300 | MachineOperand &Dest = Inst.getOperand(0); |
6301 | MachineOperand &Src = Inst.getOperand(1); |
6302 | Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
6303 | Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
6304 | |
6305 | unsigned SubOp = ST.hasAddNoCarry() ? |
6306 | AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32; |
6307 | |
6308 | BuildMI(MBB, MII, DL, get(SubOp), TmpReg) |
6309 | .addImm(0) |
6310 | .addReg(Src.getReg()); |
6311 | |
6312 | BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg) |
6313 | .addReg(Src.getReg()) |
6314 | .addReg(TmpReg); |
6315 | |
6316 | MRI.replaceRegWith(Dest.getReg(), ResultReg); |
6317 | addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); |
6318 | } |
6319 | |
6320 | void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist, |
6321 | MachineInstr &Inst) const { |
6322 | MachineBasicBlock &MBB = *Inst.getParent(); |
6323 | MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); |
6324 | MachineBasicBlock::iterator MII = Inst; |
6325 | const DebugLoc &DL = Inst.getDebugLoc(); |
6326 | |
6327 | MachineOperand &Dest = Inst.getOperand(0); |
6328 | MachineOperand &Src0 = Inst.getOperand(1); |
6329 | MachineOperand &Src1 = Inst.getOperand(2); |
6330 | |
6331 | if (ST.hasDLInsts()) { |
6332 | Register NewDest = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
6333 | legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src0, MRI, DL); |
6334 | legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src1, MRI, DL); |
6335 | |
6336 | BuildMI(MBB, MII, DL, get(AMDGPU::V_XNOR_B32_e64), NewDest) |
6337 | .add(Src0) |
6338 | .add(Src1); |
6339 | |
6340 | MRI.replaceRegWith(Dest.getReg(), NewDest); |
6341 | addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist); |
6342 | } else { |
6343 | |
6344 | |
6345 | |
6346 | |
6347 | bool Src0IsSGPR = Src0.isReg() && |
6348 | RI.isSGPRClass(MRI.getRegClass(Src0.getReg())); |
6349 | bool Src1IsSGPR = Src1.isReg() && |
6350 | RI.isSGPRClass(MRI.getRegClass(Src1.getReg())); |
6351 | MachineInstr *Xor; |
6352 | Register Temp = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); |
6353 | Register NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); |
6354 | |
6355 | |
6356 | |
6357 | |
6358 | if (Src0IsSGPR) { |
6359 | BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp).add(Src0); |
6360 | Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), NewDest) |
6361 | .addReg(Temp) |
6362 | .add(Src1); |
6363 | } else if (Src1IsSGPR) { |
6364 | BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp).add(Src1); |
6365 | Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), NewDest) |
6366 | .add(Src0) |
6367 | .addReg(Temp); |
6368 | } else { |
6369 | Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), Temp) |
6370 | .add(Src0) |
6371 | .add(Src1); |
6372 | MachineInstr *Not = |
6373 | BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), NewDest).addReg(Temp); |
6374 | Worklist.insert(Not); |
6375 | } |
6376 | |
6377 | MRI.replaceRegWith(Dest.getReg(), NewDest); |
6378 | |
6379 | Worklist.insert(Xor); |
6380 | |
6381 | addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist); |
6382 | } |
6383 | } |
6384 | |
6385 | void SIInstrInfo::splitScalarNotBinop(SetVectorType &Worklist, |
6386 | MachineInstr &Inst, |
6387 | unsigned Opcode) const { |
6388 | MachineBasicBlock &MBB = *Inst.getParent(); |
6389 | MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); |
6390 | MachineBasicBlock::iterator MII = Inst; |
6391 | const DebugLoc &DL = Inst.getDebugLoc(); |
6392 | |
6393 | MachineOperand &Dest = Inst.getOperand(0); |
6394 | MachineOperand &Src0 = Inst.getOperand(1); |
6395 | MachineOperand &Src1 = Inst.getOperand(2); |
6396 | |
6397 | Register NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); |
6398 | Register Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); |
6399 | |
6400 | MachineInstr &Op = *BuildMI(MBB, MII, DL, get(Opcode), Interm) |
6401 | .add(Src0) |
6402 | .add(Src1); |
6403 | |
6404 | MachineInstr &Not = *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), NewDest) |
6405 | .addReg(Interm); |
6406 | |
6407 | Worklist.insert(&Op); |
6408 | Worklist.insert(&Not); |
6409 | |
6410 | MRI.replaceRegWith(Dest.getReg(), NewDest); |
6411 | addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist); |
6412 | } |
6413 | |
6414 | void SIInstrInfo::splitScalarBinOpN2(SetVectorType& Worklist, |
6415 | MachineInstr &Inst, |
6416 | unsigned Opcode) const { |
6417 | MachineBasicBlock &MBB = *Inst.getParent(); |
6418 | MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); |
6419 | MachineBasicBlock::iterator MII = Inst; |
6420 | const DebugLoc &DL = Inst.getDebugLoc(); |
6421 | |
6422 | MachineOperand &Dest = Inst.getOperand(0); |
6423 | MachineOperand &Src0 = Inst.getOperand(1); |
6424 | MachineOperand &Src1 = Inst.getOperand(2); |
6425 | |
6426 | Register NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); |
6427 | Register Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); |
6428 | |
6429 | MachineInstr &Not = *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Interm) |
6430 | .add(Src1); |
6431 | |
6432 | MachineInstr &Op = *BuildMI(MBB, MII, DL, get(Opcode), NewDest) |
6433 | .add(Src0) |
6434 | .addReg(Interm); |
6435 | |
6436 | Worklist.insert(&Not); |
6437 | Worklist.insert(&Op); |
6438 | |
6439 | MRI.replaceRegWith(Dest.getReg(), NewDest); |
6440 | addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist); |
6441 | } |
6442 | |
6443 | void SIInstrInfo::splitScalar64BitUnaryOp( |
6444 | SetVectorType &Worklist, MachineInstr &Inst, |
6445 | unsigned Opcode, bool Swap) const { |
6446 | MachineBasicBlock &MBB = *Inst.getParent(); |
6447 | MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); |
6448 | |
6449 | MachineOperand &Dest = Inst.getOperand(0); |
6450 | MachineOperand &Src0 = Inst.getOperand(1); |
6451 | DebugLoc DL = Inst.getDebugLoc(); |
6452 | |
6453 | MachineBasicBlock::iterator MII = Inst; |
6454 | |
6455 | const MCInstrDesc &InstDesc = get(Opcode); |
6456 | const TargetRegisterClass *Src0RC = Src0.isReg() ? |
6457 | MRI.getRegClass(Src0.getReg()) : |
6458 | &AMDGPU::SGPR_32RegClass; |
6459 | |
6460 | const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); |
6461 | |
6462 | MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, |
6463 | AMDGPU::sub0, Src0SubRC); |
6464 | |
6465 | const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); |
6466 | const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); |
6467 | const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0); |
6468 | |
6469 | Register DestSub0 = MRI.createVirtualRegister(NewDestSubRC); |
6470 | MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0); |
6471 | |
6472 | MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, |
6473 | AMDGPU::sub1, Src0SubRC); |
6474 | |
6475 | Register DestSub1 = MRI.createVirtualRegister(NewDestSubRC); |
6476 | MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1).add(SrcReg0Sub1); |
6477 | |
6478 | if (Swap) |
6479 | std::swap(DestSub0, DestSub1); |
6480 | |
6481 | Register FullDestReg = MRI.createVirtualRegister(NewDestRC); |
6482 | BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) |
6483 | .addReg(DestSub0) |
6484 | .addImm(AMDGPU::sub0) |
6485 | .addReg(DestSub1) |
6486 | .addImm(AMDGPU::sub1); |
6487 | |
6488 | MRI.replaceRegWith(Dest.getReg(), FullDestReg); |
6489 | |
6490 | Worklist.insert(&LoHalf); |
6491 | Worklist.insert(&HiHalf); |
6492 | |
6493 | |
6494 | |
6495 | |
6496 | |
6497 | addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist); |
6498 | } |
6499 | |
6500 | void SIInstrInfo::splitScalar64BitAddSub(SetVectorType &Worklist, |
6501 | MachineInstr &Inst, |
6502 | MachineDominatorTree *MDT) const { |
6503 | bool IsAdd = (Inst.getOpcode() == AMDGPU::S_ADD_U64_PSEUDO); |
6504 | |
6505 | MachineBasicBlock &MBB = *Inst.getParent(); |
6506 | MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); |
6507 | const auto *CarryRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID); |
6508 | |
6509 | Register FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); |
6510 | Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
6511 | Register DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
6512 | |
6513 | Register CarryReg = MRI.createVirtualRegister(CarryRC); |
6514 | Register DeadCarryReg = MRI.createVirtualRegister(CarryRC); |
6515 | |
6516 | MachineOperand &Dest = Inst.getOperand(0); |
6517 | MachineOperand &Src0 = Inst.getOperand(1); |
6518 | MachineOperand &Src1 = Inst.getOperand(2); |
6519 | const DebugLoc &DL = Inst.getDebugLoc(); |
6520 | MachineBasicBlock::iterator MII = Inst; |
6521 | |
6522 | const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0.getReg()); |
6523 | const TargetRegisterClass *Src1RC = MRI.getRegClass(Src1.getReg()); |
6524 | const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); |
6525 | const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0); |
6526 | |
6527 | MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, |
6528 | AMDGPU::sub0, Src0SubRC); |
6529 | MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, |
6530 | AMDGPU::sub0, Src1SubRC); |
6531 | |
6532 | |
6533 | MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, |
6534 | AMDGPU::sub1, Src0SubRC); |
6535 | MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, |
6536 | AMDGPU::sub1, Src1SubRC); |
6537 | |
6538 | unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64; |
6539 | MachineInstr *LoHalf = |
6540 | BuildMI(MBB, MII, DL, get(LoOpc), DestSub0) |
6541 | .addReg(CarryReg, RegState::Define) |
6542 | .add(SrcReg0Sub0) |
6543 | .add(SrcReg1Sub0) |
6544 | .addImm(0); |
6545 | |
6546 | unsigned HiOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64; |
6547 | MachineInstr *HiHalf = |
6548 | BuildMI(MBB, MII, DL, get(HiOpc), DestSub1) |
6549 | .addReg(DeadCarryReg, RegState::Define | RegState::Dead) |
6550 | .add(SrcReg0Sub1) |
6551 | .add(SrcReg1Sub1) |
6552 | .addReg(CarryReg, RegState::Kill) |
6553 | .addImm(0); |
6554 | |
6555 | BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) |
6556 | .addReg(DestSub0) |
6557 | .addImm(AMDGPU::sub0) |
6558 | .addReg(DestSub1) |
6559 | .addImm(AMDGPU::sub1); |
6560 | |
6561 | MRI.replaceRegWith(Dest.getReg(), FullDestReg); |
6562 | |
6563 | |
6564 | |
6565 | legalizeOperands(*LoHalf, MDT); |
6566 | legalizeOperands(*HiHalf, MDT); |
6567 | |
6568 | |
6569 | addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist); |
6570 | } |
6571 | |
6572 | void SIInstrInfo::splitScalar64BitBinaryOp(SetVectorType &Worklist, |
6573 | MachineInstr &Inst, unsigned Opcode, |
6574 | MachineDominatorTree *MDT) const { |
6575 | MachineBasicBlock &MBB = *Inst.getParent(); |
6576 | MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); |
6577 | |
6578 | MachineOperand &Dest = Inst.getOperand(0); |
6579 | MachineOperand &Src0 = Inst.getOperand(1); |
6580 | MachineOperand &Src1 = Inst.getOperand(2); |
6581 | DebugLoc DL = Inst.getDebugLoc(); |
6582 | |
6583 | MachineBasicBlock::iterator MII = Inst; |
6584 | |
6585 | const MCInstrDesc &InstDesc = get(Opcode); |
6586 | const TargetRegisterClass *Src0RC = Src0.isReg() ? |
6587 | MRI.getRegClass(Src0.getReg()) : |
6588 | &AMDGPU::SGPR_32RegClass; |
6589 | |
6590 | const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); |
6591 | const TargetRegisterClass *Src1RC = Src1.isReg() ? |
6592 | MRI.getRegClass(Src1.getReg()) : |
6593 | &AMDGPU::SGPR_32RegClass; |
6594 | |
6595 | const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0); |
6596 | |
6597 | MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, |
6598 | AMDGPU::sub0, Src0SubRC); |
6599 | MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, |
6600 | AMDGPU::sub0, Src1SubRC); |
6601 | MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, |
6602 | AMDGPU::sub1, Src0SubRC); |
6603 | MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, |
6604 | AMDGPU::sub1, Src1SubRC); |
6605 | |
6606 | const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); |
6607 | const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); |
6608 | const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0); |
6609 | |
6610 | Register DestSub0 = MRI.createVirtualRegister(NewDestSubRC); |
6611 | MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0) |
6612 | .add(SrcReg0Sub0) |
6613 | .add(SrcReg1Sub0); |
6614 | |
6615 | Register DestSub1 = MRI.createVirtualRegister(NewDestSubRC); |
6616 | MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1) |
6617 | .add(SrcReg0Sub1) |
6618 | .add(SrcReg1Sub1); |
6619 | |
6620 | Register FullDestReg = MRI.createVirtualRegister(NewDestRC); |
6621 | BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) |
6622 | .addReg(DestSub0) |
6623 | .addImm(AMDGPU::sub0) |
6624 | .addReg(DestSub1) |
6625 | .addImm(AMDGPU::sub1); |
6626 | |
6627 | MRI.replaceRegWith(Dest.getReg(), FullDestReg); |
6628 | |
6629 | Worklist.insert(&LoHalf); |
6630 | Worklist.insert(&HiHalf); |
6631 | |
6632 | |
6633 | addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist); |
6634 | } |
6635 | |
6636 | void SIInstrInfo::splitScalar64BitXnor(SetVectorType &Worklist, |
6637 | MachineInstr &Inst, |
6638 | MachineDominatorTree *MDT) const { |
6639 | MachineBasicBlock &MBB = *Inst.getParent(); |
6640 | MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); |
6641 | |
6642 | MachineOperand &Dest = Inst.getOperand(0); |
6643 | MachineOperand &Src0 = Inst.getOperand(1); |
6644 | MachineOperand &Src1 = Inst.getOperand(2); |
6645 | const DebugLoc &DL = Inst.getDebugLoc(); |
6646 | |
6647 | MachineBasicBlock::iterator MII = Inst; |
6648 | |
6649 | const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); |
6650 | |
6651 | Register Interm = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); |
6652 | |
6653 | MachineOperand* Op0; |
6654 | MachineOperand* Op1; |
6655 | |
6656 | if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg())) { |
6657 | Op0 = &Src0; |
6658 | Op1 = &Src1; |
6659 | } else { |
6660 | Op0 = &Src1; |
6661 | Op1 = &Src0; |
6662 | } |
6663 | |
6664 | BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B64), Interm) |
6665 | .add(*Op0); |
6666 | |
6667 | Register NewDest = MRI.createVirtualRegister(DestRC); |
6668 | |
6669 | MachineInstr &Xor = *BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B64), NewDest) |
6670 | .addReg(Interm) |
6671 | .add(*Op1); |
6672 | |
6673 | MRI.replaceRegWith(Dest.getReg(), NewDest); |
6674 | |
6675 | Worklist.insert(&Xor); |
6676 | } |
6677 | |
6678 | void SIInstrInfo::splitScalar64BitBCNT( |
6679 | SetVectorType &Worklist, MachineInstr &Inst) const { |
6680 | MachineBasicBlock &MBB = *Inst.getParent(); |
6681 | MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); |
6682 | |
6683 | MachineBasicBlock::iterator MII = Inst; |
6684 | const DebugLoc &DL = Inst.getDebugLoc(); |
6685 | |
6686 | MachineOperand &Dest = Inst.getOperand(0); |
6687 | MachineOperand &Src = Inst.getOperand(1); |
6688 | |
6689 | const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64); |
6690 | const TargetRegisterClass *SrcRC = Src.isReg() ? |
6691 | MRI.getRegClass(Src.getReg()) : |
6692 | &AMDGPU::SGPR_32RegClass; |
6693 | |
6694 | Register MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
6695 | Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
6696 | |
6697 | const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0); |
6698 | |
6699 | MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, |
6700 | AMDGPU::sub0, SrcSubRC); |
6701 | MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, |
6702 | AMDGPU::sub1, SrcSubRC); |
6703 | |
6704 | BuildMI(MBB, MII, DL, InstDesc, MidReg).add(SrcRegSub0).addImm(0); |
6705 | |
6706 | BuildMI(MBB, MII, DL, InstDesc, ResultReg).add(SrcRegSub1).addReg(MidReg); |
6707 | |
6708 | MRI.replaceRegWith(Dest.getReg(), ResultReg); |
6709 | |
6710 | |
6711 | |
6712 | addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); |
6713 | } |
6714 | |
6715 | void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist, |
6716 | MachineInstr &Inst) const { |
6717 | MachineBasicBlock &MBB = *Inst.getParent(); |
6718 | MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); |
6719 | MachineBasicBlock::iterator MII = Inst; |
6720 | const DebugLoc &DL = Inst.getDebugLoc(); |
6721 | |
6722 | MachineOperand &Dest = Inst.getOperand(0); |
6723 | uint32_t Imm = Inst.getOperand(2).getImm(); |
6724 | uint32_t Offset = Imm & 0x3f; |
6725 | uint32_t BitWidth = (Imm & 0x7f0000) >> 16; |
6726 | |
6727 | (void) Offset; |
6728 | |
6729 | |
6730 | assert(Inst.getOpcode() == AMDGPU::S_BFE_I64 && BitWidth <= 32 && |
6731 | Offset == 0 && "Not implemented"); |
6732 | |
6733 | if (BitWidth < 32) { |
6734 | Register MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
6735 | Register MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
6736 | Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); |
6737 | |
6738 | BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32_e64), MidRegLo) |
6739 | .addReg(Inst.getOperand(1).getReg(), 0, AMDGPU::sub0) |
6740 | .addImm(0) |
6741 | .addImm(BitWidth); |
6742 | |
6743 | BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi) |
6744 | .addImm(31) |
6745 | .addReg(MidRegLo); |
6746 | |
6747 | BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg) |
6748 | .addReg(MidRegLo) |
6749 | .addImm(AMDGPU::sub0) |
6750 | .addReg(MidRegHi) |
6751 | .addImm(AMDGPU::sub1); |
6752 | |
6753 | MRI.replaceRegWith(Dest.getReg(), ResultReg); |
6754 | addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); |
6755 | return; |
6756 | } |
6757 | |
6758 | MachineOperand &Src = Inst.getOperand(1); |
6759 | Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
6760 | Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); |
6761 | |
6762 | BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg) |
6763 | .addImm(31) |
6764 | .addReg(Src.getReg(), 0, AMDGPU::sub0); |
6765 | |
6766 | BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg) |
6767 | .addReg(Src.getReg(), 0, AMDGPU::sub0) |
6768 | .addImm(AMDGPU::sub0) |
6769 | .addReg(TmpReg) |
6770 | .addImm(AMDGPU::sub1); |
6771 | |
6772 | MRI.replaceRegWith(Dest.getReg(), ResultReg); |
6773 | addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); |
6774 | } |
6775 | |
6776 | void SIInstrInfo::addUsersToMoveToVALUWorklist( |
6777 | Register DstReg, |
6778 | MachineRegisterInfo &MRI, |
6779 | SetVectorType &Worklist) const { |
6780 | for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg), |
6781 | E = MRI.use_end(); I != E;) { |
6782 | MachineInstr &UseMI = *I->getParent(); |
6783 | |
6784 | unsigned OpNo = 0; |
6785 | |
6786 | switch (UseMI.getOpcode()) { |
6787 | case AMDGPU::COPY: |
6788 | case AMDGPU::WQM: |
6789 | case AMDGPU::SOFT_WQM: |
6790 | case AMDGPU::STRICT_WWM: |
6791 | case AMDGPU::STRICT_WQM: |
6792 | case AMDGPU::REG_SEQUENCE: |
6793 | case AMDGPU::PHI: |
6794 | case AMDGPU::INSERT_SUBREG: |
6795 | break; |
6796 | default: |
6797 | OpNo = I.getOperandNo(); |
6798 | break; |
6799 | } |
6800 | |
6801 | if (!RI.hasVectorRegisters(getOpRegClass(UseMI, OpNo))) { |
6802 | Worklist.insert(&UseMI); |
6803 | |
6804 | do { |
6805 | ++I; |
6806 | } while (I != E && I->getParent() == &UseMI); |
6807 | } else { |
6808 | ++I; |
6809 | } |
6810 | } |
6811 | } |
6812 | |
6813 | void SIInstrInfo::movePackToVALU(SetVectorType &Worklist, |
6814 | MachineRegisterInfo &MRI, |
6815 | MachineInstr &Inst) const { |
6816 | Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
6817 | MachineBasicBlock *MBB = Inst.getParent(); |
6818 | MachineOperand &Src0 = Inst.getOperand(1); |
6819 | MachineOperand &Src1 = Inst.getOperand(2); |
6820 | const DebugLoc &DL = Inst.getDebugLoc(); |
6821 | |
6822 | switch (Inst.getOpcode()) { |
6823 | case AMDGPU::S_PACK_LL_B32_B16: { |
6824 | Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
6825 | Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
6826 | |
6827 | |
6828 | |
6829 | BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg) |
6830 | .addImm(0xffff); |
6831 | |
6832 | BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_B32_e64), TmpReg) |
6833 | .addReg(ImmReg, RegState::Kill) |
6834 | .add(Src0); |
6835 | |
6836 | BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHL_OR_B32_e64), ResultReg) |
6837 | .add(Src1) |
6838 | .addImm(16) |
6839 | .addReg(TmpReg, RegState::Kill); |
6840 | break; |
6841 | } |
6842 | case AMDGPU::S_PACK_LH_B32_B16: { |
6843 | Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
6844 | BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg) |
6845 | .addImm(0xffff); |
6846 | BuildMI(*MBB, Inst, DL, get(AMDGPU::V_BFI_B32_e64), ResultReg) |
6847 | .addReg(ImmReg, RegState::Kill) |
6848 | .add(Src0) |
6849 | .add(Src1); |
6850 | break; |
6851 | } |
6852 | case AMDGPU::S_PACK_HH_B32_B16: { |
6853 | Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
6854 | Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
6855 | BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg) |
6856 | .addImm(16) |
6857 | .add(Src0); |
6858 | BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg) |
6859 | .addImm(0xffff0000); |
6860 | BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_OR_B32_e64), ResultReg) |
6861 | .add(Src1) |
6862 | .addReg(ImmReg, RegState::Kill) |
6863 | .addReg(TmpReg, RegState::Kill); |
6864 | break; |
6865 | } |
6866 | default: |
6867 | llvm_unreachable("unhandled s_pack_* instruction"); |
6868 | } |
6869 | |
6870 | MachineOperand &Dest = Inst.getOperand(0); |
6871 | MRI.replaceRegWith(Dest.getReg(), ResultReg); |
6872 | addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); |
6873 | } |
6874 | |
6875 | void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineOperand &Op, |
6876 | MachineInstr &SCCDefInst, |
6877 | SetVectorType &Worklist, |
6878 | Register NewCond) const { |
6879 | |
6880 | |
6881 | assert(Op.isReg() && Op.getReg() == AMDGPU::SCC && Op.isDef() && |
6882 | !Op.isDead() && Op.getParent() == &SCCDefInst); |
6883 | SmallVector<MachineInstr *, 4> CopyToDelete; |
6884 | |
6885 | |
6886 | for (MachineInstr &MI : |
6887 | make_range(std::next(MachineBasicBlock::iterator(SCCDefInst)), |
6888 | SCCDefInst.getParent()->end())) { |
6889 | |
6890 | int SCCIdx = MI.findRegisterUseOperandIdx(AMDGPU::SCC, false, &RI); |
6891 | if (SCCIdx != -1) { |
6892 | if (MI.isCopy()) { |
6893 | MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); |
6894 | Register DestReg = MI.getOperand(0).getReg(); |
6895 | |
6896 | MRI.replaceRegWith(DestReg, NewCond); |
6897 | CopyToDelete.push_back(&MI); |
6898 | } else { |
6899 | |
6900 | if (NewCond.isValid()) |
6901 | MI.getOperand(SCCIdx).setReg(NewCond); |
6902 | |
6903 | Worklist.insert(&MI); |
6904 | } |
6905 | } |
6906 | |
6907 | if (MI.findRegisterDefOperandIdx(AMDGPU::SCC, false, false, &RI) != -1) |
6908 | break; |
6909 | } |
6910 | for (auto &Copy : CopyToDelete) |
6911 | Copy->eraseFromParent(); |
6912 | } |
6913 | |
6914 | |
6915 | |
6916 | |
6917 | |
6918 | |
6919 | void SIInstrInfo::addSCCDefsToVALUWorklist(MachineOperand &Op, |
6920 | SetVectorType &Worklist) const { |
6921 | assert(Op.isReg() && Op.getReg() == AMDGPU::SCC && Op.isUse()); |
6922 | |
6923 | MachineInstr *SCCUseInst = Op.getParent(); |
6924 | |
6925 | |
6926 | |
6927 | |
6928 | for (MachineInstr &MI : |
6929 | make_range(std::next(MachineBasicBlock::reverse_iterator(SCCUseInst)), |
6930 | SCCUseInst->getParent()->rend())) { |
6931 | if (MI.modifiesRegister(AMDGPU::VCC, &RI)) |
6932 | break; |
6933 | if (MI.definesRegister(AMDGPU::SCC, &RI)) { |
6934 | Worklist.insert(&MI); |
6935 | break; |
6936 | } |
6937 | } |
6938 | } |
6939 | |
6940 | const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass( |
6941 | const MachineInstr &Inst) const { |
6942 | const TargetRegisterClass *NewDstRC = getOpRegClass(Inst, 0); |
6943 | |
6944 | switch (Inst.getOpcode()) { |
6945 | |
6946 | |
6947 | |
6948 | case AMDGPU::COPY: |
6949 | case AMDGPU::PHI: |
6950 | case AMDGPU::REG_SEQUENCE: |
6951 | case AMDGPU::INSERT_SUBREG: |
6952 | case AMDGPU::WQM: |
6953 | case AMDGPU::SOFT_WQM: |
6954 | case AMDGPU::STRICT_WWM: |
6955 | case AMDGPU::STRICT_WQM: { |
6956 | const TargetRegisterClass *SrcRC = getOpRegClass(Inst, 1); |
6957 | if (RI.hasAGPRs(SrcRC)) { |
6958 | if (RI.hasAGPRs(NewDstRC)) |
6959 | return nullptr; |
6960 | |
6961 | switch (Inst.getOpcode()) { |
6962 | case AMDGPU::PHI: |
6963 | case AMDGPU::REG_SEQUENCE: |
6964 | case AMDGPU::INSERT_SUBREG: |
6965 | NewDstRC = RI.getEquivalentAGPRClass(NewDstRC); |
6966 | break; |
6967 | default: |
6968 | NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); |
6969 | } |
6970 | |
6971 | if (!NewDstRC) |
6972 | return nullptr; |
6973 | } else { |
6974 | if (RI.hasVGPRs(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass) |
6975 | return nullptr; |
6976 | |
6977 | NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); |
6978 | if (!NewDstRC) |
6979 | return nullptr; |
6980 | } |
6981 | |
6982 | return NewDstRC; |
6983 | } |
6984 | default: |
6985 | return NewDstRC; |
6986 | } |
6987 | } |
6988 | |
6989 | |
6990 | Register SIInstrInfo::findUsedSGPR(const MachineInstr &MI, |
6991 | int OpIndices[3]) const { |
6992 | const MCInstrDesc &Desc = MI.getDesc(); |
6993 | |
6994 | |
6995 | |
6996 | |
6997 | |
6998 | |
6999 | |
7000 | |
7001 | |
7002 | |
7003 | Register SGPRReg = findImplicitSGPRRead(MI); |
7004 | if (SGPRReg != AMDGPU::NoRegister) |
7005 | return SGPRReg; |
7006 | |
7007 | Register UsedSGPRs[3] = { AMDGPU::NoRegister }; |
7008 | const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); |
7009 | |
7010 | for (unsigned i = 0; i < 3; ++i) { |
7011 | int Idx = OpIndices[i]; |
7012 | if (Idx == -1) |
7013 | break; |
7014 | |
7015 | const MachineOperand &MO = MI.getOperand(Idx); |
7016 | if (!MO.isReg()) |
7017 | continue; |
7018 | |
7019 | |
7020 | |
7021 | const TargetRegisterClass *OpRC = RI.getRegClass(Desc.OpInfo[Idx].RegClass); |
7022 | bool IsRequiredSGPR = RI.isSGPRClass(OpRC); |
7023 | if (IsRequiredSGPR) |
7024 | return MO.getReg(); |
7025 | |
7026 | |
7027 | Register Reg = MO.getReg(); |
7028 | const TargetRegisterClass *RegRC = MRI.getRegClass(Reg); |
7029 | if (RI.isSGPRClass(RegRC)) |
7030 | UsedSGPRs[i] = Reg; |
7031 | } |
7032 | |
7033 | |
7034 | |
7035 | |
7036 | |
7037 | |
7038 | |
7039 | |
7040 | |
7041 | |
7042 | |
7043 | |
7044 | |
7045 | |
7046 | if (UsedSGPRs[0] != AMDGPU::NoRegister) { |
7047 | if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2]) |
7048 | SGPRReg = UsedSGPRs[0]; |
7049 | } |
7050 | |
7051 | if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) { |
7052 | if (UsedSGPRs[1] == UsedSGPRs[2]) |
7053 | SGPRReg = UsedSGPRs[1]; |
7054 | } |
7055 | |
7056 | return SGPRReg; |
7057 | } |
7058 | |
7059 | MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI, |
7060 | unsigned OperandName) const { |
7061 | int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName); |
7062 | if (Idx == -1) |
7063 | return nullptr; |
7064 | |
7065 | return &MI.getOperand(Idx); |
7066 | } |
7067 | |
7068 | uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const { |
7069 | if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) { |
7070 | return (AMDGPU::MTBUFFormat::UFMT_32_FLOAT << 44) | |
7071 | (1ULL << 56) | |
7072 | (3ULL << 60); |
7073 | } |
7074 | |
7075 | uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT; |
7076 | if (ST.isAmdHsaOS()) { |
7077 | |
7078 | if (ST.getGeneration() <= AMDGPUSubtarget::VOLCANIC_ISLANDS) |
7079 | RsrcDataFormat |= (1ULL << 56); |
7080 | |
7081 | |
7082 | |
7083 | if (ST.getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) |
7084 | RsrcDataFormat |= (2ULL << 59); |
7085 | } |
7086 | |
7087 | return RsrcDataFormat; |
7088 | } |
7089 | |
7090 | uint64_t SIInstrInfo::getScratchRsrcWords23() const { |
7091 | uint64_t Rsrc23 = getDefaultRsrcDataFormat() | |
7092 | AMDGPU::RSRC_TID_ENABLE | |
7093 | 0xffffffff; |
7094 | |
7095 | |
7096 | if (ST.getGeneration() <= AMDGPUSubtarget::VOLCANIC_ISLANDS) { |
7097 | uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize(true)) - 1; |
7098 | Rsrc23 |= EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT; |
7099 | } |
7100 | |
7101 | |
7102 | uint64_t IndexStride = ST.getWavefrontSize() == 64 ? 3 : 2; |
7103 | Rsrc23 |= IndexStride << AMDGPU::RSRC_INDEX_STRIDE_SHIFT; |
7104 | |
7105 | |
7106 | |
7107 | if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS && |
7108 | ST.getGeneration() <= AMDGPUSubtarget::GFX9) |
7109 | Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT; |
7110 | |
7111 | return Rsrc23; |
7112 | } |
7113 | |
7114 | bool SIInstrInfo::isLowLatencyInstruction(const MachineInstr &MI) const { |
7115 | unsigned Opc = MI.getOpcode(); |
7116 | |
7117 | return isSMRD(Opc); |
7118 | } |
7119 | |
7120 | bool SIInstrInfo::isHighLatencyDef(int Opc) const { |
7121 | return get(Opc).mayLoad() && |
7122 | (isMUBUF(Opc) || isMTBUF(Opc) || isMIMG(Opc) || isFLAT(Opc)); |
7123 | } |
7124 | |
7125 | unsigned SIInstrInfo::isStackAccess(const MachineInstr &MI, |
7126 | int &FrameIndex) const { |
7127 | const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::vaddr); |
7128 | if (!Addr || !Addr->isFI()) |
7129 | return AMDGPU::NoRegister; |
7130 | |
7131 | assert(!MI.memoperands_empty() && |
7132 | (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS); |
7133 | |
7134 | FrameIndex = Addr->getIndex(); |
7135 | return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg(); |
7136 | } |
7137 | |
7138 | unsigned SIInstrInfo::isSGPRStackAccess(const MachineInstr &MI, |
7139 | int &FrameIndex) const { |
7140 | const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::addr); |
7141 | assert(Addr && Addr->isFI()); |
7142 | FrameIndex = Addr->getIndex(); |
7143 | return getNamedOperand(MI, AMDGPU::OpName::data)->getReg(); |
7144 | } |
7145 | |
7146 | unsigned SIInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, |
7147 | int &FrameIndex) const { |
7148 | if (!MI.mayLoad()) |
7149 | return AMDGPU::NoRegister; |
7150 | |
7151 | if (isMUBUF(MI) || isVGPRSpill(MI)) |
7152 | return isStackAccess(MI, FrameIndex); |
7153 | |
7154 | if (isSGPRSpill(MI)) |
7155 | return isSGPRStackAccess(MI, FrameIndex); |
7156 | |
7157 | return AMDGPU::NoRegister; |
7158 | } |
7159 | |
7160 | unsigned SIInstrInfo::isStoreToStackSlot(const MachineInstr &MI, |
7161 | int &FrameIndex) const { |
7162 | if (!MI.mayStore()) |
7163 | return AMDGPU::NoRegister; |
7164 | |
7165 | if (isMUBUF(MI) || isVGPRSpill(MI)) |
7166 | return isStackAccess(MI, FrameIndex); |
7167 | |
7168 | if (isSGPRSpill(MI)) |
7169 | return isSGPRStackAccess(MI, FrameIndex); |
7170 | |
7171 | return AMDGPU::NoRegister; |
7172 | } |
7173 | |
7174 | unsigned SIInstrInfo::getInstBundleSize(const MachineInstr &MI) const { |
7175 | unsigned Size = 0; |
7176 | MachineBasicBlock::const_instr_iterator I = MI.getIterator(); |
7177 | MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); |
7178 | while (++I != E && I->isInsideBundle()) { |
7179 | assert(!I->isBundle() && "No nested bundle!"); |
7180 | Size += getInstSizeInBytes(*I); |
7181 | } |
7182 | |
7183 | return Size; |
7184 | } |
7185 | |
7186 | unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { |
7187 | unsigned Opc = MI.getOpcode(); |
7188 | const MCInstrDesc &Desc = getMCOpcodeFromPseudo(Opc); |
7189 | unsigned DescSize = Desc.getSize(); |
7190 | |
7191 | |
7192 | |
7193 | if (isFixedSize(MI)) { |
7194 | unsigned Size = DescSize; |
7195 | |
7196 | |
7197 | |
7198 | if (MI.isBranch() && ST.hasOffset3fBug()) |
7199 | Size += 4; |
7200 | |
7201 | return Size; |
7202 | } |
7203 | |
7204 | |
7205 | |
7206 | if (isVALU(MI) || isSALU(MI)) { |
7207 | int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); |
7208 | if (Src0Idx == -1) |
7209 | return DescSize; |
7210 | |
7211 | if (isLiteralConstantLike(MI.getOperand(Src0Idx), Desc.OpInfo[Src0Idx])) |
7212 | return isVOP3(MI) ? 12 : (DescSize + 4); |
7213 | |
7214 | int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); |
7215 | if (Src1Idx == -1) |
7216 | return DescSize; |
7217 | |
7218 | if (isLiteralConstantLike(MI.getOperand(Src1Idx), Desc.OpInfo[Src1Idx])) |
7219 | return isVOP3(MI) ? 12 : (DescSize + 4); |
7220 | |
7221 | int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); |
7222 | if (Src2Idx == -1) |
7223 | return DescSize; |
7224 | |
7225 | if (isLiteralConstantLike(MI.getOperand(Src2Idx), Desc.OpInfo[Src2Idx])) |
7226 | return isVOP3(MI) ? 12 : (DescSize + 4); |
7227 | |
7228 | return DescSize; |
7229 | } |
7230 | |
7231 | |
7232 | if (isMIMG(MI)) { |
7233 | int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); |
7234 | if (VAddr0Idx < 0) |
7235 | return 8; |
7236 | |
7237 | int RSrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); |
7238 | return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4); |
7239 | } |
7240 | |
7241 | switch (Opc) { |
7242 | case TargetOpcode::BUNDLE: |
7243 | return getInstBundleSize(MI); |
7244 | case TargetOpcode::INLINEASM: |
7245 | case TargetOpcode::INLINEASM_BR: { |
7246 | const MachineFunction *MF = MI.getParent()->getParent(); |
7247 | const char *AsmStr = MI.getOperand(0).getSymbolName(); |
7248 | return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo(), &ST); |
7249 | } |
7250 | default: |
7251 | if (MI.isMetaInstruction()) |
7252 | return 0; |
7253 | return DescSize; |
7254 | } |
7255 | } |
7256 | |
7257 | bool SIInstrInfo::mayAccessFlatAddressSpace(const MachineInstr &MI) const { |
7258 | if (!isFLAT(MI)) |
7259 | return false; |
7260 | |
7261 | if (MI.memoperands_empty()) |
7262 | return true; |
7263 | |
7264 | for (const MachineMemOperand *MMO : MI.memoperands()) { |
7265 | if (MMO->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS) |
7266 | return true; |
7267 | } |
7268 | return false; |
7269 | } |
7270 | |
7271 | bool SIInstrInfo::isNonUniformBranchInstr(MachineInstr &Branch) const { |
7272 | return Branch.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO; |
7273 | } |
7274 | |
7275 | void SIInstrInfo::convertNonUniformIfRegion(MachineBasicBlock *IfEntry, |
7276 | MachineBasicBlock *IfEnd) const { |
7277 | MachineBasicBlock::iterator TI = IfEntry->getFirstTerminator(); |
7278 | assert(TI != IfEntry->end()); |
7279 | |
7280 | MachineInstr *Branch = &(*TI); |
7281 | MachineFunction *MF = IfEntry->getParent(); |
7282 | MachineRegisterInfo &MRI = IfEntry->getParent()->getRegInfo(); |
7283 | |
7284 | if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { |
7285 | Register DstReg = MRI.createVirtualRegister(RI.getBoolRC()); |
7286 | MachineInstr *SIIF = |
7287 | BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_IF), DstReg) |
7288 | .add(Branch->getOperand(0)) |
7289 | .add(Branch->getOperand(1)); |
7290 | MachineInstr *SIEND = |
7291 | BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_END_CF)) |
7292 | .addReg(DstReg); |
7293 | |
7294 | IfEntry->erase(TI); |
7295 | IfEntry->insert(IfEntry->end(), SIIF); |
7296 | IfEnd->insert(IfEnd->getFirstNonPHI(), SIEND); |
7297 | } |
7298 | } |
7299 | |
7300 | void SIInstrInfo::convertNonUniformLoopRegion( |
7301 | MachineBasicBlock *LoopEntry, MachineBasicBlock *LoopEnd) const { |
7302 | MachineBasicBlock::iterator TI = LoopEnd->getFirstTerminator(); |
7303 | |
7304 | assert(TI != LoopEnd->end()); |
7305 | |
7306 | MachineInstr *Branch = &(*TI); |
7307 | MachineFunction *MF = LoopEnd->getParent(); |
7308 | MachineRegisterInfo &MRI = LoopEnd->getParent()->getRegInfo(); |
7309 | |
7310 | if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { |
7311 | |
7312 | Register DstReg = MRI.createVirtualRegister(RI.getBoolRC()); |
7313 | Register BackEdgeReg = MRI.createVirtualRegister(RI.getBoolRC()); |
7314 | MachineInstrBuilder HeaderPHIBuilder = |
7315 | BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg); |
7316 | for (MachineBasicBlock::pred_iterator PI = LoopEntry->pred_begin(), |
7317 | E = LoopEntry->pred_end(); |
7318 | PI != E; ++PI) { |
7319 | if (*PI == LoopEnd) { |
7320 | HeaderPHIBuilder.addReg(BackEdgeReg); |
7321 | } else { |
7322 | MachineBasicBlock *PMBB = *PI; |
7323 | Register ZeroReg = MRI.createVirtualRegister(RI.getBoolRC()); |
7324 | materializeImmediate(*PMBB, PMBB->getFirstTerminator(), DebugLoc(), |
7325 | ZeroReg, 0); |
7326 | HeaderPHIBuilder.addReg(ZeroReg); |
7327 | } |
7328 | HeaderPHIBuilder.addMBB(*PI); |
7329 | } |
7330 | MachineInstr *HeaderPhi = HeaderPHIBuilder; |
7331 | MachineInstr *SIIFBREAK = BuildMI(*(MF), Branch->getDebugLoc(), |
7332 | get(AMDGPU::SI_IF_BREAK), BackEdgeReg) |
7333 | .addReg(DstReg) |
7334 | .add(Branch->getOperand(0)); |
7335 | MachineInstr *SILOOP = |
7336 | BuildMI(*(MF), Branch->getDebugLoc(), get(AMDGPU::SI_LOOP)) |
7337 | .addReg(BackEdgeReg) |
7338 | .addMBB(LoopEntry); |
7339 | |
7340 | LoopEntry->insert(LoopEntry->begin(), HeaderPhi); |
7341 | LoopEnd->erase(TI); |
7342 | LoopEnd->insert(LoopEnd->end(), SIIFBREAK); |
7343 | LoopEnd->insert(LoopEnd->end(), SILOOP); |
7344 | } |
7345 | } |
7346 | |
7347 | ArrayRef<std::pair<int, const char *>> |
7348 | SIInstrInfo::getSerializableTargetIndices() const { |
7349 | static const std::pair<int, const char *> TargetIndices[] = { |
7350 | {AMDGPU::TI_CONSTDATA_START, "amdgpu-constdata-start"}, |
7351 | {AMDGPU::TI_SCRATCH_RSRC_DWORD0, "amdgpu-scratch-rsrc-dword0"}, |
7352 | {AMDGPU::TI_SCRATCH_RSRC_DWORD1, "amdgpu-scratch-rsrc-dword1"}, |
7353 | {AMDGPU::TI_SCRATCH_RSRC_DWORD2, "amdgpu-scratch-rsrc-dword2"}, |
7354 | {AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}}; |
7355 | return makeArrayRef(TargetIndices); |
7356 | } |
7357 | |
7358 | |
7359 | |
7360 | ScheduleHazardRecognizer * |
7361 | SIInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, |
7362 | const ScheduleDAG *DAG) const { |
7363 | return new GCNHazardRecognizer(DAG->MF); |
7364 | } |
7365 | |
7366 | |
7367 | |
7368 | ScheduleHazardRecognizer * |
7369 | SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const { |
7370 | return new GCNHazardRecognizer(MF); |
7371 | } |
7372 | |
7373 | std::pair<unsigned, unsigned> |
7374 | SIInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { |
7375 | return std::make_pair(TF & MO_MASK, TF & ~MO_MASK); |
7376 | } |
7377 | |
7378 | ArrayRef<std::pair<unsigned, const char *>> |
7379 | SIInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { |
7380 | static const std::pair<unsigned, const char *> TargetFlags[] = { |
7381 | { MO_GOTPCREL, "amdgpu-gotprel" }, |
7382 | { MO_GOTPCREL32_LO, "amdgpu-gotprel32-lo" }, |
7383 | { MO_GOTPCREL32_HI, "amdgpu-gotprel32-hi" }, |
7384 | { MO_REL32_LO, "amdgpu-rel32-lo" }, |
7385 | { MO_REL32_HI, "amdgpu-rel32-hi" }, |
7386 | { MO_ABS32_LO, "amdgpu-abs32-lo" }, |
7387 | { MO_ABS32_HI, "amdgpu-abs32-hi" }, |
7388 | }; |
7389 | |
7390 | return makeArrayRef(TargetFlags); |
7391 | } |
7392 | |
7393 | bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const { |
7394 | return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY && |
7395 | MI.modifiesRegister(AMDGPU::EXEC, &RI); |
7396 | } |
7397 | |
7398 | MachineInstrBuilder |
7399 | SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB, |
7400 | MachineBasicBlock::iterator I, |
7401 | const DebugLoc &DL, |
7402 | Register DestReg) const { |
7403 | if (ST.hasAddNoCarry()) |
7404 | return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e64), DestReg); |
7405 | |
7406 | MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); |
7407 | Register UnusedCarry = MRI.createVirtualRegister(RI.getBoolRC()); |
7408 | MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC()); |
7409 | |
7410 | return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_CO_U32_e64), DestReg) |
7411 | .addReg(UnusedCarry, RegState::Define | RegState::Dead); |
7412 | } |
7413 | |
7414 | MachineInstrBuilder SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB, |
7415 | MachineBasicBlock::iterator I, |
7416 | const DebugLoc &DL, |
7417 | Register DestReg, |
7418 | RegScavenger &RS) const { |
7419 | if (ST.hasAddNoCarry()) |
7420 | return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e32), DestReg); |
7421 | |
7422 | |
7423 | Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC) |
7424 | ? Register(RI.getVCC()) |
7425 | : RS.scavengeRegister(RI.getBoolRC(), I, 0, false); |
7426 | |
7427 | |
7428 | if (!UnusedCarry.isValid()) |
7429 | return MachineInstrBuilder(); |
7430 | |
7431 | return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_CO_U32_e64), DestReg) |
7432 | .addReg(UnusedCarry, RegState::Define | RegState::Dead); |
7433 | } |
7434 | |
7435 | bool SIInstrInfo::isKillTerminator(unsigned Opcode) { |
7436 | switch (Opcode) { |
7437 | case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR: |
7438 | case AMDGPU::SI_KILL_I1_TERMINATOR: |
7439 | return true; |
7440 | default: |
7441 | return false; |
7442 | } |
7443 | } |
7444 | |
7445 | const MCInstrDesc &SIInstrInfo::getKillTerminatorFromPseudo(unsigned Opcode) const { |
7446 | switch (Opcode) { |
7447 | case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO: |
7448 | return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR); |
7449 | case AMDGPU::SI_KILL_I1_PSEUDO: |
7450 | return get(AMDGPU::SI_KILL_I1_TERMINATOR); |
7451 | default: |
7452 | llvm_unreachable("invalid opcode, expected SI_KILL_*_PSEUDO"); |
7453 | } |
7454 | } |
7455 | |
7456 | void SIInstrInfo::fixImplicitOperands(MachineInstr &MI) const { |
7457 | if (!ST.isWave32()) |
7458 | return; |
7459 | |
7460 | for (auto &Op : MI.implicit_operands()) { |
7461 | if (Op.isReg() && Op.getReg() == AMDGPU::VCC) |
7462 | Op.setReg(AMDGPU::VCC_LO); |
7463 | } |
7464 | } |
7465 | |
7466 | bool SIInstrInfo::isBufferSMRD(const MachineInstr &MI) const { |
7467 | if (!isSMRD(MI)) |
7468 | return false; |
7469 | |
7470 | |
7471 | int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sbase); |
7472 | if (Idx == -1) |
7473 | return false; |
7474 | |
7475 | const auto RCID = MI.getDesc().OpInfo[Idx].RegClass; |
7476 | return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass); |
7477 | } |
7478 | |
7479 | |
7480 | |
7481 | |
7482 | |
7483 | |
7484 | |
7485 | |
7486 | |
7487 | |
7488 | |
7489 | |
7490 | |
7491 | |
7492 | |
7493 | |
7494 | |
7495 | |
7496 | |
7497 | |
7498 | |
7499 | |
7500 | |
7501 | |
7502 | |
7503 | |
7504 | |
7505 | |
7506 | |
7507 | |
7508 | |
7509 | bool SIInstrInfo::isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, |
7510 | uint64_t FlatVariant) const { |
7511 | |
7512 | if (!ST.hasFlatInstOffsets()) |
7513 | return false; |
7514 | |
7515 | if (ST.hasFlatSegmentOffsetBug() && FlatVariant == SIInstrFlags::FLAT && |
7516 | (AddrSpace == AMDGPUAS::FLAT_ADDRESS || |
7517 | AddrSpace == AMDGPUAS::GLOBAL_ADDRESS)) |
7518 | return false; |
7519 | |
7520 | bool Signed = FlatVariant != SIInstrFlags::FLAT; |
7521 | if (ST.hasNegativeScratchOffsetBug() && |
7522 | FlatVariant == SIInstrFlags::FlatScratch) |
7523 | Signed = false; |
7524 | if (ST.hasNegativeUnalignedScratchOffsetBug() && |
7525 | FlatVariant == SIInstrFlags::FlatScratch && Offset < 0 && |
7526 | (Offset % 4) != 0) { |
7527 | return false; |
7528 | } |
7529 | |
7530 | unsigned N = AMDGPU::getNumFlatOffsetBits(ST, Signed); |
7531 | return Signed ? isIntN(N, Offset) : isUIntN(N, Offset); |
7532 | } |
7533 | |
7534 | |
7535 | std::pair<int64_t, int64_t> |
7536 | SIInstrInfo::splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, |
7537 | uint64_t FlatVariant) const { |
7538 | int64_t RemainderOffset = COffsetVal; |
7539 | int64_t ImmField = 0; |
7540 | bool Signed = FlatVariant != SIInstrFlags::FLAT; |
7541 | if (ST.hasNegativeScratchOffsetBug() && |
7542 | FlatVariant == SIInstrFlags::FlatScratch) |
7543 | Signed = false; |
7544 | |
7545 | const unsigned NumBits = AMDGPU::getNumFlatOffsetBits(ST, Signed); |
7546 | if (Signed) { |
7547 | |
7548 | int64_t D = 1LL << (NumBits - 1); |
7549 | RemainderOffset = (COffsetVal / D) * D; |
7550 | ImmField = COffsetVal - RemainderOffset; |
7551 | |
7552 | if (ST.hasNegativeUnalignedScratchOffsetBug() && |
7553 | FlatVariant == SIInstrFlags::FlatScratch && ImmField < 0 && |
7554 | (ImmField % 4) != 0) { |
7555 | |
7556 | RemainderOffset += ImmField % 4; |
7557 | ImmField -= ImmField % 4; |
7558 | } |
7559 | } else if (COffsetVal >= 0) { |
7560 | ImmField = COffsetVal & maskTrailingOnes<uint64_t>(NumBits); |
7561 | RemainderOffset = COffsetVal - ImmField; |
7562 | } |
7563 | |
7564 | assert(isLegalFLATOffset(ImmField, AddrSpace, FlatVariant)); |
7565 | assert(RemainderOffset + ImmField == COffsetVal); |
7566 | return {ImmField, RemainderOffset}; |
7567 | } |
7568 | |
7569 | |
7570 | enum SIEncodingFamily { |
7571 | SI = 0, |
7572 | VI = 1, |
7573 | SDWA = 2, |
7574 | SDWA9 = 3, |
7575 | GFX80 = 4, |
7576 | GFX9 = 5, |
7577 | GFX10 = 6, |
7578 | SDWA10 = 7, |
7579 | GFX90A = 8 |
7580 | }; |
7581 | |
7582 | static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST) { |
7583 | switch (ST.getGeneration()) { |
7584 | default: |
7585 | break; |
7586 | case AMDGPUSubtarget::SOUTHERN_ISLANDS: |
7587 | case AMDGPUSubtarget::SEA_ISLANDS: |
7588 | return SIEncodingFamily::SI; |
7589 | case AMDGPUSubtarget::VOLCANIC_ISLANDS: |
7590 | case AMDGPUSubtarget::GFX9: |
7591 | return SIEncodingFamily::VI; |
7592 | case AMDGPUSubtarget::GFX10: |
7593 | return SIEncodingFamily::GFX10; |
7594 | } |
7595 | llvm_unreachable("Unknown subtarget generation!"); |
7596 | } |
7597 | |
7598 | bool SIInstrInfo::isAsmOnlyOpcode(int MCOp) const { |
7599 | switch(MCOp) { |
7600 | |
7601 | |
7602 | |
7603 | |
7604 | case AMDGPU::V_MOVRELS_B32_dpp_gfx10: |
7605 | case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: |
7606 | case AMDGPU::V_MOVRELD_B32_dpp_gfx10: |
7607 | case AMDGPU::V_MOVRELD_B32_sdwa_gfx10: |
7608 | case AMDGPU::V_MOVRELSD_B32_dpp_gfx10: |
7609 | case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: |
7610 | case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10: |
7611 | case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: |
7612 | return true; |
7613 | default: |
7614 | return false; |
7615 | } |
7616 | } |
7617 | |
7618 | int SIInstrInfo::pseudoToMCOpcode(int Opcode) const { |
7619 | SIEncodingFamily Gen = subtargetEncodingFamily(ST); |
7620 | |
7621 | if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 && |
7622 | ST.getGeneration() == AMDGPUSubtarget::GFX9) |
7623 | Gen = SIEncodingFamily::GFX9; |
7624 | |
7625 | |
7626 | |
7627 | |
7628 | if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16Buf)) |
7629 | Gen = SIEncodingFamily::GFX80; |
7630 | |
7631 | if (get(Opcode).TSFlags & SIInstrFlags::SDWA) { |
7632 | switch (ST.getGeneration()) { |
7633 | default: |
7634 | Gen = SIEncodingFamily::SDWA; |
7635 | break; |
7636 | case AMDGPUSubtarget::GFX9: |
7637 | Gen = SIEncodingFamily::SDWA9; |
7638 | break; |
7639 | case AMDGPUSubtarget::GFX10: |
7640 | Gen = SIEncodingFamily::SDWA10; |
7641 | break; |
7642 | } |
7643 | } |
7644 | |
7645 | int MCOp = AMDGPU::getMCOpcode(Opcode, Gen); |
7646 | |
7647 | |
7648 | if (MCOp == -1) |
7649 | return Opcode; |
7650 | |
7651 | if (ST.hasGFX90AInsts()) { |
7652 | uint16_t NMCOp = (uint16_t)-1; |
7653 | NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX90A); |
7654 | if (NMCOp == (uint16_t)-1) |
7655 | NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX9); |
7656 | if (NMCOp != (uint16_t)-1) |
7657 | MCOp = NMCOp; |
7658 | } |
7659 | |
7660 | |
7661 | |
7662 | if (MCOp == (uint16_t)-1) |
7663 | return -1; |
7664 | |
7665 | if (isAsmOnlyOpcode(MCOp)) |
7666 | return -1; |
7667 | |
7668 | return MCOp; |
7669 | } |
7670 | |
7671 | static |
7672 | TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd) { |
7673 | assert(RegOpnd.isReg()); |
7674 | return RegOpnd.isUndef() ? TargetInstrInfo::RegSubRegPair() : |
7675 | getRegSubRegPair(RegOpnd); |
7676 | } |
7677 | |
7678 | TargetInstrInfo::RegSubRegPair |
7679 | llvm::getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg) { |
7680 | assert(MI.isRegSequence()); |
7681 | for (unsigned I = 0, E = (MI.getNumOperands() - 1)/ 2; I < E; ++I) |
7682 | if (MI.getOperand(1 + 2 * I + 1).getImm() == SubReg) { |
7683 | auto &RegOp = MI.getOperand(1 + 2 * I); |
7684 | return getRegOrUndef(RegOp); |
7685 | } |
7686 | return TargetInstrInfo::RegSubRegPair(); |
7687 | } |
7688 | |
7689 | |
7690 | |
7691 | static bool followSubRegDef(MachineInstr &MI, |
7692 | TargetInstrInfo::RegSubRegPair &RSR) { |
7693 | if (!RSR.SubReg) |
7694 | return false; |
7695 | switch (MI.getOpcode()) { |
7696 | default: break; |
7697 | case AMDGPU::REG_SEQUENCE: |
7698 | RSR = getRegSequenceSubReg(MI, RSR.SubReg); |
7699 | return true; |
7700 | |
7701 | case AMDGPU::INSERT_SUBREG: |
7702 | if (RSR.SubReg == (unsigned)MI.getOperand(3).getImm()) |
7703 | |
7704 | RSR = getRegOrUndef(MI.getOperand(2)); |
7705 | else { |
7706 | auto R1 = getRegOrUndef(MI.getOperand(1)); |
7707 | if (R1.SubReg) |
7708 | return false; |
7709 | RSR.Reg = R1.Reg; |
7710 | } |
7711 | return true; |
7712 | } |
7713 | return false; |
7714 | } |
7715 | |
7716 | MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, |
7717 | MachineRegisterInfo &MRI) { |
7718 | assert(MRI.isSSA()); |
7719 | if (!P.Reg.isVirtual()) |
7720 | return nullptr; |
7721 | |
7722 | auto RSR = P; |
7723 | auto *DefInst = MRI.getVRegDef(RSR.Reg); |
7724 | while (auto *MI = DefInst) { |
7725 | DefInst = nullptr; |
7726 | switch (MI->getOpcode()) { |
7727 | case AMDGPU::COPY: |
7728 | case AMDGPU::V_MOV_B32_e32: { |
7729 | auto &Op1 = MI->getOperand(1); |
7730 | if (Op1.isReg() && Op1.getReg().isVirtual()) { |
7731 | if (Op1.isUndef()) |
7732 | return nullptr; |
7733 | RSR = getRegSubRegPair(Op1); |
7734 | DefInst = MRI.getVRegDef(RSR.Reg); |
7735 | } |
7736 | break; |
7737 | } |
7738 | default: |
7739 | if (followSubRegDef(*MI, RSR)) { |
7740 | if (!RSR.Reg) |
7741 | return nullptr; |
7742 | DefInst = MRI.getVRegDef(RSR.Reg); |
7743 | } |
7744 | } |
7745 | if (!DefInst) |
7746 | return MI; |
7747 | } |
7748 | return nullptr; |
7749 | } |
7750 | |
7751 | bool llvm::execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, |
7752 | Register VReg, |
7753 | const MachineInstr &DefMI, |
7754 | const MachineInstr &UseMI) { |
7755 | assert(MRI.isSSA() && "Must be run on SSA"); |
7756 | |
7757 | auto *TRI = MRI.getTargetRegisterInfo(); |
7758 | auto *DefBB = DefMI.getParent(); |
7759 | |
7760 | |
7761 | |
7762 | if (UseMI.getParent() != DefBB) |
7763 | return true; |
7764 | |
7765 | const int MaxInstScan = 20; |
7766 | int NumInst = 0; |
7767 | |
7768 | |
7769 | auto E = UseMI.getIterator(); |
7770 | for (auto I = std::next(DefMI.getIterator()); I != E; ++I) { |
7771 | if (I->isDebugInstr()) |
7772 | continue; |
7773 | |
7774 | if (++NumInst > MaxInstScan) |
7775 | return true; |
7776 | |
7777 | if (I->modifiesRegister(AMDGPU::EXEC, TRI)) |
7778 | return true; |
7779 | } |
7780 | |
7781 | return false; |
7782 | } |
7783 | |
7784 | bool llvm::execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, |
7785 | Register VReg, |
7786 | const MachineInstr &DefMI) { |
7787 | assert(MRI.isSSA() && "Must be run on SSA"); |
7788 | |
7789 | auto *TRI = MRI.getTargetRegisterInfo(); |
7790 | auto *DefBB = DefMI.getParent(); |
7791 | |
7792 | const int MaxUseScan = 10; |
7793 | int NumUse = 0; |
7794 | |
7795 | for (auto &Use : MRI.use_nodbg_operands(VReg)) { |
7796 | auto &UseInst = *Use.getParent(); |
7797 | |
7798 | |
7799 | if (UseInst.getParent() != DefBB) |
7800 | return true; |
7801 | |
7802 | if (++NumUse > MaxUseScan) |
7803 | return true; |
7804 | } |
7805 | |
7806 | if (NumUse == 0) |
7807 | return false; |
7808 | |
7809 | const int MaxInstScan = 20; |
7810 | int NumInst = 0; |
7811 | |
7812 | |
7813 | for (auto I = std::next(DefMI.getIterator()); ; ++I) { |
7814 | assert(I != DefBB->end()); |
7815 | |
7816 | if (I->isDebugInstr()) |
7817 | continue; |
7818 | |
7819 | if (++NumInst > MaxInstScan) |
7820 | return true; |
7821 | |
7822 | for (const MachineOperand &Op : I->operands()) { |
7823 | |
7824 | |
7825 | |
7826 | |
7827 | if (!Op.isReg()) |
7828 | continue; |
7829 | |
7830 | Register Reg = Op.getReg(); |
7831 | if (Op.isUse()) { |
7832 | if (Reg == VReg && --NumUse == 0) |
7833 | return false; |
7834 | } else if (TRI->regsOverlap(Reg, AMDGPU::EXEC)) |
7835 | return true; |
7836 | } |
7837 | } |
7838 | } |
7839 | |
7840 | MachineInstr *SIInstrInfo::createPHIDestinationCopy( |
7841 | MachineBasicBlock &MBB, MachineBasicBlock::iterator LastPHIIt, |
7842 | const DebugLoc &DL, Register Src, Register Dst) const { |
7843 | auto Cur = MBB.begin(); |
7844 | if (Cur != MBB.end()) |
7845 | do { |
7846 | if (!Cur->isPHI() && Cur->readsRegister(Dst)) |
7847 | return BuildMI(MBB, Cur, DL, get(TargetOpcode::COPY), Dst).addReg(Src); |
7848 | ++Cur; |
7849 | } while (Cur != MBB.end() && Cur != LastPHIIt); |
7850 | |
7851 | return TargetInstrInfo::createPHIDestinationCopy(MBB, LastPHIIt, DL, Src, |
7852 | Dst); |
7853 | } |
7854 | |
7855 | MachineInstr *SIInstrInfo::createPHISourceCopy( |
7856 | MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, |
7857 | const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const { |
7858 | if (InsPt != MBB.end() && |
7859 | (InsPt->getOpcode() == AMDGPU::SI_IF || |
7860 | InsPt->getOpcode() == AMDGPU::SI_ELSE || |
7861 | InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) && |
7862 | InsPt->definesRegister(Src)) { |
7863 | InsPt++; |
7864 | return BuildMI(MBB, InsPt, DL, |
7865 | get(ST.isWave32() ? AMDGPU::S_MOV_B32_term |
7866 | : AMDGPU::S_MOV_B64_term), |
7867 | Dst) |
7868 | .addReg(Src, 0, SrcSubReg) |
7869 | .addReg(AMDGPU::EXEC, RegState::Implicit); |
7870 | } |
7871 | return TargetInstrInfo::createPHISourceCopy(MBB, InsPt, DL, Src, SrcSubReg, |
7872 | Dst); |
7873 | } |
7874 | |
7875 | bool llvm::SIInstrInfo::isWave32() const { return ST.isWave32(); } |
7876 | |
7877 | MachineInstr *SIInstrInfo::foldMemoryOperandImpl( |
7878 | MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, |
7879 | MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS, |
7880 | VirtRegMap *VRM) const { |
7881 | |
7882 | |
7883 | |
7884 | |
7885 | |
7886 | |
7887 | |
7888 | |
7889 | |
7890 | |
7891 | |
7892 | |
7893 | if (MI.isFullCopy()) { |
7894 | Register DstReg = MI.getOperand(0).getReg(); |
7895 | Register SrcReg = MI.getOperand(1).getReg(); |
7896 | if ((DstReg.isVirtual() || SrcReg.isVirtual()) && |
7897 | (DstReg.isVirtual() != SrcReg.isVirtual())) { |
7898 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
7899 | Register VirtReg = DstReg.isVirtual() ? DstReg : SrcReg; |
7900 | const TargetRegisterClass *RC = MRI.getRegClass(VirtReg); |
7901 | if (RC->hasSuperClassEq(&AMDGPU::SReg_32RegClass)) { |
7902 | MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass); |
7903 | return nullptr; |
7904 | } else if (RC->hasSuperClassEq(&AMDGPU::SReg_64RegClass)) { |
7905 | MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass); |
7906 | return nullptr; |
7907 | } |
7908 | } |
7909 | } |
7910 | |
7911 | return nullptr; |
7912 | } |
7913 | |
7914 | unsigned SIInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, |
7915 | const MachineInstr &MI, |
7916 | unsigned *PredCost) const { |
7917 | if (MI.isBundle()) { |
7918 | MachineBasicBlock::const_instr_iterator I(MI.getIterator()); |
7919 | MachineBasicBlock::const_instr_iterator E(MI.getParent()->instr_end()); |
7920 | unsigned Lat = 0, Count = 0; |
7921 | for (++I; I != E && I->isBundledWithPred(); ++I) { |
7922 | ++Count; |
7923 | Lat = std::max(Lat, SchedModel.computeInstrLatency(&*I)); |
7924 | } |
7925 | return Lat + Count - 1; |
7926 | } |
7927 | |
7928 | return SchedModel.computeInstrLatency(&MI); |
7929 | } |
7930 | |
7931 | unsigned SIInstrInfo::getDSShaderTypeValue(const MachineFunction &MF) { |
7932 | switch (MF.getFunction().getCallingConv()) { |
7933 | case CallingConv::AMDGPU_PS: |
7934 | return 1; |
7935 | case CallingConv::AMDGPU_VS: |
7936 | return 2; |
7937 | case CallingConv::AMDGPU_GS: |
7938 | return 3; |
7939 | case CallingConv::AMDGPU_HS: |
7940 | case CallingConv::AMDGPU_LS: |
7941 | case CallingConv::AMDGPU_ES: |
7942 | report_fatal_error("ds_ordered_count unsupported for this calling conv"); |
7943 | case CallingConv::AMDGPU_CS: |
7944 | case CallingConv::AMDGPU_KERNEL: |
7945 | case CallingConv::C: |
7946 | case CallingConv::Fast: |
7947 | default: |
7948 | |
7949 | return 0; |
7950 | } |
7951 | } |
7952 | |
7953 | bool SIInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, |
7954 | Register &SrcReg2, int64_t &CmpMask, |
7955 | int64_t &CmpValue) const { |
7956 | if (!MI.getOperand(0).isReg() || MI.getOperand(0).getSubReg()) |
7957 | return false; |
7958 | |
7959 | switch (MI.getOpcode()) { |
7960 | default: |
7961 | break; |
7962 | case AMDGPU::S_CMP_EQ_U32: |
7963 | case AMDGPU::S_CMP_EQ_I32: |
7964 | case AMDGPU::S_CMP_LG_U32: |
7965 | case AMDGPU::S_CMP_LG_I32: |
7966 | case AMDGPU::S_CMP_LT_U32: |
7967 | case AMDGPU::S_CMP_LT_I32: |
7968 | case AMDGPU::S_CMP_GT_U32: |
7969 | case AMDGPU::S_CMP_GT_I32: |
7970 | case AMDGPU::S_CMP_LE_U32: |
7971 | case AMDGPU::S_CMP_LE_I32: |
7972 | case AMDGPU::S_CMP_GE_U32: |
7973 | case AMDGPU::S_CMP_GE_I32: |
7974 | case AMDGPU::S_CMP_EQ_U64: |
7975 | case AMDGPU::S_CMP_LG_U64: |
7976 | SrcReg = MI.getOperand(0).getReg(); |
7977 | if (MI.getOperand(1).isReg()) { |
7978 | if (MI.getOperand(1).getSubReg()) |
7979 | return false; |
7980 | SrcReg2 = MI.getOperand(1).getReg(); |
7981 | CmpValue = 0; |
7982 | } else if (MI.getOperand(1).isImm()) { |
7983 | SrcReg2 = Register(); |
7984 | CmpValue = MI.getOperand(1).getImm(); |
7985 | } else { |
7986 | return false; |
7987 | } |
7988 | CmpMask = ~0; |
7989 | return true; |
7990 | case AMDGPU::S_CMPK_EQ_U32: |
7991 | case AMDGPU::S_CMPK_EQ_I32: |
7992 | case AMDGPU::S_CMPK_LG_U32: |
7993 | case AMDGPU::S_CMPK_LG_I32: |
7994 | case AMDGPU::S_CMPK_LT_U32: |
7995 | case AMDGPU::S_CMPK_LT_I32: |
7996 | case AMDGPU::S_CMPK_GT_U32: |
7997 | case AMDGPU::S_CMPK_GT_I32: |
7998 | case AMDGPU::S_CMPK_LE_U32: |
7999 | case AMDGPU::S_CMPK_LE_I32: |
8000 | case AMDGPU::S_CMPK_GE_U32: |
8001 | case AMDGPU::S_CMPK_GE_I32: |
8002 | SrcReg = MI.getOperand(0).getReg(); |
8003 | SrcReg2 = Register(); |
8004 | CmpValue = MI.getOperand(1).getImm(); |
8005 | CmpMask = ~0; |
8006 | return true; |
8007 | } |
8008 | |
8009 | return false; |
8010 | } |
8011 | |
8012 | bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, |
8013 | Register SrcReg2, int64_t CmpMask, |
8014 | int64_t CmpValue, |
8015 | const MachineRegisterInfo *MRI) const { |
8016 | if (!SrcReg || SrcReg.isPhysical()) |
8017 | return false; |
8018 | |
8019 | if (SrcReg2 && !getFoldableImm(SrcReg2, *MRI, CmpValue)) |
8020 | return false; |
8021 | |
8022 | const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI, |
8023 | this](int64_t ExpectedValue, unsigned SrcSize, |
8024 | bool IsReversable, bool IsSigned) -> bool { |
8025 | |
8026 | |
8027 | |
8028 | |
8029 | |
8030 | |
8031 | |
8032 | |
8033 | |
8034 | |
8035 | |
8036 | |
8037 | |
8038 | |
8039 | |
8040 | |
8041 | |
8042 | |
8043 | |
8044 | |
8045 | |
8046 | |
8047 | |
8048 | MachineInstr *Def = MRI->getUniqueVRegDef(SrcReg); |
8049 | if (!Def || Def->getParent() != CmpInstr.getParent()) |
8050 | return false; |
8051 | |
8052 | if (Def->getOpcode() != AMDGPU::S_AND_B32 && |
8053 | Def->getOpcode() != AMDGPU::S_AND_B64) |
8054 | return false; |
8055 | |
8056 | int64_t Mask; |
8057 | const auto isMask = [&Mask, SrcSize](const MachineOperand *MO) -> bool { |
8058 | if (MO->isImm()) |
8059 | Mask = MO->getImm(); |
8060 | else if (!getFoldableImm(MO, Mask)) |
8061 | return false; |
8062 | Mask &= maxUIntN(SrcSize); |
8063 | return isPowerOf2_64(Mask); |
8064 | }; |
8065 | |
8066 | MachineOperand *SrcOp = &Def->getOperand(1); |
8067 | if (isMask(SrcOp)) |
8068 | SrcOp = &Def->getOperand(2); |
8069 | else if (isMask(&Def->getOperand(2))) |
8070 | SrcOp = &Def->getOperand(1); |
8071 | else |
8072 | return false; |
8073 | |
8074 | unsigned BitNo = countTrailingZeros((uint64_t)Mask); |
8075 | if (IsSigned && BitNo == SrcSize - 1) |
8076 | return false; |
8077 | |
8078 | ExpectedValue <<= BitNo; |
8079 | |
8080 | bool IsReversedCC = false; |
8081 | if (CmpValue != ExpectedValue) { |
8082 | if (!IsReversable) |
8083 | return false; |
8084 | IsReversedCC = CmpValue == (ExpectedValue ^ Mask); |
8085 | if (!IsReversedCC) |
8086 | return false; |
8087 | } |
8088 | |
8089 | Register DefReg = Def->getOperand(0).getReg(); |
8090 | if (IsReversedCC && !MRI->hasOneNonDBGUse(DefReg)) |
8091 | return false; |
8092 | |
8093 | for (auto I = std::next(Def->getIterator()), E = CmpInstr.getIterator(); |
8094 | I != E; ++I) { |
8095 | if (I->modifiesRegister(AMDGPU::SCC, &RI) || |
8096 | I->killsRegister(AMDGPU::SCC, &RI)) |
8097 | return false; |
8098 | } |
8099 | |
8100 | MachineOperand *SccDef = Def->findRegisterDefOperand(AMDGPU::SCC); |
8101 | SccDef->setIsDead(false); |
8102 | CmpInstr.eraseFromParent(); |
8103 | |
8104 | if (!MRI->use_nodbg_empty(DefReg)) { |
8105 | assert(!IsReversedCC); |
8106 | return true; |
8107 | } |
8108 | |
8109 | |
8110 | MachineBasicBlock *MBB = Def->getParent(); |
8111 | |
8112 | unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32 |
8113 | : AMDGPU::S_BITCMP1_B32 |
8114 | : IsReversedCC ? AMDGPU::S_BITCMP0_B64 |
8115 | : AMDGPU::S_BITCMP1_B64; |
8116 | |
8117 | BuildMI(*MBB, Def, Def->getDebugLoc(), get(NewOpc)) |
8118 | .add(*SrcOp) |
8119 | .addImm(BitNo); |
8120 | Def->eraseFromParent(); |
8121 | |
8122 | return true; |
8123 | }; |
8124 | |
8125 | switch (CmpInstr.getOpcode()) { |
8126 | default: |
8127 | break; |
8128 | case AMDGPU::S_CMP_EQ_U32: |
8129 | case AMDGPU::S_CMP_EQ_I32: |
8130 | case AMDGPU::S_CMPK_EQ_U32: |
8131 | case AMDGPU::S_CMPK_EQ_I32: |
8132 | return optimizeCmpAnd(1, 32, true, false); |
8133 | case AMDGPU::S_CMP_GE_U32: |
8134 | case AMDGPU::S_CMPK_GE_U32: |
8135 | return optimizeCmpAnd(1, 32, false, false); |
8136 | case AMDGPU::S_CMP_GE_I32: |
8137 | case AMDGPU::S_CMPK_GE_I32: |
8138 | return optimizeCmpAnd(1, 32, false, true); |
8139 | case AMDGPU::S_CMP_EQ_U64: |
8140 | return optimizeCmpAnd(1, 64, true, false); |
8141 | case AMDGPU::S_CMP_LG_U32: |
8142 | case AMDGPU::S_CMP_LG_I32: |
8143 | case AMDGPU::S_CMPK_LG_U32: |
8144 | case AMDGPU::S_CMPK_LG_I32: |
8145 | return optimizeCmpAnd(0, 32, true, false); |
8146 | case AMDGPU::S_CMP_GT_U32: |
8147 | case AMDGPU::S_CMPK_GT_U32: |
8148 | return optimizeCmpAnd(0, 32, false, false); |
8149 | case AMDGPU::S_CMP_GT_I32: |
8150 | case AMDGPU::S_CMPK_GT_I32: |
8151 | return optimizeCmpAnd(0, 32, false, true); |
8152 | case AMDGPU::S_CMP_LG_U64: |
8153 | return optimizeCmpAnd(0, 64, true, false); |
8154 | } |
8155 | |
8156 | return false; |
8157 | } |