LLVM 17.0.0git
AMDGPUISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8//
9/// \file
10/// Defines an instruction selector for the AMDGPU target.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUISelDAGToDAG.h"
15#include "AMDGPU.h"
16#include "AMDGPUInstrInfo.h"
17#include "AMDGPUSubtarget.h"
18#include "AMDGPUTargetMachine.h"
21#include "R600RegisterInfo.h"
29#include "llvm/IR/IntrinsicsAMDGPU.h"
31
32#ifdef EXPENSIVE_CHECKS
34#include "llvm/IR/Dominators.h"
35#endif
36
37#define DEBUG_TYPE "amdgpu-isel"
38
39using namespace llvm;
40
41//===----------------------------------------------------------------------===//
42// Instruction Selector Implementation
43//===----------------------------------------------------------------------===//
44
45namespace {
46static SDValue stripBitcast(SDValue Val) {
47 return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
48}
49
50// Figure out if this is really an extract of the high 16-bits of a dword.
51static bool isExtractHiElt(SDValue In, SDValue &Out) {
52 In = stripBitcast(In);
53
54 if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
55 if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
56 if (!Idx->isOne())
57 return false;
58 Out = In.getOperand(0);
59 return true;
60 }
61 }
62
63 if (In.getOpcode() != ISD::TRUNCATE)
64 return false;
65
66 SDValue Srl = In.getOperand(0);
67 if (Srl.getOpcode() == ISD::SRL) {
68 if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
69 if (ShiftAmt->getZExtValue() == 16) {
70 Out = stripBitcast(Srl.getOperand(0));
71 return true;
72 }
73 }
74 }
75
76 return false;
77}
78
79// Look through operations that obscure just looking at the low 16-bits of the
80// same register.
81static SDValue stripExtractLoElt(SDValue In) {
82 if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
83 if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
84 if (Idx->isZero() && In.getValueSizeInBits() <= 32)
85 return In.getOperand(0);
86 }
87 }
88
89 if (In.getOpcode() == ISD::TRUNCATE) {
90 SDValue Src = In.getOperand(0);
91 if (Src.getValueType().getSizeInBits() == 32)
92 return stripBitcast(Src);
93 }
94
95 return In;
96}
97
98} // end anonymous namespace
99
101 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
105#ifdef EXPENSIVE_CHECKS
108#endif
110 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
111
112/// This pass converts a legalized DAG into a AMDGPU-specific
113// DAG, ready for instruction scheduling.
115 CodeGenOpt::Level OptLevel) {
116 return new AMDGPUDAGToDAGISel(TM, OptLevel);
117}
118
120 CodeGenOpt::Level OptLevel)
121 : SelectionDAGISel(ID, TM, OptLevel) {
122 EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
123}
124
126#ifdef EXPENSIVE_CHECKS
127 DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
128 LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
129 for (auto &L : LI->getLoopsInPreorder()) {
130 assert(L->isLCSSAForm(DT));
131 }
132#endif
133 Subtarget = &MF.getSubtarget<GCNSubtarget>();
136}
137
138bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const {
139 // XXX - only need to list legal operations.
140 switch (Opc) {
141 case ISD::FADD:
142 case ISD::FSUB:
143 case ISD::FMUL:
144 case ISD::FDIV:
145 case ISD::FREM:
147 case ISD::UINT_TO_FP:
148 case ISD::SINT_TO_FP:
149 case ISD::FABS:
150 // Fabs is lowered to a bit operation, but it's an and which will clear the
151 // high bits anyway.
152 case ISD::FSQRT:
153 case ISD::FSIN:
154 case ISD::FCOS:
155 case ISD::FPOWI:
156 case ISD::FPOW:
157 case ISD::FLOG:
158 case ISD::FLOG2:
159 case ISD::FLOG10:
160 case ISD::FEXP:
161 case ISD::FEXP2:
162 case ISD::FCEIL:
163 case ISD::FTRUNC:
164 case ISD::FRINT:
165 case ISD::FNEARBYINT:
166 case ISD::FROUND:
167 case ISD::FFLOOR:
168 case ISD::FMINNUM:
169 case ISD::FMAXNUM:
170 case AMDGPUISD::FRACT:
171 case AMDGPUISD::CLAMP:
174 case AMDGPUISD::FMIN3:
175 case AMDGPUISD::FMAX3:
176 case AMDGPUISD::FMED3:
178 case AMDGPUISD::RCP:
179 case AMDGPUISD::RSQ:
181 case AMDGPUISD::LDEXP:
182 // On gfx10, all 16-bit instructions preserve the high bits.
183 return Subtarget->getGeneration() <= AMDGPUSubtarget::GFX9;
184 case ISD::FP_ROUND:
185 // We may select fptrunc (fma/mad) to mad_mixlo, which does not zero the
186 // high bits on gfx9.
187 // TODO: If we had the source node we could see if the source was fma/mad
189 case ISD::FMA:
190 case ISD::FMAD:
193 default:
194 // fcopysign, select and others may be lowered to 32-bit bit operations
195 // which don't zero the high bits.
196 return false;
197 }
198}
199
203#ifdef EXPENSIVE_CHECKS
206#endif
208}
209
211 assert(Subtarget->d16PreservesUnusedBits());
212 MVT VT = N->getValueType(0).getSimpleVT();
213 if (VT != MVT::v2i16 && VT != MVT::v2f16)
214 return false;
215
216 SDValue Lo = N->getOperand(0);
217 SDValue Hi = N->getOperand(1);
218
219 LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));
220
221 // build_vector lo, (load ptr) -> load_d16_hi ptr, lo
222 // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
223 // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
224
225 // Need to check for possible indirect dependencies on the other half of the
226 // vector to avoid introducing a cycle.
227 if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {
228 SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
229
231 SDValue Ops[] = {
232 LdHi->getChain(), LdHi->getBasePtr(), TiedIn
233 };
234
235 unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
236 if (LdHi->getMemoryVT() == MVT::i8) {
237 LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
239 } else {
240 assert(LdHi->getMemoryVT() == MVT::i16);
241 }
242
243 SDValue NewLoadHi =
244 CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
245 Ops, LdHi->getMemoryVT(),
246 LdHi->getMemOperand());
247
248 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
249 CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
250 return true;
251 }
252
253 // build_vector (load ptr), hi -> load_d16_lo ptr, hi
254 // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
255 // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
256 LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));
257 if (LdLo && Lo.hasOneUse()) {
258 SDValue TiedIn = getHi16Elt(Hi);
259 if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode()))
260 return false;
261
262 SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
263 unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
264 if (LdLo->getMemoryVT() == MVT::i8) {
265 LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
267 } else {
268 assert(LdLo->getMemoryVT() == MVT::i16);
269 }
270
271 TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
272
273 SDValue Ops[] = {
274 LdLo->getChain(), LdLo->getBasePtr(), TiedIn
275 };
276
277 SDValue NewLoadLo =
278 CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
279 Ops, LdLo->getMemoryVT(),
280 LdLo->getMemOperand());
281
282 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
283 CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
284 return true;
285 }
286
287 return false;
288}
289
291 if (!Subtarget->d16PreservesUnusedBits())
292 return;
293
295
296 bool MadeChange = false;
297 while (Position != CurDAG->allnodes_begin()) {
298 SDNode *N = &*--Position;
299 if (N->use_empty())
300 continue;
301
302 switch (N->getOpcode()) {
304 MadeChange |= matchLoadD16FromBuildVector(N);
305 break;
306 default:
307 break;
308 }
309 }
310
311 if (MadeChange) {
313 LLVM_DEBUG(dbgs() << "After PreProcess:\n";
314 CurDAG->dump(););
315 }
316}
317
318bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N,
319 bool Negated) const {
320 if (N->isUndef())
321 return true;
322
323 const SIInstrInfo *TII = Subtarget->getInstrInfo();
324 if (Negated) {
325 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
326 return TII->isInlineConstant(-C->getAPIntValue());
327
328 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
329 return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt());
330
331 } else {
332 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
333 return TII->isInlineConstant(C->getAPIntValue());
334
335 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
336 return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
337 }
338
339 return false;
340}
341
342/// Determine the register class for \p OpNo
343/// \returns The register class of the virtual register that will be used for
344/// the given operand number \OpNo or NULL if the register class cannot be
345/// determined.
346const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
347 unsigned OpNo) const {
348 if (!N->isMachineOpcode()) {
349 if (N->getOpcode() == ISD::CopyToReg) {
350 Register Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
351 if (Reg.isVirtual()) {
353 return MRI.getRegClass(Reg);
354 }
355
356 const SIRegisterInfo *TRI
357 = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
358 return TRI->getPhysRegBaseClass(Reg);
359 }
360
361 return nullptr;
362 }
363
364 switch (N->getMachineOpcode()) {
365 default: {
366 const MCInstrDesc &Desc =
367 Subtarget->getInstrInfo()->get(N->getMachineOpcode());
368 unsigned OpIdx = Desc.getNumDefs() + OpNo;
369 if (OpIdx >= Desc.getNumOperands())
370 return nullptr;
371 int RegClass = Desc.operands()[OpIdx].RegClass;
372 if (RegClass == -1)
373 return nullptr;
374
375 return Subtarget->getRegisterInfo()->getRegClass(RegClass);
376 }
377 case AMDGPU::REG_SEQUENCE: {
378 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
379 const TargetRegisterClass *SuperRC =
380 Subtarget->getRegisterInfo()->getRegClass(RCID);
381
382 SDValue SubRegOp = N->getOperand(OpNo + 1);
383 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
384 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
385 SubRegIdx);
386 }
387 }
388}
389
390SDNode *AMDGPUDAGToDAGISel::glueCopyToOp(SDNode *N, SDValue NewChain,
391 SDValue Glue) const {
392 SmallVector <SDValue, 8> Ops;
393 Ops.push_back(NewChain); // Replace the chain.
394 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
395 Ops.push_back(N->getOperand(i));
396
397 Ops.push_back(Glue);
398 return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
399}
400
401SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
403 *static_cast<const SITargetLowering*>(getTargetLowering());
404
405 assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");
406
407 SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), Val);
408 return glueCopyToOp(N, M0, M0.getValue(1));
409}
410
411SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
412 unsigned AS = cast<MemSDNode>(N)->getAddressSpace();
413 if (AS == AMDGPUAS::LOCAL_ADDRESS) {
414 if (Subtarget->ldsRequiresM0Init())
415 return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
416 } else if (AS == AMDGPUAS::REGION_ADDRESS) {
418 unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
419 return
420 glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32));
421 }
422 return N;
423}
424
425MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
426 EVT VT) const {
428 AMDGPU::S_MOV_B32, DL, MVT::i32,
429 CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
430 SDNode *Hi =
431 CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
432 CurDAG->getTargetConstant(Imm >> 32, DL, MVT::i32));
433 const SDValue Ops[] = {
434 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
435 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
436 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
437
438 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
439}
440
441void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
442 EVT VT = N->getValueType(0);
443 unsigned NumVectorElts = VT.getVectorNumElements();
444 EVT EltVT = VT.getVectorElementType();
445 SDLoc DL(N);
446 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
447
448 if (NumVectorElts == 1) {
449 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
450 RegClass);
451 return;
452 }
453
454 assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
455 "supported yet");
456 // 32 = Max Num Vector Elements
457 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
458 // 1 = Vector Register Class
459 SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
460
461 bool IsGCN = CurDAG->getSubtarget().getTargetTriple().getArch() ==
463 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
464 bool IsRegSeq = true;
465 unsigned NOps = N->getNumOperands();
466 for (unsigned i = 0; i < NOps; i++) {
467 // XXX: Why is this here?
468 if (isa<RegisterSDNode>(N->getOperand(i))) {
469 IsRegSeq = false;
470 break;
471 }
472 unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
474 RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
475 RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
476 }
477 if (NOps != NumVectorElts) {
478 // Fill in the missing undef elements if this was a scalar_to_vector.
479 assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
480 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
481 DL, EltVT);
482 for (unsigned i = NOps; i < NumVectorElts; ++i) {
483 unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
485 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
486 RegSeqArgs[1 + (2 * i) + 1] =
488 }
489 }
490
491 if (!IsRegSeq)
492 SelectCode(N);
493 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
494}
495
497 unsigned int Opc = N->getOpcode();
498 if (N->isMachineOpcode()) {
499 N->setNodeId(-1);
500 return; // Already selected.
501 }
502
503 // isa<MemSDNode> almost works but is slightly too permissive for some DS
504 // intrinsics.
505 if (Opc == ISD::LOAD || Opc == ISD::STORE || isa<AtomicSDNode>(N) ||
508 N = glueCopyToM0LDSInit(N);
509 SelectCode(N);
510 return;
511 }
512
513 switch (Opc) {
514 default:
515 break;
516 // We are selecting i64 ADD here instead of custom lower it during
517 // DAG legalization, so we can fold some i64 ADDs used for address
518 // calculation into the LOAD and STORE instructions.
519 case ISD::ADDC:
520 case ISD::ADDE:
521 case ISD::SUBC:
522 case ISD::SUBE: {
523 if (N->getValueType(0) != MVT::i64)
524 break;
525
526 SelectADD_SUB_I64(N);
527 return;
528 }
529 case ISD::ADDCARRY:
530 case ISD::SUBCARRY:
531 if (N->getValueType(0) != MVT::i32)
532 break;
533
534 SelectAddcSubb(N);
535 return;
536 case ISD::UADDO:
537 case ISD::USUBO: {
538 SelectUADDO_USUBO(N);
539 return;
540 }
542 SelectFMUL_W_CHAIN(N);
543 return;
544 }
546 SelectFMA_W_CHAIN(N);
547 return;
548 }
549
551 case ISD::BUILD_VECTOR: {
552 EVT VT = N->getValueType(0);
553 unsigned NumVectorElts = VT.getVectorNumElements();
554 if (VT.getScalarSizeInBits() == 16) {
555 if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
556 if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) {
557 ReplaceNode(N, Packed);
558 return;
559 }
560 }
561
562 break;
563 }
564
566 unsigned RegClassID =
567 SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID();
568 SelectBuildVector(N, RegClassID);
569 return;
570 }
571 case ISD::BUILD_PAIR: {
572 SDValue RC, SubReg0, SubReg1;
573 SDLoc DL(N);
574 if (N->getValueType(0) == MVT::i128) {
575 RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32);
576 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
577 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
578 } else if (N->getValueType(0) == MVT::i64) {
579 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
580 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
581 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
582 } else {
583 llvm_unreachable("Unhandled value type for BUILD_PAIR");
584 }
585 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
586 N->getOperand(1), SubReg1 };
587 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
588 N->getValueType(0), Ops));
589 return;
590 }
591
592 case ISD::Constant:
593 case ISD::ConstantFP: {
594 if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
595 break;
596
597 uint64_t Imm;
598 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
599 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
600 else {
601 ConstantSDNode *C = cast<ConstantSDNode>(N);
602 Imm = C->getZExtValue();
603 }
604
605 SDLoc DL(N);
606 ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
607 return;
608 }
610 case AMDGPUISD::BFE_U32: {
611 // There is a scalar version available, but unlike the vector version which
612 // has a separate operand for the offset and width, the scalar version packs
613 // the width and offset into a single operand. Try to move to the scalar
614 // version if the offsets are constant, so that we can try to keep extended
615 // loads of kernel arguments in SGPRs.
616
617 // TODO: Technically we could try to pattern match scalar bitshifts of
618 // dynamic values, but it's probably not useful.
619 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
620 if (!Offset)
621 break;
622
623 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
624 if (!Width)
625 break;
626
627 bool Signed = Opc == AMDGPUISD::BFE_I32;
628
629 uint32_t OffsetVal = Offset->getZExtValue();
630 uint32_t WidthVal = Width->getZExtValue();
631
632 ReplaceNode(N, getBFE32(Signed, SDLoc(N), N->getOperand(0), OffsetVal,
633 WidthVal));
634 return;
635 }
637 SelectDIV_SCALE(N);
638 return;
639 }
642 SelectMAD_64_32(N);
643 return;
644 }
645 case ISD::SMUL_LOHI:
646 case ISD::UMUL_LOHI:
647 return SelectMUL_LOHI(N);
648 case ISD::CopyToReg: {
650 *static_cast<const SITargetLowering*>(getTargetLowering());
651 N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
652 break;
653 }
654 case ISD::AND:
655 case ISD::SRL:
656 case ISD::SRA:
658 if (N->getValueType(0) != MVT::i32)
659 break;
660
661 SelectS_BFE(N);
662 return;
663 case ISD::BRCOND:
664 SelectBRCOND(N);
665 return;
671 // Hack around using a legal type if f16 is illegal.
672 if (N->getValueType(0) == MVT::i32) {
674 N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
675 { N->getOperand(0), N->getOperand(1) });
676 SelectCode(N);
677 return;
678 }
679
680 break;
681 }
683 SelectINTRINSIC_W_CHAIN(N);
684 return;
685 }
687 SelectINTRINSIC_WO_CHAIN(N);
688 return;
689 }
690 case ISD::INTRINSIC_VOID: {
691 SelectINTRINSIC_VOID(N);
692 return;
693 }
694 }
695
696 SelectCode(N);
697}
698
699bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
700 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
701 const Instruction *Term = BB->getTerminator();
702 return Term->getMetadata("amdgpu.uniform") ||
703 Term->getMetadata("structurizecfg.uniform");
704}
705
706bool AMDGPUDAGToDAGISel::isUnneededShiftMask(const SDNode *N,
707 unsigned ShAmtBits) const {
708 assert(N->getOpcode() == ISD::AND);
709
710 const APInt &RHS = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
711 if (RHS.countr_one() >= ShAmtBits)
712 return true;
713
714 const APInt &LHSKnownZeros = CurDAG->computeKnownBits(N->getOperand(0)).Zero;
715 return (LHSKnownZeros | RHS).countr_one() >= ShAmtBits;
716}
717
719 SDValue &N0, SDValue &N1) {
720 if (Addr.getValueType() == MVT::i64 && Addr.getOpcode() == ISD::BITCAST &&
721 Addr.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
722 // As we split 64-bit `or` earlier, it's complicated pattern to match, i.e.
723 // (i64 (bitcast (v2i32 (build_vector
724 // (or (extract_vector_elt V, 0), OFFSET),
725 // (extract_vector_elt V, 1)))))
726 SDValue Lo = Addr.getOperand(0).getOperand(0);
727 if (Lo.getOpcode() == ISD::OR && DAG.isBaseWithConstantOffset(Lo)) {
728 SDValue BaseLo = Lo.getOperand(0);
729 SDValue BaseHi = Addr.getOperand(0).getOperand(1);
730 // Check that split base (Lo and Hi) are extracted from the same one.
731 if (BaseLo.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
733 BaseLo.getOperand(0) == BaseHi.getOperand(0) &&
734 // Lo is statically extracted from index 0.
735 isa<ConstantSDNode>(BaseLo.getOperand(1)) &&
736 BaseLo.getConstantOperandVal(1) == 0 &&
737 // Hi is statically extracted from index 0.
738 isa<ConstantSDNode>(BaseHi.getOperand(1)) &&
739 BaseHi.getConstantOperandVal(1) == 1) {
740 N0 = BaseLo.getOperand(0).getOperand(0);
741 N1 = Lo.getOperand(1);
742 return true;
743 }
744 }
745 }
746 return false;
747}
748
749bool AMDGPUDAGToDAGISel::isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
750 SDValue &RHS) const {
752 LHS = Addr.getOperand(0);
753 RHS = Addr.getOperand(1);
754 return true;
755 }
756
757 if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, LHS, RHS)) {
758 assert(LHS && RHS && isa<ConstantSDNode>(RHS));
759 return true;
760 }
761
762 return false;
763}
764
766 return "AMDGPU DAG->DAG Pattern Instruction Selection";
767}
768
769//===----------------------------------------------------------------------===//
770// Complex Patterns
771//===----------------------------------------------------------------------===//
772
773bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
774 SDValue &Offset) {
775 return false;
776}
777
778bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
779 SDValue &Offset) {
781 SDLoc DL(Addr);
782
783 if ((C = dyn_cast<ConstantSDNode>(Addr))) {
784 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
785 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
786 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
787 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
788 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
789 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
790 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
791 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
792 Base = Addr.getOperand(0);
793 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
794 } else {
795 Base = Addr;
797 }
798
799 return true;
800}
801
802SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
803 const SDLoc &DL) const {
805 AMDGPU::S_MOV_B32, DL, MVT::i32,
807 return SDValue(Mov, 0);
808}
809
810// FIXME: Should only handle addcarry/subcarry
811void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
812 SDLoc DL(N);
813 SDValue LHS = N->getOperand(0);
814 SDValue RHS = N->getOperand(1);
815
816 unsigned Opcode = N->getOpcode();
817 bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
818 bool ProduceCarry =
819 ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
820 bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
821
822 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
823 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
824
825 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
826 DL, MVT::i32, LHS, Sub0);
827 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
828 DL, MVT::i32, LHS, Sub1);
829
830 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
831 DL, MVT::i32, RHS, Sub0);
832 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
833 DL, MVT::i32, RHS, Sub1);
834
836
837 static const unsigned OpcMap[2][2][2] = {
838 {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
839 {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
840 {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
841 {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
842
843 unsigned Opc = OpcMap[0][N->isDivergent()][IsAdd];
844 unsigned CarryOpc = OpcMap[1][N->isDivergent()][IsAdd];
845
846 SDNode *AddLo;
847 if (!ConsumeCarry) {
848 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
849 AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
850 } else {
851 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
852 AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
853 }
854 SDValue AddHiArgs[] = {
855 SDValue(Hi0, 0),
856 SDValue(Hi1, 0),
857 SDValue(AddLo, 1)
858 };
859 SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
860
861 SDValue RegSequenceArgs[] = {
862 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
863 SDValue(AddLo,0),
864 Sub0,
865 SDValue(AddHi,0),
866 Sub1,
867 };
868 SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
869 MVT::i64, RegSequenceArgs);
870
871 if (ProduceCarry) {
872 // Replace the carry-use
873 ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
874 }
875
876 // Replace the remaining uses.
877 ReplaceNode(N, RegSequence);
878}
879
880void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
881 SDLoc DL(N);
882 SDValue LHS = N->getOperand(0);
883 SDValue RHS = N->getOperand(1);
884 SDValue CI = N->getOperand(2);
885
886 if (N->isDivergent()) {
887 unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
888 : AMDGPU::V_SUBB_U32_e64;
890 N, Opc, N->getVTList(),
891 {LHS, RHS, CI,
892 CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
893 } else {
894 unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::S_ADD_CO_PSEUDO
895 : AMDGPU::S_SUB_CO_PSEUDO;
896 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {LHS, RHS, CI});
897 }
898}
899
900void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
901 // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
902 // carry out despite the _i32 name. These were renamed in VI to _U32.
903 // FIXME: We should probably rename the opcodes here.
904 bool IsAdd = N->getOpcode() == ISD::UADDO;
905 bool IsVALU = N->isDivergent();
906
907 for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E;
908 ++UI)
909 if (UI.getUse().getResNo() == 1) {
910 if ((IsAdd && (UI->getOpcode() != ISD::ADDCARRY)) ||
911 (!IsAdd && (UI->getOpcode() != ISD::SUBCARRY))) {
912 IsVALU = true;
913 break;
914 }
915 }
916
917 if (IsVALU) {
918 unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
919
921 N, Opc, N->getVTList(),
922 {N->getOperand(0), N->getOperand(1),
923 CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
924 } else {
925 unsigned Opc = N->getOpcode() == ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
926 : AMDGPU::S_USUBO_PSEUDO;
927
928 CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
929 {N->getOperand(0), N->getOperand(1)});
930 }
931}
932
933void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
934 SDLoc SL(N);
935 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
936 SDValue Ops[10];
937
938 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
939 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
940 SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
941 Ops[8] = N->getOperand(0);
942 Ops[9] = N->getOperand(4);
943
944 // If there are no source modifiers, prefer fmac over fma because it can use
945 // the smaller VOP2 encoding.
946 bool UseFMAC = Subtarget->hasDLInsts() &&
947 cast<ConstantSDNode>(Ops[0])->isZero() &&
948 cast<ConstantSDNode>(Ops[2])->isZero() &&
949 cast<ConstantSDNode>(Ops[4])->isZero();
950 unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
951 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), Ops);
952}
953
954void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
955 SDLoc SL(N);
956 // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
957 SDValue Ops[8];
958
959 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
960 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
961 Ops[6] = N->getOperand(0);
962 Ops[7] = N->getOperand(3);
963
964 CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
965}
966
967// We need to handle this here because tablegen doesn't support matching
968// instructions with multiple outputs.
969void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
970 SDLoc SL(N);
971 EVT VT = N->getValueType(0);
972
973 assert(VT == MVT::f32 || VT == MVT::f64);
974
975 unsigned Opc
976 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
977
978 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
979 // omod
980 SDValue Ops[8];
981 SelectVOP3BMods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
982 SelectVOP3BMods(N->getOperand(1), Ops[3], Ops[2]);
983 SelectVOP3BMods(N->getOperand(2), Ops[5], Ops[4]);
984 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
985}
986
987// We need to handle this here because tablegen doesn't support matching
988// instructions with multiple outputs.
989void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
990 SDLoc SL(N);
991 bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
992 unsigned Opc;
993 if (Subtarget->hasMADIntraFwdBug())
994 Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
995 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
996 else
997 Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
998
999 SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
1000 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
1001 Clamp };
1002 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1003}
1004
1005// We need to handle this here because tablegen doesn't support matching
1006// instructions with multiple outputs.
1007void AMDGPUDAGToDAGISel::SelectMUL_LOHI(SDNode *N) {
1008 SDLoc SL(N);
1009 bool Signed = N->getOpcode() == ISD::SMUL_LOHI;
1010 unsigned Opc;
1011 if (Subtarget->hasMADIntraFwdBug())
1012 Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1013 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1014 else
1015 Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1016
1018 SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
1019 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Zero, Clamp};
1020 SDNode *Mad = CurDAG->getMachineNode(Opc, SL, N->getVTList(), Ops);
1021 if (!SDValue(N, 0).use_empty()) {
1022 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32);
1023 SDNode *Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1024 MVT::i32, SDValue(Mad, 0), Sub0);
1025 ReplaceUses(SDValue(N, 0), SDValue(Lo, 0));
1026 }
1027 if (!SDValue(N, 1).use_empty()) {
1028 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32);
1029 SDNode *Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1030 MVT::i32, SDValue(Mad, 0), Sub1);
1031 ReplaceUses(SDValue(N, 1), SDValue(Hi, 0));
1032 }
1034}
1035
1036bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset) const {
1037 if (!isUInt<16>(Offset))
1038 return false;
1039
1040 if (!Base || Subtarget->hasUsableDSOffset() ||
1041 Subtarget->unsafeDSOffsetFoldingEnabled())
1042 return true;
1043
1044 // On Southern Islands instruction with a negative base value and an offset
1045 // don't seem to work.
1046 return CurDAG->SignBitIsZero(Base);
1047}
1048
1049bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
1050 SDValue &Offset) const {
1051 SDLoc DL(Addr);
1053 SDValue N0 = Addr.getOperand(0);
1054 SDValue N1 = Addr.getOperand(1);
1055 ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1056 if (isDSOffsetLegal(N0, C1->getSExtValue())) {
1057 // (add n0, c0)
1058 Base = N0;
1060 return true;
1061 }
1062 } else if (Addr.getOpcode() == ISD::SUB) {
1063 // sub C, x -> add (sub 0, x), C
1064 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1065 int64_t ByteOffset = C->getSExtValue();
1066 if (isDSOffsetLegal(SDValue(), ByteOffset)) {
1068
1069 // XXX - This is kind of hacky. Create a dummy sub node so we can check
1070 // the known bits in isDSOffsetLegal. We need to emit the selected node
1071 // here, so this is thrown away.
1073 Zero, Addr.getOperand(1));
1074
1075 if (isDSOffsetLegal(Sub, ByteOffset)) {
1077 Opnds.push_back(Zero);
1078 Opnds.push_back(Addr.getOperand(1));
1079
1080 // FIXME: Select to VOP3 version for with-carry.
1081 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1082 if (Subtarget->hasAddNoCarry()) {
1083 SubOp = AMDGPU::V_SUB_U32_e64;
1084 Opnds.push_back(
1085 CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1086 }
1087
1088 MachineSDNode *MachineSub =
1089 CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1090
1091 Base = SDValue(MachineSub, 0);
1092 Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
1093 return true;
1094 }
1095 }
1096 }
1097 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1098 // If we have a constant address, prefer to put the constant into the
1099 // offset. This can save moves to load the constant address since multiple
1100 // operations can share the zero base address register, and enables merging
1101 // into read2 / write2 instructions.
1102
1103 SDLoc DL(Addr);
1104
1105 if (isDSOffsetLegal(SDValue(), CAddr->getZExtValue())) {
1107 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1108 DL, MVT::i32, Zero);
1109 Base = SDValue(MovZero, 0);
1110 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1111 return true;
1112 }
1113 }
1114
1115 // default case
1116 Base = Addr;
1118 return true;
1119}
1120
1121bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0,
1122 unsigned Offset1,
1123 unsigned Size) const {
1124 if (Offset0 % Size != 0 || Offset1 % Size != 0)
1125 return false;
1126 if (!isUInt<8>(Offset0 / Size) || !isUInt<8>(Offset1 / Size))
1127 return false;
1128
1129 if (!Base || Subtarget->hasUsableDSOffset() ||
1130 Subtarget->unsafeDSOffsetFoldingEnabled())
1131 return true;
1132
1133 // On Southern Islands instruction with a negative base value and an offset
1134 // don't seem to work.
1135 return CurDAG->SignBitIsZero(Base);
1136}
1137
1138bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(SDValue Base,
1139 uint64_t FlatVariant) const {
1140 if (FlatVariant != SIInstrFlags::FlatScratch)
1141 return true;
1142 // When value in 32-bit Base can be negative calculate scratch offset using
1143 // 32-bit add instruction, otherwise use Base(unsigned) + offset.
1144 return CurDAG->SignBitIsZero(Base);
1145}
1146
1147// TODO: If offset is too big, put low 16-bit into offset.
1148bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
1149 SDValue &Offset0,
1150 SDValue &Offset1) const {
1151 return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 4);
1152}
1153
1154bool AMDGPUDAGToDAGISel::SelectDS128Bit8ByteAligned(SDValue Addr, SDValue &Base,
1155 SDValue &Offset0,
1156 SDValue &Offset1) const {
1157 return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 8);
1158}
1159
1160bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base,
1161 SDValue &Offset0, SDValue &Offset1,
1162 unsigned Size) const {
1163 SDLoc DL(Addr);
1164
1166 SDValue N0 = Addr.getOperand(0);
1167 SDValue N1 = Addr.getOperand(1);
1168 ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1169 unsigned OffsetValue0 = C1->getZExtValue();
1170 unsigned OffsetValue1 = OffsetValue0 + Size;
1171
1172 // (add n0, c0)
1173 if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1, Size)) {
1174 Base = N0;
1175 Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1176 Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1177 return true;
1178 }
1179 } else if (Addr.getOpcode() == ISD::SUB) {
1180 // sub C, x -> add (sub 0, x), C
1181 if (const ConstantSDNode *C =
1182 dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1183 unsigned OffsetValue0 = C->getZExtValue();
1184 unsigned OffsetValue1 = OffsetValue0 + Size;
1185
1186 if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
1187 SDLoc DL(Addr);
1189
1190 // XXX - This is kind of hacky. Create a dummy sub node so we can check
1191 // the known bits in isDSOffsetLegal. We need to emit the selected node
1192 // here, so this is thrown away.
1193 SDValue Sub =
1194 CurDAG->getNode(ISD::SUB, DL, MVT::i32, Zero, Addr.getOperand(1));
1195
1196 if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1, Size)) {
1198 Opnds.push_back(Zero);
1199 Opnds.push_back(Addr.getOperand(1));
1200 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1201 if (Subtarget->hasAddNoCarry()) {
1202 SubOp = AMDGPU::V_SUB_U32_e64;
1203 Opnds.push_back(
1204 CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1205 }
1206
1207 MachineSDNode *MachineSub = CurDAG->getMachineNode(
1208 SubOp, DL, MVT::getIntegerVT(Size * 8), Opnds);
1209
1210 Base = SDValue(MachineSub, 0);
1211 Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1212 Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1213 return true;
1214 }
1215 }
1216 }
1217 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1218 unsigned OffsetValue0 = CAddr->getZExtValue();
1219 unsigned OffsetValue1 = OffsetValue0 + Size;
1220
1221 if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
1223 MachineSDNode *MovZero =
1224 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, Zero);
1225 Base = SDValue(MovZero, 0);
1226 Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1227 Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1228 return true;
1229 }
1230 }
1231
1232 // default case
1233
1234 Base = Addr;
1235 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
1236 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
1237 return true;
1238}
1239
1240bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
1241 SDValue &SOffset, SDValue &Offset,
1242 SDValue &Offen, SDValue &Idxen,
1243 SDValue &Addr64) const {
1244 // Subtarget prefers to use flat instruction
1245 // FIXME: This should be a pattern predicate and not reach here
1246 if (Subtarget->useFlatForGlobal())
1247 return false;
1248
1249 SDLoc DL(Addr);
1250
1251 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1252 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1253 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1254 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1255
1256 ConstantSDNode *C1 = nullptr;
1257 SDValue N0 = Addr;
1259 C1 = cast<ConstantSDNode>(Addr.getOperand(1));
1260 if (isUInt<32>(C1->getZExtValue()))
1261 N0 = Addr.getOperand(0);
1262 else
1263 C1 = nullptr;
1264 }
1265
1266 if (N0.getOpcode() == ISD::ADD) {
1267 // (add N2, N3) -> addr64, or
1268 // (add (add N2, N3), C1) -> addr64
1269 SDValue N2 = N0.getOperand(0);
1270 SDValue N3 = N0.getOperand(1);
1271 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1272
1273 if (N2->isDivergent()) {
1274 if (N3->isDivergent()) {
1275 // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
1276 // addr64, and construct the resource from a 0 address.
1277 Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1278 VAddr = N0;
1279 } else {
1280 // N2 is divergent, N3 is not.
1281 Ptr = N3;
1282 VAddr = N2;
1283 }
1284 } else {
1285 // N2 is not divergent.
1286 Ptr = N2;
1287 VAddr = N3;
1288 }
1290 } else if (N0->isDivergent()) {
1291 // N0 is divergent. Use it as the addr64, and construct the resource from a
1292 // 0 address.
1293 Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1294 VAddr = N0;
1295 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1296 } else {
1297 // N0 -> offset, or
1298 // (N0 + C1) -> offset
1299 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1300 Ptr = N0;
1301 }
1302
1303 if (!C1) {
1304 // No offset.
1306 return true;
1307 }
1308
1310 // Legal offset for instruction.
1312 return true;
1313 }
1314
1315 // Illegal offset, store it in soffset.
1317 SOffset =
1319 AMDGPU::S_MOV_B32, DL, MVT::i32,
1321 0);
1322 return true;
1323}
1324
1325bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1326 SDValue &VAddr, SDValue &SOffset,
1327 SDValue &Offset) const {
1328 SDValue Ptr, Offen, Idxen, Addr64;
1329
1330 // addr64 bit was removed for volcanic islands.
1331 // FIXME: This should be a pattern predicate and not reach here
1332 if (!Subtarget->hasAddr64())
1333 return false;
1334
1335 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
1336 return false;
1337
1338 ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1339 if (C->getSExtValue()) {
1340 SDLoc DL(Addr);
1341
1342 const SITargetLowering& Lowering =
1343 *static_cast<const SITargetLowering*>(getTargetLowering());
1344
1345 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1346 return true;
1347 }
1348
1349 return false;
1350}
1351
1352std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1353 SDLoc DL(N);
1354
1355 auto *FI = dyn_cast<FrameIndexSDNode>(N);
1356 SDValue TFI =
1357 FI ? CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) : N;
1358
1359 // We rebase the base address into an absolute stack address and hence
1360 // use constant 0 for soffset. This value must be retained until
1361 // frame elimination and eliminateFrameIndex will choose the appropriate
1362 // frame register if need be.
1363 return std::pair(TFI, CurDAG->getTargetConstant(0, DL, MVT::i32));
1364}
1365
1366bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1367 SDValue Addr, SDValue &Rsrc,
1368 SDValue &VAddr, SDValue &SOffset,
1369 SDValue &ImmOffset) const {
1370
1371 SDLoc DL(Addr);
1374
1375 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1376
1377 if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1378 int64_t Imm = CAddr->getSExtValue();
1379 const int64_t NullPtr =
1381 // Don't fold null pointer.
1382 if (Imm != NullPtr) {
1383 const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset();
1384 SDValue HighBits =
1385 CurDAG->getTargetConstant(Imm & ~MaxOffset, DL, MVT::i32);
1386 MachineSDNode *MovHighBits = CurDAG->getMachineNode(
1387 AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits);
1388 VAddr = SDValue(MovHighBits, 0);
1389
1390 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1391 ImmOffset = CurDAG->getTargetConstant(Imm & MaxOffset, DL, MVT::i32);
1392 return true;
1393 }
1394 }
1395
1397 // (add n0, c1)
1398
1399 SDValue N0 = Addr.getOperand(0);
1400 SDValue N1 = Addr.getOperand(1);
1401
1402 // Offsets in vaddr must be positive if range checking is enabled.
1403 //
1404 // The total computation of vaddr + soffset + offset must not overflow. If
1405 // vaddr is negative, even if offset is 0 the sgpr offset add will end up
1406 // overflowing.
1407 //
1408 // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
1409 // always perform a range check. If a negative vaddr base index was used,
1410 // this would fail the range check. The overall address computation would
1411 // compute a valid address, but this doesn't happen due to the range
1412 // check. For out-of-bounds MUBUF loads, a 0 is returned.
1413 //
1414 // Therefore it should be safe to fold any VGPR offset on gfx9 into the
1415 // MUBUF vaddr, but not on older subtargets which can only do this if the
1416 // sign bit is known 0.
1417 ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1419 (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1420 CurDAG->SignBitIsZero(N0))) {
1421 std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1422 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
1423 return true;
1424 }
1425 }
1426
1427 // (node)
1428 std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1429 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1430 return true;
1431}
1432
1433static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val) {
1434 if (Val.getOpcode() != ISD::CopyFromReg)
1435 return false;
1436 auto Reg = cast<RegisterSDNode>(Val.getOperand(1))->getReg();
1437 if (!Reg.isPhysical())
1438 return false;
1439 auto RC = TRI.getPhysRegBaseClass(Reg);
1440 return RC && TRI.isSGPRClass(RC);
1441}
1442
1443bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1444 SDValue Addr,
1445 SDValue &SRsrc,
1446 SDValue &SOffset,
1447 SDValue &Offset) const {
1448 const SIRegisterInfo *TRI =
1449 static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
1452 SDLoc DL(Addr);
1453
1454 // CopyFromReg <sgpr>
1455 if (IsCopyFromSGPR(*TRI, Addr)) {
1456 SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1457 SOffset = Addr;
1459 return true;
1460 }
1461
1462 ConstantSDNode *CAddr;
1463 if (Addr.getOpcode() == ISD::ADD) {
1464 // Add (CopyFromReg <sgpr>) <constant>
1465 CAddr = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
1466 if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1467 return false;
1468 if (!IsCopyFromSGPR(*TRI, Addr.getOperand(0)))
1469 return false;
1470
1471 SOffset = Addr.getOperand(0);
1472 } else if ((CAddr = dyn_cast<ConstantSDNode>(Addr)) &&
1474 // <constant>
1475 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1476 } else {
1477 return false;
1478 }
1479
1480 SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1481
1483 return true;
1484}
1485
1486bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1487 SDValue &SOffset, SDValue &Offset
1488 ) const {
1489 SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1490 const SIInstrInfo *TII =
1491 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1492
1493 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
1494 return false;
1495
1496 if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1497 !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1498 !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1499 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1500 APInt::getAllOnes(32).getZExtValue(); // Size
1501 SDLoc DL(Addr);
1502
1503 const SITargetLowering& Lowering =
1504 *static_cast<const SITargetLowering*>(getTargetLowering());
1505
1506 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1507 return true;
1508 }
1509 return false;
1510}
1511
1512// Find a load or store from corresponding pattern root.
1513// Roots may be build_vector, bitconvert or their combinations.
1516 if (MemSDNode *MN = dyn_cast<MemSDNode>(N))
1517 return MN;
1518 assert(isa<BuildVectorSDNode>(N));
1519 for (SDValue V : N->op_values())
1520 if (MemSDNode *MN =
1521 dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V)))
1522 return MN;
1523 llvm_unreachable("cannot find MemSDNode in the pattern!");
1524}
1525
1526bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr,
1527 SDValue &VAddr, SDValue &Offset,
1528 uint64_t FlatVariant) const {
1529 int64_t OffsetVal = 0;
1530
1531 unsigned AS = findMemSDNode(N)->getAddressSpace();
1532
1533 bool CanHaveFlatSegmentOffsetBug =
1534 Subtarget->hasFlatSegmentOffsetBug() &&
1535 FlatVariant == SIInstrFlags::FLAT &&
1537
1538 if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
1539 SDValue N0, N1;
1540 if (isBaseWithConstantOffset64(Addr, N0, N1) &&
1541 isFlatScratchBaseLegal(N0, FlatVariant)) {
1542 int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1543
1544 const SIInstrInfo *TII = Subtarget->getInstrInfo();
1545 if (TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1546 Addr = N0;
1547 OffsetVal = COffsetVal;
1548 } else {
1549 // If the offset doesn't fit, put the low bits into the offset field and
1550 // add the rest.
1551 //
1552 // For a FLAT instruction the hardware decides whether to access
1553 // global/scratch/shared memory based on the high bits of vaddr,
1554 // ignoring the offset field, so we have to ensure that when we add
1555 // remainder to vaddr it still points into the same underlying object.
1556 // The easiest way to do that is to make sure that we split the offset
1557 // into two pieces that are both >= 0 or both <= 0.
1558
1559 SDLoc DL(N);
1560 uint64_t RemainderOffset;
1561
1562 std::tie(OffsetVal, RemainderOffset) =
1563 TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1564
1565 SDValue AddOffsetLo =
1566 getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
1568
1569 if (Addr.getValueType().getSizeInBits() == 32) {
1571 Opnds.push_back(N0);
1572 Opnds.push_back(AddOffsetLo);
1573 unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1574 if (Subtarget->hasAddNoCarry()) {
1575 AddOp = AMDGPU::V_ADD_U32_e64;
1576 Opnds.push_back(Clamp);
1577 }
1578 Addr = SDValue(CurDAG->getMachineNode(AddOp, DL, MVT::i32, Opnds), 0);
1579 } else {
1580 // TODO: Should this try to use a scalar add pseudo if the base address
1581 // is uniform and saddr is usable?
1582 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
1583 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
1584
1585 SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1586 DL, MVT::i32, N0, Sub0);
1587 SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1588 DL, MVT::i32, N0, Sub1);
1589
1590 SDValue AddOffsetHi =
1591 getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
1592
1594
1595 SDNode *Add =
1596 CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64, DL, VTs,
1597 {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
1598
1599 SDNode *Addc = CurDAG->getMachineNode(
1600 AMDGPU::V_ADDC_U32_e64, DL, VTs,
1601 {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});
1602
1603 SDValue RegSequenceArgs[] = {
1604 CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
1605 SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1};
1606
1607 Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1608 MVT::i64, RegSequenceArgs),
1609 0);
1610 }
1611 }
1612 }
1613 }
1614
1615 VAddr = Addr;
1616 Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
1617 return true;
1618}
1619
1620bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, SDValue Addr,
1621 SDValue &VAddr,
1622 SDValue &Offset) const {
1623 return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FLAT);
1624}
1625
1626bool AMDGPUDAGToDAGISel::SelectGlobalOffset(SDNode *N, SDValue Addr,
1627 SDValue &VAddr,
1628 SDValue &Offset) const {
1629 return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FlatGlobal);
1630}
1631
1632bool AMDGPUDAGToDAGISel::SelectScratchOffset(SDNode *N, SDValue Addr,
1633 SDValue &VAddr,
1634 SDValue &Offset) const {
1635 return SelectFlatOffsetImpl(N, Addr, VAddr, Offset,
1637}
1638
1639// If this matches zero_extend i32:x, return x
1641 if (Op.getOpcode() != ISD::ZERO_EXTEND)
1642 return SDValue();
1643
1644 SDValue ExtSrc = Op.getOperand(0);
1645 return (ExtSrc.getValueType() == MVT::i32) ? ExtSrc : SDValue();
1646}
1647
1648// Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
1649bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
1650 SDValue Addr,
1651 SDValue &SAddr,
1652 SDValue &VOffset,
1653 SDValue &Offset) const {
1654 int64_t ImmOffset = 0;
1655
1656 // Match the immediate offset first, which canonically is moved as low as
1657 // possible.
1658
1659 SDValue LHS, RHS;
1660 if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
1661 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1662 const SIInstrInfo *TII = Subtarget->getInstrInfo();
1663
1664 if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS,
1666 Addr = LHS;
1667 ImmOffset = COffsetVal;
1668 } else if (!LHS->isDivergent()) {
1669 if (COffsetVal > 0) {
1670 SDLoc SL(N);
1671 // saddr + large_offset -> saddr +
1672 // (voffset = large_offset & ~MaxOffset) +
1673 // (large_offset & MaxOffset);
1674 int64_t SplitImmOffset, RemainderOffset;
1675 std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
1677
1678 if (isUInt<32>(RemainderOffset)) {
1679 SDNode *VMov = CurDAG->getMachineNode(
1680 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1681 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
1682 VOffset = SDValue(VMov, 0);
1683 SAddr = LHS;
1684 Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
1685 return true;
1686 }
1687 }
1688
1689 // We are adding a 64 bit SGPR and a constant. If constant bus limit
1690 // is 1 we would need to perform 1 or 2 extra moves for each half of
1691 // the constant and it is better to do a scalar add and then issue a
1692 // single VALU instruction to materialize zero. Otherwise it is less
1693 // instructions to perform VALU adds with immediates or inline literals.
1694 unsigned NumLiterals =
1695 !TII->isInlineConstant(APInt(32, COffsetVal & 0xffffffff)) +
1696 !TII->isInlineConstant(APInt(32, COffsetVal >> 32));
1697 if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
1698 return false;
1699 }
1700 }
1701
1702 // Match the variable offset.
1703 if (Addr.getOpcode() == ISD::ADD) {
1704 LHS = Addr.getOperand(0);
1705 RHS = Addr.getOperand(1);
1706
1707 if (!LHS->isDivergent()) {
1708 // add (i64 sgpr), (zero_extend (i32 vgpr))
1709 if (SDValue ZextRHS = matchZExtFromI32(RHS)) {
1710 SAddr = LHS;
1711 VOffset = ZextRHS;
1712 }
1713 }
1714
1715 if (!SAddr && !RHS->isDivergent()) {
1716 // add (zero_extend (i32 vgpr)), (i64 sgpr)
1717 if (SDValue ZextLHS = matchZExtFromI32(LHS)) {
1718 SAddr = RHS;
1719 VOffset = ZextLHS;
1720 }
1721 }
1722
1723 if (SAddr) {
1724 Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1725 return true;
1726 }
1727 }
1728
1729 if (Addr->isDivergent() || Addr.getOpcode() == ISD::UNDEF ||
1730 isa<ConstantSDNode>(Addr))
1731 return false;
1732
1733 // It's cheaper to materialize a single 32-bit zero for vaddr than the two
1734 // moves required to copy a 64-bit SGPR to VGPR.
1735 SAddr = Addr;
1736 SDNode *VMov =
1737 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
1739 VOffset = SDValue(VMov, 0);
1740 Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1741 return true;
1742}
1743
1745 if (auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
1746 SAddr = CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
1747 } else if (SAddr.getOpcode() == ISD::ADD &&
1748 isa<FrameIndexSDNode>(SAddr.getOperand(0))) {
1749 // Materialize this into a scalar move for scalar address to avoid
1750 // readfirstlane.
1751 auto FI = cast<FrameIndexSDNode>(SAddr.getOperand(0));
1752 SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1753 FI->getValueType(0));
1754 SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, SDLoc(SAddr),
1755 MVT::i32, TFI, SAddr.getOperand(1)),
1756 0);
1757 }
1758
1759 return SAddr;
1760}
1761
1762// Match (32-bit SGPR base) + sext(imm offset)
1763bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
1764 SDValue &SAddr,
1765 SDValue &Offset) const {
1766 if (Addr->isDivergent())
1767 return false;
1768
1769 SDLoc DL(Addr);
1770
1771 int64_t COffsetVal = 0;
1772
1774 isFlatScratchBaseLegal(Addr.getOperand(0))) {
1775 COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
1776 SAddr = Addr.getOperand(0);
1777 } else {
1778 SAddr = Addr;
1779 }
1780
1781 SAddr = SelectSAddrFI(CurDAG, SAddr);
1782
1783 const SIInstrInfo *TII = Subtarget->getInstrInfo();
1784
1785 if (!TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS,
1787 int64_t SplitImmOffset, RemainderOffset;
1788 std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
1790
1791 COffsetVal = SplitImmOffset;
1792
1793 SDValue AddOffset =
1795 ? getMaterializedScalarImm32(Lo_32(RemainderOffset), DL)
1796 : CurDAG->getTargetConstant(RemainderOffset, DL, MVT::i32);
1797 SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL, MVT::i32,
1798 SAddr, AddOffset),
1799 0);
1800 }
1801
1802 Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i16);
1803
1804 return true;
1805}
1806
1807// Check whether the flat scratch SVS swizzle bug affects this access.
1808bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
1809 SDValue VAddr, SDValue SAddr, uint64_t ImmOffset) const {
1810 if (!Subtarget->hasFlatScratchSVSSwizzleBug())
1811 return false;
1812
1813 // The bug affects the swizzling of SVS accesses if there is any carry out
1814 // from the two low order bits (i.e. from bit 1 into bit 2) when adding
1815 // voffset to (soffset + inst_offset).
1816 KnownBits VKnown = CurDAG->computeKnownBits(VAddr);
1818 true, false, CurDAG->computeKnownBits(SAddr),
1819 KnownBits::makeConstant(APInt(32, ImmOffset)));
1820 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
1822 return (VMax & 3) + (SMax & 3) >= 4;
1823}
1824
1825bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
1826 SDValue &VAddr, SDValue &SAddr,
1827 SDValue &Offset) const {
1828 int64_t ImmOffset = 0;
1829
1830 SDValue LHS, RHS;
1831 if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
1832 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1833 const SIInstrInfo *TII = Subtarget->getInstrInfo();
1834
1835 if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true)) {
1836 Addr = LHS;
1837 ImmOffset = COffsetVal;
1838 } else if (!LHS->isDivergent() && COffsetVal > 0) {
1839 SDLoc SL(N);
1840 // saddr + large_offset -> saddr + (vaddr = large_offset & ~MaxOffset) +
1841 // (large_offset & MaxOffset);
1842 int64_t SplitImmOffset, RemainderOffset;
1843 std::tie(SplitImmOffset, RemainderOffset)
1844 = TII->splitFlatOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true);
1845
1846 if (isUInt<32>(RemainderOffset)) {
1847 SDNode *VMov = CurDAG->getMachineNode(
1848 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1849 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
1850 VAddr = SDValue(VMov, 0);
1851 SAddr = LHS;
1852 if (!isFlatScratchBaseLegal(SAddr) || !isFlatScratchBaseLegal(VAddr))
1853 return false;
1854 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
1855 return false;
1856 Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
1857 return true;
1858 }
1859 }
1860 }
1861
1862 if (Addr.getOpcode() != ISD::ADD)
1863 return false;
1864
1865 LHS = Addr.getOperand(0);
1866 RHS = Addr.getOperand(1);
1867
1868 if (!LHS->isDivergent() && RHS->isDivergent()) {
1869 SAddr = LHS;
1870 VAddr = RHS;
1871 } else if (!RHS->isDivergent() && LHS->isDivergent()) {
1872 SAddr = RHS;
1873 VAddr = LHS;
1874 } else {
1875 return false;
1876 }
1877
1878 if (!isFlatScratchBaseLegal(SAddr) || !isFlatScratchBaseLegal(VAddr))
1879 return false;
1880
1881 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
1882 return false;
1883 SAddr = SelectSAddrFI(CurDAG, SAddr);
1884 Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1885 return true;
1886}
1887
1888// Match an immediate (if Offset is not null) or an SGPR (if SOffset is
1889// not null) offset. If Imm32Only is true, match only 32-bit immediate
1890// offsets available on CI.
1891bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1892 SDValue *SOffset, SDValue *Offset,
1893 bool Imm32Only, bool IsBuffer) const {
1894 assert((!SOffset || !Offset) &&
1895 "Cannot match both soffset and offset at the same time!");
1896
1897 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1898 if (!C) {
1899 if (!SOffset)
1900 return false;
1901 if (ByteOffsetNode.getValueType().isScalarInteger() &&
1902 ByteOffsetNode.getValueType().getSizeInBits() == 32) {
1903 *SOffset = ByteOffsetNode;
1904 return true;
1905 }
1906 if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) {
1907 if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) {
1908 *SOffset = ByteOffsetNode.getOperand(0);
1909 return true;
1910 }
1911 }
1912 return false;
1913 }
1914
1915 SDLoc SL(ByteOffsetNode);
1916
1917 // GFX9 and GFX10 have signed byte immediate offsets. The immediate
1918 // offset for S_BUFFER instructions is unsigned.
1919 int64_t ByteOffset = IsBuffer ? C->getZExtValue() : C->getSExtValue();
1920 std::optional<int64_t> EncodedOffset =
1921 AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, IsBuffer);
1922 if (EncodedOffset && Offset && !Imm32Only) {
1923 *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
1924 return true;
1925 }
1926
1927 // SGPR and literal offsets are unsigned.
1928 if (ByteOffset < 0)
1929 return false;
1930
1931 EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset);
1932 if (EncodedOffset && Offset && Imm32Only) {
1933 *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
1934 return true;
1935 }
1936
1937 if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
1938 return false;
1939
1940 if (SOffset) {
1941 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1942 *SOffset = SDValue(
1943 CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
1944 return true;
1945 }
1946
1947 return false;
1948}
1949
1950SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
1951 if (Addr.getValueType() != MVT::i32)
1952 return Addr;
1953
1954 // Zero-extend a 32-bit address.
1955 SDLoc SL(Addr);
1956
1959 unsigned AddrHiVal = Info->get32BitAddressHighBits();
1960 SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
1961
1962 const SDValue Ops[] = {
1963 CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
1964 Addr,
1965 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
1966 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
1967 0),
1968 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
1969 };
1970
1971 return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
1972 Ops), 0);
1973}
1974
1975// Match a base and an immediate (if Offset is not null) or an SGPR (if
1976// SOffset is not null) or an immediate+SGPR offset. If Imm32Only is
1977// true, match only 32-bit immediate offsets available on CI.
1978bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
1979 SDValue *SOffset, SDValue *Offset,
1980 bool Imm32Only,
1981 bool IsBuffer) const {
1982 if (SOffset && Offset) {
1983 assert(!Imm32Only && !IsBuffer);
1984 SDValue B;
1985 return SelectSMRDBaseOffset(Addr, B, nullptr, Offset) &&
1986 SelectSMRDBaseOffset(B, SBase, SOffset, nullptr);
1987 }
1988
1989 // A 32-bit (address + offset) should not cause unsigned 32-bit integer
1990 // wraparound, because s_load instructions perform the addition in 64 bits.
1991 if (Addr.getValueType() == MVT::i32 && Addr.getOpcode() == ISD::ADD &&
1992 !Addr->getFlags().hasNoUnsignedWrap())
1993 return false;
1994
1995 SDValue N0, N1;
1996 // Extract the base and offset if possible.
1997 if (CurDAG->isBaseWithConstantOffset(Addr) || Addr.getOpcode() == ISD::ADD) {
1998 N0 = Addr.getOperand(0);
1999 N1 = Addr.getOperand(1);
2000 } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
2001 assert(N0 && N1 && isa<ConstantSDNode>(N1));
2002 }
2003 if (!N0 || !N1)
2004 return false;
2005 if (SelectSMRDOffset(N1, SOffset, Offset, Imm32Only, IsBuffer)) {
2006 SBase = N0;
2007 return true;
2008 }
2009 if (SelectSMRDOffset(N0, SOffset, Offset, Imm32Only, IsBuffer)) {
2010 SBase = N1;
2011 return true;
2012 }
2013 return false;
2014}
2015
2016bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
2017 SDValue *SOffset, SDValue *Offset,
2018 bool Imm32Only) const {
2019 if (SelectSMRDBaseOffset(Addr, SBase, SOffset, Offset, Imm32Only)) {
2020 SBase = Expand32BitAddress(SBase);
2021 return true;
2022 }
2023
2024 if (Addr.getValueType() == MVT::i32 && Offset && !SOffset) {
2025 SBase = Expand32BitAddress(Addr);
2027 return true;
2028 }
2029
2030 return false;
2031}
2032
2033bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
2034 SDValue &Offset) const {
2035 return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset);
2036}
2037
2038bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
2039 SDValue &Offset) const {
2041 return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset,
2042 /* Imm32Only */ true);
2043}
2044
2045bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
2046 SDValue &SOffset) const {
2047 return SelectSMRD(Addr, SBase, &SOffset, /* Offset */ nullptr);
2048}
2049
2050bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(SDValue Addr, SDValue &SBase,
2051 SDValue &SOffset,
2052 SDValue &Offset) const {
2053 return SelectSMRD(Addr, SBase, &SOffset, &Offset);
2054}
2055
2056bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue N, SDValue &Offset) const {
2057 return SelectSMRDOffset(N, /* SOffset */ nullptr, &Offset,
2058 /* Imm32Only */ false, /* IsBuffer */ true);
2059}
2060
2061bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue N,
2062 SDValue &Offset) const {
2064 return SelectSMRDOffset(N, /* SOffset */ nullptr, &Offset,
2065 /* Imm32Only */ true, /* IsBuffer */ true);
2066}
2067
2068bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,
2069 SDValue &Offset) const {
2070 // Match the (soffset + offset) pair as a 32-bit register base and
2071 // an immediate offset.
2072 return N.getValueType() == MVT::i32 &&
2073 SelectSMRDBaseOffset(N, /* SBase */ SOffset, /* SOffset*/ nullptr,
2074 &Offset, /* Imm32Only */ false,
2075 /* IsBuffer */ true);
2076}
2077
2078bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
2079 SDValue &Base,
2080 SDValue &Offset) const {
2081 SDLoc DL(Index);
2082
2084 SDValue N0 = Index.getOperand(0);
2085 SDValue N1 = Index.getOperand(1);
2086 ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
2087
2088 // (add n0, c0)
2089 // Don't peel off the offset (c0) if doing so could possibly lead
2090 // the base (n0) to be negative.
2091 // (or n0, |c0|) can never change a sign given isBaseWithConstantOffset.
2092 if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0) ||
2093 (Index->getOpcode() == ISD::OR && C1->getSExtValue() >= 0)) {
2094 Base = N0;
2096 return true;
2097 }
2098 }
2099
2100 if (isa<ConstantSDNode>(Index))
2101 return false;
2102
2103 Base = Index;
2105 return true;
2106}
2107
2108SDNode *AMDGPUDAGToDAGISel::getBFE32(bool IsSigned, const SDLoc &DL,
2109 SDValue Val, uint32_t Offset,
2110 uint32_t Width) {
2111 if (Val->isDivergent()) {
2112 unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2115
2116 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, Off, W);
2117 }
2118 unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2119 // Transformation function, pack the offset and width of a BFE into
2120 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
2121 // source, bits [5:0] contain the offset and bits [22:16] the width.
2122 uint32_t PackedVal = Offset | (Width << 16);
2123 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
2124
2125 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
2126}
2127
2128void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
2129 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
2130 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
2131 // Predicate: 0 < b <= c < 32
2132
2133 const SDValue &Shl = N->getOperand(0);
2134 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
2135 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2136
2137 if (B && C) {
2138 uint32_t BVal = B->getZExtValue();
2139 uint32_t CVal = C->getZExtValue();
2140
2141 if (0 < BVal && BVal <= CVal && CVal < 32) {
2142 bool Signed = N->getOpcode() == ISD::SRA;
2143 ReplaceNode(N, getBFE32(Signed, SDLoc(N), Shl.getOperand(0), CVal - BVal,
2144 32 - CVal));
2145 return;
2146 }
2147 }
2148 SelectCode(N);
2149}
2150
2151void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
2152 switch (N->getOpcode()) {
2153 case ISD::AND:
2154 if (N->getOperand(0).getOpcode() == ISD::SRL) {
2155 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
2156 // Predicate: isMask(mask)
2157 const SDValue &Srl = N->getOperand(0);
2158 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
2159 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
2160
2161 if (Shift && Mask) {
2162 uint32_t ShiftVal = Shift->getZExtValue();
2163 uint32_t MaskVal = Mask->getZExtValue();
2164
2165 if (isMask_32(MaskVal)) {
2166 uint32_t WidthVal = llvm::popcount(MaskVal);
2167 ReplaceNode(N, getBFE32(false, SDLoc(N), Srl.getOperand(0), ShiftVal,
2168 WidthVal));
2169 return;
2170 }
2171 }
2172 }
2173 break;
2174 case ISD::SRL:
2175 if (N->getOperand(0).getOpcode() == ISD::AND) {
2176 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
2177 // Predicate: isMask(mask >> b)
2178 const SDValue &And = N->getOperand(0);
2179 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
2180 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
2181
2182 if (Shift && Mask) {
2183 uint32_t ShiftVal = Shift->getZExtValue();
2184 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
2185
2186 if (isMask_32(MaskVal)) {
2187 uint32_t WidthVal = llvm::popcount(MaskVal);
2188 ReplaceNode(N, getBFE32(false, SDLoc(N), And.getOperand(0), ShiftVal,
2189 WidthVal));
2190 return;
2191 }
2192 }
2193 } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
2194 SelectS_BFEFromShifts(N);
2195 return;
2196 }
2197 break;
2198 case ISD::SRA:
2199 if (N->getOperand(0).getOpcode() == ISD::SHL) {
2200 SelectS_BFEFromShifts(N);
2201 return;
2202 }
2203 break;
2204
2206 // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
2207 SDValue Src = N->getOperand(0);
2208 if (Src.getOpcode() != ISD::SRL)
2209 break;
2210
2211 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
2212 if (!Amt)
2213 break;
2214
2215 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2216 ReplaceNode(N, getBFE32(true, SDLoc(N), Src.getOperand(0),
2217 Amt->getZExtValue(), Width));
2218 return;
2219 }
2220 }
2221
2222 SelectCode(N);
2223}
2224
2225bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
2226 assert(N->getOpcode() == ISD::BRCOND);
2227 if (!N->hasOneUse())
2228 return false;
2229
2230 SDValue Cond = N->getOperand(1);
2231 if (Cond.getOpcode() == ISD::CopyToReg)
2232 Cond = Cond.getOperand(2);
2233
2234 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
2235 return false;
2236
2237 MVT VT = Cond.getOperand(0).getSimpleValueType();
2238 if (VT == MVT::i32)
2239 return true;
2240
2241 if (VT == MVT::i64) {
2242 auto ST = static_cast<const GCNSubtarget *>(Subtarget);
2243
2244 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
2245 return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
2246 }
2247
2248 return false;
2249}
2250
2251void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
2252 SDValue Cond = N->getOperand(1);
2253
2254 if (Cond.isUndef()) {
2255 CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
2256 N->getOperand(2), N->getOperand(0));
2257 return;
2258 }
2259
2260 const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
2261 const SIRegisterInfo *TRI = ST->getRegisterInfo();
2262
2263 bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
2264 unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
2265 Register CondReg = UseSCCBr ? AMDGPU::SCC : TRI->getVCC();
2266 SDLoc SL(N);
2267
2268 if (!UseSCCBr) {
2269 // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
2270 // analyzed what generates the vcc value, so we do not know whether vcc
2271 // bits for disabled lanes are 0. Thus we need to mask out bits for
2272 // disabled lanes.
2273 //
2274 // For the case that we select S_CBRANCH_SCC1 and it gets
2275 // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
2276 // SIInstrInfo::moveToVALU which inserts the S_AND).
2277 //
2278 // We could add an analysis of what generates the vcc value here and omit
2279 // the S_AND when is unnecessary. But it would be better to add a separate
2280 // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
2281 // catches both cases.
2282 Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B32
2283 : AMDGPU::S_AND_B64,
2284 SL, MVT::i1,
2285 CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO
2286 : AMDGPU::EXEC,
2287 MVT::i1),
2288 Cond),
2289 0);
2290 }
2291
2292 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
2294 N->getOperand(2), // Basic Block
2295 VCC.getValue(0));
2296}
2297
2298void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
2299 // The address is assumed to be uniform, so if it ends up in a VGPR, it will
2300 // be copied to an SGPR with readfirstlane.
2301 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2302 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2303
2304 SDValue Chain = N->getOperand(0);
2305 SDValue Ptr = N->getOperand(2);
2306 MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2307 MachineMemOperand *MMO = M->getMemOperand();
2308 bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
2309
2312 SDValue PtrBase = Ptr.getOperand(0);
2313 SDValue PtrOffset = Ptr.getOperand(1);
2314
2315 const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
2316 if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue())) {
2317 N = glueCopyToM0(N, PtrBase);
2318 Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
2319 }
2320 }
2321
2322 if (!Offset) {
2323 N = glueCopyToM0(N, Ptr);
2325 }
2326
2327 SDValue Ops[] = {
2328 Offset,
2330 Chain,
2331 N->getOperand(N->getNumOperands() - 1) // New glue
2332 };
2333
2334 SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2335 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2336}
2337
2338// We need to handle this here because tablegen doesn't support matching
2339// instructions with multiple outputs.
2340void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(SDNode *N) {
2341 unsigned Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2342 SDValue Ops[] = {N->getOperand(2), N->getOperand(3), N->getOperand(4),
2343 N->getOperand(5), N->getOperand(0)};
2344
2345 MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2346 MachineMemOperand *MMO = M->getMemOperand();
2347 SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2348 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2349}
2350
2351static unsigned gwsIntrinToOpcode(unsigned IntrID) {
2352 switch (IntrID) {
2353 case Intrinsic::amdgcn_ds_gws_init:
2354 return AMDGPU::DS_GWS_INIT;
2355 case Intrinsic::amdgcn_ds_gws_barrier:
2356 return AMDGPU::DS_GWS_BARRIER;
2357 case Intrinsic::amdgcn_ds_gws_sema_v:
2358 return AMDGPU::DS_GWS_SEMA_V;
2359 case Intrinsic::amdgcn_ds_gws_sema_br:
2360 return AMDGPU::DS_GWS_SEMA_BR;
2361 case Intrinsic::amdgcn_ds_gws_sema_p:
2362 return AMDGPU::DS_GWS_SEMA_P;
2363 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2364 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2365 default:
2366 llvm_unreachable("not a gws intrinsic");
2367 }
2368}
2369
2370void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
2371 if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2372 !Subtarget->hasGWSSemaReleaseAll()) {
2373 // Let this error.
2374 SelectCode(N);
2375 return;
2376 }
2377
2378 // Chain, intrinsic ID, vsrc, offset
2379 const bool HasVSrc = N->getNumOperands() == 4;
2380 assert(HasVSrc || N->getNumOperands() == 3);
2381
2382 SDLoc SL(N);
2383 SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2);
2384 int ImmOffset = 0;
2385 MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2386 MachineMemOperand *MMO = M->getMemOperand();
2387
2388 // Don't worry if the offset ends up in a VGPR. Only one lane will have
2389 // effect, so SIFixSGPRCopies will validly insert readfirstlane.
2390
2391 // The resource id offset is computed as (<isa opaque base> + M0[21:16] +
2392 // offset field) % 64. Some versions of the programming guide omit the m0
2393 // part, or claim it's from offset 0.
2394 if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
2395 // If we have a constant offset, try to use the 0 in m0 as the base.
2396 // TODO: Look into changing the default m0 initialization value. If the
2397 // default -1 only set the low 16-bits, we could leave it as-is and add 1 to
2398 // the immediate offset.
2399 glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32));
2400 ImmOffset = ConstOffset->getZExtValue();
2401 } else {
2402 if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
2403 ImmOffset = BaseOffset.getConstantOperandVal(1);
2404 BaseOffset = BaseOffset.getOperand(0);
2405 }
2406
2407 // Prefer to do the shift in an SGPR since it should be possible to use m0
2408 // as the result directly. If it's already an SGPR, it will be eliminated
2409 // later.
2410 SDNode *SGPROffset
2411 = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
2412 BaseOffset);
2413 // Shift to offset in m0
2414 SDNode *M0Base
2415 = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
2416 SDValue(SGPROffset, 0),
2418 glueCopyToM0(N, SDValue(M0Base, 0));
2419 }
2420
2421 SDValue Chain = N->getOperand(0);
2422 SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
2423
2424 const unsigned Opc = gwsIntrinToOpcode(IntrID);
2426 if (HasVSrc)
2427 Ops.push_back(N->getOperand(2));
2428 Ops.push_back(OffsetField);
2429 Ops.push_back(Chain);
2430
2431 SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2432 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2433}
2434
2435void AMDGPUDAGToDAGISel::SelectInterpP1F16(SDNode *N) {
2436 if (Subtarget->getLDSBankCount() != 16) {
2437 // This is a single instruction with a pattern.
2438 SelectCode(N);
2439 return;
2440 }
2441
2442 SDLoc DL(N);
2443
2444 // This requires 2 instructions. It is possible to write a pattern to support
2445 // this, but the generated isel emitter doesn't correctly deal with multiple
2446 // output instructions using the same physical register input. The copy to m0
2447 // is incorrectly placed before the second instruction.
2448 //
2449 // TODO: Match source modifiers.
2450 //
2451 // def : Pat <
2452 // (int_amdgcn_interp_p1_f16
2453 // (VOP3Mods f32:$src0, i32:$src0_modifiers),
2454 // (i32 timm:$attrchan), (i32 timm:$attr),
2455 // (i1 timm:$high), M0),
2456 // (V_INTERP_P1LV_F16 $src0_modifiers, VGPR_32:$src0, timm:$attr,
2457 // timm:$attrchan, 0,
2458 // (V_INTERP_MOV_F32 2, timm:$attr, timm:$attrchan), timm:$high)> {
2459 // let Predicates = [has16BankLDS];
2460 // }
2461
2462 // 16 bank LDS
2463 SDValue ToM0 = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, AMDGPU::M0,
2464 N->getOperand(5), SDValue());
2465
2467
2468 SDNode *InterpMov =
2469 CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32, DL, VTs, {
2470 CurDAG->getTargetConstant(2, DL, MVT::i32), // P0
2471 N->getOperand(3), // Attr
2472 N->getOperand(2), // Attrchan
2473 ToM0.getValue(1) // In glue
2474 });
2475
2476 SDNode *InterpP1LV =
2477 CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16, DL, MVT::f32, {
2478 CurDAG->getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
2479 N->getOperand(1), // Src0
2480 N->getOperand(3), // Attr
2481 N->getOperand(2), // Attrchan
2482 CurDAG->getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
2483 SDValue(InterpMov, 0), // Src2 - holds two f16 values selected by high
2484 N->getOperand(4), // high
2485 CurDAG->getTargetConstant(0, DL, MVT::i1), // $clamp
2486 CurDAG->getTargetConstant(0, DL, MVT::i32), // $omod
2487 SDValue(InterpMov, 1)
2488 });
2489
2490 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), SDValue(InterpP1LV, 0));
2491}
2492
2493void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
2494 unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2495 switch (IntrID) {
2496 case Intrinsic::amdgcn_ds_append:
2497 case Intrinsic::amdgcn_ds_consume: {
2498 if (N->getValueType(0) != MVT::i32)
2499 break;
2500 SelectDSAppendConsume(N, IntrID);
2501 return;
2502 }
2503 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2504 SelectDSBvhStackIntrinsic(N);
2505 return;
2506 }
2507
2508 SelectCode(N);
2509}
2510
2511void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
2512 unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
2513 unsigned Opcode;
2514 switch (IntrID) {
2515 case Intrinsic::amdgcn_wqm:
2516 Opcode = AMDGPU::WQM;
2517 break;
2518 case Intrinsic::amdgcn_softwqm:
2519 Opcode = AMDGPU::SOFT_WQM;
2520 break;
2521 case Intrinsic::amdgcn_wwm:
2522 case Intrinsic::amdgcn_strict_wwm:
2523 Opcode = AMDGPU::STRICT_WWM;
2524 break;
2525 case Intrinsic::amdgcn_strict_wqm:
2526 Opcode = AMDGPU::STRICT_WQM;
2527 break;
2528 case Intrinsic::amdgcn_interp_p1_f16:
2529 SelectInterpP1F16(N);
2530 return;
2531 default:
2532 SelectCode(N);
2533 return;
2534 }
2535
2536 SDValue Src = N->getOperand(1);
2537 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});
2538}
2539
2540void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
2541 unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2542 switch (IntrID) {
2543 case Intrinsic::amdgcn_ds_gws_init:
2544 case Intrinsic::amdgcn_ds_gws_barrier:
2545 case Intrinsic::amdgcn_ds_gws_sema_v:
2546 case Intrinsic::amdgcn_ds_gws_sema_br:
2547 case Intrinsic::amdgcn_ds_gws_sema_p:
2548 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2549 SelectDS_GWS(N, IntrID);
2550 return;
2551 default:
2552 break;
2553 }
2554
2555 SelectCode(N);
2556}
2557
2558bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
2559 unsigned &Mods,
2560 bool AllowAbs) const {
2561 Mods = 0;
2562 Src = In;
2563
2564 if (Src.getOpcode() == ISD::FNEG) {
2565 Mods |= SISrcMods::NEG;
2566 Src = Src.getOperand(0);
2567 }
2568
2569 if (AllowAbs && Src.getOpcode() == ISD::FABS) {
2570 Mods |= SISrcMods::ABS;
2571 Src = Src.getOperand(0);
2572 }
2573
2574 return true;
2575}
2576
2577bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
2578 SDValue &SrcMods) const {
2579 unsigned Mods;
2580 if (SelectVOP3ModsImpl(In, Src, Mods)) {
2581 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2582 return true;
2583 }
2584
2585 return false;
2586}
2587
2588bool AMDGPUDAGToDAGISel::SelectVOP3BMods(SDValue In, SDValue &Src,
2589 SDValue &SrcMods) const {
2590 unsigned Mods;
2591 if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {
2592 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2593 return true;
2594 }
2595
2596 return false;
2597}
2598
2599bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
2600 if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
2601 return false;
2602
2603 Src = In;
2604 return true;
2605}
2606
2607bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(SDValue In, SDValue &Src,
2608 SDValue &SrcMods,
2609 bool OpSel) const {
2610 unsigned Mods;
2611 if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {
2612 if (OpSel)
2613 Mods |= SISrcMods::OP_SEL_0;
2614 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2615 return true;
2616 }
2617
2618 return false;
2619}
2620
2621bool AMDGPUDAGToDAGISel::SelectVINTERPMods(SDValue In, SDValue &Src,
2622 SDValue &SrcMods) const {
2623 return SelectVINTERPModsImpl(In, Src, SrcMods, /* OpSel */ false);
2624}
2625
2626bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(SDValue In, SDValue &Src,
2627 SDValue &SrcMods) const {
2628 return SelectVINTERPModsImpl(In, Src, SrcMods, /* OpSel */ true);
2629}
2630
2631bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
2632 SDValue &SrcMods, SDValue &Clamp,
2633 SDValue &Omod) const {
2634 SDLoc DL(In);
2635 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2636 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2637
2638 return SelectVOP3Mods(In, Src, SrcMods);
2639}
2640
2641bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(SDValue In, SDValue &Src,
2642 SDValue &SrcMods, SDValue &Clamp,
2643 SDValue &Omod) const {
2644 SDLoc DL(In);
2645 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2646 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2647
2648 return SelectVOP3BMods(In, Src, SrcMods);
2649}
2650
2651bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
2652 SDValue &Clamp, SDValue &Omod) const {
2653 Src = In;
2654
2655 SDLoc DL(In);
2656 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2657 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2658
2659 return true;
2660}
2661
2662bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
2663 SDValue &SrcMods, bool IsDOT) const {
2664 unsigned Mods = 0;
2665 Src = In;
2666
2667 if (Src.getOpcode() == ISD::FNEG) {
2669 Src = Src.getOperand(0);
2670 }
2671
2672 if (Src.getOpcode() == ISD::BUILD_VECTOR && Src.getNumOperands() == 2 &&
2673 (!IsDOT || !Subtarget->hasDOTOpSelHazard())) {
2674 unsigned VecMods = Mods;
2675
2676 SDValue Lo = stripBitcast(Src.getOperand(0));
2677 SDValue Hi = stripBitcast(Src.getOperand(1));
2678
2679 if (Lo.getOpcode() == ISD::FNEG) {
2680 Lo = stripBitcast(Lo.getOperand(0));
2681 Mods ^= SISrcMods::NEG;
2682 }
2683
2684 if (Hi.getOpcode() == ISD::FNEG) {
2685 Hi = stripBitcast(Hi.getOperand(0));
2686 Mods ^= SISrcMods::NEG_HI;
2687 }
2688
2689 if (isExtractHiElt(Lo, Lo))
2690 Mods |= SISrcMods::OP_SEL_0;
2691
2692 if (isExtractHiElt(Hi, Hi))
2693 Mods |= SISrcMods::OP_SEL_1;
2694
2695 unsigned VecSize = Src.getValueSizeInBits();
2696 Lo = stripExtractLoElt(Lo);
2697 Hi = stripExtractLoElt(Hi);
2698
2699 if (Lo.getValueSizeInBits() > VecSize) {
2701 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
2702 MVT::getIntegerVT(VecSize), Lo);
2703 }
2704
2705 if (Hi.getValueSizeInBits() > VecSize) {
2707 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
2708 MVT::getIntegerVT(VecSize), Hi);
2709 }
2710
2711 assert(Lo.getValueSizeInBits() <= VecSize &&
2712 Hi.getValueSizeInBits() <= VecSize);
2713
2714 if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
2715 // Really a scalar input. Just select from the low half of the register to
2716 // avoid packing.
2717
2718 if (VecSize == 32 || VecSize == Lo.getValueSizeInBits()) {
2719 Src = Lo;
2720 } else {
2721 assert(Lo.getValueSizeInBits() == 32 && VecSize == 64);
2722
2723 SDLoc SL(In);
2725 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL,
2726 Lo.getValueType()), 0);
2727 auto RC = Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
2728 : AMDGPU::SReg_64RegClassID;
2729 const SDValue Ops[] = {
2731 Lo, CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
2732 Undef, CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) };
2733
2734 Src = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL,
2735 Src.getValueType(), Ops), 0);
2736 }
2737 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2738 return true;
2739 }
2740
2741 if (VecSize == 64 && Lo == Hi && isa<ConstantFPSDNode>(Lo)) {
2742 uint64_t Lit = cast<ConstantFPSDNode>(Lo)->getValueAPF()
2743 .bitcastToAPInt().getZExtValue();
2744 if (AMDGPU::isInlinableLiteral32(Lit, Subtarget->hasInv2PiInlineImm())) {
2745 Src = CurDAG->getTargetConstant(Lit, SDLoc(In), MVT::i64);;
2746 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2747 return true;
2748 }
2749 }
2750
2751 Mods = VecMods;
2752 }
2753
2754 // Packed instructions do not have abs modifiers.
2755 Mods |= SISrcMods::OP_SEL_1;
2756
2757 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2758 return true;
2759}
2760
2761bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, SDValue &Src,
2762 SDValue &SrcMods) const {
2763 return SelectVOP3PMods(In, Src, SrcMods, true);
2764}
2765
2766bool AMDGPUDAGToDAGISel::SelectDotIUVOP3PMods(SDValue In, SDValue &Src) const {
2767 const ConstantSDNode *C = cast<ConstantSDNode>(In);
2768 // Literal i1 value set in intrinsic, represents SrcMods for the next operand.
2769 // 1 promotes packed values to signed, 0 treats them as unsigned.
2770 assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
2771
2772 unsigned Mods = SISrcMods::OP_SEL_1;
2773 unsigned SrcSign = C->getAPIntValue().getZExtValue();
2774 if (SrcSign == 1)
2775 Mods ^= SISrcMods::NEG;
2776
2777 Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2778 return true;
2779}
2780
2781bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
2782 SDValue &Src) const {
2783 const ConstantSDNode *C = cast<ConstantSDNode>(In);
2784 assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
2785
2786 unsigned Mods = SISrcMods::OP_SEL_1;
2787 unsigned SrcVal = C->getAPIntValue().getZExtValue();
2788 if (SrcVal == 1)
2789 Mods |= SISrcMods::OP_SEL_0;
2790
2791 Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2792 return true;
2793}
2794
2795bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
2796 SDValue &SrcMods) const {
2797 Src = In;
2798 // FIXME: Handle op_sel
2799 SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2800 return true;
2801}
2802
2803bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
2804 SDValue &SrcMods) const {
2805 // FIXME: Handle op_sel
2806 return SelectVOP3Mods(In, Src, SrcMods);
2807}
2808
2809// The return value is not whether the match is possible (which it always is),
2810// but whether or not it a conversion is really used.
2811bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
2812 unsigned &Mods) const {
2813 Mods = 0;
2814 SelectVOP3ModsImpl(In, Src, Mods);
2815
2816 if (Src.getOpcode() == ISD::FP_EXTEND) {
2817 Src = Src.getOperand(0);
2818 assert(Src.getValueType() == MVT::f16);
2819 Src = stripBitcast(Src);
2820
2821 // Be careful about folding modifiers if we already have an abs. fneg is
2822 // applied last, so we don't want to apply an earlier fneg.
2823 if ((Mods & SISrcMods::ABS) == 0) {
2824 unsigned ModsTmp;
2825 SelectVOP3ModsImpl(Src, Src, ModsTmp);
2826
2827 if ((ModsTmp & SISrcMods::NEG) != 0)
2828 Mods ^= SISrcMods::NEG;
2829
2830 if ((ModsTmp & SISrcMods::ABS) != 0)
2831 Mods |= SISrcMods::ABS;
2832 }
2833
2834 // op_sel/op_sel_hi decide the source type and source.
2835 // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
2836 // If the sources's op_sel is set, it picks the high half of the source
2837 // register.
2838
2839 Mods |= SISrcMods::OP_SEL_1;
2840 if (isExtractHiElt(Src, Src)) {
2841 Mods |= SISrcMods::OP_SEL_0;
2842
2843 // TODO: Should we try to look for neg/abs here?
2844 }
2845
2846 return true;
2847 }
2848
2849 return false;
2850}
2851
2852bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(SDValue In, SDValue &Src,
2853 SDValue &SrcMods) const {
2854 unsigned Mods = 0;
2855 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods))
2856 return false;
2857 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2858 return true;
2859}
2860
2861bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
2862 SDValue &SrcMods) const {
2863 unsigned Mods = 0;
2864 SelectVOP3PMadMixModsImpl(In, Src, Mods);
2865 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2866 return true;
2867}
2868
2869SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
2870 if (In.isUndef())
2871 return CurDAG->getUNDEF(MVT::i32);
2872
2873 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
2874 SDLoc SL(In);
2875 return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
2876 }
2877
2878 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
2879 SDLoc SL(In);
2880 return CurDAG->getConstant(
2881 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
2882 }
2883
2884 SDValue Src;
2885 if (isExtractHiElt(In, Src))
2886 return Src;
2887
2888 return SDValue();
2889}
2890
2891bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
2893
2894 const SIRegisterInfo *SIRI =
2895 static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
2896 const SIInstrInfo * SII =
2897 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
2898
2899 unsigned Limit = 0;
2900 bool AllUsesAcceptSReg = true;
2901 for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
2902 Limit < 10 && U != E; ++U, ++Limit) {
2903 const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
2904
2905 // If the register class is unknown, it could be an unknown
2906 // register class that needs to be an SGPR, e.g. an inline asm
2907 // constraint
2908 if (!RC || SIRI->isSGPRClass(RC))
2909 return false;
2910
2911 if (RC != &AMDGPU::VS_32RegClass) {
2912 AllUsesAcceptSReg = false;
2913 SDNode * User = *U;
2914 if (User->isMachineOpcode()) {
2915 unsigned Opc = User->getMachineOpcode();
2916 const MCInstrDesc &Desc = SII->get(Opc);
2917 if (Desc.isCommutable()) {
2918 unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
2919 unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
2920 if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
2921 unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
2922 const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
2923 if (CommutedRC == &AMDGPU::VS_32RegClass)
2924 AllUsesAcceptSReg = true;
2925 }
2926 }
2927 }
2928 // If "AllUsesAcceptSReg == false" so far we haven't succeeded
2929 // commuting current user. This means have at least one use
2930 // that strictly require VGPR. Thus, we will not attempt to commute
2931 // other user instructions.
2932 if (!AllUsesAcceptSReg)
2933 break;
2934 }
2935 }
2936 return !AllUsesAcceptSReg && (Limit < 10);
2937}
2938
2939bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
2940 auto Ld = cast<LoadSDNode>(N);
2941
2942 if (N->isDivergent() && !AMDGPUInstrInfo::isUniformMMO(Ld->getMemOperand()))
2943 return false;
2944
2945 return Ld->getAlign() >= Align(4) &&
2946 ((Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
2947 Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) ||
2948 (Subtarget->getScalarizeGlobalBehavior() &&
2949 Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
2950 Ld->isSimple() &&
2951 static_cast<const SITargetLowering *>(getTargetLowering())
2952 ->isMemOpHasNoClobberedMemOperand(N)));
2953}
2954
2957 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
2958 bool IsModified = false;
2959 do {
2960 IsModified = false;
2961
2962 // Go over all selected nodes and try to fold them a bit more
2964 while (Position != CurDAG->allnodes_end()) {
2965 SDNode *Node = &*Position++;
2966 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
2967 if (!MachineNode)
2968 continue;
2969
2970 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
2971 if (ResNode != Node) {
2972 if (ResNode)
2973 ReplaceUses(Node, ResNode);
2974 IsModified = true;
2975 }
2976 }
2978 } while (IsModified);
2979}
2980
2981char AMDGPUDAGToDAGISel::ID = 0;
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
static SDValue matchZExtFromI32(SDValue Op)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
static MemSDNode * findMemSDNode(SDNode *N)
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)
Defines an instruction selector for the AMDGPU target.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static MachineInstr * isExtractHiElt(MachineInstr *Inst, MachineRegisterInfo &MRI)
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
unsigned const TargetRegisterInfo * TRI
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
Provides R600 specific target descriptions.
Interface definition for R600RegisterInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Shadow Stack GC Lowering
LLVM IR instance of the generic uniformity analysis.
Value * RHS
Value * LHS
AMDGPU specific code to select AMDGPU machine instructions for SelectionDAG operations.
void SelectBuildVector(SDNode *N, unsigned RegClassID)
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
AMDGPUDAGToDAGISel()=delete
bool matchLoadD16FromBuildVector(SDNode *N) const
static bool isUniformMMO(const MachineMemOperand *MMO)
bool hasInv2PiInlineImm() const
static SDValue stripBitcast(SDValue Val)
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
Class for arbitrary precision integers.
Definition: APInt.h:75
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1494
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1609
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
uint64_t getZExtValue() const
int64_t getSExtValue() const
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:314
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
int getLDSBankCount() const
Definition: GCNSubtarget.h:293
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition: GCNSubtarget.h:423
bool unsafeDSOffsetFoldingEnabled() const
Definition: GCNSubtarget.h:427
bool hasFlatInstOffsets() const
Definition: GCNSubtarget.h:580
bool hasDLInsts() const
Definition: GCNSubtarget.h:703
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:223
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
Definition: GCNSubtarget.h:931
bool privateMemoryResourceIsRangeChecked() const
Definition: GCNSubtarget.h:508
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:235
bool hasDOTOpSelHazard() const
bool d16PreservesUnusedBits() const
Definition: GCNSubtarget.h:634
bool hasFlatSegmentOffsetBug() const
Definition: GCNSubtarget.h:622
bool getScalarizeGlobalBehavior() const
Definition: GCNSubtarget.h:828
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
Definition: GCNSubtarget.h:644
bool hasFlatScratchSVSSwizzleBug() const
bool useFlatForGlobal() const
Definition: GCNSubtarget.h:486
Generation getGeneration() const
Definition: GCNSubtarget.h:274
bool hasGWSSemaReleaseAll() const
Definition: GCNSubtarget.h:658
bool hasAddr64() const
Definition: GCNSubtarget.h:338
bool hasAddNoCarry() const
Definition: GCNSubtarget.h:666
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
Definition: LoopInfoImpl.h:587
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1293
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:247
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z,...
Definition: MCInstrDesc.h:480
const Triple & getTargetTriple() const
Machine Value Type.
static MVT getIntegerVT(unsigned BitWidth)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
A description of a memory reference used in the backend.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
bool isDivergent() const
const SDValue & getOperand(unsigned Num) const
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
static use_iterator use_end()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
static bool isLegalMUBUFImmOffset(unsigned Imm)
Definition: SIInstrInfo.h:1134
static unsigned getMaxMUBUFImmOffset()
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
const TargetRegisterClass * getRegClass(unsigned RCID) const
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
static bool isSGPRClass(const TargetRegisterClass *RC)
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
std::unique_ptr< FunctionLoweringInfo > FuncInfo
MachineFunction * MF
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetLowering * getTargetLowering() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:474
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
SDNode * MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef< SDValue > Ops)
This mutates the specified node to have the specified return type, opcode, and operands.
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:531
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:532
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:726
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getRegister(unsigned Reg, EVT VT)
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:773
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:675
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
static const unsigned CommuteAnyOperandIndex
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
const Triple & getTargetTriple() const
unsigned getID() const
Return the register class ID number.
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:356
Legacy analysis pass which computes a CycleInfo.
LLVM Value Representation.
Definition: Value.h:74
Iterator for intrusive lists based on ilist_node.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
Definition: AMDGPU.h:384
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPU.h:378
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:381
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:380
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:376
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:377
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:382
@ CLAMP
CLAMP value between 0.0 and 1.0.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:119
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Level
Code generation optimization level.
Definition: CodeGen.h:57
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:749
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:269
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:486
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:978
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:482
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:786
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:898
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:619
@ ADDCARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:303
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:923
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:499
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:211
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:208
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:704
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:534
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:776
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:955
@ TargetFrameIndex
Definition: ISDOpcodes.h:166
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:794
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:883
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:679
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:279
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:865
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:782
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1013
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:514
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1447
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
constexpr const char32_t SBase
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:406
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:349
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:268
FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a AMDGPU-specific.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:164
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:169
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
unsigned M0(unsigned Val)
Definition: VE.h:467
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:351
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:363
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:239
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:311
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:149
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:319
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition: KnownBits.h:292
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:136
static KnownBits computeForAddSub(bool Add, bool NSW, const KnownBits &LHS, KnownBits RHS)
Compute known bits resulting from adding LHS and RHS.
Definition: KnownBits.cpp:57
static unsigned getSubRegFromChannel(unsigned Channel)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.