LLVM  16.0.0git
AMDGPUISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines an instruction selector for the AMDGPU target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPUISelDAGToDAG.h"
15 #include "AMDGPU.h"
16 #include "AMDGPUInstrInfo.h"
17 #include "AMDGPUSubtarget.h"
18 #include "AMDGPUTargetMachine.h"
21 #include "R600RegisterInfo.h"
22 #include "SIMachineFunctionInfo.h"
29 #include "llvm/IR/IntrinsicsAMDGPU.h"
30 #include "llvm/InitializePasses.h"
31 
32 #ifdef EXPENSIVE_CHECKS
33 #include "llvm/Analysis/LoopInfo.h"
34 #include "llvm/IR/Dominators.h"
35 #endif
36 
37 #define DEBUG_TYPE "amdgpu-isel"
38 
39 using namespace llvm;
40 
41 //===----------------------------------------------------------------------===//
42 // Instruction Selector Implementation
43 //===----------------------------------------------------------------------===//
44 
45 namespace {
46 static SDValue stripBitcast(SDValue Val) {
47  return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
48 }
49 
50 // Figure out if this is really an extract of the high 16-bits of a dword.
51 static bool isExtractHiElt(SDValue In, SDValue &Out) {
52  In = stripBitcast(In);
53 
54  if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
55  if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
56  if (!Idx->isOne())
57  return false;
58  Out = In.getOperand(0);
59  return true;
60  }
61  }
62 
63  if (In.getOpcode() != ISD::TRUNCATE)
64  return false;
65 
66  SDValue Srl = In.getOperand(0);
67  if (Srl.getOpcode() == ISD::SRL) {
68  if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
69  if (ShiftAmt->getZExtValue() == 16) {
70  Out = stripBitcast(Srl.getOperand(0));
71  return true;
72  }
73  }
74  }
75 
76  return false;
77 }
78 
79 // Look through operations that obscure just looking at the low 16-bits of the
80 // same register.
81 static SDValue stripExtractLoElt(SDValue In) {
82  if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
83  if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
84  if (Idx->isZero() && In.getValueSizeInBits() <= 32)
85  return In.getOperand(0);
86  }
87  }
88 
89  if (In.getOpcode() == ISD::TRUNCATE) {
90  SDValue Src = In.getOperand(0);
91  if (Src.getValueType().getSizeInBits() == 32)
92  return stripBitcast(Src);
93  }
94 
95  return In;
96 }
97 
98 } // end anonymous namespace
99 
101  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
105 #ifdef EXPENSIVE_CHECKS
108 #endif
110  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
111 
112 /// This pass converts a legalized DAG into a AMDGPU-specific
113 // DAG, ready for instruction scheduling.
115  CodeGenOpt::Level OptLevel) {
116  return new AMDGPUDAGToDAGISel(TM, OptLevel);
117 }
118 
120  TargetMachine *TM /*= nullptr*/,
121  CodeGenOpt::Level OptLevel /*= CodeGenOpt::Default*/)
122  : SelectionDAGISel(*TM, OptLevel) {
123  EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
124 }
125 
127 #ifdef EXPENSIVE_CHECKS
128  DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
129  LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
130  for (auto &L : LI->getLoopsInPreorder()) {
131  assert(L->isLCSSAForm(DT));
132  }
133 #endif
134  Subtarget = &MF.getSubtarget<GCNSubtarget>();
135  Mode = AMDGPU::SIModeRegisterDefaults(MF.getFunction());
136  return SelectionDAGISel::runOnMachineFunction(MF);
137 }
138 
139 bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const {
140  // XXX - only need to list legal operations.
141  switch (Opc) {
142  case ISD::FADD:
143  case ISD::FSUB:
144  case ISD::FMUL:
145  case ISD::FDIV:
146  case ISD::FREM:
147  case ISD::FCANONICALIZE:
148  case ISD::UINT_TO_FP:
149  case ISD::SINT_TO_FP:
150  case ISD::FABS:
151  // Fabs is lowered to a bit operation, but it's an and which will clear the
152  // high bits anyway.
153  case ISD::FSQRT:
154  case ISD::FSIN:
155  case ISD::FCOS:
156  case ISD::FPOWI:
157  case ISD::FPOW:
158  case ISD::FLOG:
159  case ISD::FLOG2:
160  case ISD::FLOG10:
161  case ISD::FEXP:
162  case ISD::FEXP2:
163  case ISD::FCEIL:
164  case ISD::FTRUNC:
165  case ISD::FRINT:
166  case ISD::FNEARBYINT:
167  case ISD::FROUND:
168  case ISD::FFLOOR:
169  case ISD::FMINNUM:
170  case ISD::FMAXNUM:
171  case AMDGPUISD::FRACT:
172  case AMDGPUISD::CLAMP:
173  case AMDGPUISD::COS_HW:
174  case AMDGPUISD::SIN_HW:
175  case AMDGPUISD::FMIN3:
176  case AMDGPUISD::FMAX3:
177  case AMDGPUISD::FMED3:
178  case AMDGPUISD::FMAD_FTZ:
179  case AMDGPUISD::RCP:
180  case AMDGPUISD::RSQ:
182  case AMDGPUISD::LDEXP:
183  // On gfx10, all 16-bit instructions preserve the high bits.
184  return Subtarget->getGeneration() <= AMDGPUSubtarget::GFX9;
185  case ISD::FP_ROUND:
186  // We may select fptrunc (fma/mad) to mad_mixlo, which does not zero the
187  // high bits on gfx9.
188  // TODO: If we had the source node we could see if the source was fma/mad
189  return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
190  case ISD::FMA:
191  case ISD::FMAD:
193  return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
194  default:
195  // fcopysign, select and others may be lowered to 32-bit bit operations
196  // which don't zero the high bits.
197  return false;
198  }
199 }
200 
204 #ifdef EXPENSIVE_CHECKS
207 #endif
208  SelectionDAGISel::getAnalysisUsage(AU);
209 }
210 
212  assert(Subtarget->d16PreservesUnusedBits());
213  MVT VT = N->getValueType(0).getSimpleVT();
214  if (VT != MVT::v2i16 && VT != MVT::v2f16)
215  return false;
216 
217  SDValue Lo = N->getOperand(0);
218  SDValue Hi = N->getOperand(1);
219 
220  LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));
221 
222  // build_vector lo, (load ptr) -> load_d16_hi ptr, lo
223  // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
224  // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
225 
226  // Need to check for possible indirect dependencies on the other half of the
227  // vector to avoid introducing a cycle.
228  if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {
229  SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
230 
232  SDValue Ops[] = {
233  LdHi->getChain(), LdHi->getBasePtr(), TiedIn
234  };
235 
236  unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
237  if (LdHi->getMemoryVT() == MVT::i8) {
238  LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
240  } else {
241  assert(LdHi->getMemoryVT() == MVT::i16);
242  }
243 
244  SDValue NewLoadHi =
245  CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
246  Ops, LdHi->getMemoryVT(),
247  LdHi->getMemOperand());
248 
249  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
250  CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
251  return true;
252  }
253 
254  // build_vector (load ptr), hi -> load_d16_lo ptr, hi
255  // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
256  // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
257  LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));
258  if (LdLo && Lo.hasOneUse()) {
259  SDValue TiedIn = getHi16Elt(Hi);
260  if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode()))
261  return false;
262 
263  SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
264  unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
265  if (LdLo->getMemoryVT() == MVT::i8) {
266  LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
268  } else {
269  assert(LdLo->getMemoryVT() == MVT::i16);
270  }
271 
272  TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
273 
274  SDValue Ops[] = {
275  LdLo->getChain(), LdLo->getBasePtr(), TiedIn
276  };
277 
278  SDValue NewLoadLo =
279  CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
280  Ops, LdLo->getMemoryVT(),
281  LdLo->getMemOperand());
282 
283  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
284  CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
285  return true;
286  }
287 
288  return false;
289 }
290 
292  if (!Subtarget->d16PreservesUnusedBits())
293  return;
294 
296 
297  bool MadeChange = false;
298  while (Position != CurDAG->allnodes_begin()) {
299  SDNode *N = &*--Position;
300  if (N->use_empty())
301  continue;
302 
303  switch (N->getOpcode()) {
304  case ISD::BUILD_VECTOR:
305  MadeChange |= matchLoadD16FromBuildVector(N);
306  break;
307  default:
308  break;
309  }
310  }
311 
312  if (MadeChange) {
314  LLVM_DEBUG(dbgs() << "After PreProcess:\n";
315  CurDAG->dump(););
316  }
317 }
318 
319 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N,
320  bool Negated) const {
321  if (N->isUndef())
322  return true;
323 
324  const SIInstrInfo *TII = Subtarget->getInstrInfo();
325  if (Negated) {
326  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
327  return TII->isInlineConstant(-C->getAPIntValue());
328 
329  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
330  return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt());
331 
332  } else {
333  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
334  return TII->isInlineConstant(C->getAPIntValue());
335 
336  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
337  return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
338  }
339 
340  return false;
341 }
342 
343 /// Determine the register class for \p OpNo
344 /// \returns The register class of the virtual register that will be used for
345 /// the given operand number \OpNo or NULL if the register class cannot be
346 /// determined.
347 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
348  unsigned OpNo) const {
349  if (!N->isMachineOpcode()) {
350  if (N->getOpcode() == ISD::CopyToReg) {
351  Register Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
352  if (Reg.isVirtual()) {
354  return MRI.getRegClass(Reg);
355  }
356 
357  const SIRegisterInfo *TRI
358  = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
359  return TRI->getPhysRegClass(Reg);
360  }
361 
362  return nullptr;
363  }
364 
365  switch (N->getMachineOpcode()) {
366  default: {
367  const MCInstrDesc &Desc =
368  Subtarget->getInstrInfo()->get(N->getMachineOpcode());
369  unsigned OpIdx = Desc.getNumDefs() + OpNo;
370  if (OpIdx >= Desc.getNumOperands())
371  return nullptr;
372  int RegClass = Desc.OpInfo[OpIdx].RegClass;
373  if (RegClass == -1)
374  return nullptr;
375 
376  return Subtarget->getRegisterInfo()->getRegClass(RegClass);
377  }
378  case AMDGPU::REG_SEQUENCE: {
379  unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
380  const TargetRegisterClass *SuperRC =
381  Subtarget->getRegisterInfo()->getRegClass(RCID);
382 
383  SDValue SubRegOp = N->getOperand(OpNo + 1);
384  unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
385  return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
386  SubRegIdx);
387  }
388  }
389 }
390 
391 SDNode *AMDGPUDAGToDAGISel::glueCopyToOp(SDNode *N, SDValue NewChain,
392  SDValue Glue) const {
394  Ops.push_back(NewChain); // Replace the chain.
395  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
396  Ops.push_back(N->getOperand(i));
397 
398  Ops.push_back(Glue);
399  return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
400 }
401 
402 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
403  const SITargetLowering& Lowering =
404  *static_cast<const SITargetLowering*>(getTargetLowering());
405 
406  assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");
407 
408  SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), Val);
409  return glueCopyToOp(N, M0, M0.getValue(1));
410 }
411 
412 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
413  unsigned AS = cast<MemSDNode>(N)->getAddressSpace();
414  if (AS == AMDGPUAS::LOCAL_ADDRESS) {
415  if (Subtarget->ldsRequiresM0Init())
416  return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
417  } else if (AS == AMDGPUAS::REGION_ADDRESS) {
419  unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
420  return
421  glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32));
422  }
423  return N;
424 }
425 
426 MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
427  EVT VT) const {
429  AMDGPU::S_MOV_B32, DL, MVT::i32,
430  CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
431  SDNode *Hi =
432  CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
434  const SDValue Ops[] = {
435  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
436  SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
437  SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
438 
439  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
440 }
441 
442 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
443  EVT VT = N->getValueType(0);
444  unsigned NumVectorElts = VT.getVectorNumElements();
445  EVT EltVT = VT.getVectorElementType();
446  SDLoc DL(N);
447  SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
448 
449  if (NumVectorElts == 1) {
450  CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
451  RegClass);
452  return;
453  }
454 
455  assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
456  "supported yet");
457  // 32 = Max Num Vector Elements
458  // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
459  // 1 = Vector Register Class
460  SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
461 
462  bool IsGCN = CurDAG->getSubtarget().getTargetTriple().getArch() ==
463  Triple::amdgcn;
464  RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
465  bool IsRegSeq = true;
466  unsigned NOps = N->getNumOperands();
467  for (unsigned i = 0; i < NOps; i++) {
468  // XXX: Why is this here?
469  if (isa<RegisterSDNode>(N->getOperand(i))) {
470  IsRegSeq = false;
471  break;
472  }
473  unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
474  : R600RegisterInfo::getSubRegFromChannel(i);
475  RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
476  RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
477  }
478  if (NOps != NumVectorElts) {
479  // Fill in the missing undef elements if this was a scalar_to_vector.
480  assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
481  MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
482  DL, EltVT);
483  for (unsigned i = NOps; i < NumVectorElts; ++i) {
484  unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
485  : R600RegisterInfo::getSubRegFromChannel(i);
486  RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
487  RegSeqArgs[1 + (2 * i) + 1] =
489  }
490  }
491 
492  if (!IsRegSeq)
493  SelectCode(N);
494  CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
495 }
496 
498  unsigned int Opc = N->getOpcode();
499  if (N->isMachineOpcode()) {
500  N->setNodeId(-1);
501  return; // Already selected.
502  }
503 
504  // isa<MemSDNode> almost works but is slightly too permissive for some DS
505  // intrinsics.
506  if (Opc == ISD::LOAD || Opc == ISD::STORE || isa<AtomicSDNode>(N) ||
507  (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
508  Opc == ISD::ATOMIC_LOAD_FADD ||
510  Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) {
511  N = glueCopyToM0LDSInit(N);
512  SelectCode(N);
513  return;
514  }
515 
516  switch (Opc) {
517  default:
518  break;
519  // We are selecting i64 ADD here instead of custom lower it during
520  // DAG legalization, so we can fold some i64 ADDs used for address
521  // calculation into the LOAD and STORE instructions.
522  case ISD::ADDC:
523  case ISD::ADDE:
524  case ISD::SUBC:
525  case ISD::SUBE: {
526  if (N->getValueType(0) != MVT::i64)
527  break;
528 
529  SelectADD_SUB_I64(N);
530  return;
531  }
532  case ISD::ADDCARRY:
533  case ISD::SUBCARRY:
534  if (N->getValueType(0) != MVT::i32)
535  break;
536 
537  SelectAddcSubb(N);
538  return;
539  case ISD::UADDO:
540  case ISD::USUBO: {
541  SelectUADDO_USUBO(N);
542  return;
543  }
545  SelectFMUL_W_CHAIN(N);
546  return;
547  }
548  case AMDGPUISD::FMA_W_CHAIN: {
549  SelectFMA_W_CHAIN(N);
550  return;
551  }
552 
554  case ISD::BUILD_VECTOR: {
555  EVT VT = N->getValueType(0);
556  unsigned NumVectorElts = VT.getVectorNumElements();
557  if (VT.getScalarSizeInBits() == 16) {
558  if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
559  if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) {
560  ReplaceNode(N, Packed);
561  return;
562  }
563  }
564 
565  break;
566  }
567 
569  unsigned RegClassID =
570  SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID();
571  SelectBuildVector(N, RegClassID);
572  return;
573  }
574  case ISD::BUILD_PAIR: {
575  SDValue RC, SubReg0, SubReg1;
576  SDLoc DL(N);
577  if (N->getValueType(0) == MVT::i128) {
578  RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32);
579  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
580  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
581  } else if (N->getValueType(0) == MVT::i64) {
582  RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
583  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
584  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
585  } else {
586  llvm_unreachable("Unhandled value type for BUILD_PAIR");
587  }
588  const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
589  N->getOperand(1), SubReg1 };
590  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
591  N->getValueType(0), Ops));
592  return;
593  }
594 
595  case ISD::Constant:
596  case ISD::ConstantFP: {
597  if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
598  break;
599 
600  uint64_t Imm;
601  if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
602  Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
603  else {
604  ConstantSDNode *C = cast<ConstantSDNode>(N);
605  Imm = C->getZExtValue();
606  }
607 
608  SDLoc DL(N);
609  ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
610  return;
611  }
612  case AMDGPUISD::BFE_I32:
613  case AMDGPUISD::BFE_U32: {
614  // There is a scalar version available, but unlike the vector version which
615  // has a separate operand for the offset and width, the scalar version packs
616  // the width and offset into a single operand. Try to move to the scalar
617  // version if the offsets are constant, so that we can try to keep extended
618  // loads of kernel arguments in SGPRs.
619 
620  // TODO: Technically we could try to pattern match scalar bitshifts of
621  // dynamic values, but it's probably not useful.
622  ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
623  if (!Offset)
624  break;
625 
626  ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
627  if (!Width)
628  break;
629 
630  bool Signed = Opc == AMDGPUISD::BFE_I32;
631 
632  uint32_t OffsetVal = Offset->getZExtValue();
633  uint32_t WidthVal = Width->getZExtValue();
634 
635  ReplaceNode(N, getBFE32(Signed, SDLoc(N), N->getOperand(0), OffsetVal,
636  WidthVal));
637  return;
638  }
639  case AMDGPUISD::DIV_SCALE: {
640  SelectDIV_SCALE(N);
641  return;
642  }
644  case AMDGPUISD::MAD_U64_U32: {
645  SelectMAD_64_32(N);
646  return;
647  }
648  case ISD::SMUL_LOHI:
649  case ISD::UMUL_LOHI:
650  return SelectMUL_LOHI(N);
651  case ISD::CopyToReg: {
652  const SITargetLowering& Lowering =
653  *static_cast<const SITargetLowering*>(getTargetLowering());
654  N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
655  break;
656  }
657  case ISD::AND:
658  case ISD::SRL:
659  case ISD::SRA:
661  if (N->getValueType(0) != MVT::i32)
662  break;
663 
664  SelectS_BFE(N);
665  return;
666  case ISD::BRCOND:
667  SelectBRCOND(N);
668  return;
669  case ISD::FMAD:
670  case ISD::FMA:
671  SelectFMAD_FMA(N);
672  return;
678  // Hack around using a legal type if f16 is illegal.
679  if (N->getValueType(0) == MVT::i32) {
680  MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
681  N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
682  { N->getOperand(0), N->getOperand(1) });
683  SelectCode(N);
684  return;
685  }
686 
687  break;
688  }
689  case ISD::INTRINSIC_W_CHAIN: {
690  SelectINTRINSIC_W_CHAIN(N);
691  return;
692  }
694  SelectINTRINSIC_WO_CHAIN(N);
695  return;
696  }
697  case ISD::INTRINSIC_VOID: {
698  SelectINTRINSIC_VOID(N);
699  return;
700  }
701  }
702 
703  SelectCode(N);
704 }
705 
706 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
707  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
708  const Instruction *Term = BB->getTerminator();
709  return Term->getMetadata("amdgpu.uniform") ||
710  Term->getMetadata("structurizecfg.uniform");
711 }
712 
713 bool AMDGPUDAGToDAGISel::isUnneededShiftMask(const SDNode *N,
714  unsigned ShAmtBits) const {
715  assert(N->getOpcode() == ISD::AND);
716 
717  const APInt &RHS = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
718  if (RHS.countTrailingOnes() >= ShAmtBits)
719  return true;
720 
721  const APInt &LHSKnownZeros = CurDAG->computeKnownBits(N->getOperand(0)).Zero;
722  return (LHSKnownZeros | RHS).countTrailingOnes() >= ShAmtBits;
723 }
724 
726  SDValue &N0, SDValue &N1) {
727  if (Addr.getValueType() == MVT::i64 && Addr.getOpcode() == ISD::BITCAST &&
728  Addr.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
729  // As we split 64-bit `or` earlier, it's complicated pattern to match, i.e.
730  // (i64 (bitcast (v2i32 (build_vector
731  // (or (extract_vector_elt V, 0), OFFSET),
732  // (extract_vector_elt V, 1)))))
733  SDValue Lo = Addr.getOperand(0).getOperand(0);
734  if (Lo.getOpcode() == ISD::OR && DAG.isBaseWithConstantOffset(Lo)) {
735  SDValue BaseLo = Lo.getOperand(0);
736  SDValue BaseHi = Addr.getOperand(0).getOperand(1);
737  // Check that split base (Lo and Hi) are extracted from the same one.
738  if (BaseLo.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
739  BaseHi.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
740  BaseLo.getOperand(0) == BaseHi.getOperand(0) &&
741  // Lo is statically extracted from index 0.
742  isa<ConstantSDNode>(BaseLo.getOperand(1)) &&
743  BaseLo.getConstantOperandVal(1) == 0 &&
744  // Hi is statically extracted from index 0.
745  isa<ConstantSDNode>(BaseHi.getOperand(1)) &&
746  BaseHi.getConstantOperandVal(1) == 1) {
747  N0 = BaseLo.getOperand(0).getOperand(0);
748  N1 = Lo.getOperand(1);
749  return true;
750  }
751  }
752  }
753  return false;
754 }
755 
756 bool AMDGPUDAGToDAGISel::isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
757  SDValue &RHS) const {
759  LHS = Addr.getOperand(0);
760  RHS = Addr.getOperand(1);
761  return true;
762  }
763 
765  assert(LHS && RHS && isa<ConstantSDNode>(RHS));
766  return true;
767  }
768 
769  return false;
770 }
771 
773  return "AMDGPU DAG->DAG Pattern Instruction Selection";
774 }
775 
776 //===----------------------------------------------------------------------===//
777 // Complex Patterns
778 //===----------------------------------------------------------------------===//
779 
780 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
781  SDValue &Offset) {
782  return false;
783 }
784 
785 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
786  SDValue &Offset) {
787  ConstantSDNode *C;
788  SDLoc DL(Addr);
789 
790  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
791  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
792  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
793  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
794  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
795  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
796  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
797  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
798  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
799  Base = Addr.getOperand(0);
800  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
801  } else {
802  Base = Addr;
804  }
805 
806  return true;
807 }
808 
809 SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
810  const SDLoc &DL) const {
811  SDNode *Mov = CurDAG->getMachineNode(
812  AMDGPU::S_MOV_B32, DL, MVT::i32,
814  return SDValue(Mov, 0);
815 }
816 
817 // FIXME: Should only handle addcarry/subcarry
818 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
819  SDLoc DL(N);
820  SDValue LHS = N->getOperand(0);
821  SDValue RHS = N->getOperand(1);
822 
823  unsigned Opcode = N->getOpcode();
824  bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
825  bool ProduceCarry =
826  ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
827  bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
828 
829  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
830  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
831 
832  SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
833  DL, MVT::i32, LHS, Sub0);
834  SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
835  DL, MVT::i32, LHS, Sub1);
836 
837  SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
838  DL, MVT::i32, RHS, Sub0);
839  SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
840  DL, MVT::i32, RHS, Sub1);
841 
842  SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
843 
844  static const unsigned OpcMap[2][2][2] = {
845  {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
846  {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
847  {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
848  {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
849 
850  unsigned Opc = OpcMap[0][N->isDivergent()][IsAdd];
851  unsigned CarryOpc = OpcMap[1][N->isDivergent()][IsAdd];
852 
853  SDNode *AddLo;
854  if (!ConsumeCarry) {
855  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
856  AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
857  } else {
858  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
859  AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
860  }
861  SDValue AddHiArgs[] = {
862  SDValue(Hi0, 0),
863  SDValue(Hi1, 0),
864  SDValue(AddLo, 1)
865  };
866  SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
867 
868  SDValue RegSequenceArgs[] = {
869  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
870  SDValue(AddLo,0),
871  Sub0,
872  SDValue(AddHi,0),
873  Sub1,
874  };
875  SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
876  MVT::i64, RegSequenceArgs);
877 
878  if (ProduceCarry) {
879  // Replace the carry-use
880  ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
881  }
882 
883  // Replace the remaining uses.
885 }
886 
887 void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
888  SDLoc DL(N);
889  SDValue LHS = N->getOperand(0);
890  SDValue RHS = N->getOperand(1);
891  SDValue CI = N->getOperand(2);
892 
893  if (N->isDivergent()) {
894  unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
895  : AMDGPU::V_SUBB_U32_e64;
897  N, Opc, N->getVTList(),
898  {LHS, RHS, CI,
899  CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
900  } else {
901  unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::S_ADD_CO_PSEUDO
902  : AMDGPU::S_SUB_CO_PSEUDO;
903  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {LHS, RHS, CI});
904  }
905 }
906 
907 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
908  // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
909  // carry out despite the _i32 name. These were renamed in VI to _U32.
910  // FIXME: We should probably rename the opcodes here.
911  bool IsAdd = N->getOpcode() == ISD::UADDO;
912  bool IsVALU = N->isDivergent();
913 
914  for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E;
915  ++UI)
916  if (UI.getUse().getResNo() == 1) {
917  if ((IsAdd && (UI->getOpcode() != ISD::ADDCARRY)) ||
918  (!IsAdd && (UI->getOpcode() != ISD::SUBCARRY))) {
919  IsVALU = true;
920  break;
921  }
922  }
923 
924  if (IsVALU) {
925  unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
926 
928  N, Opc, N->getVTList(),
929  {N->getOperand(0), N->getOperand(1),
930  CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
931  } else {
932  unsigned Opc = N->getOpcode() == ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
933  : AMDGPU::S_USUBO_PSEUDO;
934 
935  CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
936  {N->getOperand(0), N->getOperand(1)});
937  }
938 }
939 
940 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
941  SDLoc SL(N);
942  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
943  SDValue Ops[10];
944 
945  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
946  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
947  SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
948  Ops[8] = N->getOperand(0);
949  Ops[9] = N->getOperand(4);
950 
951  // If there are no source modifiers, prefer fmac over fma because it can use
952  // the smaller VOP2 encoding.
953  bool UseFMAC = Subtarget->hasDLInsts() &&
954  cast<ConstantSDNode>(Ops[0])->isZero() &&
955  cast<ConstantSDNode>(Ops[2])->isZero() &&
956  cast<ConstantSDNode>(Ops[4])->isZero();
957  unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
958  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), Ops);
959 }
960 
961 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
962  SDLoc SL(N);
963  // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
964  SDValue Ops[8];
965 
966  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
967  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
968  Ops[6] = N->getOperand(0);
969  Ops[7] = N->getOperand(3);
970 
971  CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
972 }
973 
974 // We need to handle this here because tablegen doesn't support matching
975 // instructions with multiple outputs.
976 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
977  SDLoc SL(N);
978  EVT VT = N->getValueType(0);
979 
980  assert(VT == MVT::f32 || VT == MVT::f64);
981 
982  unsigned Opc
983  = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
984 
985  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
986  // omod
987  SDValue Ops[8];
988  SelectVOP3BMods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
989  SelectVOP3BMods(N->getOperand(1), Ops[3], Ops[2]);
990  SelectVOP3BMods(N->getOperand(2), Ops[5], Ops[4]);
991  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
992 }
993 
994 // We need to handle this here because tablegen doesn't support matching
995 // instructions with multiple outputs.
996 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
997  SDLoc SL(N);
998  bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
999  unsigned Opc;
1000  if (Subtarget->hasMADIntraFwdBug())
1001  Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1002  : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1003  else
1004  Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1005 
1006  SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
1007  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
1008  Clamp };
1009  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1010 }
1011 
1012 // We need to handle this here because tablegen doesn't support matching
1013 // instructions with multiple outputs.
1014 void AMDGPUDAGToDAGISel::SelectMUL_LOHI(SDNode *N) {
1015  SDLoc SL(N);
1016  bool Signed = N->getOpcode() == ISD::SMUL_LOHI;
1017  unsigned Opc;
1018  if (Subtarget->hasMADIntraFwdBug())
1019  Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1020  : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1021  else
1022  Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1023 
1025  SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
1026  SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Zero, Clamp};
1027  SDNode *Mad = CurDAG->getMachineNode(Opc, SL, N->getVTList(), Ops);
1028  if (!SDValue(N, 0).use_empty()) {
1029  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32);
1030  SDNode *Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1031  MVT::i32, SDValue(Mad, 0), Sub0);
1032  ReplaceUses(SDValue(N, 0), SDValue(Lo, 0));
1033  }
1034  if (!SDValue(N, 1).use_empty()) {
1035  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32);
1036  SDNode *Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1037  MVT::i32, SDValue(Mad, 0), Sub1);
1038  ReplaceUses(SDValue(N, 1), SDValue(Hi, 0));
1039  }
1041 }
1042 
1043 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset) const {
1044  if (!isUInt<16>(Offset))
1045  return false;
1046 
1047  if (!Base || Subtarget->hasUsableDSOffset() ||
1048  Subtarget->unsafeDSOffsetFoldingEnabled())
1049  return true;
1050 
1051  // On Southern Islands instruction with a negative base value and an offset
1052  // don't seem to work.
1053  return CurDAG->SignBitIsZero(Base);
1054 }
1055 
1056 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
1057  SDValue &Offset) const {
1058  SDLoc DL(Addr);
1060  SDValue N0 = Addr.getOperand(0);
1061  SDValue N1 = Addr.getOperand(1);
1062  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1063  if (isDSOffsetLegal(N0, C1->getSExtValue())) {
1064  // (add n0, c0)
1065  Base = N0;
1066  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1067  return true;
1068  }
1069  } else if (Addr.getOpcode() == ISD::SUB) {
1070  // sub C, x -> add (sub 0, x), C
1071  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1072  int64_t ByteOffset = C->getSExtValue();
1073  if (isDSOffsetLegal(SDValue(), ByteOffset)) {
1075 
1076  // XXX - This is kind of hacky. Create a dummy sub node so we can check
1077  // the known bits in isDSOffsetLegal. We need to emit the selected node
1078  // here, so this is thrown away.
1080  Zero, Addr.getOperand(1));
1081 
1082  if (isDSOffsetLegal(Sub, ByteOffset)) {
1084  Opnds.push_back(Zero);
1085  Opnds.push_back(Addr.getOperand(1));
1086 
1087  // FIXME: Select to VOP3 version for with-carry.
1088  unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1089  if (Subtarget->hasAddNoCarry()) {
1090  SubOp = AMDGPU::V_SUB_U32_e64;
1091  Opnds.push_back(
1092  CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1093  }
1094 
1095  MachineSDNode *MachineSub =
1096  CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1097 
1098  Base = SDValue(MachineSub, 0);
1099  Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
1100  return true;
1101  }
1102  }
1103  }
1104  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1105  // If we have a constant address, prefer to put the constant into the
1106  // offset. This can save moves to load the constant address since multiple
1107  // operations can share the zero base address register, and enables merging
1108  // into read2 / write2 instructions.
1109 
1110  SDLoc DL(Addr);
1111 
1112  if (isDSOffsetLegal(SDValue(), CAddr->getZExtValue())) {
1114  MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1115  DL, MVT::i32, Zero);
1116  Base = SDValue(MovZero, 0);
1117  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1118  return true;
1119  }
1120  }
1121 
1122  // default case
1123  Base = Addr;
1125  return true;
1126 }
1127 
1128 bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0,
1129  unsigned Offset1,
1130  unsigned Size) const {
1131  if (Offset0 % Size != 0 || Offset1 % Size != 0)
1132  return false;
1133  if (!isUInt<8>(Offset0 / Size) || !isUInt<8>(Offset1 / Size))
1134  return false;
1135 
1136  if (!Base || Subtarget->hasUsableDSOffset() ||
1137  Subtarget->unsafeDSOffsetFoldingEnabled())
1138  return true;
1139 
1140  // On Southern Islands instruction with a negative base value and an offset
1141  // don't seem to work.
1142  return CurDAG->SignBitIsZero(Base);
1143 }
1144 
1145 // TODO: If offset is too big, put low 16-bit into offset.
1146 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
1147  SDValue &Offset0,
1148  SDValue &Offset1) const {
1149  return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 4);
1150 }
1151 
1152 bool AMDGPUDAGToDAGISel::SelectDS128Bit8ByteAligned(SDValue Addr, SDValue &Base,
1153  SDValue &Offset0,
1154  SDValue &Offset1) const {
1155  return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 8);
1156 }
1157 
1158 bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base,
1159  SDValue &Offset0, SDValue &Offset1,
1160  unsigned Size) const {
1161  SDLoc DL(Addr);
1162 
1164  SDValue N0 = Addr.getOperand(0);
1165  SDValue N1 = Addr.getOperand(1);
1166  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1167  unsigned OffsetValue0 = C1->getZExtValue();
1168  unsigned OffsetValue1 = OffsetValue0 + Size;
1169 
1170  // (add n0, c0)
1171  if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1, Size)) {
1172  Base = N0;
1173  Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1174  Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1175  return true;
1176  }
1177  } else if (Addr.getOpcode() == ISD::SUB) {
1178  // sub C, x -> add (sub 0, x), C
1179  if (const ConstantSDNode *C =
1180  dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1181  unsigned OffsetValue0 = C->getZExtValue();
1182  unsigned OffsetValue1 = OffsetValue0 + Size;
1183 
1184  if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
1185  SDLoc DL(Addr);
1187 
1188  // XXX - This is kind of hacky. Create a dummy sub node so we can check
1189  // the known bits in isDSOffsetLegal. We need to emit the selected node
1190  // here, so this is thrown away.
1191  SDValue Sub =
1192  CurDAG->getNode(ISD::SUB, DL, MVT::i32, Zero, Addr.getOperand(1));
1193 
1194  if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1, Size)) {
1196  Opnds.push_back(Zero);
1197  Opnds.push_back(Addr.getOperand(1));
1198  unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1199  if (Subtarget->hasAddNoCarry()) {
1200  SubOp = AMDGPU::V_SUB_U32_e64;
1201  Opnds.push_back(
1202  CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1203  }
1204 
1205  MachineSDNode *MachineSub = CurDAG->getMachineNode(
1206  SubOp, DL, MVT::getIntegerVT(Size * 8), Opnds);
1207 
1208  Base = SDValue(MachineSub, 0);
1209  Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1210  Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1211  return true;
1212  }
1213  }
1214  }
1215  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1216  unsigned OffsetValue0 = CAddr->getZExtValue();
1217  unsigned OffsetValue1 = OffsetValue0 + Size;
1218 
1219  if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
1221  MachineSDNode *MovZero =
1222  CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, Zero);
1223  Base = SDValue(MovZero, 0);
1224  Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1225  Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1226  return true;
1227  }
1228  }
1229 
1230  // default case
1231 
1232  Base = Addr;
1233  Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
1234  Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
1235  return true;
1236 }
1237 
1238 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
1239  SDValue &SOffset, SDValue &Offset,
1240  SDValue &Offen, SDValue &Idxen,
1241  SDValue &Addr64) const {
1242  // Subtarget prefers to use flat instruction
1243  // FIXME: This should be a pattern predicate and not reach here
1244  if (Subtarget->useFlatForGlobal())
1245  return false;
1246 
1247  SDLoc DL(Addr);
1248 
1249  Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1250  Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1251  Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1252  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1253 
1254  ConstantSDNode *C1 = nullptr;
1255  SDValue N0 = Addr;
1257  C1 = cast<ConstantSDNode>(Addr.getOperand(1));
1258  if (isUInt<32>(C1->getZExtValue()))
1259  N0 = Addr.getOperand(0);
1260  else
1261  C1 = nullptr;
1262  }
1263 
1264  if (N0.getOpcode() == ISD::ADD) {
1265  // (add N2, N3) -> addr64, or
1266  // (add (add N2, N3), C1) -> addr64
1267  SDValue N2 = N0.getOperand(0);
1268  SDValue N3 = N0.getOperand(1);
1269  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1270 
1271  if (N2->isDivergent()) {
1272  if (N3->isDivergent()) {
1273  // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
1274  // addr64, and construct the resource from a 0 address.
1275  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1276  VAddr = N0;
1277  } else {
1278  // N2 is divergent, N3 is not.
1279  Ptr = N3;
1280  VAddr = N2;
1281  }
1282  } else {
1283  // N2 is not divergent.
1284  Ptr = N2;
1285  VAddr = N3;
1286  }
1288  } else if (N0->isDivergent()) {
1289  // N0 is divergent. Use it as the addr64, and construct the resource from a
1290  // 0 address.
1291  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1292  VAddr = N0;
1293  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1294  } else {
1295  // N0 -> offset, or
1296  // (N0 + C1) -> offset
1297  VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1298  Ptr = N0;
1299  }
1300 
1301  if (!C1) {
1302  // No offset.
1304  return true;
1305  }
1306 
1307  if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
1308  // Legal offset for instruction.
1309  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1310  return true;
1311  }
1312 
1313  // Illegal offset, store it in soffset.
1315  SOffset =
1317  AMDGPU::S_MOV_B32, DL, MVT::i32,
1318  CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1319  0);
1320  return true;
1321 }
1322 
1323 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1324  SDValue &VAddr, SDValue &SOffset,
1325  SDValue &Offset) const {
1326  SDValue Ptr, Offen, Idxen, Addr64;
1327 
1328  // addr64 bit was removed for volcanic islands.
1329  // FIXME: This should be a pattern predicate and not reach here
1330  if (!Subtarget->hasAddr64())
1331  return false;
1332 
1333  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
1334  return false;
1335 
1336  ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1337  if (C->getSExtValue()) {
1338  SDLoc DL(Addr);
1339 
1340  const SITargetLowering& Lowering =
1341  *static_cast<const SITargetLowering*>(getTargetLowering());
1342 
1343  SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1344  return true;
1345  }
1346 
1347  return false;
1348 }
1349 
1350 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1351  SDLoc DL(N);
1352 
1353  auto *FI = dyn_cast<FrameIndexSDNode>(N);
1354  SDValue TFI =
1355  FI ? CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) : N;
1356 
1357  // We rebase the base address into an absolute stack address and hence
1358  // use constant 0 for soffset. This value must be retained until
1359  // frame elimination and eliminateFrameIndex will choose the appropriate
1360  // frame register if need be.
1361  return std::make_pair(TFI, CurDAG->getTargetConstant(0, DL, MVT::i32));
1362 }
1363 
1364 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1365  SDValue Addr, SDValue &Rsrc,
1366  SDValue &VAddr, SDValue &SOffset,
1367  SDValue &ImmOffset) const {
1368 
1369  SDLoc DL(Addr);
1372 
1373  Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1374 
1375  if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1376  int64_t Imm = CAddr->getSExtValue();
1377  const int64_t NullPtr =
1378  AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS);
1379  // Don't fold null pointer.
1380  if (Imm != NullPtr) {
1381  SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1382  MachineSDNode *MovHighBits = CurDAG->getMachineNode(
1383  AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits);
1384  VAddr = SDValue(MovHighBits, 0);
1385 
1386  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1387  ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
1388  return true;
1389  }
1390  }
1391 
1393  // (add n0, c1)
1394 
1395  SDValue N0 = Addr.getOperand(0);
1396  SDValue N1 = Addr.getOperand(1);
1397 
1398  // Offsets in vaddr must be positive if range checking is enabled.
1399  //
1400  // The total computation of vaddr + soffset + offset must not overflow. If
1401  // vaddr is negative, even if offset is 0 the sgpr offset add will end up
1402  // overflowing.
1403  //
1404  // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
1405  // always perform a range check. If a negative vaddr base index was used,
1406  // this would fail the range check. The overall address computation would
1407  // compute a valid address, but this doesn't happen due to the range
1408  // check. For out-of-bounds MUBUF loads, a 0 is returned.
1409  //
1410  // Therefore it should be safe to fold any VGPR offset on gfx9 into the
1411  // MUBUF vaddr, but not on older subtargets which can only do this if the
1412  // sign bit is known 0.
1413  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1414  if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) &&
1415  (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1416  CurDAG->SignBitIsZero(N0))) {
1417  std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1418  ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1419  return true;
1420  }
1421  }
1422 
1423  // (node)
1424  std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1425  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1426  return true;
1427 }
1428 
1429 static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val) {
1430  if (Val.getOpcode() != ISD::CopyFromReg)
1431  return false;
1432  auto RC =
1433  TRI.getPhysRegClass(cast<RegisterSDNode>(Val.getOperand(1))->getReg());
1434  return RC && TRI.isSGPRClass(RC);
1435 }
1436 
1437 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1438  SDValue Addr,
1439  SDValue &SRsrc,
1440  SDValue &SOffset,
1441  SDValue &Offset) const {
1442  const SIRegisterInfo *TRI =
1443  static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
1446  SDLoc DL(Addr);
1447 
1448  // CopyFromReg <sgpr>
1449  if (IsCopyFromSGPR(*TRI, Addr)) {
1450  SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1451  SOffset = Addr;
1453  return true;
1454  }
1455 
1456  ConstantSDNode *CAddr;
1457  if (Addr.getOpcode() == ISD::ADD) {
1458  // Add (CopyFromReg <sgpr>) <constant>
1459  CAddr = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
1460  if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1461  return false;
1462  if (!IsCopyFromSGPR(*TRI, Addr.getOperand(0)))
1463  return false;
1464 
1465  SOffset = Addr.getOperand(0);
1466  } else if ((CAddr = dyn_cast<ConstantSDNode>(Addr)) &&
1467  SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) {
1468  // <constant>
1469  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1470  } else {
1471  return false;
1472  }
1473 
1474  SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1475 
1477  return true;
1478 }
1479 
1480 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1481  SDValue &SOffset, SDValue &Offset
1482  ) const {
1483  SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1484  const SIInstrInfo *TII =
1485  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1486 
1487  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
1488  return false;
1489 
1490  if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1491  !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1492  !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1493  uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1494  APInt::getAllOnes(32).getZExtValue(); // Size
1495  SDLoc DL(Addr);
1496 
1497  const SITargetLowering& Lowering =
1498  *static_cast<const SITargetLowering*>(getTargetLowering());
1499 
1500  SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1501  return true;
1502  }
1503  return false;
1504 }
1505 
1506 // Find a load or store from corresponding pattern root.
1507 // Roots may be build_vector, bitconvert or their combinations.
1509  N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode();
1510  if (MemSDNode *MN = dyn_cast<MemSDNode>(N))
1511  return MN;
1512  assert(isa<BuildVectorSDNode>(N));
1513  for (SDValue V : N->op_values())
1514  if (MemSDNode *MN =
1515  dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V)))
1516  return MN;
1517  llvm_unreachable("cannot find MemSDNode in the pattern!");
1518 }
1519 
1520 bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr,
1521  SDValue &VAddr, SDValue &Offset,
1522  uint64_t FlatVariant) const {
1523  int64_t OffsetVal = 0;
1524 
1525  unsigned AS = findMemSDNode(N)->getAddressSpace();
1526 
1527  bool CanHaveFlatSegmentOffsetBug =
1528  Subtarget->hasFlatSegmentOffsetBug() &&
1529  FlatVariant == SIInstrFlags::FLAT &&
1531 
1532  if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
1533  SDValue N0, N1;
1534  if (isBaseWithConstantOffset64(Addr, N0, N1)) {
1535  int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1536 
1537  const SIInstrInfo *TII = Subtarget->getInstrInfo();
1538  if (TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1539  Addr = N0;
1540  OffsetVal = COffsetVal;
1541  } else {
1542  // If the offset doesn't fit, put the low bits into the offset field and
1543  // add the rest.
1544  //
1545  // For a FLAT instruction the hardware decides whether to access
1546  // global/scratch/shared memory based on the high bits of vaddr,
1547  // ignoring the offset field, so we have to ensure that when we add
1548  // remainder to vaddr it still points into the same underlying object.
1549  // The easiest way to do that is to make sure that we split the offset
1550  // into two pieces that are both >= 0 or both <= 0.
1551 
1552  SDLoc DL(N);
1553  uint64_t RemainderOffset;
1554 
1555  std::tie(OffsetVal, RemainderOffset) =
1556  TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1557 
1558  SDValue AddOffsetLo =
1559  getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
1560  SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1561 
1562  if (Addr.getValueType().getSizeInBits() == 32) {
1564  Opnds.push_back(N0);
1565  Opnds.push_back(AddOffsetLo);
1566  unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1567  if (Subtarget->hasAddNoCarry()) {
1568  AddOp = AMDGPU::V_ADD_U32_e64;
1569  Opnds.push_back(Clamp);
1570  }
1571  Addr = SDValue(CurDAG->getMachineNode(AddOp, DL, MVT::i32, Opnds), 0);
1572  } else {
1573  // TODO: Should this try to use a scalar add pseudo if the base address
1574  // is uniform and saddr is usable?
1575  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
1576  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
1577 
1578  SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1579  DL, MVT::i32, N0, Sub0);
1580  SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1581  DL, MVT::i32, N0, Sub1);
1582 
1583  SDValue AddOffsetHi =
1584  getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
1585 
1587 
1588  SDNode *Add =
1589  CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64, DL, VTs,
1590  {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
1591 
1592  SDNode *Addc = CurDAG->getMachineNode(
1593  AMDGPU::V_ADDC_U32_e64, DL, VTs,
1594  {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});
1595 
1596  SDValue RegSequenceArgs[] = {
1597  CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
1598  SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1};
1599 
1600  Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1601  MVT::i64, RegSequenceArgs),
1602  0);
1603  }
1604  }
1605  }
1606  }
1607 
1608  VAddr = Addr;
1609  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
1610  return true;
1611 }
1612 
1613 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, SDValue Addr,
1614  SDValue &VAddr,
1615  SDValue &Offset) const {
1616  return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FLAT);
1617 }
1618 
1619 bool AMDGPUDAGToDAGISel::SelectGlobalOffset(SDNode *N, SDValue Addr,
1620  SDValue &VAddr,
1621  SDValue &Offset) const {
1622  return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FlatGlobal);
1623 }
1624 
1625 bool AMDGPUDAGToDAGISel::SelectScratchOffset(SDNode *N, SDValue Addr,
1626  SDValue &VAddr,
1627  SDValue &Offset) const {
1628  return SelectFlatOffsetImpl(N, Addr, VAddr, Offset,
1630 }
1631 
1632 // If this matches zero_extend i32:x, return x
1634  if (Op.getOpcode() != ISD::ZERO_EXTEND)
1635  return SDValue();
1636 
1637  SDValue ExtSrc = Op.getOperand(0);
1638  return (ExtSrc.getValueType() == MVT::i32) ? ExtSrc : SDValue();
1639 }
1640 
1641 // Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
1642 bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
1643  SDValue Addr,
1644  SDValue &SAddr,
1645  SDValue &VOffset,
1646  SDValue &Offset) const {
1647  int64_t ImmOffset = 0;
1648 
1649  // Match the immediate offset first, which canonically is moved as low as
1650  // possible.
1651 
1652  SDValue LHS, RHS;
1653  if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
1654  int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1655  const SIInstrInfo *TII = Subtarget->getInstrInfo();
1656 
1657  if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS,
1659  Addr = LHS;
1660  ImmOffset = COffsetVal;
1661  } else if (!LHS->isDivergent()) {
1662  if (COffsetVal > 0) {
1663  SDLoc SL(N);
1664  // saddr + large_offset -> saddr +
1665  // (voffset = large_offset & ~MaxOffset) +
1666  // (large_offset & MaxOffset);
1667  int64_t SplitImmOffset, RemainderOffset;
1668  std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
1670 
1671  if (isUInt<32>(RemainderOffset)) {
1672  SDNode *VMov = CurDAG->getMachineNode(
1673  AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1674  CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
1675  VOffset = SDValue(VMov, 0);
1676  SAddr = LHS;
1677  Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
1678  return true;
1679  }
1680  }
1681 
1682  // We are adding a 64 bit SGPR and a constant. If constant bus limit
1683  // is 1 we would need to perform 1 or 2 extra moves for each half of
1684  // the constant and it is better to do a scalar add and then issue a
1685  // single VALU instruction to materialize zero. Otherwise it is less
1686  // instructions to perform VALU adds with immediates or inline literals.
1687  unsigned NumLiterals =
1688  !TII->isInlineConstant(APInt(32, COffsetVal & 0xffffffff)) +
1689  !TII->isInlineConstant(APInt(32, COffsetVal >> 32));
1690  if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
1691  return false;
1692  }
1693  }
1694 
1695  // Match the variable offset.
1696  if (Addr.getOpcode() == ISD::ADD) {
1697  LHS = Addr.getOperand(0);
1698  RHS = Addr.getOperand(1);
1699 
1700  if (!LHS->isDivergent()) {
1701  // add (i64 sgpr), (zero_extend (i32 vgpr))
1702  if (SDValue ZextRHS = matchZExtFromI32(RHS)) {
1703  SAddr = LHS;
1704  VOffset = ZextRHS;
1705  }
1706  }
1707 
1708  if (!SAddr && !RHS->isDivergent()) {
1709  // add (zero_extend (i32 vgpr)), (i64 sgpr)
1710  if (SDValue ZextLHS = matchZExtFromI32(LHS)) {
1711  SAddr = RHS;
1712  VOffset = ZextLHS;
1713  }
1714  }
1715 
1716  if (SAddr) {
1717  Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1718  return true;
1719  }
1720  }
1721 
1722  if (Addr->isDivergent() || Addr.getOpcode() == ISD::UNDEF ||
1723  isa<ConstantSDNode>(Addr))
1724  return false;
1725 
1726  // It's cheaper to materialize a single 32-bit zero for vaddr than the two
1727  // moves required to copy a 64-bit SGPR to VGPR.
1728  SAddr = Addr;
1729  SDNode *VMov =
1730  CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
1732  VOffset = SDValue(VMov, 0);
1733  Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1734  return true;
1735 }
1736 
1737 static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr) {
1738  if (auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
1739  SAddr = CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
1740  } else if (SAddr.getOpcode() == ISD::ADD &&
1741  isa<FrameIndexSDNode>(SAddr.getOperand(0))) {
1742  // Materialize this into a scalar move for scalar address to avoid
1743  // readfirstlane.
1744  auto FI = cast<FrameIndexSDNode>(SAddr.getOperand(0));
1745  SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1746  FI->getValueType(0));
1747  SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, SDLoc(SAddr),
1748  MVT::i32, TFI, SAddr.getOperand(1)),
1749  0);
1750  }
1751 
1752  return SAddr;
1753 }
1754 
1755 // Match (32-bit SGPR base) + sext(imm offset)
1756 bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
1757  SDValue &SAddr,
1758  SDValue &Offset) const {
1759  if (Addr->isDivergent())
1760  return false;
1761 
1762  SDLoc DL(Addr);
1763 
1764  int64_t COffsetVal = 0;
1765 
1767  COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
1768  SAddr = Addr.getOperand(0);
1769  } else {
1770  SAddr = Addr;
1771  }
1772 
1773  SAddr = SelectSAddrFI(CurDAG, SAddr);
1774 
1775  const SIInstrInfo *TII = Subtarget->getInstrInfo();
1776 
1777  if (!TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS,
1779  int64_t SplitImmOffset, RemainderOffset;
1780  std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
1782 
1783  COffsetVal = SplitImmOffset;
1784 
1785  SDValue AddOffset =
1786  SAddr.getOpcode() == ISD::TargetFrameIndex
1787  ? getMaterializedScalarImm32(Lo_32(RemainderOffset), DL)
1788  : CurDAG->getTargetConstant(RemainderOffset, DL, MVT::i32);
1789  SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL, MVT::i32,
1790  SAddr, AddOffset),
1791  0);
1792  }
1793 
1794  Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i16);
1795 
1796  return true;
1797 }
1798 
1799 // Check whether the flat scratch SVS swizzle bug affects this access.
1800 bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
1801  SDValue VAddr, SDValue SAddr, uint64_t ImmOffset) const {
1802  if (!Subtarget->hasFlatScratchSVSSwizzleBug())
1803  return false;
1804 
1805  // The bug affects the swizzling of SVS accesses if there is any carry out
1806  // from the two low order bits (i.e. from bit 1 into bit 2) when adding
1807  // voffset to (soffset + inst_offset).
1808  KnownBits VKnown = CurDAG->computeKnownBits(VAddr);
1809  KnownBits SKnown = KnownBits::computeForAddSub(
1810  true, false, CurDAG->computeKnownBits(SAddr),
1811  KnownBits::makeConstant(APInt(32, ImmOffset)));
1812  uint64_t VMax = VKnown.getMaxValue().getZExtValue();
1813  uint64_t SMax = SKnown.getMaxValue().getZExtValue();
1814  return (VMax & 3) + (SMax & 3) >= 4;
1815 }
1816 
1817 bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
1818  SDValue &VAddr, SDValue &SAddr,
1819  SDValue &Offset) const {
1820  int64_t ImmOffset = 0;
1821 
1822  SDValue LHS, RHS;
1823  if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
1824  int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1825  const SIInstrInfo *TII = Subtarget->getInstrInfo();
1826 
1827  if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true)) {
1828  Addr = LHS;
1829  ImmOffset = COffsetVal;
1830  } else if (!LHS->isDivergent() && COffsetVal > 0) {
1831  SDLoc SL(N);
1832  // saddr + large_offset -> saddr + (vaddr = large_offset & ~MaxOffset) +
1833  // (large_offset & MaxOffset);
1834  int64_t SplitImmOffset, RemainderOffset;
1835  std::tie(SplitImmOffset, RemainderOffset)
1836  = TII->splitFlatOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true);
1837 
1838  if (isUInt<32>(RemainderOffset)) {
1839  SDNode *VMov = CurDAG->getMachineNode(
1840  AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1841  CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
1842  VAddr = SDValue(VMov, 0);
1843  SAddr = LHS;
1844  if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
1845  return false;
1846  Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
1847  return true;
1848  }
1849  }
1850  }
1851 
1852  if (Addr.getOpcode() != ISD::ADD)
1853  return false;
1854 
1855  LHS = Addr.getOperand(0);
1856  RHS = Addr.getOperand(1);
1857 
1858  if (!LHS->isDivergent() && RHS->isDivergent()) {
1859  SAddr = LHS;
1860  VAddr = RHS;
1861  } else if (!RHS->isDivergent() && LHS->isDivergent()) {
1862  SAddr = RHS;
1863  VAddr = LHS;
1864  } else {
1865  return false;
1866  }
1867 
1868  if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
1869  return false;
1870  SAddr = SelectSAddrFI(CurDAG, SAddr);
1871  Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1872  return true;
1873 }
1874 
1875 // Match an immediate (if Offset is not null) or an SGPR (if SOffset is
1876 // not null) offset. If Imm32Only is true, match only 32-bit immediate
1877 // offsets available on CI.
1878 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1879  SDValue *SOffset, SDValue *Offset,
1880  bool Imm32Only, bool IsBuffer) const {
1881  assert((!SOffset || !Offset) &&
1882  "Cannot match both soffset and offset at the same time!");
1883 
1884  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1885  if (!C) {
1886  if (!SOffset)
1887  return false;
1888  if (ByteOffsetNode.getValueType().isScalarInteger() &&
1889  ByteOffsetNode.getValueType().getSizeInBits() == 32) {
1890  *SOffset = ByteOffsetNode;
1891  return true;
1892  }
1893  if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) {
1894  if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) {
1895  *SOffset = ByteOffsetNode.getOperand(0);
1896  return true;
1897  }
1898  }
1899  return false;
1900  }
1901 
1902  SDLoc SL(ByteOffsetNode);
1903 
1904  // GFX9 and GFX10 have signed byte immediate offsets. The immediate
1905  // offset for S_BUFFER instructions is unsigned.
1906  int64_t ByteOffset = IsBuffer ? C->getZExtValue() : C->getSExtValue();
1907  Optional<int64_t> EncodedOffset =
1908  AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, IsBuffer);
1909  if (EncodedOffset && Offset && !Imm32Only) {
1910  *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
1911  return true;
1912  }
1913 
1914  // SGPR and literal offsets are unsigned.
1915  if (ByteOffset < 0)
1916  return false;
1917 
1918  EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset);
1919  if (EncodedOffset && Offset && Imm32Only) {
1920  *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
1921  return true;
1922  }
1923 
1924  if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
1925  return false;
1926 
1927  if (SOffset) {
1928  SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1929  *SOffset = SDValue(
1930  CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
1931  return true;
1932  }
1933 
1934  return false;
1935 }
1936 
1937 SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
1938  if (Addr.getValueType() != MVT::i32)
1939  return Addr;
1940 
1941  // Zero-extend a 32-bit address.
1942  SDLoc SL(Addr);
1943 
1946  unsigned AddrHiVal = Info->get32BitAddressHighBits();
1947  SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
1948 
1949  const SDValue Ops[] = {
1950  CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
1951  Addr,
1952  CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
1953  SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
1954  0),
1955  CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
1956  };
1957 
1958  return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
1959  Ops), 0);
1960 }
1961 
1962 // Match a base and an immediate (if Offset is not null) or an SGPR (if
1963 // SOffset is not null) or an immediate+SGPR offset. If Imm32Only is
1964 // true, match only 32-bit immediate offsets available on CI.
1965 bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
1966  SDValue *SOffset, SDValue *Offset,
1967  bool Imm32Only,
1968  bool IsBuffer) const {
1969  if (SOffset && Offset) {
1970  assert(!Imm32Only && !IsBuffer);
1971  SDValue B;
1972  return SelectSMRDBaseOffset(Addr, B, nullptr, Offset) &&
1973  SelectSMRDBaseOffset(B, SBase, SOffset, nullptr);
1974  }
1975 
1976  // A 32-bit (address + offset) should not cause unsigned 32-bit integer
1977  // wraparound, because s_load instructions perform the addition in 64 bits.
1978  if (Addr.getValueType() == MVT::i32 && Addr.getOpcode() == ISD::ADD &&
1979  !Addr->getFlags().hasNoUnsignedWrap())
1980  return false;
1981 
1982  SDValue N0, N1;
1983  // Extract the base and offset if possible.
1984  if (CurDAG->isBaseWithConstantOffset(Addr) || Addr.getOpcode() == ISD::ADD) {
1985  N0 = Addr.getOperand(0);
1986  N1 = Addr.getOperand(1);
1987  } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
1988  assert(N0 && N1 && isa<ConstantSDNode>(N1));
1989  }
1990  if (!N0 || !N1)
1991  return false;
1992  if (SelectSMRDOffset(N1, SOffset, Offset, Imm32Only, IsBuffer)) {
1993  SBase = N0;
1994  return true;
1995  }
1996  if (SelectSMRDOffset(N0, SOffset, Offset, Imm32Only, IsBuffer)) {
1997  SBase = N1;
1998  return true;
1999  }
2000  return false;
2001 }
2002 
2003 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
2004  SDValue *SOffset, SDValue *Offset,
2005  bool Imm32Only) const {
2006  if (SelectSMRDBaseOffset(Addr, SBase, SOffset, Offset, Imm32Only)) {
2007  SBase = Expand32BitAddress(SBase);
2008  return true;
2009  }
2010 
2011  if (Addr.getValueType() == MVT::i32 && Offset && !SOffset) {
2012  SBase = Expand32BitAddress(Addr);
2014  return true;
2015  }
2016 
2017  return false;
2018 }
2019 
2020 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
2021  SDValue &Offset) const {
2022  return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset);
2023 }
2024 
2025 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
2026  SDValue &Offset) const {
2027  assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
2028  return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset,
2029  /* Imm32Only */ true);
2030 }
2031 
2032 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
2033  SDValue &SOffset) const {
2034  return SelectSMRD(Addr, SBase, &SOffset, /* Offset */ nullptr);
2035 }
2036 
2037 bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(SDValue Addr, SDValue &SBase,
2038  SDValue &SOffset,
2039  SDValue &Offset) const {
2040  return SelectSMRD(Addr, SBase, &SOffset, &Offset);
2041 }
2042 
2043 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue N, SDValue &Offset) const {
2044  return SelectSMRDOffset(N, /* SOffset */ nullptr, &Offset,
2045  /* Imm32Only */ false, /* IsBuffer */ true);
2046 }
2047 
2048 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue N,
2049  SDValue &Offset) const {
2050  assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
2051  return SelectSMRDOffset(N, /* SOffset */ nullptr, &Offset,
2052  /* Imm32Only */ true, /* IsBuffer */ true);
2053 }
2054 
2055 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,
2056  SDValue &Offset) const {
2057  // Match the (soffset + offset) pair as a 32-bit register base and
2058  // an immediate offset.
2059  return N.getValueType() == MVT::i32 &&
2060  SelectSMRDBaseOffset(N, /* SBase */ SOffset, /* SOffset*/ nullptr,
2061  &Offset, /* Imm32Only */ false,
2062  /* IsBuffer */ true);
2063 }
2064 
2065 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
2066  SDValue &Base,
2067  SDValue &Offset) const {
2068  SDLoc DL(Index);
2069 
2071  SDValue N0 = Index.getOperand(0);
2072  SDValue N1 = Index.getOperand(1);
2073  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
2074 
2075  // (add n0, c0)
2076  // Don't peel off the offset (c0) if doing so could possibly lead
2077  // the base (n0) to be negative.
2078  // (or n0, |c0|) can never change a sign given isBaseWithConstantOffset.
2079  if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0) ||
2080  (Index->getOpcode() == ISD::OR && C1->getSExtValue() >= 0)) {
2081  Base = N0;
2082  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
2083  return true;
2084  }
2085  }
2086 
2087  if (isa<ConstantSDNode>(Index))
2088  return false;
2089 
2090  Base = Index;
2092  return true;
2093 }
2094 
2095 SDNode *AMDGPUDAGToDAGISel::getBFE32(bool IsSigned, const SDLoc &DL,
2096  SDValue Val, uint32_t Offset,
2097  uint32_t Width) {
2098  if (Val->isDivergent()) {
2099  unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2102 
2103  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, Off, W);
2104  }
2105  unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2106  // Transformation function, pack the offset and width of a BFE into
2107  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
2108  // source, bits [5:0] contain the offset and bits [22:16] the width.
2109  uint32_t PackedVal = Offset | (Width << 16);
2110  SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
2111 
2112  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
2113 }
2114 
2115 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
2116  // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
2117  // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
2118  // Predicate: 0 < b <= c < 32
2119 
2120  const SDValue &Shl = N->getOperand(0);
2121  ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
2122  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2123 
2124  if (B && C) {
2125  uint32_t BVal = B->getZExtValue();
2126  uint32_t CVal = C->getZExtValue();
2127 
2128  if (0 < BVal && BVal <= CVal && CVal < 32) {
2129  bool Signed = N->getOpcode() == ISD::SRA;
2130  ReplaceNode(N, getBFE32(Signed, SDLoc(N), Shl.getOperand(0), CVal - BVal,
2131  32 - CVal));
2132  return;
2133  }
2134  }
2135  SelectCode(N);
2136 }
2137 
2138 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
2139  switch (N->getOpcode()) {
2140  case ISD::AND:
2141  if (N->getOperand(0).getOpcode() == ISD::SRL) {
2142  // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
2143  // Predicate: isMask(mask)
2144  const SDValue &Srl = N->getOperand(0);
2145  ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
2146  ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
2147 
2148  if (Shift && Mask) {
2149  uint32_t ShiftVal = Shift->getZExtValue();
2150  uint32_t MaskVal = Mask->getZExtValue();
2151 
2152  if (isMask_32(MaskVal)) {
2153  uint32_t WidthVal = countPopulation(MaskVal);
2154  ReplaceNode(N, getBFE32(false, SDLoc(N), Srl.getOperand(0), ShiftVal,
2155  WidthVal));
2156  return;
2157  }
2158  }
2159  }
2160  break;
2161  case ISD::SRL:
2162  if (N->getOperand(0).getOpcode() == ISD::AND) {
2163  // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
2164  // Predicate: isMask(mask >> b)
2165  const SDValue &And = N->getOperand(0);
2166  ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
2167  ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
2168 
2169  if (Shift && Mask) {
2170  uint32_t ShiftVal = Shift->getZExtValue();
2171  uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
2172 
2173  if (isMask_32(MaskVal)) {
2174  uint32_t WidthVal = countPopulation(MaskVal);
2175  ReplaceNode(N, getBFE32(false, SDLoc(N), And.getOperand(0), ShiftVal,
2176  WidthVal));
2177  return;
2178  }
2179  }
2180  } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
2181  SelectS_BFEFromShifts(N);
2182  return;
2183  }
2184  break;
2185  case ISD::SRA:
2186  if (N->getOperand(0).getOpcode() == ISD::SHL) {
2187  SelectS_BFEFromShifts(N);
2188  return;
2189  }
2190  break;
2191 
2192  case ISD::SIGN_EXTEND_INREG: {
2193  // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
2194  SDValue Src = N->getOperand(0);
2195  if (Src.getOpcode() != ISD::SRL)
2196  break;
2197 
2198  const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
2199  if (!Amt)
2200  break;
2201 
2202  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2203  ReplaceNode(N, getBFE32(true, SDLoc(N), Src.getOperand(0),
2204  Amt->getZExtValue(), Width));
2205  return;
2206  }
2207  }
2208 
2209  SelectCode(N);
2210 }
2211 
2212 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
2213  assert(N->getOpcode() == ISD::BRCOND);
2214  if (!N->hasOneUse())
2215  return false;
2216 
2217  SDValue Cond = N->getOperand(1);
2218  if (Cond.getOpcode() == ISD::CopyToReg)
2219  Cond = Cond.getOperand(2);
2220 
2221  if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
2222  return false;
2223 
2224  MVT VT = Cond.getOperand(0).getSimpleValueType();
2225  if (VT == MVT::i32)
2226  return true;
2227 
2228  if (VT == MVT::i64) {
2229  auto ST = static_cast<const GCNSubtarget *>(Subtarget);
2230 
2231  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
2232  return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
2233  }
2234 
2235  return false;
2236 }
2237 
2238 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
2239  SDValue Cond = N->getOperand(1);
2240 
2241  if (Cond.isUndef()) {
2242  CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
2243  N->getOperand(2), N->getOperand(0));
2244  return;
2245  }
2246 
2247  const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
2248  const SIRegisterInfo *TRI = ST->getRegisterInfo();
2249 
2250  bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
2251  unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
2252  Register CondReg = UseSCCBr ? AMDGPU::SCC : TRI->getVCC();
2253  SDLoc SL(N);
2254 
2255  if (!UseSCCBr) {
2256  // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
2257  // analyzed what generates the vcc value, so we do not know whether vcc
2258  // bits for disabled lanes are 0. Thus we need to mask out bits for
2259  // disabled lanes.
2260  //
2261  // For the case that we select S_CBRANCH_SCC1 and it gets
2262  // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
2263  // SIInstrInfo::moveToVALU which inserts the S_AND).
2264  //
2265  // We could add an analysis of what generates the vcc value here and omit
2266  // the S_AND when is unnecessary. But it would be better to add a separate
2267  // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
2268  // catches both cases.
2269  Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B32
2270  : AMDGPU::S_AND_B64,
2271  SL, MVT::i1,
2272  CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO
2273  : AMDGPU::EXEC,
2274  MVT::i1),
2275  Cond),
2276  0);
2277  }
2278 
2279  SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
2280  CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
2281  N->getOperand(2), // Basic Block
2282  VCC.getValue(0));
2283 }
2284 
2285 void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
2286  MVT VT = N->getSimpleValueType(0);
2287  bool IsFMA = N->getOpcode() == ISD::FMA;
2288  if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
2289  !Subtarget->hasFmaMixInsts()) ||
2290  ((IsFMA && Subtarget->hasMadMixInsts()) ||
2291  (!IsFMA && Subtarget->hasFmaMixInsts()))) {
2292  SelectCode(N);
2293  return;
2294  }
2295 
2296  SDValue Src0 = N->getOperand(0);
2297  SDValue Src1 = N->getOperand(1);
2298  SDValue Src2 = N->getOperand(2);
2299  unsigned Src0Mods, Src1Mods, Src2Mods;
2300 
2301  // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
2302  // using the conversion from f16.
2303  bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
2304  bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
2305  bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
2306 
2307  assert((IsFMA || !Mode.allFP32Denormals()) &&
2308  "fmad selected with denormals enabled");
2309  // TODO: We can select this with f32 denormals enabled if all the sources are
2310  // converted from f16 (in which case fmad isn't legal).
2311 
2312  if (Sel0 || Sel1 || Sel2) {
2313  // For dummy operands.
2315  SDValue Ops[] = {
2316  CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
2317  CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
2318  CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
2320  Zero, Zero
2321  };
2322 
2324  IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
2325  MVT::f32, Ops);
2326  } else {
2327  SelectCode(N);
2328  }
2329 }
2330 
2331 void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
2332  // The address is assumed to be uniform, so if it ends up in a VGPR, it will
2333  // be copied to an SGPR with readfirstlane.
2334  unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2335  AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2336 
2337  SDValue Chain = N->getOperand(0);
2338  SDValue Ptr = N->getOperand(2);
2339  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2340  MachineMemOperand *MMO = M->getMemOperand();
2341  bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
2342 
2343  SDValue Offset;
2345  SDValue PtrBase = Ptr.getOperand(0);
2346  SDValue PtrOffset = Ptr.getOperand(1);
2347 
2348  const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
2349  if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue())) {
2350  N = glueCopyToM0(N, PtrBase);
2351  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
2352  }
2353  }
2354 
2355  if (!Offset) {
2356  N = glueCopyToM0(N, Ptr);
2358  }
2359 
2360  SDValue Ops[] = {
2361  Offset,
2362  CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
2363  Chain,
2364  N->getOperand(N->getNumOperands() - 1) // New glue
2365  };
2366 
2367  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2368  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2369 }
2370 
2371 // We need to handle this here because tablegen doesn't support matching
2372 // instructions with multiple outputs.
2373 void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(SDNode *N) {
2374  unsigned Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2375  SDValue Ops[] = {N->getOperand(2), N->getOperand(3), N->getOperand(4),
2376  N->getOperand(5), N->getOperand(0)};
2377 
2378  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2379  MachineMemOperand *MMO = M->getMemOperand();
2380  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2381  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2382 }
2383 
2384 static unsigned gwsIntrinToOpcode(unsigned IntrID) {
2385  switch (IntrID) {
2386  case Intrinsic::amdgcn_ds_gws_init:
2387  return AMDGPU::DS_GWS_INIT;
2388  case Intrinsic::amdgcn_ds_gws_barrier:
2389  return AMDGPU::DS_GWS_BARRIER;
2390  case Intrinsic::amdgcn_ds_gws_sema_v:
2391  return AMDGPU::DS_GWS_SEMA_V;
2392  case Intrinsic::amdgcn_ds_gws_sema_br:
2393  return AMDGPU::DS_GWS_SEMA_BR;
2394  case Intrinsic::amdgcn_ds_gws_sema_p:
2395  return AMDGPU::DS_GWS_SEMA_P;
2396  case Intrinsic::amdgcn_ds_gws_sema_release_all:
2397  return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2398  default:
2399  llvm_unreachable("not a gws intrinsic");
2400  }
2401 }
2402 
2403 void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
2404  if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2405  !Subtarget->hasGWSSemaReleaseAll()) {
2406  // Let this error.
2407  SelectCode(N);
2408  return;
2409  }
2410 
2411  // Chain, intrinsic ID, vsrc, offset
2412  const bool HasVSrc = N->getNumOperands() == 4;
2413  assert(HasVSrc || N->getNumOperands() == 3);
2414 
2415  SDLoc SL(N);
2416  SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2);
2417  int ImmOffset = 0;
2418  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2419  MachineMemOperand *MMO = M->getMemOperand();
2420 
2421  // Don't worry if the offset ends up in a VGPR. Only one lane will have
2422  // effect, so SIFixSGPRCopies will validly insert readfirstlane.
2423 
2424  // The resource id offset is computed as (<isa opaque base> + M0[21:16] +
2425  // offset field) % 64. Some versions of the programming guide omit the m0
2426  // part, or claim it's from offset 0.
2427  if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
2428  // If we have a constant offset, try to use the 0 in m0 as the base.
2429  // TODO: Look into changing the default m0 initialization value. If the
2430  // default -1 only set the low 16-bits, we could leave it as-is and add 1 to
2431  // the immediate offset.
2432  glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32));
2433  ImmOffset = ConstOffset->getZExtValue();
2434  } else {
2435  if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
2436  ImmOffset = BaseOffset.getConstantOperandVal(1);
2437  BaseOffset = BaseOffset.getOperand(0);
2438  }
2439 
2440  // Prefer to do the shift in an SGPR since it should be possible to use m0
2441  // as the result directly. If it's already an SGPR, it will be eliminated
2442  // later.
2443  SDNode *SGPROffset
2444  = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
2445  BaseOffset);
2446  // Shift to offset in m0
2447  SDNode *M0Base
2448  = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
2449  SDValue(SGPROffset, 0),
2450  CurDAG->getTargetConstant(16, SL, MVT::i32));
2451  glueCopyToM0(N, SDValue(M0Base, 0));
2452  }
2453 
2454  SDValue Chain = N->getOperand(0);
2455  SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
2456 
2457  const unsigned Opc = gwsIntrinToOpcode(IntrID);
2459  if (HasVSrc)
2460  Ops.push_back(N->getOperand(2));
2461  Ops.push_back(OffsetField);
2462  Ops.push_back(Chain);
2463 
2464  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2465  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2466 }
2467 
2468 void AMDGPUDAGToDAGISel::SelectInterpP1F16(SDNode *N) {
2469  if (Subtarget->getLDSBankCount() != 16) {
2470  // This is a single instruction with a pattern.
2471  SelectCode(N);
2472  return;
2473  }
2474 
2475  SDLoc DL(N);
2476 
2477  // This requires 2 instructions. It is possible to write a pattern to support
2478  // this, but the generated isel emitter doesn't correctly deal with multiple
2479  // output instructions using the same physical register input. The copy to m0
2480  // is incorrectly placed before the second instruction.
2481  //
2482  // TODO: Match source modifiers.
2483  //
2484  // def : Pat <
2485  // (int_amdgcn_interp_p1_f16
2486  // (VOP3Mods f32:$src0, i32:$src0_modifiers),
2487  // (i32 timm:$attrchan), (i32 timm:$attr),
2488  // (i1 timm:$high), M0),
2489  // (V_INTERP_P1LV_F16 $src0_modifiers, VGPR_32:$src0, timm:$attr,
2490  // timm:$attrchan, 0,
2491  // (V_INTERP_MOV_F32 2, timm:$attr, timm:$attrchan), timm:$high)> {
2492  // let Predicates = [has16BankLDS];
2493  // }
2494 
2495  // 16 bank LDS
2497  N->getOperand(5), SDValue());
2498 
2500 
2501  SDNode *InterpMov =
2502  CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32, DL, VTs, {
2503  CurDAG->getTargetConstant(2, DL, MVT::i32), // P0
2504  N->getOperand(3), // Attr
2505  N->getOperand(2), // Attrchan
2506  ToM0.getValue(1) // In glue
2507  });
2508 
2509  SDNode *InterpP1LV =
2510  CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16, DL, MVT::f32, {
2511  CurDAG->getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
2512  N->getOperand(1), // Src0
2513  N->getOperand(3), // Attr
2514  N->getOperand(2), // Attrchan
2515  CurDAG->getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
2516  SDValue(InterpMov, 0), // Src2 - holds two f16 values selected by high
2517  N->getOperand(4), // high
2518  CurDAG->getTargetConstant(0, DL, MVT::i1), // $clamp
2519  CurDAG->getTargetConstant(0, DL, MVT::i32), // $omod
2520  SDValue(InterpMov, 1)
2521  });
2522 
2523  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), SDValue(InterpP1LV, 0));
2524 }
2525 
2526 void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
2527  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2528  switch (IntrID) {
2529  case Intrinsic::amdgcn_ds_append:
2530  case Intrinsic::amdgcn_ds_consume: {
2531  if (N->getValueType(0) != MVT::i32)
2532  break;
2533  SelectDSAppendConsume(N, IntrID);
2534  return;
2535  }
2536  case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2537  SelectDSBvhStackIntrinsic(N);
2538  return;
2539  }
2540 
2541  SelectCode(N);
2542 }
2543 
2544 void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
2545  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
2546  unsigned Opcode;
2547  switch (IntrID) {
2548  case Intrinsic::amdgcn_wqm:
2549  Opcode = AMDGPU::WQM;
2550  break;
2551  case Intrinsic::amdgcn_softwqm:
2552  Opcode = AMDGPU::SOFT_WQM;
2553  break;
2554  case Intrinsic::amdgcn_wwm:
2555  case Intrinsic::amdgcn_strict_wwm:
2556  Opcode = AMDGPU::STRICT_WWM;
2557  break;
2558  case Intrinsic::amdgcn_strict_wqm:
2559  Opcode = AMDGPU::STRICT_WQM;
2560  break;
2561  case Intrinsic::amdgcn_interp_p1_f16:
2562  SelectInterpP1F16(N);
2563  return;
2564  default:
2565  SelectCode(N);
2566  return;
2567  }
2568 
2569  SDValue Src = N->getOperand(1);
2570  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});
2571 }
2572 
2573 void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
2574  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2575  switch (IntrID) {
2576  case Intrinsic::amdgcn_ds_gws_init:
2577  case Intrinsic::amdgcn_ds_gws_barrier:
2578  case Intrinsic::amdgcn_ds_gws_sema_v:
2579  case Intrinsic::amdgcn_ds_gws_sema_br:
2580  case Intrinsic::amdgcn_ds_gws_sema_p:
2581  case Intrinsic::amdgcn_ds_gws_sema_release_all:
2582  SelectDS_GWS(N, IntrID);
2583  return;
2584  default:
2585  break;
2586  }
2587 
2588  SelectCode(N);
2589 }
2590 
2591 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
2592  unsigned &Mods,
2593  bool AllowAbs) const {
2594  Mods = 0;
2595  Src = In;
2596 
2597  if (Src.getOpcode() == ISD::FNEG) {
2598  Mods |= SISrcMods::NEG;
2599  Src = Src.getOperand(0);
2600  }
2601 
2602  if (AllowAbs && Src.getOpcode() == ISD::FABS) {
2603  Mods |= SISrcMods::ABS;
2604  Src = Src.getOperand(0);
2605  }
2606 
2607  return true;
2608 }
2609 
2610 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
2611  SDValue &SrcMods) const {
2612  unsigned Mods;
2613  if (SelectVOP3ModsImpl(In, Src, Mods)) {
2614  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2615  return true;
2616  }
2617 
2618  return false;
2619 }
2620 
2621 bool AMDGPUDAGToDAGISel::SelectVOP3BMods(SDValue In, SDValue &Src,
2622  SDValue &SrcMods) const {
2623  unsigned Mods;
2624  if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {
2625  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2626  return true;
2627  }
2628 
2629  return false;
2630 }
2631 
2632 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
2633  SDValue &SrcMods) const {
2634  SelectVOP3Mods(In, Src, SrcMods);
2635  return CurDAG->isKnownNeverNaN(Src);
2636 }
2637 
2638 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
2639  if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
2640  return false;
2641 
2642  Src = In;
2643  return true;
2644 }
2645 
2646 bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(SDValue In, SDValue &Src,
2647  SDValue &SrcMods,
2648  bool OpSel) const {
2649  unsigned Mods;
2650  if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {
2651  if (OpSel)
2652  Mods |= SISrcMods::OP_SEL_0;
2653  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2654  return true;
2655  }
2656 
2657  return false;
2658 }
2659 
2660 bool AMDGPUDAGToDAGISel::SelectVINTERPMods(SDValue In, SDValue &Src,
2661  SDValue &SrcMods) const {
2662  return SelectVINTERPModsImpl(In, Src, SrcMods, /* OpSel */ false);
2663 }
2664 
2665 bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(SDValue In, SDValue &Src,
2666  SDValue &SrcMods) const {
2667  return SelectVINTERPModsImpl(In, Src, SrcMods, /* OpSel */ true);
2668 }
2669 
2670 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
2671  SDValue &SrcMods, SDValue &Clamp,
2672  SDValue &Omod) const {
2673  SDLoc DL(In);
2674  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2675  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2676 
2677  return SelectVOP3Mods(In, Src, SrcMods);
2678 }
2679 
2680 bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(SDValue In, SDValue &Src,
2681  SDValue &SrcMods, SDValue &Clamp,
2682  SDValue &Omod) const {
2683  SDLoc DL(In);
2684  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2685  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2686 
2687  return SelectVOP3BMods(In, Src, SrcMods);
2688 }
2689 
2690 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
2691  SDValue &Clamp, SDValue &Omod) const {
2692  Src = In;
2693 
2694  SDLoc DL(In);
2695  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2696  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2697 
2698  return true;
2699 }
2700 
2701 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
2702  SDValue &SrcMods, bool IsDOT) const {
2703  unsigned Mods = 0;
2704  Src = In;
2705 
2706  if (Src.getOpcode() == ISD::FNEG) {
2707  Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
2708  Src = Src.getOperand(0);
2709  }
2710 
2711  if (Src.getOpcode() == ISD::BUILD_VECTOR && Src.getNumOperands() == 2 &&
2712  (!IsDOT || !Subtarget->hasDOTOpSelHazard())) {
2713  unsigned VecMods = Mods;
2714 
2715  SDValue Lo = stripBitcast(Src.getOperand(0));
2716  SDValue Hi = stripBitcast(Src.getOperand(1));
2717 
2718  if (Lo.getOpcode() == ISD::FNEG) {
2719  Lo = stripBitcast(Lo.getOperand(0));
2720  Mods ^= SISrcMods::NEG;
2721  }
2722 
2723  if (Hi.getOpcode() == ISD::FNEG) {
2724  Hi = stripBitcast(Hi.getOperand(0));
2725  Mods ^= SISrcMods::NEG_HI;
2726  }
2727 
2728  if (isExtractHiElt(Lo, Lo))
2729  Mods |= SISrcMods::OP_SEL_0;
2730 
2731  if (isExtractHiElt(Hi, Hi))
2732  Mods |= SISrcMods::OP_SEL_1;
2733 
2734  unsigned VecSize = Src.getValueSizeInBits();
2735  Lo = stripExtractLoElt(Lo);
2736  Hi = stripExtractLoElt(Hi);
2737 
2738  if (Lo.getValueSizeInBits() > VecSize) {
2740  (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
2741  MVT::getIntegerVT(VecSize), Lo);
2742  }
2743 
2744  if (Hi.getValueSizeInBits() > VecSize) {
2746  (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
2747  MVT::getIntegerVT(VecSize), Hi);
2748  }
2749 
2750  assert(Lo.getValueSizeInBits() <= VecSize &&
2751  Hi.getValueSizeInBits() <= VecSize);
2752 
2753  if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
2754  // Really a scalar input. Just select from the low half of the register to
2755  // avoid packing.
2756 
2757  if (VecSize == 32 || VecSize == Lo.getValueSizeInBits()) {
2758  Src = Lo;
2759  } else {
2760  assert(Lo.getValueSizeInBits() == 32 && VecSize == 64);
2761 
2762  SDLoc SL(In);
2763  SDValue Undef = SDValue(
2764  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL,
2765  Lo.getValueType()), 0);
2766  auto RC = Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
2767  : AMDGPU::SReg_64RegClassID;
2768  const SDValue Ops[] = {
2769  CurDAG->getTargetConstant(RC, SL, MVT::i32),
2770  Lo, CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
2771  Undef, CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) };
2772 
2773  Src = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL,
2774  Src.getValueType(), Ops), 0);
2775  }
2776  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2777  return true;
2778  }
2779 
2780  if (VecSize == 64 && Lo == Hi && isa<ConstantFPSDNode>(Lo)) {
2781  uint64_t Lit = cast<ConstantFPSDNode>(Lo)->getValueAPF()
2782  .bitcastToAPInt().getZExtValue();
2783  if (AMDGPU::isInlinableLiteral32(Lit, Subtarget->hasInv2PiInlineImm())) {
2784  Src = CurDAG->getTargetConstant(Lit, SDLoc(In), MVT::i64);;
2785  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2786  return true;
2787  }
2788  }
2789 
2790  Mods = VecMods;
2791  }
2792 
2793  // Packed instructions do not have abs modifiers.
2794  Mods |= SISrcMods::OP_SEL_1;
2795 
2796  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2797  return true;
2798 }
2799 
2800 bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, SDValue &Src,
2801  SDValue &SrcMods) const {
2802  return SelectVOP3PMods(In, Src, SrcMods, true);
2803 }
2804 
2805 bool AMDGPUDAGToDAGISel::SelectDotIUVOP3PMods(SDValue In, SDValue &Src) const {
2806  const ConstantSDNode *C = cast<ConstantSDNode>(In);
2807  // Literal i1 value set in intrinsic, represents SrcMods for the next operand.
2808  // 1 promotes packed values to signed, 0 treats them as unsigned.
2809  assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
2810 
2811  unsigned Mods = SISrcMods::OP_SEL_1;
2812  unsigned SrcSign = C->getAPIntValue().getZExtValue();
2813  if (SrcSign == 1)
2814  Mods ^= SISrcMods::NEG;
2815 
2816  Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2817  return true;
2818 }
2819 
2820 bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
2821  SDValue &Src) const {
2822  const ConstantSDNode *C = cast<ConstantSDNode>(In);
2823  assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
2824 
2825  unsigned Mods = SISrcMods::OP_SEL_1;
2826  unsigned SrcVal = C->getAPIntValue().getZExtValue();
2827  if (SrcVal == 1)
2828  Mods |= SISrcMods::OP_SEL_0;
2829 
2830  Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2831  return true;
2832 }
2833 
2834 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
2835  SDValue &SrcMods) const {
2836  Src = In;
2837  // FIXME: Handle op_sel
2838  SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2839  return true;
2840 }
2841 
2842 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
2843  SDValue &SrcMods) const {
2844  // FIXME: Handle op_sel
2845  return SelectVOP3Mods(In, Src, SrcMods);
2846 }
2847 
2848 // The return value is not whether the match is possible (which it always is),
2849 // but whether or not it a conversion is really used.
2850 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
2851  unsigned &Mods) const {
2852  Mods = 0;
2853  SelectVOP3ModsImpl(In, Src, Mods);
2854 
2855  if (Src.getOpcode() == ISD::FP_EXTEND) {
2856  Src = Src.getOperand(0);
2857  assert(Src.getValueType() == MVT::f16);
2858  Src = stripBitcast(Src);
2859 
2860  // Be careful about folding modifiers if we already have an abs. fneg is
2861  // applied last, so we don't want to apply an earlier fneg.
2862  if ((Mods & SISrcMods::ABS) == 0) {
2863  unsigned ModsTmp;
2864  SelectVOP3ModsImpl(Src, Src, ModsTmp);
2865 
2866  if ((ModsTmp & SISrcMods::NEG) != 0)
2867  Mods ^= SISrcMods::NEG;
2868 
2869  if ((ModsTmp & SISrcMods::ABS) != 0)
2870  Mods |= SISrcMods::ABS;
2871  }
2872 
2873  // op_sel/op_sel_hi decide the source type and source.
2874  // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
2875  // If the sources's op_sel is set, it picks the high half of the source
2876  // register.
2877 
2878  Mods |= SISrcMods::OP_SEL_1;
2879  if (isExtractHiElt(Src, Src)) {
2880  Mods |= SISrcMods::OP_SEL_0;
2881 
2882  // TODO: Should we try to look for neg/abs here?
2883  }
2884 
2885  return true;
2886  }
2887 
2888  return false;
2889 }
2890 
2891 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
2892  SDValue &SrcMods) const {
2893  unsigned Mods = 0;
2894  SelectVOP3PMadMixModsImpl(In, Src, Mods);
2895  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2896  return true;
2897 }
2898 
2899 SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
2900  if (In.isUndef())
2901  return CurDAG->getUNDEF(MVT::i32);
2902 
2903  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
2904  SDLoc SL(In);
2905  return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
2906  }
2907 
2908  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
2909  SDLoc SL(In);
2910  return CurDAG->getConstant(
2911  C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
2912  }
2913 
2914  SDValue Src;
2915  if (isExtractHiElt(In, Src))
2916  return Src;
2917 
2918  return SDValue();
2919 }
2920 
2921 bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
2922  assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn);
2923 
2924  const SIRegisterInfo *SIRI =
2925  static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
2926  const SIInstrInfo * SII =
2927  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
2928 
2929  unsigned Limit = 0;
2930  bool AllUsesAcceptSReg = true;
2931  for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
2932  Limit < 10 && U != E; ++U, ++Limit) {
2933  const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
2934 
2935  // If the register class is unknown, it could be an unknown
2936  // register class that needs to be an SGPR, e.g. an inline asm
2937  // constraint
2938  if (!RC || SIRI->isSGPRClass(RC))
2939  return false;
2940 
2941  if (RC != &AMDGPU::VS_32RegClass) {
2942  AllUsesAcceptSReg = false;
2943  SDNode * User = *U;
2944  if (User->isMachineOpcode()) {
2945  unsigned Opc = User->getMachineOpcode();
2946  MCInstrDesc Desc = SII->get(Opc);
2947  if (Desc.isCommutable()) {
2948  unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
2949  unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
2950  if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
2951  unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
2952  const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
2953  if (CommutedRC == &AMDGPU::VS_32RegClass)
2954  AllUsesAcceptSReg = true;
2955  }
2956  }
2957  }
2958  // If "AllUsesAcceptSReg == false" so far we haven't succeeded
2959  // commuting current user. This means have at least one use
2960  // that strictly require VGPR. Thus, we will not attempt to commute
2961  // other user instructions.
2962  if (!AllUsesAcceptSReg)
2963  break;
2964  }
2965  }
2966  return !AllUsesAcceptSReg && (Limit < 10);
2967 }
2968 
2969 bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
2970  auto Ld = cast<LoadSDNode>(N);
2971 
2972  if (N->isDivergent() && !AMDGPUInstrInfo::isUniformMMO(Ld->getMemOperand()))
2973  return false;
2974 
2975  return Ld->getAlign() >= Align(4) &&
2976  ((Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
2977  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) ||
2978  (Subtarget->getScalarizeGlobalBehavior() &&
2979  Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
2980  Ld->isSimple() &&
2981  static_cast<const SITargetLowering *>(getTargetLowering())
2982  ->isMemOpHasNoClobberedMemOperand(N)));
2983 }
2984 
2987  *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
2988  bool IsModified = false;
2989  do {
2990  IsModified = false;
2991 
2992  // Go over all selected nodes and try to fold them a bit more
2994  while (Position != CurDAG->allnodes_end()) {
2995  SDNode *Node = &*Position++;
2996  MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
2997  if (!MachineNode)
2998  continue;
2999 
3000  SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
3001  if (ResNode != Node) {
3002  if (ResNode)
3003  ReplaceUses(Node, ResNode);
3004  IsModified = true;
3005  }
3006  }
3008  } while (IsModified);
3009 }
llvm::ISD::SUB
@ SUB
Definition: ISDOpcodes.h:240
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
llvm::AMDGPUISD::CLAMP
@ CLAMP
CLAMP value between 0.0 and 1.0.
Definition: AMDGPUISelLowering.h:379
llvm::ISD::FPOWI
@ FPOWI
Definition: ISDOpcodes.h:917
i
i
Definition: README.txt:29
llvm::MCInstrDesc::getNumDefs
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:245
CmpMode::FP
@ FP
llvm::ConstantSDNode
Definition: SelectionDAGNodes.h:1582
Lowering
Shadow Stack GC Lowering
Definition: ShadowStackGCLowering.cpp:99
llvm::SelectionDAGISel::getTargetLowering
const TargetLowering * getTargetLowering() const
Definition: SelectionDAGISel.h:71
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4715
llvm::ISD::INTRINSIC_VOID
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
llvm::AMDGPUSubtarget::hasInv2PiInlineImm
bool hasInv2PiInlineImm() const
Definition: AMDGPUSubtarget.h:184
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::SIInstrFlags::IsDOT
@ IsDOT
Definition: SIDefines.h:120
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector
bool matchLoadD16FromBuildVector(SDNode *N) const
Definition: AMDGPUISelDAGToDAG.cpp:211
llvm::ISD::ATOMIC_LOAD_FMIN
@ ATOMIC_LOAD_FMIN
Definition: ISDOpcodes.h:1199
llvm::EVT::isScalarInteger
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:149
AMDGPUISelDAGToDAG.h
llvm::AMDGPUISD::DIV_SCALE
@ DIV_SCALE
Definition: AMDGPUISelLowering.h:410
v2i32
gets compiled into this on rsp movaps rsp movaps rsp movaps rsp movaps rsp movaps rsp movaps rsp movaps rsp movaps rsp movq rsp movq rsp movq rsp movq rsp movq rsp rax movq rsp rax movq rsp rsp rsp eax eax jbe LBB1_3 rcx rax movq rsp eax rsp ret ecx eax rcx movl rsp jmp LBB1_2 gcc rsp rax movq rsp rsp movq rsp rax movq rsp eax eax jb L6 rdx eax rsp ret p2align edx rdx eax movl rsp eax rsp ret and it gets compiled into this on ebp esp eax movl ebp eax movl ebp eax esp popl ebp ret gcc ebp eax popl ebp ret Teach tblgen not to check bitconvert source type in some cases This allows us to consolidate the following patterns in X86InstrMMX v2i32(MMX_MOVDQ2Qrr VR128:$src))>
llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition: SelectionDAGNodes.h:1106
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:667
llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:886
llvm::ISD::SETNE
@ SETNE
Definition: ISDOpcodes.h:1449
llvm::SelectionDAG::SignBitIsZero
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
Definition: SelectionDAG.cpp:2524
llvm::AMDGPUTargetLowering
Definition: AMDGPUISelLowering.h:27
llvm::SIRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(unsigned RCID) const
Definition: SIRegisterInfo.cpp:3051
llvm::ISD::FMINNUM
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:943
SIMachineFunctionInfo.h
llvm::SelectionDAG::getCopyToReg
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:767
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:159
llvm::ISD::ConstantFP
@ ConstantFP
Definition: ISDOpcodes.h:77
llvm::SelectionDAG::allnodes_end
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:526
llvm::AMDGPU::getSMRDEncodedOffset
Optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer)
Definition: AMDGPUBaseInfo.cpp:2439
llvm::ISD::UADDO
@ UADDO
Definition: ISDOpcodes.h:324
llvm::AMDGPUSubtarget::hasMadMixInsts
bool hasMadMixInsts() const
Definition: AMDGPUSubtarget.h:152
llvm::ISD::ADDC
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:269
llvm::KnownBits::Zero
APInt Zero
Definition: KnownBits.h:24
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPU.h:373
llvm::ISD::SETEQ
@ SETEQ
Definition: ISDOpcodes.h:1444
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:266
llvm::SelectionDAG::getVTList
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Definition: SelectionDAG.cpp:9378
llvm::AMDGPUISD::CVT_PKNORM_I16_F32
@ CVT_PKNORM_I16_F32
Definition: AMDGPUISelLowering.h:465
llvm::MipsISD::Lo
@ Lo
Definition: MipsISelLowering.h:79
llvm::MachineSDNode
An SDNode that represents everything that will be needed to construct a MachineInstr.
Definition: SelectionDAGNodes.h:2897
llvm::AMDGPUISD::FMUL_W_CHAIN
@ FMUL_W_CHAIN
Definition: AMDGPUISelLowering.h:390
llvm::SelectionDAG::allnodes_begin
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:525
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::AMDGPUISD::DIV_FIXUP
@ DIV_FIXUP
Definition: AMDGPUISelLowering.h:412
llvm::AMDGPUISD::LOAD_D16_HI_I8
@ LOAD_D16_HI_I8
Definition: AMDGPUISelLowering.h:494
llvm::SIInstrFlags::FLAT
@ FLAT
Definition: SIDefines.h:59
llvm::GCNSubtarget::hasFlatSegmentOffsetBug
bool hasFlatSegmentOffsetBug() const
Definition: GCNSubtarget.h:613
llvm::MemSDNode::getMemoryVT
EVT getMemoryVT() const
Return the type of the in-memory value.
Definition: SelectionDAGNodes.h:1355
ValueTracking.h
llvm::ISD::FLOG2
@ FLOG2
Definition: ISDOpcodes.h:920
llvm::MemSDNode::getChain
const SDValue & getChain() const
Definition: SelectionDAGNodes.h:1378
llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:463
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:482
llvm::AMDGPU::getSMRDEncodedLiteralOffset32
Optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
Definition: AMDGPUBaseInfo.cpp:2456
llvm::LoadSDNode
This class is used to represent ISD::LOAD nodes.
Definition: SelectionDAGNodes.h:2344
llvm::SDNode::use_iterator
This class provides iterator support for SDUse operands that use a specific SDNode.
Definition: SelectionDAGNodes.h:745
llvm::AMDGPU::VOP3PEncoding::OpSel
OpSel
Definition: SIDefines.h:882
Shift
bool Shift
Definition: README.txt:468
AMDGPUDAGToDAGISel
AMDGPU specific code to select AMDGPU machine instructions for SelectionDAG operations.
Definition: AMDGPUISelDAGToDAG.h:79
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:127
i8
Clang compiles this i8
Definition: README.txt:504
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1293
llvm::SelectionDAG::isBaseWithConstantOffset
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
Definition: SelectionDAG.cpp:4709
llvm::GCNSubtarget::hasDLInsts
bool hasDLInsts() const
Definition: GCNSubtarget.h:694
llvm::ISD::SETCC
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:736
llvm::Optional< int64_t >
llvm::SDNode::isPredecessorOf
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
Definition: SelectionDAGNodes.h:830
f32
So we should use XX3Form_Rcr to implement intrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store load store see def memrix16 in PPCInstrInfo td Load Store Vector load store outs ins lxsdx set load store with conversion from to outs ins lxsspx set f32
Definition: README_P9.txt:522
i1
Decimal Convert From to National Zoned Signed int_ppc_altivec_bcdcfno i1
Definition: README_P9.txt:147
llvm::createAMDGPUISelDag
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
Definition: AMDGPUISelDAGToDAG.cpp:114
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:80
llvm::MemSDNode
This is an abstract virtual class for memory operations.
Definition: SelectionDAGNodes.h:1275
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
matchZExtFromI32
static SDValue matchZExtFromI32(SDValue Op)
Definition: AMDGPUISelDAGToDAG.cpp:1633
llvm::SelectionDAG::RemoveDeadNodes
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
Definition: SelectionDAG.cpp:943
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::SDNode::isDivergent
bool isDivergent() const
Definition: SelectionDAGNodes.h:712
llvm::Lo_32
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:335
AMDGPUDAGToDAGISel::PreprocessISelDAG
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
Definition: AMDGPUISelDAGToDAG.cpp:291
SelectionDAG.h
llvm::SelectionDAG::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:471
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:372
llvm::GCNSubtarget::getRegisterInfo
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:228
AMDGPUDAGToDAGISel::SelectBuildVector
void SelectBuildVector(SDNode *N, unsigned RegClassID)
Definition: AMDGPUISelDAGToDAG.cpp:442
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::ISD::FABS
@ FABS
Definition: ISDOpcodes.h:912
llvm::AMDGPUISD::FMIN3
@ FMIN3
Definition: AMDGPUISelLowering.h:402
llvm::ISD::ADDCARRY
@ ADDCARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:303
AMDGPUDAGToDAGISel::PostprocessISelDAG
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
Definition: AMDGPUISelDAGToDAG.cpp:2985
llvm::ISD::BRCOND
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1001
llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:371
llvm::GCNSubtarget::getInstrInfo
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:216
llvm::GCNSubtarget::unsafeDSOffsetFoldingEnabled
bool unsafeDSOffsetFoldingEnabled() const
Definition: GCNSubtarget.h:418
llvm::SelectionDAG::getRegister
SDValue getRegister(unsigned Reg, EVT VT)
Definition: SelectionDAG.cpp:2121
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::MipsISD::Hi
@ Hi
Definition: MipsISelLowering.h:75
llvm::GCNSubtarget::hasGWSSemaReleaseAll
bool hasGWSSemaReleaseAll() const
Definition: GCNSubtarget.h:649
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:930
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
SelectSAddrFI
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
Definition: AMDGPUISelDAGToDAG.cpp:1737
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
i64
Clang compiles this i64
Definition: README.txt:504
AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel
AMDGPUDAGToDAGISel(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
Definition: AMDGPUISelDAGToDAG.cpp:119
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:667
AMDGPUDAGToDAGISel::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: AMDGPUISelDAGToDAG.cpp:201
llvm::SelectionDAG::getTargetFrameIndex
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:720
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:1141
llvm::MemIntrinsicSDNode
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
Definition: SelectionDAGNodes.h:1498
llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:220
SelectionDAGNodes.h
llvm::ISD::Constant
@ Constant
Definition: ISDOpcodes.h:76
llvm::ISD::ZERO_EXTEND
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:763
llvm::GCNSubtarget::hasAddr64
bool hasAddr64() const
Definition: GCNSubtarget.h:329
llvm::ISD::ABS
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:674
FunctionLoweringInfo.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::GCNSubtarget::useFlatForGlobal
bool useFlatForGlobal() const
Definition: GCNSubtarget.h:477
llvm::GCNSubtarget::hasFmaMixInsts
bool hasFmaMixInsts() const
Definition: GCNSubtarget.h:379
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:755
llvm::User
Definition: User.h:44
llvm::AMDGPUISD::CVT_PKNORM_U16_F32
@ CVT_PKNORM_U16_F32
Definition: AMDGPUISelLowering.h:466
llvm::SelectionDAG::getUNDEF
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:1023
llvm::ISD::SIGN_EXTEND_INREG
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:781
llvm::ISD::CopyToReg
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
llvm::AMDGPUISD::FMED3
@ FMED3
Definition: AMDGPUISelLowering.h:405
GFX9
@ GFX9
Definition: SIInstrInfo.cpp:7971
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::SelectionDAG::getConstant
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1514
llvm::EVT::getVectorNumElements
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:308
llvm::MCSubtargetInfo::getTargetTriple
const Triple & getTargetTriple() const
Definition: MCSubtargetInfo.h:109
llvm::AMDGPUISD::LOAD_D16_LO_I8
@ LOAD_D16_LO_I8
Definition: AMDGPUISelLowering.h:496
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::ISD::FROUND
@ FROUND
Definition: ISDOpcodes.h:928
llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:769
AMDGPUSubtarget.h
llvm::ISD::SRA
@ SRA
Definition: ISDOpcodes.h:692
llvm::SelectionDAGISel::ReplaceNode
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
Definition: SelectionDAGISel.h:231
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:197
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MCInstrDesc::isCommutable
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z,...
Definition: MCInstrDesc.h:478
llvm::M0
unsigned M0(unsigned Val)
Definition: VE.h:465
llvm::Instruction
Definition: Instruction.h:42
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:306
llvm::ISD::SINT_TO_FP
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:773
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1486
llvm::ISD::FNEARBYINT
@ FNEARBYINT
Definition: ISDOpcodes.h:927
llvm::ISD::FRINT
@ FRINT
Definition: ISDOpcodes.h:926
llvm::SIInstrInfo::findCommutedOpIndices
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
Definition: SIInstrInfo.cpp:2423
llvm::SIInstrFlags::WQM
@ WQM
Definition: SIDefines.h:77
llvm::SelectionDAG::getMemIntrinsicNode
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
Definition: SelectionDAG.cpp:7763
llvm::LegacyDivergenceAnalysis
Definition: LegacyDivergenceAnalysis.h:31
llvm::SelectionDAG::dump
void dump() const
Definition: SelectionDAGDumper.cpp:925
llvm::AMDGPUISD::CVT_PK_U16_U32
@ CVT_PK_U16_U32
Definition: AMDGPUISelLowering.h:468
llvm::SIRegisterInfo::isSGPRClass
static bool isSGPRClass(const TargetRegisterClass *RC)
Definition: SIRegisterInfo.h:190
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:30
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:666
Align
uint64_t Align
Definition: ELFObjHandler.cpp:82
llvm::MCOperandInfo::RegClass
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:90
llvm::SelectionDAG::isKnownNeverNaN
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
Definition: SelectionDAG.cpp:4721
llvm::Triple::getArch
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:354
llvm::GCNSubtarget::hasFlatScratchSVSSwizzleBug
bool hasFlatScratchSVSSwizzleBug() const
Definition: GCNSubtarget.h:1086
llvm::SDValue::getConstantOperandVal
uint64_t getConstantOperandVal(unsigned i) const
Definition: SelectionDAGNodes.h:1153
llvm::ISD::CopyFromReg
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:208
llvm::MCID::RegSequence
@ RegSequence
Definition: MCInstrDesc.h:181
llvm::AMDGPUISD::FMA_W_CHAIN
@ FMA_W_CHAIN
Definition: AMDGPUISelLowering.h:389
llvm::GCNSubtarget::getScalarizeGlobalBehavior
bool getScalarizeGlobalBehavior() const
Definition: GCNSubtarget.h:809
llvm::ISD::USUBO
@ USUBO
Definition: ISDOpcodes.h:328
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
findMemSDNode
static MemSDNode * findMemSDNode(SDNode *N)
Definition: AMDGPUISelDAGToDAG.cpp:1508
llvm::Hi_32
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:330
LoopInfo.h
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:647
llvm::ISD::ATOMIC_LOAD_FADD
@ ATOMIC_LOAD_FADD
Definition: ISDOpcodes.h:1196
i32
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM ID Predecessors according to mbb< bb27, 0x8b0a7c0 > Note ADDri is not a two address instruction its result reg1037 is an operand of the PHI node in bb76 and its operand reg1039 is the result of the PHI node We should treat it as a two address code and make sure the ADDri is scheduled after any node that reads reg1039 Use info(i.e. register scavenger) to assign it a free register to allow reuse the collector could move the objects and invalidate the derived pointer This is bad enough in the first but safe points can crop up unpredictably **array_addr i32
Definition: README.txt:122
llvm::ISD::FPOW
@ FPOW
Definition: ISDOpcodes.h:918
llvm::sys::unicode::SBase
constexpr const char32_t SBase
Definition: UnicodeNameToCodepoint.cpp:256
llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
R600MCTargetDesc.h
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:657
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:375
llvm::SelectionDAG::RemoveDeadNode
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
Definition: SelectionDAG.cpp:997
llvm::countPopulation
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:501
llvm::SISrcMods::NEG_HI
@ NEG_HI
Definition: SIDefines.h:222
llvm::AMDGPUISD::LDEXP
@ LDEXP
Definition: AMDGPUISelLowering.h:425
AMDGPUMCTargetDesc.h
llvm::ISD::FLOG10
@ FLOG10
Definition: ISDOpcodes.h:921
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:52
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:340
llvm::ISD::FMAD
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:486
Index
uint32_t Index
Definition: ELFObjHandler.cpp:83
uint64_t
llvm::ConstantFPSDNode
Definition: SelectionDAGNodes.h:1631
llvm::SelectionDAGISel::TII
const TargetInstrInfo * TII
Definition: SelectionDAGISel.h:55
llvm::MemSDNode::getMemOperand
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Definition: SelectionDAGNodes.h:1359
llvm::ISD::LOAD
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:966
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:79
isExtractHiElt
static MachineInstr * isExtractHiElt(MachineInstr *Inst, MachineRegisterInfo &MRI)
Definition: AMDGPUInstructionSelector.cpp:4893
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::AMDGPUISD::LOAD_D16_HI
@ LOAD_D16_HI
Definition: AMDGPUISelLowering.h:492
getBaseWithOffsetUsingSplitOR
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
Definition: AMDGPUISelDAGToDAG.cpp:725
llvm::ARMII::VecSize
@ VecSize
Definition: ARMBaseInfo.h:421
llvm::SelectionDAGISel::FuncInfo
std::unique_ptr< FunctionLoweringInfo > FuncInfo
Definition: SelectionDAGISel.h:45
llvm::GCNSubtarget::hasAddNoCarry
bool hasAddNoCarry() const
Definition: GCNSubtarget.h:657
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
llvm::GCNSubtarget::privateMemoryResourceIsRangeChecked
bool privateMemoryResourceIsRangeChecked() const
Definition: GCNSubtarget.h:499
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:534
llvm::AMDGPUISD::FMAD_FTZ
@ FMAD_FTZ
Definition: AMDGPUISelLowering.h:415
llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition: SelectionDAGNodes.h:921
llvm::AMDGPUISD::ATOMIC_DEC
@ ATOMIC_DEC
Definition: AMDGPUISelLowering.h:508
llvm::SelectionDAG::getNode
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
Definition: SelectionDAG.cpp:9072
llvm::AMDGPUISD::CVT_PK_I16_I32
@ CVT_PK_I16_I32
Definition: AMDGPUISelLowering.h:467
llvm::AMDGPUISD::BFE_I32
@ BFE_I32
Definition: AMDGPUISelLowering.h:431
AMDGPUDAGToDAGISel::getPassName
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
Definition: AMDGPUISelDAGToDAG.cpp:772
llvm::LoadSDNode::getExtensionType
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Definition: SelectionDAGNodes.h:2359
llvm::AMDGPUArgumentUsageInfo
Definition: AMDGPUArgumentUsageInfo.h:160
llvm::KnownBits::getMaxValue
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:136
llvm::SelectionDAG::MorphNodeTo
SDNode * MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef< SDValue > Ops)
This mutates the specified node to have the specified return type, opcode, and operands.
Definition: SelectionDAG.cpp:9715
llvm::SDValue::getValue
SDValue getValue(unsigned R) const
Definition: SelectionDAGNodes.h:179
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
llvm::AMDGPUISD::SIN_HW
@ SIN_HW
Definition: AMDGPUISelLowering.h:395
llvm::ConstantSDNode::getZExtValue
uint64_t getZExtValue() const
Definition: SelectionDAGNodes.h:1597
llvm::AMDGPUISD::RSQ
@ RSQ
Definition: AMDGPUISelLowering.h:420
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:60
i16
< i32 > ret i32 conv5 And the following x86 eax movsbl ecx cmpl ecx sete al movzbl eax ret It should be possible to eliminate the sign extensions LLVM misses a load store narrowing opportunity in this i16
Definition: README.txt:1493
llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1424
llvm::MCInstrDesc::OpInfo
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:208
llvm::SelectionDAGISel::CurDAG
SelectionDAG * CurDAG
Definition: SelectionDAGISel.h:49
llvm::SelectionDAG::getMachineNode
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition: SelectionDAG.cpp:9816
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::ISD::SUBCARRY
@ SUBCARRY
Definition: ISDOpcodes.h:304
llvm::SelectionDAG::setNodeMemRefs
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
Definition: SelectionDAG.cpp:9584
llvm::GCNSubtarget::getLDSBankCount
int getLDSBankCount() const
Definition: GCNSubtarget.h:286
llvm::GCNSubtarget::hasDOTOpSelHazard
bool hasDOTOpSelHazard() const
Definition: GCNSubtarget.h:1035
AMDGPUDAGToDAGISel::Select
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
Definition: AMDGPUISelDAGToDAG.cpp:497
v4i32
Vector Rotate Left Mask Mask v4i32
Definition: README_P9.txt:112
llvm::SelectionDAG::SelectNodeTo
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
Definition: SelectionDAG.cpp:9608
llvm::AMDGPU::IsaInfo::TargetIDSetting::Off
@ Off
R600RegisterInfo.h
llvm::isMask_32
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:434
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::SISrcMods::OP_SEL_1
@ OP_SEL_1
Definition: SIDefines.h:224
llvm::AMDGPU::CPol::SCC
@ SCC
Definition: SIDefines.h:307
SelectionDAGISel.h
llvm::LoopInfo
Definition: LoopInfo.h:1108
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:138
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::GCNSubtarget::hasFlatInstOffsets
bool hasFlatInstOffsets() const
Definition: GCNSubtarget.h:571
llvm::EVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:352
AMDGPU.h
llvm::logicalview::LVAttributeKind::Zero
@ Zero
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::ISD::UMUL_LOHI
@ UMUL_LOHI
Definition: ISDOpcodes.h:251
llvm::AMDGPUISD::LOAD_D16_LO_U8
@ LOAD_D16_LO_U8
Definition: AMDGPUISelLowering.h:497
llvm::SelectionDAG::ReplaceAllUsesOfValueWith
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
Definition: SelectionDAG.cpp:10411
uint32_t
llvm::SDValue::getOperand
const SDValue & getOperand(unsigned i) const
Definition: SelectionDAGNodes.h:1149
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::SISrcMods::OP_SEL_0
@ OP_SEL_0
Definition: SIDefines.h:223
llvm::ISD::UNDEF
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:211
llvm::ISD::FEXP
@ FEXP
Definition: ISDOpcodes.h:922
CC
auto CC
Definition: RISCVRedundantCopyElimination.cpp:79
llvm::ISD::SMUL_LOHI
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
llvm::ISD::ATOMIC_LOAD_FMAX
@ ATOMIC_LOAD_FMAX
Definition: ISDOpcodes.h:1198
llvm::ISD::FEXP2
@ FEXP2
Definition: ISDOpcodes.h:923
llvm::M68kBeads::Term
@ Term
Definition: M68kBaseInfo.h:71
llvm::CodeGenOpt::Level
Level
Definition: CodeGen.h:52
llvm::SDVTList
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Definition: SelectionDAGNodes.h:79
llvm::ISD::FMUL
@ FMUL
Definition: ISDOpcodes.h:392
llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition: ISDOpcodes.h:1404
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::ISD::INTRINSIC_WO_CHAIN
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
llvm::LoadSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2363
llvm::LoopInfoBase::getLoopsInPreorder
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
Definition: LoopInfoImpl.h:587
llvm::ISD::FSQRT
@ FSQRT
Definition: ISDOpcodes.h:913
llvm::SelectionDAGISel::MF
MachineFunction * MF
Definition: SelectionDAGISel.h:47
AMDGPUInstrInfo.h
llvm::AMDGPUISD::RCP
@ RCP
Definition: AMDGPUISelLowering.h:419
llvm::ISD::FMAXNUM
@ FMAXNUM
Definition: ISDOpcodes.h:944
llvm::SelectionDAG::computeKnownBits
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
Definition: SelectionDAG.cpp:2910
llvm::KnownBits
Definition: KnownBits.h:23
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:623
llvm::AMDGPUISD::LOAD_D16_HI_U8
@ LOAD_D16_HI_U8
Definition: AMDGPUISelLowering.h:495
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:376
llvm::AMDGPUISD::MAD_U64_U32
@ MAD_U64_U32
Definition: AMDGPUISelLowering.h:443
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::MemSDNode::getAddressSpace
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Definition: SelectionDAGNodes.h:1366
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
llvm::ISD::FCOS
@ FCOS
Definition: ISDOpcodes.h:916
llvm::SelectionDAG::getEntryNode
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:548
llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:924
llvm::ISD::FSIN
@ FSIN
Definition: ISDOpcodes.h:915
llvm::ISD::BUILD_VECTOR
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:514
llvm::SITargetLowering
Definition: SIISelLowering.h:31
llvm::SelectionDAG::getTargetExtractSubreg
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
Definition: SelectionDAG.cpp:9934
llvm::SelectionDAGISel::ReplaceUses
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
Definition: SelectionDAGISel.h:210
llvm::ISD::SUBC
@ SUBC
Definition: ISDOpcodes.h:270
llvm::GCNSubtarget::ldsRequiresM0Init
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
Definition: GCNSubtarget.h:635
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:145
gwsIntrinToOpcode
static unsigned gwsIntrinToOpcode(unsigned IntrID)
Definition: AMDGPUISelDAGToDAG.cpp:2384
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::AMDGPUPerfHintAnalysis
Definition: AMDGPUPerfHintAnalysis.h:23
llvm::ISD::STORE
@ STORE
Definition: ISDOpcodes.h:967
llvm::AMDGPUISD::CVT_PKRTZ_F16_F32
@ CVT_PKRTZ_F16_F32
Definition: AMDGPUISelLowering.h:464
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:433
llvm::ISD::UINT_TO_FP
@ UINT_TO_FP
Definition: ISDOpcodes.h:774
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
llvm::GCNSubtarget::getGeneration
Generation getGeneration() const
Definition: GCNSubtarget.h:267
llvm::RISCVMatInt::Imm
@ Imm
Definition: RISCVMatInt.h:23
llvm::SIInstrInfo
Definition: SIInstrInfo.h:44
llvm::AMDGPU::isInlinableLiteral32
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
Definition: AMDGPUBaseInfo.cpp:2293
llvm::EVT::getVectorElementType
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:300
llvm::ISD::FP_EXTEND
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:871
llvm::GCNSubtarget::hasMADIntraFwdBug
bool hasMADIntraFwdBug() const
Definition: GCNSubtarget.h:916
llvm::AMDGPUISD::FRACT
@ FRACT
Definition: AMDGPUISelLowering.h:375
llvm::ISD::FSUB
@ FSUB
Definition: ISDOpcodes.h:391
llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:691
LegacyDivergenceAnalysis.h
llvm::SelectionDAGISel
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
Definition: SelectionDAGISel.h:41
llvm::AMDGPUISD::FMAX3
@ FMAX3
Definition: AMDGPUISelLowering.h:399
llvm::GCNSubtarget::getConstantBusLimit
unsigned getConstantBusLimit(unsigned Opcode) const
Definition: AMDGPUSubtarget.cpp:184
llvm::ISD::FREM
@ FREM
Definition: ISDOpcodes.h:394
llvm::AMDGPUISD::MAD_I64_I32
@ MAD_I64_I32
Definition: AMDGPUISelLowering.h:444
Dominators.h
N
#define N
llvm::APInt::countTrailingOnes
unsigned countTrailingOnes() const
Count the number of trailing one bits.
Definition: APInt.h:1579
llvm::ISD::SUBE
@ SUBE
Definition: ISDOpcodes.h:280
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:325
llvm::ISD::SRL
@ SRL
Definition: ISDOpcodes.h:693
llvm::TargetMachine::getTargetTriple
const Triple & getTargetTriple() const
Definition: TargetMachine.h:125
llvm::GCNSubtarget::d16PreservesUnusedBits
bool d16PreservesUnusedBits() const
Definition: GCNSubtarget.h:625
llvm::SIInstrFlags::FlatScratch
@ FlatScratch
Definition: SIDefines.h:123
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel", "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) INITIALIZE_PASS_END(AMDGPUDAGToDAGISel
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:377
llvm::AMDGPUISD::COS_HW
@ COS_HW
Definition: AMDGPUISelLowering.h:394
llvm::SIInstrFlags::FlatGlobal
@ FlatGlobal
Definition: SIDefines.h:108
llvm::SDValue::getOpcode
unsigned getOpcode() const
Definition: SelectionDAGNodes.h:1137
llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:669
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::ISD::INTRINSIC_W_CHAIN
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
llvm::ISD::FNEG
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:911
llvm::SelectionDAG::getMachineFunction
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:466
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::ISD::BUILD_PAIR
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
llvm::SISrcMods::NEG
@ NEG
Definition: SIDefines.h:219
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
InlinePriorityMode::Size
@ Size
llvm::AMDGPUISD::DWORDADDR
@ DWORDADDR
Definition: AMDGPUISelLowering.h:374
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::ISD::TargetFrameIndex
@ TargetFrameIndex
Definition: ISDOpcodes.h:166
f64
QP Compare Ordered outs ins xscmpudp No builtin are required Or llvm fcmp order unorder compare DP QP Compare builtin are required DP xscmp *dp write to VSX register Use int_ppc_vsx_xscmpeqdp f64
Definition: README_P9.txt:314
llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:925
llvm::AMDGPUISD::RCP_IFLAG
@ RCP_IFLAG
Definition: AMDGPUISelLowering.h:422
llvm::AMDGPUISD::ATOMIC_INC
@ ATOMIC_INC
Definition: AMDGPUISelLowering.h:507
llvm::ISD::FCANONICALIZE
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:499
llvm::ISD::FLOG
@ FLOG
Definition: ISDOpcodes.h:919
InitializePasses.h
llvm::GCNSubtarget::hasUsableDSOffset
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition: GCNSubtarget.h:414
llvm::ISD::ADDE
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:279
llvm::ISD::FP_ROUND
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:852
llvm::MCInstrDesc::getNumOperands
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:230
llvm::RecurKind::SMax
@ SMax
Signed integer max implemented in terms of select(cmp()).
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::SelectionDAG::getTarget
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:470
llvm::EVT::bitsEq
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:228
AMDGPUTargetMachine.h
llvm::AMDGPUISD::BFE_U32
@ BFE_U32
Definition: AMDGPUISelLowering.h:430
llvm::ISD::SCALAR_TO_VECTOR
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:606
AMDGPUDAGToDAGISel::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Definition: AMDGPUISelDAGToDAG.cpp:126
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1251
llvm::AMDGPUISD::LOAD_D16_LO
@ LOAD_D16_LO
Definition: AMDGPUISelLowering.h:493
IsCopyFromSGPR
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)
Definition: AMDGPUISelDAGToDAG.cpp:1429
llvm::AMDGPUAS::CONSTANT_ADDRESS_32BIT
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
Definition: AMDGPU.h:379
llvm::ISD::FDIV
@ FDIV
Definition: ISDOpcodes.h:393