LLVM  15.0.0git
AMDGPUISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines an instruction selector for the AMDGPU target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPUISelDAGToDAG.h"
15 #include "AMDGPU.h"
16 #include "AMDGPUTargetMachine.h"
18 #include "R600RegisterInfo.h"
19 #include "SIMachineFunctionInfo.h"
26 #include "llvm/IR/IntrinsicsAMDGPU.h"
27 #include "llvm/InitializePasses.h"
28 
29 #ifdef EXPENSIVE_CHECKS
30 #include "llvm/Analysis/LoopInfo.h"
31 #include "llvm/IR/Dominators.h"
32 #endif
33 
34 #define DEBUG_TYPE "isel"
35 
36 using namespace llvm;
37 
38 //===----------------------------------------------------------------------===//
39 // Instruction Selector Implementation
40 //===----------------------------------------------------------------------===//
41 
42 namespace {
43 
44 static SDValue stripBitcast(SDValue Val) {
45  return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
46 }
47 
48 // Figure out if this is really an extract of the high 16-bits of a dword.
49 static bool isExtractHiElt(SDValue In, SDValue &Out) {
50  In = stripBitcast(In);
51 
52  if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
53  if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
54  if (!Idx->isOne())
55  return false;
56  Out = In.getOperand(0);
57  return true;
58  }
59  }
60 
61  if (In.getOpcode() != ISD::TRUNCATE)
62  return false;
63 
64  SDValue Srl = In.getOperand(0);
65  if (Srl.getOpcode() == ISD::SRL) {
66  if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
67  if (ShiftAmt->getZExtValue() == 16) {
68  Out = stripBitcast(Srl.getOperand(0));
69  return true;
70  }
71  }
72  }
73 
74  return false;
75 }
76 
77 // Look through operations that obscure just looking at the low 16-bits of the
78 // same register.
79 static SDValue stripExtractLoElt(SDValue In) {
80  if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
81  if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
82  if (Idx->isZero() && In.getValueSizeInBits() <= 32)
83  return In.getOperand(0);
84  }
85  }
86 
87  if (In.getOpcode() == ISD::TRUNCATE) {
88  SDValue Src = In.getOperand(0);
89  if (Src.getValueType().getSizeInBits() == 32)
90  return stripBitcast(Src);
91  }
92 
93  return In;
94 }
95 
96 } // end anonymous namespace
97 
99  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
103 #ifdef EXPENSIVE_CHECKS
106 #endif
108  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
109 
110 /// This pass converts a legalized DAG into a AMDGPU-specific
111 // DAG, ready for instruction scheduling.
113  CodeGenOpt::Level OptLevel) {
114  return new AMDGPUDAGToDAGISel(TM, OptLevel);
115 }
116 
118  TargetMachine *TM /*= nullptr*/,
119  CodeGenOpt::Level OptLevel /*= CodeGenOpt::Default*/)
120  : SelectionDAGISel(*TM, OptLevel) {
121  EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
122 }
123 
125 #ifdef EXPENSIVE_CHECKS
126  DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
127  LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
128  for (auto &L : LI->getLoopsInPreorder()) {
129  assert(L->isLCSSAForm(DT));
130  }
131 #endif
132  Subtarget = &MF.getSubtarget<GCNSubtarget>();
133  Mode = AMDGPU::SIModeRegisterDefaults(MF.getFunction());
134  return SelectionDAGISel::runOnMachineFunction(MF);
135 }
136 
137 bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const {
138  // XXX - only need to list legal operations.
139  switch (Opc) {
140  case ISD::FADD:
141  case ISD::FSUB:
142  case ISD::FMUL:
143  case ISD::FDIV:
144  case ISD::FREM:
145  case ISD::FCANONICALIZE:
146  case ISD::UINT_TO_FP:
147  case ISD::SINT_TO_FP:
148  case ISD::FABS:
149  // Fabs is lowered to a bit operation, but it's an and which will clear the
150  // high bits anyway.
151  case ISD::FSQRT:
152  case ISD::FSIN:
153  case ISD::FCOS:
154  case ISD::FPOWI:
155  case ISD::FPOW:
156  case ISD::FLOG:
157  case ISD::FLOG2:
158  case ISD::FLOG10:
159  case ISD::FEXP:
160  case ISD::FEXP2:
161  case ISD::FCEIL:
162  case ISD::FTRUNC:
163  case ISD::FRINT:
164  case ISD::FNEARBYINT:
165  case ISD::FROUND:
166  case ISD::FFLOOR:
167  case ISD::FMINNUM:
168  case ISD::FMAXNUM:
169  case AMDGPUISD::FRACT:
170  case AMDGPUISD::CLAMP:
171  case AMDGPUISD::COS_HW:
172  case AMDGPUISD::SIN_HW:
173  case AMDGPUISD::FMIN3:
174  case AMDGPUISD::FMAX3:
175  case AMDGPUISD::FMED3:
176  case AMDGPUISD::FMAD_FTZ:
177  case AMDGPUISD::RCP:
178  case AMDGPUISD::RSQ:
180  case AMDGPUISD::LDEXP:
181  // On gfx10, all 16-bit instructions preserve the high bits.
182  return Subtarget->getGeneration() <= AMDGPUSubtarget::GFX9;
183  case ISD::FP_ROUND:
184  // We may select fptrunc (fma/mad) to mad_mixlo, which does not zero the
185  // high bits on gfx9.
186  // TODO: If we had the source node we could see if the source was fma/mad
187  return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
188  case ISD::FMA:
189  case ISD::FMAD:
191  return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
192  default:
193  // fcopysign, select and others may be lowered to 32-bit bit operations
194  // which don't zero the high bits.
195  return false;
196  }
197 }
198 
202 #ifdef EXPENSIVE_CHECKS
205 #endif
206  SelectionDAGISel::getAnalysisUsage(AU);
207 }
208 
210  assert(Subtarget->d16PreservesUnusedBits());
211  MVT VT = N->getValueType(0).getSimpleVT();
212  if (VT != MVT::v2i16 && VT != MVT::v2f16)
213  return false;
214 
215  SDValue Lo = N->getOperand(0);
216  SDValue Hi = N->getOperand(1);
217 
218  LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));
219 
220  // build_vector lo, (load ptr) -> load_d16_hi ptr, lo
221  // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
222  // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
223 
224  // Need to check for possible indirect dependencies on the other half of the
225  // vector to avoid introducing a cycle.
226  if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {
227  SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
228 
230  SDValue Ops[] = {
231  LdHi->getChain(), LdHi->getBasePtr(), TiedIn
232  };
233 
234  unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
235  if (LdHi->getMemoryVT() == MVT::i8) {
236  LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
238  } else {
239  assert(LdHi->getMemoryVT() == MVT::i16);
240  }
241 
242  SDValue NewLoadHi =
243  CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
244  Ops, LdHi->getMemoryVT(),
245  LdHi->getMemOperand());
246 
247  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
248  CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
249  return true;
250  }
251 
252  // build_vector (load ptr), hi -> load_d16_lo ptr, hi
253  // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
254  // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
255  LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));
256  if (LdLo && Lo.hasOneUse()) {
257  SDValue TiedIn = getHi16Elt(Hi);
258  if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode()))
259  return false;
260 
261  SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
262  unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
263  if (LdLo->getMemoryVT() == MVT::i8) {
264  LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
266  } else {
267  assert(LdLo->getMemoryVT() == MVT::i16);
268  }
269 
270  TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
271 
272  SDValue Ops[] = {
273  LdLo->getChain(), LdLo->getBasePtr(), TiedIn
274  };
275 
276  SDValue NewLoadLo =
277  CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
278  Ops, LdLo->getMemoryVT(),
279  LdLo->getMemOperand());
280 
281  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
282  CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
283  return true;
284  }
285 
286  return false;
287 }
288 
290  if (!Subtarget->d16PreservesUnusedBits())
291  return;
292 
294 
295  bool MadeChange = false;
296  while (Position != CurDAG->allnodes_begin()) {
297  SDNode *N = &*--Position;
298  if (N->use_empty())
299  continue;
300 
301  switch (N->getOpcode()) {
302  case ISD::BUILD_VECTOR:
303  MadeChange |= matchLoadD16FromBuildVector(N);
304  break;
305  default:
306  break;
307  }
308  }
309 
310  if (MadeChange) {
312  LLVM_DEBUG(dbgs() << "After PreProcess:\n";
313  CurDAG->dump(););
314  }
315 }
316 
317 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
318  if (TM.Options.NoNaNsFPMath)
319  return true;
320 
321  // TODO: Move into isKnownNeverNaN
322  if (N->getFlags().hasNoNaNs())
323  return true;
324 
325  return CurDAG->isKnownNeverNaN(N);
326 }
327 
328 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N,
329  bool Negated) const {
330  if (N->isUndef())
331  return true;
332 
333  const SIInstrInfo *TII = Subtarget->getInstrInfo();
334  if (Negated) {
335  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
336  return TII->isInlineConstant(-C->getAPIntValue());
337 
338  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
339  return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt());
340 
341  } else {
342  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
343  return TII->isInlineConstant(C->getAPIntValue());
344 
345  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
346  return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
347  }
348 
349  return false;
350 }
351 
352 /// Determine the register class for \p OpNo
353 /// \returns The register class of the virtual register that will be used for
354 /// the given operand number \OpNo or NULL if the register class cannot be
355 /// determined.
356 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
357  unsigned OpNo) const {
358  if (!N->isMachineOpcode()) {
359  if (N->getOpcode() == ISD::CopyToReg) {
360  Register Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
361  if (Reg.isVirtual()) {
363  return MRI.getRegClass(Reg);
364  }
365 
366  const SIRegisterInfo *TRI
367  = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
368  return TRI->getPhysRegClass(Reg);
369  }
370 
371  return nullptr;
372  }
373 
374  switch (N->getMachineOpcode()) {
375  default: {
376  const MCInstrDesc &Desc =
377  Subtarget->getInstrInfo()->get(N->getMachineOpcode());
378  unsigned OpIdx = Desc.getNumDefs() + OpNo;
379  if (OpIdx >= Desc.getNumOperands())
380  return nullptr;
381  int RegClass = Desc.OpInfo[OpIdx].RegClass;
382  if (RegClass == -1)
383  return nullptr;
384 
385  return Subtarget->getRegisterInfo()->getRegClass(RegClass);
386  }
387  case AMDGPU::REG_SEQUENCE: {
388  unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
389  const TargetRegisterClass *SuperRC =
390  Subtarget->getRegisterInfo()->getRegClass(RCID);
391 
392  SDValue SubRegOp = N->getOperand(OpNo + 1);
393  unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
394  return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
395  SubRegIdx);
396  }
397  }
398 }
399 
400 SDNode *AMDGPUDAGToDAGISel::glueCopyToOp(SDNode *N, SDValue NewChain,
401  SDValue Glue) const {
403  Ops.push_back(NewChain); // Replace the chain.
404  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
405  Ops.push_back(N->getOperand(i));
406 
407  Ops.push_back(Glue);
408  return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
409 }
410 
411 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
412  const SITargetLowering& Lowering =
413  *static_cast<const SITargetLowering*>(getTargetLowering());
414 
415  assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");
416 
417  SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), Val);
418  return glueCopyToOp(N, M0, M0.getValue(1));
419 }
420 
421 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
422  unsigned AS = cast<MemSDNode>(N)->getAddressSpace();
423  if (AS == AMDGPUAS::LOCAL_ADDRESS) {
424  if (Subtarget->ldsRequiresM0Init())
425  return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
426  } else if (AS == AMDGPUAS::REGION_ADDRESS) {
428  unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
429  return
430  glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32));
431  }
432  return N;
433 }
434 
435 MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
436  EVT VT) const {
438  AMDGPU::S_MOV_B32, DL, MVT::i32,
439  CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
440  SDNode *Hi =
441  CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
443  const SDValue Ops[] = {
444  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
445  SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
446  SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
447 
448  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
449 }
450 
451 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
452  EVT VT = N->getValueType(0);
453  unsigned NumVectorElts = VT.getVectorNumElements();
454  EVT EltVT = VT.getVectorElementType();
455  SDLoc DL(N);
456  SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
457 
458  if (NumVectorElts == 1) {
459  CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
460  RegClass);
461  return;
462  }
463 
464  assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
465  "supported yet");
466  // 32 = Max Num Vector Elements
467  // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
468  // 1 = Vector Register Class
469  SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
470 
471  bool IsGCN = CurDAG->getSubtarget().getTargetTriple().getArch() ==
472  Triple::amdgcn;
473  RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
474  bool IsRegSeq = true;
475  unsigned NOps = N->getNumOperands();
476  for (unsigned i = 0; i < NOps; i++) {
477  // XXX: Why is this here?
478  if (isa<RegisterSDNode>(N->getOperand(i))) {
479  IsRegSeq = false;
480  break;
481  }
482  unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
483  : R600RegisterInfo::getSubRegFromChannel(i);
484  RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
485  RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
486  }
487  if (NOps != NumVectorElts) {
488  // Fill in the missing undef elements if this was a scalar_to_vector.
489  assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
490  MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
491  DL, EltVT);
492  for (unsigned i = NOps; i < NumVectorElts; ++i) {
493  unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
494  : R600RegisterInfo::getSubRegFromChannel(i);
495  RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
496  RegSeqArgs[1 + (2 * i) + 1] =
498  }
499  }
500 
501  if (!IsRegSeq)
502  SelectCode(N);
503  CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
504 }
505 
507  unsigned int Opc = N->getOpcode();
508  if (N->isMachineOpcode()) {
509  N->setNodeId(-1);
510  return; // Already selected.
511  }
512 
513  // isa<MemSDNode> almost works but is slightly too permissive for some DS
514  // intrinsics.
515  if (Opc == ISD::LOAD || Opc == ISD::STORE || isa<AtomicSDNode>(N) ||
516  (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
517  Opc == ISD::ATOMIC_LOAD_FADD ||
519  Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) {
520  N = glueCopyToM0LDSInit(N);
521  SelectCode(N);
522  return;
523  }
524 
525  switch (Opc) {
526  default:
527  break;
528  // We are selecting i64 ADD here instead of custom lower it during
529  // DAG legalization, so we can fold some i64 ADDs used for address
530  // calculation into the LOAD and STORE instructions.
531  case ISD::ADDC:
532  case ISD::ADDE:
533  case ISD::SUBC:
534  case ISD::SUBE: {
535  if (N->getValueType(0) != MVT::i64)
536  break;
537 
538  SelectADD_SUB_I64(N);
539  return;
540  }
541  case ISD::ADDCARRY:
542  case ISD::SUBCARRY:
543  if (N->getValueType(0) != MVT::i32)
544  break;
545 
546  SelectAddcSubb(N);
547  return;
548  case ISD::UADDO:
549  case ISD::USUBO: {
550  SelectUADDO_USUBO(N);
551  return;
552  }
554  SelectFMUL_W_CHAIN(N);
555  return;
556  }
557  case AMDGPUISD::FMA_W_CHAIN: {
558  SelectFMA_W_CHAIN(N);
559  return;
560  }
561 
563  case ISD::BUILD_VECTOR: {
564  EVT VT = N->getValueType(0);
565  unsigned NumVectorElts = VT.getVectorNumElements();
566  if (VT.getScalarSizeInBits() == 16) {
567  if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
568  if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) {
569  ReplaceNode(N, Packed);
570  return;
571  }
572  }
573 
574  break;
575  }
576 
578  unsigned RegClassID =
579  SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID();
580  SelectBuildVector(N, RegClassID);
581  return;
582  }
583  case ISD::BUILD_PAIR: {
584  SDValue RC, SubReg0, SubReg1;
585  SDLoc DL(N);
586  if (N->getValueType(0) == MVT::i128) {
587  RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32);
588  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
589  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
590  } else if (N->getValueType(0) == MVT::i64) {
591  RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
592  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
593  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
594  } else {
595  llvm_unreachable("Unhandled value type for BUILD_PAIR");
596  }
597  const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
598  N->getOperand(1), SubReg1 };
599  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
600  N->getValueType(0), Ops));
601  return;
602  }
603 
604  case ISD::Constant:
605  case ISD::ConstantFP: {
606  if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
607  break;
608 
609  uint64_t Imm;
610  if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
611  Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
612  else {
613  ConstantSDNode *C = cast<ConstantSDNode>(N);
614  Imm = C->getZExtValue();
615  }
616 
617  SDLoc DL(N);
618  ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
619  return;
620  }
621  case AMDGPUISD::BFE_I32:
622  case AMDGPUISD::BFE_U32: {
623  // There is a scalar version available, but unlike the vector version which
624  // has a separate operand for the offset and width, the scalar version packs
625  // the width and offset into a single operand. Try to move to the scalar
626  // version if the offsets are constant, so that we can try to keep extended
627  // loads of kernel arguments in SGPRs.
628 
629  // TODO: Technically we could try to pattern match scalar bitshifts of
630  // dynamic values, but it's probably not useful.
631  ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
632  if (!Offset)
633  break;
634 
635  ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
636  if (!Width)
637  break;
638 
639  bool Signed = Opc == AMDGPUISD::BFE_I32;
640 
641  uint32_t OffsetVal = Offset->getZExtValue();
642  uint32_t WidthVal = Width->getZExtValue();
643 
644  ReplaceNode(N, getBFE32(Signed, SDLoc(N), N->getOperand(0), OffsetVal,
645  WidthVal));
646  return;
647  }
648  case AMDGPUISD::DIV_SCALE: {
649  SelectDIV_SCALE(N);
650  return;
651  }
653  case AMDGPUISD::MAD_U64_U32: {
654  SelectMAD_64_32(N);
655  return;
656  }
657  case ISD::SMUL_LOHI:
658  case ISD::UMUL_LOHI:
659  return SelectMUL_LOHI(N);
660  case ISD::CopyToReg: {
661  const SITargetLowering& Lowering =
662  *static_cast<const SITargetLowering*>(getTargetLowering());
663  N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
664  break;
665  }
666  case ISD::AND:
667  case ISD::SRL:
668  case ISD::SRA:
670  if (N->getValueType(0) != MVT::i32)
671  break;
672 
673  SelectS_BFE(N);
674  return;
675  case ISD::BRCOND:
676  SelectBRCOND(N);
677  return;
678  case ISD::FMAD:
679  case ISD::FMA:
680  SelectFMAD_FMA(N);
681  return;
687  // Hack around using a legal type if f16 is illegal.
688  if (N->getValueType(0) == MVT::i32) {
689  MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
690  N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
691  { N->getOperand(0), N->getOperand(1) });
692  SelectCode(N);
693  return;
694  }
695 
696  break;
697  }
698  case ISD::INTRINSIC_W_CHAIN: {
699  SelectINTRINSIC_W_CHAIN(N);
700  return;
701  }
703  SelectINTRINSIC_WO_CHAIN(N);
704  return;
705  }
706  case ISD::INTRINSIC_VOID: {
707  SelectINTRINSIC_VOID(N);
708  return;
709  }
710  }
711 
712  SelectCode(N);
713 }
714 
715 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
716  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
717  const Instruction *Term = BB->getTerminator();
718  return Term->getMetadata("amdgpu.uniform") ||
719  Term->getMetadata("structurizecfg.uniform");
720 }
721 
722 bool AMDGPUDAGToDAGISel::isUnneededShiftMask(const SDNode *N,
723  unsigned ShAmtBits) const {
724  assert(N->getOpcode() == ISD::AND);
725 
726  const APInt &RHS = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
727  if (RHS.countTrailingOnes() >= ShAmtBits)
728  return true;
729 
730  const APInt &LHSKnownZeros = CurDAG->computeKnownBits(N->getOperand(0)).Zero;
731  return (LHSKnownZeros | RHS).countTrailingOnes() >= ShAmtBits;
732 }
733 
735  SDValue &N0, SDValue &N1) {
736  if (Addr.getValueType() == MVT::i64 && Addr.getOpcode() == ISD::BITCAST &&
737  Addr.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
738  // As we split 64-bit `or` earlier, it's complicated pattern to match, i.e.
739  // (i64 (bitcast (v2i32 (build_vector
740  // (or (extract_vector_elt V, 0), OFFSET),
741  // (extract_vector_elt V, 1)))))
742  SDValue Lo = Addr.getOperand(0).getOperand(0);
743  if (Lo.getOpcode() == ISD::OR && DAG.isBaseWithConstantOffset(Lo)) {
744  SDValue BaseLo = Lo.getOperand(0);
745  SDValue BaseHi = Addr.getOperand(0).getOperand(1);
746  // Check that split base (Lo and Hi) are extracted from the same one.
747  if (BaseLo.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
748  BaseHi.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
749  BaseLo.getOperand(0) == BaseHi.getOperand(0) &&
750  // Lo is statically extracted from index 0.
751  isa<ConstantSDNode>(BaseLo.getOperand(1)) &&
752  BaseLo.getConstantOperandVal(1) == 0 &&
753  // Hi is statically extracted from index 0.
754  isa<ConstantSDNode>(BaseHi.getOperand(1)) &&
755  BaseHi.getConstantOperandVal(1) == 1) {
756  N0 = BaseLo.getOperand(0).getOperand(0);
757  N1 = Lo.getOperand(1);
758  return true;
759  }
760  }
761  }
762  return false;
763 }
764 
765 bool AMDGPUDAGToDAGISel::isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
766  SDValue &RHS) const {
768  LHS = Addr.getOperand(0);
769  RHS = Addr.getOperand(1);
770  return true;
771  }
772 
774  assert(LHS && RHS && isa<ConstantSDNode>(RHS));
775  return true;
776  }
777 
778  return false;
779 }
780 
782  return "AMDGPU DAG->DAG Pattern Instruction Selection";
783 }
784 
785 //===----------------------------------------------------------------------===//
786 // Complex Patterns
787 //===----------------------------------------------------------------------===//
788 
789 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
790  SDValue &Offset) {
791  return false;
792 }
793 
794 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
795  SDValue &Offset) {
796  ConstantSDNode *C;
797  SDLoc DL(Addr);
798 
799  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
800  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
801  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
802  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
803  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
804  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
805  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
806  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
807  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
808  Base = Addr.getOperand(0);
809  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
810  } else {
811  Base = Addr;
813  }
814 
815  return true;
816 }
817 
818 SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
819  const SDLoc &DL) const {
820  SDNode *Mov = CurDAG->getMachineNode(
821  AMDGPU::S_MOV_B32, DL, MVT::i32,
823  return SDValue(Mov, 0);
824 }
825 
826 // FIXME: Should only handle addcarry/subcarry
827 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
828  SDLoc DL(N);
829  SDValue LHS = N->getOperand(0);
830  SDValue RHS = N->getOperand(1);
831 
832  unsigned Opcode = N->getOpcode();
833  bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
834  bool ProduceCarry =
835  ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
836  bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
837 
838  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
839  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
840 
841  SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
842  DL, MVT::i32, LHS, Sub0);
843  SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
844  DL, MVT::i32, LHS, Sub1);
845 
846  SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
847  DL, MVT::i32, RHS, Sub0);
848  SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
849  DL, MVT::i32, RHS, Sub1);
850 
851  SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
852 
853  static const unsigned OpcMap[2][2][2] = {
854  {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
855  {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
856  {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
857  {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
858 
859  unsigned Opc = OpcMap[0][N->isDivergent()][IsAdd];
860  unsigned CarryOpc = OpcMap[1][N->isDivergent()][IsAdd];
861 
862  SDNode *AddLo;
863  if (!ConsumeCarry) {
864  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
865  AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
866  } else {
867  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
868  AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
869  }
870  SDValue AddHiArgs[] = {
871  SDValue(Hi0, 0),
872  SDValue(Hi1, 0),
873  SDValue(AddLo, 1)
874  };
875  SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
876 
877  SDValue RegSequenceArgs[] = {
878  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
879  SDValue(AddLo,0),
880  Sub0,
881  SDValue(AddHi,0),
882  Sub1,
883  };
884  SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
885  MVT::i64, RegSequenceArgs);
886 
887  if (ProduceCarry) {
888  // Replace the carry-use
889  ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
890  }
891 
892  // Replace the remaining uses.
894 }
895 
896 void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
897  SDLoc DL(N);
898  SDValue LHS = N->getOperand(0);
899  SDValue RHS = N->getOperand(1);
900  SDValue CI = N->getOperand(2);
901 
902  if (N->isDivergent()) {
903  unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
904  : AMDGPU::V_SUBB_U32_e64;
906  N, Opc, N->getVTList(),
907  {LHS, RHS, CI,
908  CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
909  } else {
910  unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::S_ADD_CO_PSEUDO
911  : AMDGPU::S_SUB_CO_PSEUDO;
912  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {LHS, RHS, CI});
913  }
914 }
915 
916 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
917  // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
918  // carry out despite the _i32 name. These were renamed in VI to _U32.
919  // FIXME: We should probably rename the opcodes here.
920  bool IsAdd = N->getOpcode() == ISD::UADDO;
921  bool IsVALU = N->isDivergent();
922 
923  for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E;
924  ++UI)
925  if (UI.getUse().getResNo() == 1) {
926  if ((IsAdd && (UI->getOpcode() != ISD::ADDCARRY)) ||
927  (!IsAdd && (UI->getOpcode() != ISD::SUBCARRY))) {
928  IsVALU = true;
929  break;
930  }
931  }
932 
933  if (IsVALU) {
934  unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
935 
937  N, Opc, N->getVTList(),
938  {N->getOperand(0), N->getOperand(1),
939  CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
940  } else {
941  unsigned Opc = N->getOpcode() == ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
942  : AMDGPU::S_USUBO_PSEUDO;
943 
944  CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
945  {N->getOperand(0), N->getOperand(1)});
946  }
947 }
948 
949 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
950  SDLoc SL(N);
951  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
952  SDValue Ops[10];
953 
954  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
955  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
956  SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
957  Ops[8] = N->getOperand(0);
958  Ops[9] = N->getOperand(4);
959 
960  // If there are no source modifiers, prefer fmac over fma because it can use
961  // the smaller VOP2 encoding.
962  bool UseFMAC = Subtarget->hasDLInsts() &&
963  cast<ConstantSDNode>(Ops[0])->isZero() &&
964  cast<ConstantSDNode>(Ops[2])->isZero() &&
965  cast<ConstantSDNode>(Ops[4])->isZero();
966  unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
967  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), Ops);
968 }
969 
970 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
971  SDLoc SL(N);
972  // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
973  SDValue Ops[8];
974 
975  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
976  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
977  Ops[6] = N->getOperand(0);
978  Ops[7] = N->getOperand(3);
979 
980  CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
981 }
982 
983 // We need to handle this here because tablegen doesn't support matching
984 // instructions with multiple outputs.
985 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
986  SDLoc SL(N);
987  EVT VT = N->getValueType(0);
988 
989  assert(VT == MVT::f32 || VT == MVT::f64);
990 
991  unsigned Opc
992  = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
993 
994  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
995  // omod
996  SDValue Ops[8];
997  SelectVOP3BMods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
998  SelectVOP3BMods(N->getOperand(1), Ops[3], Ops[2]);
999  SelectVOP3BMods(N->getOperand(2), Ops[5], Ops[4]);
1000  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1001 }
1002 
1003 // We need to handle this here because tablegen doesn't support matching
1004 // instructions with multiple outputs.
1005 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
1006  SDLoc SL(N);
1007  bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
1008  unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1009 
1011  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
1012  Clamp };
1013  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1014 }
1015 
1016 // We need to handle this here because tablegen doesn't support matching
1017 // instructions with multiple outputs.
1018 void AMDGPUDAGToDAGISel::SelectMUL_LOHI(SDNode *N) {
1019  SDLoc SL(N);
1020  bool Signed = N->getOpcode() == ISD::SMUL_LOHI;
1021  unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1022 
1023  SDValue Zero = CurDAG->getTargetConstant(0, SL, MVT::i64);
1025  SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Zero, Clamp};
1026  SDNode *Mad = CurDAG->getMachineNode(Opc, SL, N->getVTList(), Ops);
1027  if (!SDValue(N, 0).use_empty()) {
1028  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32);
1029  SDNode *Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1030  MVT::i32, SDValue(Mad, 0), Sub0);
1031  ReplaceUses(SDValue(N, 0), SDValue(Lo, 0));
1032  }
1033  if (!SDValue(N, 1).use_empty()) {
1034  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32);
1035  SDNode *Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1036  MVT::i32, SDValue(Mad, 0), Sub1);
1037  ReplaceUses(SDValue(N, 1), SDValue(Hi, 0));
1038  }
1040 }
1041 
1042 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset) const {
1043  if (!isUInt<16>(Offset))
1044  return false;
1045 
1046  if (!Base || Subtarget->hasUsableDSOffset() ||
1047  Subtarget->unsafeDSOffsetFoldingEnabled())
1048  return true;
1049 
1050  // On Southern Islands instruction with a negative base value and an offset
1051  // don't seem to work.
1052  return CurDAG->SignBitIsZero(Base);
1053 }
1054 
1055 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
1056  SDValue &Offset) const {
1057  SDLoc DL(Addr);
1059  SDValue N0 = Addr.getOperand(0);
1060  SDValue N1 = Addr.getOperand(1);
1061  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1062  if (isDSOffsetLegal(N0, C1->getSExtValue())) {
1063  // (add n0, c0)
1064  Base = N0;
1065  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1066  return true;
1067  }
1068  } else if (Addr.getOpcode() == ISD::SUB) {
1069  // sub C, x -> add (sub 0, x), C
1070  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1071  int64_t ByteOffset = C->getSExtValue();
1072  if (isDSOffsetLegal(SDValue(), ByteOffset)) {
1074 
1075  // XXX - This is kind of hacky. Create a dummy sub node so we can check
1076  // the known bits in isDSOffsetLegal. We need to emit the selected node
1077  // here, so this is thrown away.
1079  Zero, Addr.getOperand(1));
1080 
1081  if (isDSOffsetLegal(Sub, ByteOffset)) {
1083  Opnds.push_back(Zero);
1084  Opnds.push_back(Addr.getOperand(1));
1085 
1086  // FIXME: Select to VOP3 version for with-carry.
1087  unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1088  if (Subtarget->hasAddNoCarry()) {
1089  SubOp = AMDGPU::V_SUB_U32_e64;
1090  Opnds.push_back(
1091  CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1092  }
1093 
1094  MachineSDNode *MachineSub =
1095  CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1096 
1097  Base = SDValue(MachineSub, 0);
1098  Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
1099  return true;
1100  }
1101  }
1102  }
1103  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1104  // If we have a constant address, prefer to put the constant into the
1105  // offset. This can save moves to load the constant address since multiple
1106  // operations can share the zero base address register, and enables merging
1107  // into read2 / write2 instructions.
1108 
1109  SDLoc DL(Addr);
1110 
1111  if (isDSOffsetLegal(SDValue(), CAddr->getZExtValue())) {
1113  MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1114  DL, MVT::i32, Zero);
1115  Base = SDValue(MovZero, 0);
1116  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1117  return true;
1118  }
1119  }
1120 
1121  // default case
1122  Base = Addr;
1124  return true;
1125 }
1126 
1127 bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0,
1128  unsigned Offset1,
1129  unsigned Size) const {
1130  if (Offset0 % Size != 0 || Offset1 % Size != 0)
1131  return false;
1132  if (!isUInt<8>(Offset0 / Size) || !isUInt<8>(Offset1 / Size))
1133  return false;
1134 
1135  if (!Base || Subtarget->hasUsableDSOffset() ||
1136  Subtarget->unsafeDSOffsetFoldingEnabled())
1137  return true;
1138 
1139  // On Southern Islands instruction with a negative base value and an offset
1140  // don't seem to work.
1141  return CurDAG->SignBitIsZero(Base);
1142 }
1143 
1144 // TODO: If offset is too big, put low 16-bit into offset.
1145 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
1146  SDValue &Offset0,
1147  SDValue &Offset1) const {
1148  return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 4);
1149 }
1150 
1151 bool AMDGPUDAGToDAGISel::SelectDS128Bit8ByteAligned(SDValue Addr, SDValue &Base,
1152  SDValue &Offset0,
1153  SDValue &Offset1) const {
1154  return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 8);
1155 }
1156 
1157 bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base,
1158  SDValue &Offset0, SDValue &Offset1,
1159  unsigned Size) const {
1160  SDLoc DL(Addr);
1161 
1163  SDValue N0 = Addr.getOperand(0);
1164  SDValue N1 = Addr.getOperand(1);
1165  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1166  unsigned OffsetValue0 = C1->getZExtValue();
1167  unsigned OffsetValue1 = OffsetValue0 + Size;
1168 
1169  // (add n0, c0)
1170  if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1, Size)) {
1171  Base = N0;
1172  Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1173  Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1174  return true;
1175  }
1176  } else if (Addr.getOpcode() == ISD::SUB) {
1177  // sub C, x -> add (sub 0, x), C
1178  if (const ConstantSDNode *C =
1179  dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1180  unsigned OffsetValue0 = C->getZExtValue();
1181  unsigned OffsetValue1 = OffsetValue0 + Size;
1182 
1183  if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
1184  SDLoc DL(Addr);
1186 
1187  // XXX - This is kind of hacky. Create a dummy sub node so we can check
1188  // the known bits in isDSOffsetLegal. We need to emit the selected node
1189  // here, so this is thrown away.
1190  SDValue Sub =
1191  CurDAG->getNode(ISD::SUB, DL, MVT::i32, Zero, Addr.getOperand(1));
1192 
1193  if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1, Size)) {
1195  Opnds.push_back(Zero);
1196  Opnds.push_back(Addr.getOperand(1));
1197  unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1198  if (Subtarget->hasAddNoCarry()) {
1199  SubOp = AMDGPU::V_SUB_U32_e64;
1200  Opnds.push_back(
1201  CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1202  }
1203 
1204  MachineSDNode *MachineSub = CurDAG->getMachineNode(
1205  SubOp, DL, MVT::getIntegerVT(Size * 8), Opnds);
1206 
1207  Base = SDValue(MachineSub, 0);
1208  Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1209  Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1210  return true;
1211  }
1212  }
1213  }
1214  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1215  unsigned OffsetValue0 = CAddr->getZExtValue();
1216  unsigned OffsetValue1 = OffsetValue0 + Size;
1217 
1218  if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
1220  MachineSDNode *MovZero =
1221  CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, Zero);
1222  Base = SDValue(MovZero, 0);
1223  Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1224  Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1225  return true;
1226  }
1227  }
1228 
1229  // default case
1230 
1231  Base = Addr;
1232  Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
1233  Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
1234  return true;
1235 }
1236 
1237 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
1238  SDValue &SOffset, SDValue &Offset,
1239  SDValue &Offen, SDValue &Idxen,
1240  SDValue &Addr64) const {
1241  // Subtarget prefers to use flat instruction
1242  // FIXME: This should be a pattern predicate and not reach here
1243  if (Subtarget->useFlatForGlobal())
1244  return false;
1245 
1246  SDLoc DL(Addr);
1247 
1248  Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1249  Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1250  Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1251  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1252 
1253  ConstantSDNode *C1 = nullptr;
1254  SDValue N0 = Addr;
1256  C1 = cast<ConstantSDNode>(Addr.getOperand(1));
1257  if (isUInt<32>(C1->getZExtValue()))
1258  N0 = Addr.getOperand(0);
1259  else
1260  C1 = nullptr;
1261  }
1262 
1263  if (N0.getOpcode() == ISD::ADD) {
1264  // (add N2, N3) -> addr64, or
1265  // (add (add N2, N3), C1) -> addr64
1266  SDValue N2 = N0.getOperand(0);
1267  SDValue N3 = N0.getOperand(1);
1268  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1269 
1270  if (N2->isDivergent()) {
1271  if (N3->isDivergent()) {
1272  // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
1273  // addr64, and construct the resource from a 0 address.
1274  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1275  VAddr = N0;
1276  } else {
1277  // N2 is divergent, N3 is not.
1278  Ptr = N3;
1279  VAddr = N2;
1280  }
1281  } else {
1282  // N2 is not divergent.
1283  Ptr = N2;
1284  VAddr = N3;
1285  }
1287  } else if (N0->isDivergent()) {
1288  // N0 is divergent. Use it as the addr64, and construct the resource from a
1289  // 0 address.
1290  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1291  VAddr = N0;
1292  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1293  } else {
1294  // N0 -> offset, or
1295  // (N0 + C1) -> offset
1296  VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1297  Ptr = N0;
1298  }
1299 
1300  if (!C1) {
1301  // No offset.
1303  return true;
1304  }
1305 
1306  if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
1307  // Legal offset for instruction.
1308  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1309  return true;
1310  }
1311 
1312  // Illegal offset, store it in soffset.
1314  SOffset =
1316  AMDGPU::S_MOV_B32, DL, MVT::i32,
1317  CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1318  0);
1319  return true;
1320 }
1321 
1322 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1323  SDValue &VAddr, SDValue &SOffset,
1324  SDValue &Offset) const {
1325  SDValue Ptr, Offen, Idxen, Addr64;
1326 
1327  // addr64 bit was removed for volcanic islands.
1328  // FIXME: This should be a pattern predicate and not reach here
1329  if (!Subtarget->hasAddr64())
1330  return false;
1331 
1332  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
1333  return false;
1334 
1335  ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1336  if (C->getSExtValue()) {
1337  SDLoc DL(Addr);
1338 
1339  const SITargetLowering& Lowering =
1340  *static_cast<const SITargetLowering*>(getTargetLowering());
1341 
1342  SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1343  return true;
1344  }
1345 
1346  return false;
1347 }
1348 
1349 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1350  SDLoc DL(N);
1351 
1352  auto *FI = dyn_cast<FrameIndexSDNode>(N);
1353  SDValue TFI =
1354  FI ? CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) : N;
1355 
1356  // We rebase the base address into an absolute stack address and hence
1357  // use constant 0 for soffset. This value must be retained until
1358  // frame elimination and eliminateFrameIndex will choose the appropriate
1359  // frame register if need be.
1360  return std::make_pair(TFI, CurDAG->getTargetConstant(0, DL, MVT::i32));
1361 }
1362 
1363 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1364  SDValue Addr, SDValue &Rsrc,
1365  SDValue &VAddr, SDValue &SOffset,
1366  SDValue &ImmOffset) const {
1367 
1368  SDLoc DL(Addr);
1371 
1372  Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1373 
1374  if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1375  int64_t Imm = CAddr->getSExtValue();
1376  const int64_t NullPtr =
1377  AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS);
1378  // Don't fold null pointer.
1379  if (Imm != NullPtr) {
1380  SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1381  MachineSDNode *MovHighBits = CurDAG->getMachineNode(
1382  AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits);
1383  VAddr = SDValue(MovHighBits, 0);
1384 
1385  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1386  ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
1387  return true;
1388  }
1389  }
1390 
1392  // (add n0, c1)
1393 
1394  SDValue N0 = Addr.getOperand(0);
1395  SDValue N1 = Addr.getOperand(1);
1396 
1397  // Offsets in vaddr must be positive if range checking is enabled.
1398  //
1399  // The total computation of vaddr + soffset + offset must not overflow. If
1400  // vaddr is negative, even if offset is 0 the sgpr offset add will end up
1401  // overflowing.
1402  //
1403  // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
1404  // always perform a range check. If a negative vaddr base index was used,
1405  // this would fail the range check. The overall address computation would
1406  // compute a valid address, but this doesn't happen due to the range
1407  // check. For out-of-bounds MUBUF loads, a 0 is returned.
1408  //
1409  // Therefore it should be safe to fold any VGPR offset on gfx9 into the
1410  // MUBUF vaddr, but not on older subtargets which can only do this if the
1411  // sign bit is known 0.
1412  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1413  if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) &&
1414  (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1415  CurDAG->SignBitIsZero(N0))) {
1416  std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1417  ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1418  return true;
1419  }
1420  }
1421 
1422  // (node)
1423  std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1424  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1425  return true;
1426 }
1427 
1428 static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val) {
1429  if (Val.getOpcode() != ISD::CopyFromReg)
1430  return false;
1431  auto RC =
1432  TRI.getPhysRegClass(cast<RegisterSDNode>(Val.getOperand(1))->getReg());
1433  return RC && TRI.isSGPRClass(RC);
1434 }
1435 
1436 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1437  SDValue Addr,
1438  SDValue &SRsrc,
1439  SDValue &SOffset,
1440  SDValue &Offset) const {
1441  const SIRegisterInfo *TRI =
1442  static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
1445  SDLoc DL(Addr);
1446 
1447  // CopyFromReg <sgpr>
1448  if (IsCopyFromSGPR(*TRI, Addr)) {
1449  SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1450  SOffset = Addr;
1452  return true;
1453  }
1454 
1455  ConstantSDNode *CAddr;
1456  if (Addr.getOpcode() == ISD::ADD) {
1457  // Add (CopyFromReg <sgpr>) <constant>
1458  CAddr = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
1459  if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1460  return false;
1461  if (!IsCopyFromSGPR(*TRI, Addr.getOperand(0)))
1462  return false;
1463 
1464  SOffset = Addr.getOperand(0);
1465  } else if ((CAddr = dyn_cast<ConstantSDNode>(Addr)) &&
1466  SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) {
1467  // <constant>
1468  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1469  } else {
1470  return false;
1471  }
1472 
1473  SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1474 
1476  return true;
1477 }
1478 
1479 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1480  SDValue &SOffset, SDValue &Offset
1481  ) const {
1482  SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1483  const SIInstrInfo *TII =
1484  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1485 
1486  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
1487  return false;
1488 
1489  if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1490  !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1491  !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1492  uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1493  APInt::getAllOnes(32).getZExtValue(); // Size
1494  SDLoc DL(Addr);
1495 
1496  const SITargetLowering& Lowering =
1497  *static_cast<const SITargetLowering*>(getTargetLowering());
1498 
1499  SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1500  return true;
1501  }
1502  return false;
1503 }
1504 
1505 // Find a load or store from corresponding pattern root.
1506 // Roots may be build_vector, bitconvert or their combinations.
1508  N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode();
1509  if (MemSDNode *MN = dyn_cast<MemSDNode>(N))
1510  return MN;
1511  assert(isa<BuildVectorSDNode>(N));
1512  for (SDValue V : N->op_values())
1513  if (MemSDNode *MN =
1514  dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V)))
1515  return MN;
1516  llvm_unreachable("cannot find MemSDNode in the pattern!");
1517 }
1518 
1519 bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr,
1520  SDValue &VAddr, SDValue &Offset,
1521  uint64_t FlatVariant) const {
1522  int64_t OffsetVal = 0;
1523 
1524  unsigned AS = findMemSDNode(N)->getAddressSpace();
1525 
1526  bool CanHaveFlatSegmentOffsetBug =
1527  Subtarget->hasFlatSegmentOffsetBug() &&
1528  FlatVariant == SIInstrFlags::FLAT &&
1530 
1531  if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
1532  SDValue N0, N1;
1533  if (isBaseWithConstantOffset64(Addr, N0, N1)) {
1534  int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1535 
1536  const SIInstrInfo *TII = Subtarget->getInstrInfo();
1537  if (TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1538  Addr = N0;
1539  OffsetVal = COffsetVal;
1540  } else {
1541  // If the offset doesn't fit, put the low bits into the offset field and
1542  // add the rest.
1543  //
1544  // For a FLAT instruction the hardware decides whether to access
1545  // global/scratch/shared memory based on the high bits of vaddr,
1546  // ignoring the offset field, so we have to ensure that when we add
1547  // remainder to vaddr it still points into the same underlying object.
1548  // The easiest way to do that is to make sure that we split the offset
1549  // into two pieces that are both >= 0 or both <= 0.
1550 
1551  SDLoc DL(N);
1552  uint64_t RemainderOffset;
1553 
1554  std::tie(OffsetVal, RemainderOffset) =
1555  TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1556 
1557  SDValue AddOffsetLo =
1558  getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
1560 
1561  if (Addr.getValueType().getSizeInBits() == 32) {
1563  Opnds.push_back(N0);
1564  Opnds.push_back(AddOffsetLo);
1565  unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1566  if (Subtarget->hasAddNoCarry()) {
1567  AddOp = AMDGPU::V_ADD_U32_e64;
1568  Opnds.push_back(Clamp);
1569  }
1570  Addr = SDValue(CurDAG->getMachineNode(AddOp, DL, MVT::i32, Opnds), 0);
1571  } else {
1572  // TODO: Should this try to use a scalar add pseudo if the base address
1573  // is uniform and saddr is usable?
1574  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
1575  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
1576 
1577  SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1578  DL, MVT::i32, N0, Sub0);
1579  SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1580  DL, MVT::i32, N0, Sub1);
1581 
1582  SDValue AddOffsetHi =
1583  getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
1584 
1586 
1587  SDNode *Add =
1588  CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64, DL, VTs,
1589  {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
1590 
1591  SDNode *Addc = CurDAG->getMachineNode(
1592  AMDGPU::V_ADDC_U32_e64, DL, VTs,
1593  {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});
1594 
1595  SDValue RegSequenceArgs[] = {
1596  CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
1597  SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1};
1598 
1599  Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1600  MVT::i64, RegSequenceArgs),
1601  0);
1602  }
1603  }
1604  }
1605  }
1606 
1607  VAddr = Addr;
1608  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
1609  return true;
1610 }
1611 
1612 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, SDValue Addr,
1613  SDValue &VAddr,
1614  SDValue &Offset) const {
1615  return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FLAT);
1616 }
1617 
1618 bool AMDGPUDAGToDAGISel::SelectGlobalOffset(SDNode *N, SDValue Addr,
1619  SDValue &VAddr,
1620  SDValue &Offset) const {
1621  return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FlatGlobal);
1622 }
1623 
1624 bool AMDGPUDAGToDAGISel::SelectScratchOffset(SDNode *N, SDValue Addr,
1625  SDValue &VAddr,
1626  SDValue &Offset) const {
1627  return SelectFlatOffsetImpl(N, Addr, VAddr, Offset,
1629 }
1630 
1631 // If this matches zero_extend i32:x, return x
1633  if (Op.getOpcode() != ISD::ZERO_EXTEND)
1634  return SDValue();
1635 
1636  SDValue ExtSrc = Op.getOperand(0);
1637  return (ExtSrc.getValueType() == MVT::i32) ? ExtSrc : SDValue();
1638 }
1639 
1640 // Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
1641 bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
1642  SDValue Addr,
1643  SDValue &SAddr,
1644  SDValue &VOffset,
1645  SDValue &Offset) const {
1646  int64_t ImmOffset = 0;
1647 
1648  // Match the immediate offset first, which canonically is moved as low as
1649  // possible.
1650 
1651  SDValue LHS, RHS;
1652  if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
1653  int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1654  const SIInstrInfo *TII = Subtarget->getInstrInfo();
1655 
1656  if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS,
1658  Addr = LHS;
1659  ImmOffset = COffsetVal;
1660  } else if (!LHS->isDivergent()) {
1661  if (COffsetVal > 0) {
1662  SDLoc SL(N);
1663  // saddr + large_offset -> saddr +
1664  // (voffset = large_offset & ~MaxOffset) +
1665  // (large_offset & MaxOffset);
1666  int64_t SplitImmOffset, RemainderOffset;
1667  std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
1669 
1670  if (isUInt<32>(RemainderOffset)) {
1671  SDNode *VMov = CurDAG->getMachineNode(
1672  AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1673  CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
1674  VOffset = SDValue(VMov, 0);
1675  SAddr = LHS;
1676  Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
1677  return true;
1678  }
1679  }
1680 
1681  // We are adding a 64 bit SGPR and a constant. If constant bus limit
1682  // is 1 we would need to perform 1 or 2 extra moves for each half of
1683  // the constant and it is better to do a scalar add and then issue a
1684  // single VALU instruction to materialize zero. Otherwise it is less
1685  // instructions to perform VALU adds with immediates or inline literals.
1686  unsigned NumLiterals =
1687  !TII->isInlineConstant(APInt(32, COffsetVal & 0xffffffff)) +
1688  !TII->isInlineConstant(APInt(32, COffsetVal >> 32));
1689  if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
1690  return false;
1691  }
1692  }
1693 
1694  // Match the variable offset.
1695  if (Addr.getOpcode() == ISD::ADD) {
1696  LHS = Addr.getOperand(0);
1697  RHS = Addr.getOperand(1);
1698 
1699  if (!LHS->isDivergent()) {
1700  // add (i64 sgpr), (zero_extend (i32 vgpr))
1701  if (SDValue ZextRHS = matchZExtFromI32(RHS)) {
1702  SAddr = LHS;
1703  VOffset = ZextRHS;
1704  }
1705  }
1706 
1707  if (!SAddr && !RHS->isDivergent()) {
1708  // add (zero_extend (i32 vgpr)), (i64 sgpr)
1709  if (SDValue ZextLHS = matchZExtFromI32(LHS)) {
1710  SAddr = RHS;
1711  VOffset = ZextLHS;
1712  }
1713  }
1714 
1715  if (SAddr) {
1716  Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1717  return true;
1718  }
1719  }
1720 
1721  if (Addr->isDivergent() || Addr.getOpcode() == ISD::UNDEF ||
1722  isa<ConstantSDNode>(Addr))
1723  return false;
1724 
1725  // It's cheaper to materialize a single 32-bit zero for vaddr than the two
1726  // moves required to copy a 64-bit SGPR to VGPR.
1727  SAddr = Addr;
1728  SDNode *VMov =
1729  CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
1731  VOffset = SDValue(VMov, 0);
1732  Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1733  return true;
1734 }
1735 
1736 static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr) {
1737  if (auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
1738  SAddr = CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
1739  } else if (SAddr.getOpcode() == ISD::ADD &&
1740  isa<FrameIndexSDNode>(SAddr.getOperand(0))) {
1741  // Materialize this into a scalar move for scalar address to avoid
1742  // readfirstlane.
1743  auto FI = cast<FrameIndexSDNode>(SAddr.getOperand(0));
1744  SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1745  FI->getValueType(0));
1746  SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, SDLoc(SAddr),
1747  MVT::i32, TFI, SAddr.getOperand(1)),
1748  0);
1749  }
1750 
1751  return SAddr;
1752 }
1753 
1754 // Match (32-bit SGPR base) + sext(imm offset)
1755 bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
1756  SDValue &SAddr,
1757  SDValue &Offset) const {
1758  if (Addr->isDivergent())
1759  return false;
1760 
1761  SDLoc DL(Addr);
1762 
1763  int64_t COffsetVal = 0;
1764 
1766  COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
1767  SAddr = Addr.getOperand(0);
1768  } else {
1769  SAddr = Addr;
1770  }
1771 
1772  SAddr = SelectSAddrFI(CurDAG, SAddr);
1773 
1774  const SIInstrInfo *TII = Subtarget->getInstrInfo();
1775 
1776  if (!TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS,
1778  int64_t SplitImmOffset, RemainderOffset;
1779  std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
1781 
1782  COffsetVal = SplitImmOffset;
1783 
1784  SDValue AddOffset =
1785  SAddr.getOpcode() == ISD::TargetFrameIndex
1786  ? getMaterializedScalarImm32(Lo_32(RemainderOffset), DL)
1787  : CurDAG->getTargetConstant(RemainderOffset, DL, MVT::i32);
1788  SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL, MVT::i32,
1789  SAddr, AddOffset),
1790  0);
1791  }
1792 
1793  Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i16);
1794 
1795  return true;
1796 }
1797 
1798 bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
1799  SDValue &VAddr, SDValue &SAddr,
1800  SDValue &Offset) const {
1801  int64_t ImmOffset = 0;
1802 
1803  SDValue LHS, RHS;
1804  if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
1805  int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1806  const SIInstrInfo *TII = Subtarget->getInstrInfo();
1807 
1808  if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true)) {
1809  Addr = LHS;
1810  ImmOffset = COffsetVal;
1811  } else if (!LHS->isDivergent() && COffsetVal > 0) {
1812  SDLoc SL(N);
1813  // saddr + large_offset -> saddr + (vaddr = large_offset & ~MaxOffset) +
1814  // (large_offset & MaxOffset);
1815  int64_t SplitImmOffset, RemainderOffset;
1816  std::tie(SplitImmOffset, RemainderOffset)
1817  = TII->splitFlatOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true);
1818 
1819  if (isUInt<32>(RemainderOffset)) {
1820  SDNode *VMov = CurDAG->getMachineNode(
1821  AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1822  CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
1823  VAddr = SDValue(VMov, 0);
1824  SAddr = LHS;
1825  Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
1826  return true;
1827  }
1828  }
1829  }
1830 
1831  if (Addr.getOpcode() != ISD::ADD)
1832  return false;
1833 
1834  LHS = Addr.getOperand(0);
1835  RHS = Addr.getOperand(1);
1836 
1837  if (!LHS->isDivergent() && RHS->isDivergent()) {
1838  SAddr = LHS;
1839  VAddr = RHS;
1840  } else if (!RHS->isDivergent() && LHS->isDivergent()) {
1841  SAddr = RHS;
1842  VAddr = LHS;
1843  } else {
1844  return false;
1845  }
1846 
1847  SAddr = SelectSAddrFI(CurDAG, SAddr);
1848  Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1849  return true;
1850 }
1851 
1852 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1853  SDValue &Offset, bool &Imm) const {
1854  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1855  if (!C) {
1856  if (ByteOffsetNode.getValueType().isScalarInteger() &&
1857  ByteOffsetNode.getValueType().getSizeInBits() == 32) {
1858  Offset = ByteOffsetNode;
1859  Imm = false;
1860  return true;
1861  }
1862  if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) {
1863  if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) {
1864  Offset = ByteOffsetNode.getOperand(0);
1865  Imm = false;
1866  return true;
1867  }
1868  }
1869  return false;
1870  }
1871 
1872  SDLoc SL(ByteOffsetNode);
1873  // GFX9 and GFX10 have signed byte immediate offsets.
1874  int64_t ByteOffset = C->getSExtValue();
1875  Optional<int64_t> EncodedOffset =
1876  AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, false);
1877  if (EncodedOffset) {
1878  Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
1879  Imm = true;
1880  return true;
1881  }
1882 
1883  // SGPR and literal offsets are unsigned.
1884  if (ByteOffset < 0)
1885  return false;
1886 
1887  EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset);
1888  if (EncodedOffset) {
1889  Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
1890  return true;
1891  }
1892 
1893  if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
1894  return false;
1895 
1896  SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1897  Offset = SDValue(
1898  CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
1899 
1900  return true;
1901 }
1902 
1903 SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
1904  if (Addr.getValueType() != MVT::i32)
1905  return Addr;
1906 
1907  // Zero-extend a 32-bit address.
1908  SDLoc SL(Addr);
1909 
1912  unsigned AddrHiVal = Info->get32BitAddressHighBits();
1913  SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
1914 
1915  const SDValue Ops[] = {
1916  CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
1917  Addr,
1918  CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
1919  SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
1920  0),
1921  CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
1922  };
1923 
1924  return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
1925  Ops), 0);
1926 }
1927 
1928 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1929  SDValue &Offset, bool &Imm) const {
1930  SDLoc SL(Addr);
1931 
1932  // A 32-bit (address + offset) should not cause unsigned 32-bit integer
1933  // wraparound, because s_load instructions perform the addition in 64 bits.
1934  if ((Addr.getValueType() != MVT::i32 ||
1935  Addr->getFlags().hasNoUnsignedWrap())) {
1936  SDValue N0, N1;
1937  // Extract the base and offset if possible.
1939  Addr.getOpcode() == ISD::ADD) {
1940  N0 = Addr.getOperand(0);
1941  N1 = Addr.getOperand(1);
1942  } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
1943  assert(N0 && N1 && isa<ConstantSDNode>(N1));
1944  }
1945  if (N0 && N1) {
1946  if (SelectSMRDOffset(N1, Offset, Imm)) {
1947  SBase = Expand32BitAddress(N0);
1948  return true;
1949  }
1950  }
1951  }
1952  SBase = Expand32BitAddress(Addr);
1954  Imm = true;
1955  return true;
1956 }
1957 
1958 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1959  SDValue &Offset) const {
1960  bool Imm = false;
1961  return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1962 }
1963 
1964 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1965  SDValue &Offset) const {
1966 
1967  assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
1968 
1969  bool Imm = false;
1970  if (!SelectSMRD(Addr, SBase, Offset, Imm))
1971  return false;
1972 
1973  return !Imm && isa<ConstantSDNode>(Offset);
1974 }
1975 
1976 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1977  SDValue &Offset) const {
1978  bool Imm = false;
1979  return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1980  !isa<ConstantSDNode>(Offset);
1981 }
1982 
1983 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1984  SDValue &Offset) const {
1985  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
1986  // The immediate offset for S_BUFFER instructions is unsigned.
1987  if (auto Imm =
1988  AMDGPU::getSMRDEncodedOffset(*Subtarget, C->getZExtValue(), true)) {
1990  return true;
1991  }
1992  }
1993 
1994  return false;
1995 }
1996 
1997 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1998  SDValue &Offset) const {
1999  assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
2000 
2001  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
2002  if (auto Imm = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget,
2003  C->getZExtValue())) {
2005  return true;
2006  }
2007  }
2008 
2009  return false;
2010 }
2011 
2012 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
2013  SDValue &Base,
2014  SDValue &Offset) const {
2015  SDLoc DL(Index);
2016 
2018  SDValue N0 = Index.getOperand(0);
2019  SDValue N1 = Index.getOperand(1);
2020  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
2021 
2022  // (add n0, c0)
2023  // Don't peel off the offset (c0) if doing so could possibly lead
2024  // the base (n0) to be negative.
2025  // (or n0, |c0|) can never change a sign given isBaseWithConstantOffset.
2026  if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0) ||
2027  (Index->getOpcode() == ISD::OR && C1->getSExtValue() >= 0)) {
2028  Base = N0;
2029  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
2030  return true;
2031  }
2032  }
2033 
2034  if (isa<ConstantSDNode>(Index))
2035  return false;
2036 
2037  Base = Index;
2039  return true;
2040 }
2041 
2042 SDNode *AMDGPUDAGToDAGISel::getBFE32(bool IsSigned, const SDLoc &DL,
2043  SDValue Val, uint32_t Offset,
2044  uint32_t Width) {
2045  if (Val->isDivergent()) {
2046  unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2049 
2050  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, Off, W);
2051  }
2052  unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2053  // Transformation function, pack the offset and width of a BFE into
2054  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
2055  // source, bits [5:0] contain the offset and bits [22:16] the width.
2056  uint32_t PackedVal = Offset | (Width << 16);
2057  SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
2058 
2059  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
2060 }
2061 
2062 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
2063  // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
2064  // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
2065  // Predicate: 0 < b <= c < 32
2066 
2067  const SDValue &Shl = N->getOperand(0);
2068  ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
2069  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2070 
2071  if (B && C) {
2072  uint32_t BVal = B->getZExtValue();
2073  uint32_t CVal = C->getZExtValue();
2074 
2075  if (0 < BVal && BVal <= CVal && CVal < 32) {
2076  bool Signed = N->getOpcode() == ISD::SRA;
2077  ReplaceNode(N, getBFE32(Signed, SDLoc(N), Shl.getOperand(0), CVal - BVal,
2078  32 - CVal));
2079  return;
2080  }
2081  }
2082  SelectCode(N);
2083 }
2084 
2085 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
2086  switch (N->getOpcode()) {
2087  case ISD::AND:
2088  if (N->getOperand(0).getOpcode() == ISD::SRL) {
2089  // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
2090  // Predicate: isMask(mask)
2091  const SDValue &Srl = N->getOperand(0);
2092  ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
2093  ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
2094 
2095  if (Shift && Mask) {
2096  uint32_t ShiftVal = Shift->getZExtValue();
2097  uint32_t MaskVal = Mask->getZExtValue();
2098 
2099  if (isMask_32(MaskVal)) {
2100  uint32_t WidthVal = countPopulation(MaskVal);
2101  ReplaceNode(N, getBFE32(false, SDLoc(N), Srl.getOperand(0), ShiftVal,
2102  WidthVal));
2103  return;
2104  }
2105  }
2106  }
2107  break;
2108  case ISD::SRL:
2109  if (N->getOperand(0).getOpcode() == ISD::AND) {
2110  // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
2111  // Predicate: isMask(mask >> b)
2112  const SDValue &And = N->getOperand(0);
2113  ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
2114  ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
2115 
2116  if (Shift && Mask) {
2117  uint32_t ShiftVal = Shift->getZExtValue();
2118  uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
2119 
2120  if (isMask_32(MaskVal)) {
2121  uint32_t WidthVal = countPopulation(MaskVal);
2122  ReplaceNode(N, getBFE32(false, SDLoc(N), And.getOperand(0), ShiftVal,
2123  WidthVal));
2124  return;
2125  }
2126  }
2127  } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
2128  SelectS_BFEFromShifts(N);
2129  return;
2130  }
2131  break;
2132  case ISD::SRA:
2133  if (N->getOperand(0).getOpcode() == ISD::SHL) {
2134  SelectS_BFEFromShifts(N);
2135  return;
2136  }
2137  break;
2138 
2139  case ISD::SIGN_EXTEND_INREG: {
2140  // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
2141  SDValue Src = N->getOperand(0);
2142  if (Src.getOpcode() != ISD::SRL)
2143  break;
2144 
2145  const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
2146  if (!Amt)
2147  break;
2148 
2149  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2150  ReplaceNode(N, getBFE32(true, SDLoc(N), Src.getOperand(0),
2151  Amt->getZExtValue(), Width));
2152  return;
2153  }
2154  }
2155 
2156  SelectCode(N);
2157 }
2158 
2159 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
2160  assert(N->getOpcode() == ISD::BRCOND);
2161  if (!N->hasOneUse())
2162  return false;
2163 
2164  SDValue Cond = N->getOperand(1);
2165  if (Cond.getOpcode() == ISD::CopyToReg)
2166  Cond = Cond.getOperand(2);
2167 
2168  if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
2169  return false;
2170 
2171  MVT VT = Cond.getOperand(0).getSimpleValueType();
2172  if (VT == MVT::i32)
2173  return true;
2174 
2175  if (VT == MVT::i64) {
2176  auto ST = static_cast<const GCNSubtarget *>(Subtarget);
2177 
2178  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
2179  return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
2180  }
2181 
2182  return false;
2183 }
2184 
2185 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
2186  SDValue Cond = N->getOperand(1);
2187 
2188  if (Cond.isUndef()) {
2189  CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
2190  N->getOperand(2), N->getOperand(0));
2191  return;
2192  }
2193 
2194  const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
2195  const SIRegisterInfo *TRI = ST->getRegisterInfo();
2196 
2197  bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
2198  unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
2199  Register CondReg = UseSCCBr ? AMDGPU::SCC : TRI->getVCC();
2200  SDLoc SL(N);
2201 
2202  if (!UseSCCBr) {
2203  // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
2204  // analyzed what generates the vcc value, so we do not know whether vcc
2205  // bits for disabled lanes are 0. Thus we need to mask out bits for
2206  // disabled lanes.
2207  //
2208  // For the case that we select S_CBRANCH_SCC1 and it gets
2209  // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
2210  // SIInstrInfo::moveToVALU which inserts the S_AND).
2211  //
2212  // We could add an analysis of what generates the vcc value here and omit
2213  // the S_AND when is unnecessary. But it would be better to add a separate
2214  // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
2215  // catches both cases.
2216  Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B32
2217  : AMDGPU::S_AND_B64,
2218  SL, MVT::i1,
2219  CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO
2220  : AMDGPU::EXEC,
2221  MVT::i1),
2222  Cond),
2223  0);
2224  }
2225 
2226  SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
2227  CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
2228  N->getOperand(2), // Basic Block
2229  VCC.getValue(0));
2230 }
2231 
2232 void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
2233  MVT VT = N->getSimpleValueType(0);
2234  bool IsFMA = N->getOpcode() == ISD::FMA;
2235  if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
2236  !Subtarget->hasFmaMixInsts()) ||
2237  ((IsFMA && Subtarget->hasMadMixInsts()) ||
2238  (!IsFMA && Subtarget->hasFmaMixInsts()))) {
2239  SelectCode(N);
2240  return;
2241  }
2242 
2243  SDValue Src0 = N->getOperand(0);
2244  SDValue Src1 = N->getOperand(1);
2245  SDValue Src2 = N->getOperand(2);
2246  unsigned Src0Mods, Src1Mods, Src2Mods;
2247 
2248  // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
2249  // using the conversion from f16.
2250  bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
2251  bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
2252  bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
2253 
2254  assert((IsFMA || !Mode.allFP32Denormals()) &&
2255  "fmad selected with denormals enabled");
2256  // TODO: We can select this with f32 denormals enabled if all the sources are
2257  // converted from f16 (in which case fmad isn't legal).
2258 
2259  if (Sel0 || Sel1 || Sel2) {
2260  // For dummy operands.
2262  SDValue Ops[] = {
2263  CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
2264  CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
2265  CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
2267  Zero, Zero
2268  };
2269 
2271  IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
2272  MVT::f32, Ops);
2273  } else {
2274  SelectCode(N);
2275  }
2276 }
2277 
2278 void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
2279  // The address is assumed to be uniform, so if it ends up in a VGPR, it will
2280  // be copied to an SGPR with readfirstlane.
2281  unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2282  AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2283 
2284  SDValue Chain = N->getOperand(0);
2285  SDValue Ptr = N->getOperand(2);
2286  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2287  MachineMemOperand *MMO = M->getMemOperand();
2288  bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
2289 
2290  SDValue Offset;
2291  if (CurDAG->isBaseWithConstantOffset(Ptr)) {
2292  SDValue PtrBase = Ptr.getOperand(0);
2293  SDValue PtrOffset = Ptr.getOperand(1);
2294 
2295  const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
2296  if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue())) {
2297  N = glueCopyToM0(N, PtrBase);
2298  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
2299  }
2300  }
2301 
2302  if (!Offset) {
2303  N = glueCopyToM0(N, Ptr);
2305  }
2306 
2307  SDValue Ops[] = {
2308  Offset,
2309  CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
2310  Chain,
2311  N->getOperand(N->getNumOperands() - 1) // New glue
2312  };
2313 
2314  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2315  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2316 }
2317 
2318 static unsigned gwsIntrinToOpcode(unsigned IntrID) {
2319  switch (IntrID) {
2320  case Intrinsic::amdgcn_ds_gws_init:
2321  return AMDGPU::DS_GWS_INIT;
2322  case Intrinsic::amdgcn_ds_gws_barrier:
2323  return AMDGPU::DS_GWS_BARRIER;
2324  case Intrinsic::amdgcn_ds_gws_sema_v:
2325  return AMDGPU::DS_GWS_SEMA_V;
2326  case Intrinsic::amdgcn_ds_gws_sema_br:
2327  return AMDGPU::DS_GWS_SEMA_BR;
2328  case Intrinsic::amdgcn_ds_gws_sema_p:
2329  return AMDGPU::DS_GWS_SEMA_P;
2330  case Intrinsic::amdgcn_ds_gws_sema_release_all:
2331  return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2332  default:
2333  llvm_unreachable("not a gws intrinsic");
2334  }
2335 }
2336 
2337 void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
2338  if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2339  !Subtarget->hasGWSSemaReleaseAll()) {
2340  // Let this error.
2341  SelectCode(N);
2342  return;
2343  }
2344 
2345  // Chain, intrinsic ID, vsrc, offset
2346  const bool HasVSrc = N->getNumOperands() == 4;
2347  assert(HasVSrc || N->getNumOperands() == 3);
2348 
2349  SDLoc SL(N);
2350  SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2);
2351  int ImmOffset = 0;
2352  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2353  MachineMemOperand *MMO = M->getMemOperand();
2354 
2355  // Don't worry if the offset ends up in a VGPR. Only one lane will have
2356  // effect, so SIFixSGPRCopies will validly insert readfirstlane.
2357 
2358  // The resource id offset is computed as (<isa opaque base> + M0[21:16] +
2359  // offset field) % 64. Some versions of the programming guide omit the m0
2360  // part, or claim it's from offset 0.
2361  if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
2362  // If we have a constant offset, try to use the 0 in m0 as the base.
2363  // TODO: Look into changing the default m0 initialization value. If the
2364  // default -1 only set the low 16-bits, we could leave it as-is and add 1 to
2365  // the immediate offset.
2366  glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32));
2367  ImmOffset = ConstOffset->getZExtValue();
2368  } else {
2369  if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
2370  ImmOffset = BaseOffset.getConstantOperandVal(1);
2371  BaseOffset = BaseOffset.getOperand(0);
2372  }
2373 
2374  // Prefer to do the shift in an SGPR since it should be possible to use m0
2375  // as the result directly. If it's already an SGPR, it will be eliminated
2376  // later.
2377  SDNode *SGPROffset
2378  = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
2379  BaseOffset);
2380  // Shift to offset in m0
2381  SDNode *M0Base
2382  = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
2383  SDValue(SGPROffset, 0),
2384  CurDAG->getTargetConstant(16, SL, MVT::i32));
2385  glueCopyToM0(N, SDValue(M0Base, 0));
2386  }
2387 
2388  SDValue Chain = N->getOperand(0);
2389  SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
2390 
2391  const unsigned Opc = gwsIntrinToOpcode(IntrID);
2393  if (HasVSrc)
2394  Ops.push_back(N->getOperand(2));
2395  Ops.push_back(OffsetField);
2396  Ops.push_back(Chain);
2397 
2398  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2399  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2400 }
2401 
2402 void AMDGPUDAGToDAGISel::SelectInterpP1F16(SDNode *N) {
2403  if (Subtarget->getLDSBankCount() != 16) {
2404  // This is a single instruction with a pattern.
2405  SelectCode(N);
2406  return;
2407  }
2408 
2409  SDLoc DL(N);
2410 
2411  // This requires 2 instructions. It is possible to write a pattern to support
2412  // this, but the generated isel emitter doesn't correctly deal with multiple
2413  // output instructions using the same physical register input. The copy to m0
2414  // is incorrectly placed before the second instruction.
2415  //
2416  // TODO: Match source modifiers.
2417  //
2418  // def : Pat <
2419  // (int_amdgcn_interp_p1_f16
2420  // (VOP3Mods f32:$src0, i32:$src0_modifiers),
2421  // (i32 timm:$attrchan), (i32 timm:$attr),
2422  // (i1 timm:$high), M0),
2423  // (V_INTERP_P1LV_F16 $src0_modifiers, VGPR_32:$src0, timm:$attr,
2424  // timm:$attrchan, 0,
2425  // (V_INTERP_MOV_F32 2, timm:$attr, timm:$attrchan), timm:$high)> {
2426  // let Predicates = [has16BankLDS];
2427  // }
2428 
2429  // 16 bank LDS
2431  N->getOperand(5), SDValue());
2432 
2434 
2435  SDNode *InterpMov =
2436  CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32, DL, VTs, {
2437  CurDAG->getTargetConstant(2, DL, MVT::i32), // P0
2438  N->getOperand(3), // Attr
2439  N->getOperand(2), // Attrchan
2440  ToM0.getValue(1) // In glue
2441  });
2442 
2443  SDNode *InterpP1LV =
2444  CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16, DL, MVT::f32, {
2445  CurDAG->getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
2446  N->getOperand(1), // Src0
2447  N->getOperand(3), // Attr
2448  N->getOperand(2), // Attrchan
2449  CurDAG->getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
2450  SDValue(InterpMov, 0), // Src2 - holds two f16 values selected by high
2451  N->getOperand(4), // high
2452  CurDAG->getTargetConstant(0, DL, MVT::i1), // $clamp
2453  CurDAG->getTargetConstant(0, DL, MVT::i32), // $omod
2454  SDValue(InterpMov, 1)
2455  });
2456 
2457  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), SDValue(InterpP1LV, 0));
2458 }
2459 
2460 void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
2461  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2462  switch (IntrID) {
2463  case Intrinsic::amdgcn_ds_append:
2464  case Intrinsic::amdgcn_ds_consume: {
2465  if (N->getValueType(0) != MVT::i32)
2466  break;
2467  SelectDSAppendConsume(N, IntrID);
2468  return;
2469  }
2470  }
2471 
2472  SelectCode(N);
2473 }
2474 
2475 void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
2476  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
2477  unsigned Opcode;
2478  switch (IntrID) {
2479  case Intrinsic::amdgcn_wqm:
2480  Opcode = AMDGPU::WQM;
2481  break;
2482  case Intrinsic::amdgcn_softwqm:
2483  Opcode = AMDGPU::SOFT_WQM;
2484  break;
2485  case Intrinsic::amdgcn_wwm:
2486  case Intrinsic::amdgcn_strict_wwm:
2487  Opcode = AMDGPU::STRICT_WWM;
2488  break;
2489  case Intrinsic::amdgcn_strict_wqm:
2490  Opcode = AMDGPU::STRICT_WQM;
2491  break;
2492  case Intrinsic::amdgcn_interp_p1_f16:
2493  SelectInterpP1F16(N);
2494  return;
2495  default:
2496  SelectCode(N);
2497  return;
2498  }
2499 
2500  SDValue Src = N->getOperand(1);
2501  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});
2502 }
2503 
2504 void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
2505  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2506  switch (IntrID) {
2507  case Intrinsic::amdgcn_ds_gws_init:
2508  case Intrinsic::amdgcn_ds_gws_barrier:
2509  case Intrinsic::amdgcn_ds_gws_sema_v:
2510  case Intrinsic::amdgcn_ds_gws_sema_br:
2511  case Intrinsic::amdgcn_ds_gws_sema_p:
2512  case Intrinsic::amdgcn_ds_gws_sema_release_all:
2513  SelectDS_GWS(N, IntrID);
2514  return;
2515  default:
2516  break;
2517  }
2518 
2519  SelectCode(N);
2520 }
2521 
2522 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
2523  unsigned &Mods,
2524  bool AllowAbs) const {
2525  Mods = 0;
2526  Src = In;
2527 
2528  if (Src.getOpcode() == ISD::FNEG) {
2529  Mods |= SISrcMods::NEG;
2530  Src = Src.getOperand(0);
2531  }
2532 
2533  if (AllowAbs && Src.getOpcode() == ISD::FABS) {
2534  Mods |= SISrcMods::ABS;
2535  Src = Src.getOperand(0);
2536  }
2537 
2538  return true;
2539 }
2540 
2541 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
2542  SDValue &SrcMods) const {
2543  unsigned Mods;
2544  if (SelectVOP3ModsImpl(In, Src, Mods)) {
2545  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2546  return true;
2547  }
2548 
2549  return false;
2550 }
2551 
2552 bool AMDGPUDAGToDAGISel::SelectVOP3BMods(SDValue In, SDValue &Src,
2553  SDValue &SrcMods) const {
2554  unsigned Mods;
2555  if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {
2556  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2557  return true;
2558  }
2559 
2560  return false;
2561 }
2562 
2563 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
2564  SDValue &SrcMods) const {
2565  SelectVOP3Mods(In, Src, SrcMods);
2566  return isNoNanSrc(Src);
2567 }
2568 
2569 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
2570  if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
2571  return false;
2572 
2573  Src = In;
2574  return true;
2575 }
2576 
2577 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
2578  SDValue &SrcMods, SDValue &Clamp,
2579  SDValue &Omod) const {
2580  SDLoc DL(In);
2582  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2583 
2584  return SelectVOP3Mods(In, Src, SrcMods);
2585 }
2586 
2587 bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(SDValue In, SDValue &Src,
2588  SDValue &SrcMods, SDValue &Clamp,
2589  SDValue &Omod) const {
2590  SDLoc DL(In);
2592  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2593 
2594  return SelectVOP3BMods(In, Src, SrcMods);
2595 }
2596 
2597 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
2598  SDValue &Clamp, SDValue &Omod) const {
2599  Src = In;
2600 
2601  SDLoc DL(In);
2603  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2604 
2605  return true;
2606 }
2607 
2608 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
2609  SDValue &SrcMods, bool IsDOT) const {
2610  unsigned Mods = 0;
2611  Src = In;
2612 
2613  if (Src.getOpcode() == ISD::FNEG) {
2614  Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
2615  Src = Src.getOperand(0);
2616  }
2617 
2618  if (Src.getOpcode() == ISD::BUILD_VECTOR &&
2619  (!IsDOT || !Subtarget->hasDOTOpSelHazard())) {
2620  unsigned VecMods = Mods;
2621 
2622  SDValue Lo = stripBitcast(Src.getOperand(0));
2623  SDValue Hi = stripBitcast(Src.getOperand(1));
2624 
2625  if (Lo.getOpcode() == ISD::FNEG) {
2626  Lo = stripBitcast(Lo.getOperand(0));
2627  Mods ^= SISrcMods::NEG;
2628  }
2629 
2630  if (Hi.getOpcode() == ISD::FNEG) {
2631  Hi = stripBitcast(Hi.getOperand(0));
2632  Mods ^= SISrcMods::NEG_HI;
2633  }
2634 
2635  if (isExtractHiElt(Lo, Lo))
2636  Mods |= SISrcMods::OP_SEL_0;
2637 
2638  if (isExtractHiElt(Hi, Hi))
2639  Mods |= SISrcMods::OP_SEL_1;
2640 
2641  unsigned VecSize = Src.getValueSizeInBits();
2642  Lo = stripExtractLoElt(Lo);
2643  Hi = stripExtractLoElt(Hi);
2644 
2645  if (Lo.getValueSizeInBits() > VecSize) {
2647  (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
2648  MVT::getIntegerVT(VecSize), Lo);
2649  }
2650 
2651  if (Hi.getValueSizeInBits() > VecSize) {
2653  (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
2654  MVT::getIntegerVT(VecSize), Hi);
2655  }
2656 
2657  assert(Lo.getValueSizeInBits() <= VecSize &&
2658  Hi.getValueSizeInBits() <= VecSize);
2659 
2660  if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
2661  // Really a scalar input. Just select from the low half of the register to
2662  // avoid packing.
2663 
2664  if (VecSize == 32 || VecSize == Lo.getValueSizeInBits()) {
2665  Src = Lo;
2666  } else {
2667  assert(Lo.getValueSizeInBits() == 32 && VecSize == 64);
2668 
2669  SDLoc SL(In);
2670  SDValue Undef = SDValue(
2671  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL,
2672  Lo.getValueType()), 0);
2673  auto RC = Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
2674  : AMDGPU::SReg_64RegClassID;
2675  const SDValue Ops[] = {
2676  CurDAG->getTargetConstant(RC, SL, MVT::i32),
2677  Lo, CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
2678  Undef, CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) };
2679 
2680  Src = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL,
2681  Src.getValueType(), Ops), 0);
2682  }
2683  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2684  return true;
2685  }
2686 
2687  if (VecSize == 64 && Lo == Hi && isa<ConstantFPSDNode>(Lo)) {
2688  uint64_t Lit = cast<ConstantFPSDNode>(Lo)->getValueAPF()
2689  .bitcastToAPInt().getZExtValue();
2690  if (AMDGPU::isInlinableLiteral32(Lit, Subtarget->hasInv2PiInlineImm())) {
2691  Src = CurDAG->getTargetConstant(Lit, SDLoc(In), MVT::i64);;
2692  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2693  return true;
2694  }
2695  }
2696 
2697  Mods = VecMods;
2698  }
2699 
2700  // Packed instructions do not have abs modifiers.
2701  Mods |= SISrcMods::OP_SEL_1;
2702 
2703  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2704  return true;
2705 }
2706 
2707 bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, SDValue &Src,
2708  SDValue &SrcMods) const {
2709  return SelectVOP3PMods(In, Src, SrcMods, true);
2710 }
2711 
2712 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
2713  SDValue &SrcMods) const {
2714  Src = In;
2715  // FIXME: Handle op_sel
2716  SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2717  return true;
2718 }
2719 
2720 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
2721  SDValue &SrcMods) const {
2722  // FIXME: Handle op_sel
2723  return SelectVOP3Mods(In, Src, SrcMods);
2724 }
2725 
2726 // The return value is not whether the match is possible (which it always is),
2727 // but whether or not it a conversion is really used.
2728 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
2729  unsigned &Mods) const {
2730  Mods = 0;
2731  SelectVOP3ModsImpl(In, Src, Mods);
2732 
2733  if (Src.getOpcode() == ISD::FP_EXTEND) {
2734  Src = Src.getOperand(0);
2735  assert(Src.getValueType() == MVT::f16);
2736  Src = stripBitcast(Src);
2737 
2738  // Be careful about folding modifiers if we already have an abs. fneg is
2739  // applied last, so we don't want to apply an earlier fneg.
2740  if ((Mods & SISrcMods::ABS) == 0) {
2741  unsigned ModsTmp;
2742  SelectVOP3ModsImpl(Src, Src, ModsTmp);
2743 
2744  if ((ModsTmp & SISrcMods::NEG) != 0)
2745  Mods ^= SISrcMods::NEG;
2746 
2747  if ((ModsTmp & SISrcMods::ABS) != 0)
2748  Mods |= SISrcMods::ABS;
2749  }
2750 
2751  // op_sel/op_sel_hi decide the source type and source.
2752  // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
2753  // If the sources's op_sel is set, it picks the high half of the source
2754  // register.
2755 
2756  Mods |= SISrcMods::OP_SEL_1;
2757  if (isExtractHiElt(Src, Src)) {
2758  Mods |= SISrcMods::OP_SEL_0;
2759 
2760  // TODO: Should we try to look for neg/abs here?
2761  }
2762 
2763  return true;
2764  }
2765 
2766  return false;
2767 }
2768 
2769 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
2770  SDValue &SrcMods) const {
2771  unsigned Mods = 0;
2772  SelectVOP3PMadMixModsImpl(In, Src, Mods);
2773  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2774  return true;
2775 }
2776 
2777 SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
2778  if (In.isUndef())
2779  return CurDAG->getUNDEF(MVT::i32);
2780 
2781  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
2782  SDLoc SL(In);
2783  return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
2784  }
2785 
2786  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
2787  SDLoc SL(In);
2788  return CurDAG->getConstant(
2789  C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
2790  }
2791 
2792  SDValue Src;
2793  if (isExtractHiElt(In, Src))
2794  return Src;
2795 
2796  return SDValue();
2797 }
2798 
2799 bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
2800  assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn);
2801 
2802  const SIRegisterInfo *SIRI =
2803  static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
2804  const SIInstrInfo * SII =
2805  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
2806 
2807  unsigned Limit = 0;
2808  bool AllUsesAcceptSReg = true;
2809  for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
2810  Limit < 10 && U != E; ++U, ++Limit) {
2811  const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
2812 
2813  // If the register class is unknown, it could be an unknown
2814  // register class that needs to be an SGPR, e.g. an inline asm
2815  // constraint
2816  if (!RC || SIRI->isSGPRClass(RC))
2817  return false;
2818 
2819  if (RC != &AMDGPU::VS_32RegClass) {
2820  AllUsesAcceptSReg = false;
2821  SDNode * User = *U;
2822  if (User->isMachineOpcode()) {
2823  unsigned Opc = User->getMachineOpcode();
2824  MCInstrDesc Desc = SII->get(Opc);
2825  if (Desc.isCommutable()) {
2826  unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
2827  unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
2828  if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
2829  unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
2830  const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
2831  if (CommutedRC == &AMDGPU::VS_32RegClass)
2832  AllUsesAcceptSReg = true;
2833  }
2834  }
2835  }
2836  // If "AllUsesAcceptSReg == false" so far we haven't succeeded
2837  // commuting current user. This means have at least one use
2838  // that strictly require VGPR. Thus, we will not attempt to commute
2839  // other user instructions.
2840  if (!AllUsesAcceptSReg)
2841  break;
2842  }
2843  }
2844  return !AllUsesAcceptSReg && (Limit < 10);
2845 }
2846 
2847 bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
2848  auto Ld = cast<LoadSDNode>(N);
2849 
2850  return Ld->getAlignment() >= 4 &&
2851  (
2852  (
2853  (
2854  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
2855  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
2856  )
2857  &&
2858  !N->isDivergent()
2859  )
2860  ||
2861  (
2862  Subtarget->getScalarizeGlobalBehavior() &&
2863  Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
2864  Ld->isSimple() &&
2865  !N->isDivergent() &&
2866  static_cast<const SITargetLowering *>(
2867  getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)
2868  )
2869  );
2870 }
2871 
2874  *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
2875  bool IsModified = false;
2876  do {
2877  IsModified = false;
2878 
2879  // Go over all selected nodes and try to fold them a bit more
2881  while (Position != CurDAG->allnodes_end()) {
2882  SDNode *Node = &*Position++;
2883  MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
2884  if (!MachineNode)
2885  continue;
2886 
2887  SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
2888  if (ResNode != Node) {
2889  if (ResNode)
2890  ReplaceUses(Node, ResNode);
2891  IsModified = true;
2892  }
2893  }
2895  } while (IsModified);
2896 }
llvm::ISD::SUB
@ SUB
Definition: ISDOpcodes.h:240
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
llvm::AMDGPUISD::CLAMP
@ CLAMP
CLAMP value between 0.0 and 1.0.
Definition: AMDGPUISelLowering.h:376
llvm::ISD::FPOWI
@ FPOWI
Definition: ISDOpcodes.h:910
i
i
Definition: README.txt:29
llvm::MCInstrDesc::getNumDefs
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:245
CmpMode::FP
@ FP
llvm::ConstantSDNode
Definition: SelectionDAGNodes.h:1564
Lowering
Shadow Stack GC Lowering
Definition: ShadowStackGCLowering.cpp:98
llvm::SelectionDAGISel::getTargetLowering
const TargetLowering * getTargetLowering() const
Definition: SelectionDAGISel.h:69
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4637
llvm::ISD::INTRINSIC_VOID
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
llvm::AMDGPUSubtarget::hasInv2PiInlineImm
bool hasInv2PiInlineImm() const
Definition: AMDGPUSubtarget.h:184
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::SIInstrFlags::WQM
@ WQM
Definition: SIDefines.h:77
llvm::SelectionDAGISel::TM
TargetMachine & TM
Definition: SelectionDAGISel.h:42
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector
bool matchLoadD16FromBuildVector(SDNode *N) const
Definition: AMDGPUISelDAGToDAG.cpp:209
llvm::EVT::isScalarInteger
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:149
AMDGPUISelDAGToDAG.h
llvm::AMDGPUISD::DIV_SCALE
@ DIV_SCALE
Definition: AMDGPUISelLowering.h:407
v2i32
gets compiled into this on rsp movaps rsp movaps rsp movaps rsp movaps rsp movaps rsp movaps rsp movaps rsp movaps rsp movq rsp movq rsp movq rsp movq rsp movq rsp rax movq rsp rax movq rsp rsp rsp eax eax jbe LBB1_3 rcx rax movq rsp eax rsp ret ecx eax rcx movl rsp jmp LBB1_2 gcc rsp rax movq rsp rsp movq rsp rax movq rsp eax eax jb L6 rdx eax rsp ret p2align edx rdx eax movl rsp eax rsp ret and it gets compiled into this on ebp esp eax movl ebp eax movl ebp eax esp popl ebp ret gcc ebp eax popl ebp ret Teach tblgen not to check bitconvert source type in some cases This allows us to consolidate the following patterns in X86InstrMMX v2i32(MMX_MOVDQ2Qrr VR128:$src))>
llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition: SelectionDAGNodes.h:1090
llvm::ARMII::VecSize
@ VecSize
Definition: ARMBaseInfo.h:421
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:667
llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:886
llvm::ISD::SETNE
@ SETNE
Definition: ISDOpcodes.h:1429
llvm::SelectionDAG::SignBitIsZero
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
Definition: SelectionDAG.cpp:2520
llvm::AMDGPUTargetLowering
Definition: AMDGPUISelLowering.h:27
llvm::SIRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(unsigned RCID) const
Definition: SIRegisterInfo.cpp:2945
llvm::ISD::FMINNUM
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:936
SIMachineFunctionInfo.h
llvm::SelectionDAG::getCopyToReg
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:750
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:151
llvm::ISD::ConstantFP
@ ConstantFP
Definition: ISDOpcodes.h:77
llvm::SelectionDAG::allnodes_end
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:509
llvm::AMDGPU::getSMRDEncodedOffset
Optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer)
Definition: AMDGPUBaseInfo.cpp:2174
llvm::ISD::UADDO
@ UADDO
Definition: ISDOpcodes.h:324
llvm::AMDGPUSubtarget::hasMadMixInsts
bool hasMadMixInsts() const
Definition: AMDGPUSubtarget.h:152
llvm::ISD::ADDC
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:269
llvm::KnownBits::Zero
APInt Zero
Definition: KnownBits.h:24
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::ISD::SETEQ
@ SETEQ
Definition: ISDOpcodes.h:1424
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:241
llvm::SIInstrFlags::FlatScratch
@ FlatScratch
Definition: SIDefines.h:123
llvm::SelectionDAG::getVTList
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Definition: SelectionDAG.cpp:9055
llvm::AMDGPUISD::CVT_PKNORM_I16_F32
@ CVT_PKNORM_I16_F32
Definition: AMDGPUISelLowering.h:462
llvm::MipsISD::Lo
@ Lo
Definition: MipsISelLowering.h:79
llvm::MachineSDNode
An SDNode that represents everything that will be needed to construct a MachineInstr.
Definition: SelectionDAGNodes.h:2867
llvm::AMDGPUISD::FMUL_W_CHAIN
@ FMUL_W_CHAIN
Definition: AMDGPUISelLowering.h:387
llvm::SelectionDAG::allnodes_begin
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:508
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::AMDGPUISD::DIV_FIXUP
@ DIV_FIXUP
Definition: AMDGPUISelLowering.h:409
llvm::AMDGPUISD::LOAD_D16_HI_I8
@ LOAD_D16_HI_I8
Definition: AMDGPUISelLowering.h:491
llvm::GCNSubtarget::hasFlatSegmentOffsetBug
bool hasFlatSegmentOffsetBug() const
Definition: GCNSubtarget.h:595
llvm::MemSDNode::getMemoryVT
EVT getMemoryVT() const
Return the type of the in-memory value.
Definition: SelectionDAGNodes.h:1341
ValueTracking.h
llvm::ISD::FLOG2
@ FLOG2
Definition: ISDOpcodes.h:913
llvm::MemSDNode::getChain
const SDValue & getChain() const
Definition: SelectionDAGNodes.h:1364
llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:454
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:482
llvm::AMDGPU::getSMRDEncodedLiteralOffset32
Optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
Definition: AMDGPUBaseInfo.cpp:2191
llvm::LoadSDNode
This class is used to represent ISD::LOAD nodes.
Definition: SelectionDAGNodes.h:2314
llvm::SISrcMods::OP_SEL_0
@ OP_SEL_0
Definition: SIDefines.h:220
llvm::SDNode::use_iterator
This class provides iterator support for SDUse operands that use a specific SDNode.
Definition: SelectionDAGNodes.h:733
Shift
bool Shift
Definition: README.txt:468
AMDGPUDAGToDAGISel
AMDGPU specific code to select AMDGPU machine instructions for SelectionDAG operations.
Definition: AMDGPUISelDAGToDAG.h:79
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:126
i8
Clang compiles this i8
Definition: README.txt:504
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1271
llvm::SelectionDAG::isBaseWithConstantOffset
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
Definition: SelectionDAG.cpp:4511
llvm::GCNSubtarget::hasDLInsts
bool hasDLInsts() const
Definition: GCNSubtarget.h:676
llvm::ISD::SETCC
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:736
llvm::Optional< int64_t >
llvm::SDNode::isPredecessorOf
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
Definition: SelectionDAGNodes.h:817
f32
So we should use XX3Form_Rcr to implement intrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store load store see def memrix16 in PPCInstrInfo td Load Store Vector load store outs ins lxsdx set load store with conversion from to outs ins lxsspx set f32
Definition: README_P9.txt:522
i1
Decimal Convert From to National Zoned Signed int_ppc_altivec_bcdcfno i1
Definition: README_P9.txt:147
llvm::createAMDGPUISelDag
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
Definition: AMDGPUISelDAGToDAG.cpp:112
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:79
llvm::MemSDNode
This is an abstract virtual class for memory operations.
Definition: SelectionDAGNodes.h:1259
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
matchZExtFromI32
static SDValue matchZExtFromI32(SDValue Op)
Definition: AMDGPUISelDAGToDAG.cpp:1632
llvm::SelectionDAG::RemoveDeadNodes
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
Definition: SelectionDAG.cpp:900
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::SISrcMods::OP_SEL_1
@ OP_SEL_1
Definition: SIDefines.h:221
llvm::SDNode::isDivergent
bool isDivergent() const
Definition: SelectionDAGNodes.h:700
llvm::Lo_32
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:353
AMDGPUDAGToDAGISel::PreprocessISelDAG
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
Definition: AMDGPUISelDAGToDAG.cpp:289
SelectionDAG.h
llvm::SelectionDAG::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:454
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1620
llvm::SISrcMods::NEG
@ NEG
Definition: SIDefines.h:216
llvm::GCNSubtarget::getRegisterInfo
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:226
AMDGPUDAGToDAGISel::SelectBuildVector
void SelectBuildVector(SDNode *N, unsigned RegClassID)
Definition: AMDGPUISelDAGToDAG.cpp:451
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::ISD::FABS
@ FABS
Definition: ISDOpcodes.h:905
llvm::AMDGPUISD::FMIN3
@ FMIN3
Definition: AMDGPUISelLowering.h:399
llvm::ISD::ADDCARRY
@ ADDCARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:303
AMDGPUDAGToDAGISel::PostprocessISelDAG
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
Definition: AMDGPUISelDAGToDAG.cpp:2872
llvm::ISD::BRCOND
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:994
llvm::AMDGPUAS::CONSTANT_ADDRESS_32BIT
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
Definition: AMDGPU.h:366
llvm::GCNSubtarget::getInstrInfo
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:214
llvm::GCNSubtarget::unsafeDSOffsetFoldingEnabled
bool unsafeDSOffsetFoldingEnabled() const
Definition: GCNSubtarget.h:406
llvm::SelectionDAG::getRegister
SDValue getRegister(unsigned Reg, EVT VT)
Definition: SelectionDAG.cpp:2061
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::MipsISD::Hi
@ Hi
Definition: MipsISelLowering.h:75
llvm::GCNSubtarget::hasGWSSemaReleaseAll
bool hasGWSSemaReleaseAll() const
Definition: GCNSubtarget.h:631
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:923
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
SelectSAddrFI
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
Definition: AMDGPUISelDAGToDAG.cpp:1736
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
i64
Clang compiles this i64
Definition: README.txt:504
AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel
AMDGPUDAGToDAGISel(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
Definition: AMDGPUISelDAGToDAG.cpp:117
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:650
AMDGPUDAGToDAGISel::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: AMDGPUISelDAGToDAG.cpp:199
llvm::SelectionDAG::getTargetFrameIndex
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:703
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:1125
llvm::MemIntrinsicSDNode
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
Definition: SelectionDAGNodes.h:1480
llvm::AMDGPUSubtarget::get
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Definition: AMDGPUSubtarget.cpp:971
llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:220
SelectionDAGNodes.h
llvm::ISD::Constant
@ Constant
Definition: ISDOpcodes.h:76
llvm::ISD::ZERO_EXTEND
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:763
llvm::SPIRV::ImageOperand::ConstOffset
@ ConstOffset
llvm::GCNSubtarget::hasAddr64
bool hasAddr64() const
Definition: GCNSubtarget.h:317
llvm::ISD::ABS
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:674
FunctionLoweringInfo.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::GCNSubtarget::useFlatForGlobal
bool useFlatForGlobal() const
Definition: GCNSubtarget.h:465
llvm::GCNSubtarget::hasFmaMixInsts
bool hasFmaMixInsts() const
Definition: GCNSubtarget.h:367
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:738
llvm::User
Definition: User.h:44
llvm::AMDGPUISD::CVT_PKNORM_U16_F32
@ CVT_PKNORM_U16_F32
Definition: AMDGPUISelLowering.h:463
llvm::SelectionDAG::getUNDEF
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:971
llvm::ISD::SIGN_EXTEND_INREG
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:781
llvm::ISD::CopyToReg
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
llvm::AMDGPUISD::FMED3
@ FMED3
Definition: AMDGPUISelLowering.h:402
GFX9
@ GFX9
Definition: SIInstrInfo.cpp:7846
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::SelectionDAG::getConstant
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1449
llvm::EVT::getVectorNumElements
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:308
llvm::MCSubtargetInfo::getTargetTriple
const Triple & getTargetTriple() const
Definition: MCSubtargetInfo.h:108
llvm::AMDGPUISD::LOAD_D16_LO_I8
@ LOAD_D16_LO_I8
Definition: AMDGPUISelLowering.h:493
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::ISD::FROUND
@ FROUND
Definition: ISDOpcodes.h:921
llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:769
llvm::AMDGPUISD::ATOMIC_LOAD_FMAX
@ ATOMIC_LOAD_FMAX
Definition: AMDGPUISelLowering.h:507
llvm::ISD::SRA
@ SRA
Definition: ISDOpcodes.h:692
llvm::SelectionDAGISel::ReplaceNode
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
Definition: SelectionDAGISel.h:229
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:197
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MCInstrDesc::isCommutable
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z,...
Definition: MCInstrDesc.h:478
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:363
llvm::M0
unsigned M0(unsigned Val)
Definition: VE.h:370
llvm::Instruction
Definition: Instruction.h:42
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::ISD::SINT_TO_FP
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:773
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1478
llvm::ISD::FNEARBYINT
@ FNEARBYINT
Definition: ISDOpcodes.h:920
llvm::ISD::FRINT
@ FRINT
Definition: ISDOpcodes.h:919
llvm::SIInstrInfo::findCommutedOpIndices
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
Definition: SIInstrInfo.cpp:2335
llvm::SelectionDAG::getMemIntrinsicNode
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
Definition: SelectionDAG.cpp:7481
llvm::SIInstrFlags::FlatGlobal
@ FlatGlobal
Definition: SIDefines.h:108
llvm::LegacyDivergenceAnalysis
Definition: LegacyDivergenceAnalysis.h:31
llvm::SelectionDAG::dump
void dump() const
Definition: SelectionDAGDumper.cpp:919
llvm::AMDGPUISD::CVT_PK_U16_U32
@ CVT_PK_U16_U32
Definition: AMDGPUISelLowering.h:465
llvm::SIRegisterInfo::isSGPRClass
static bool isSGPRClass(const TargetRegisterClass *RC)
Definition: SIRegisterInfo.h:176
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:30
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:666
llvm::MCOperandInfo::RegClass
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:90
llvm::SelectionDAG::isKnownNeverNaN
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue is known to never be NaN.
Definition: SelectionDAG.cpp:4523
llvm::Triple::getArch
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:345
llvm::SDValue::getConstantOperandVal
uint64_t getConstantOperandVal(unsigned i) const
Definition: SelectionDAGNodes.h:1137
llvm::ISD::CopyFromReg
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:208
llvm::MCID::RegSequence
@ RegSequence
Definition: MCInstrDesc.h:181
llvm::AMDGPUISD::FMA_W_CHAIN
@ FMA_W_CHAIN
Definition: AMDGPUISelLowering.h:386
llvm::GCNSubtarget::getScalarizeGlobalBehavior
bool getScalarizeGlobalBehavior() const
Definition: GCNSubtarget.h:783
llvm::ISD::USUBO
@ USUBO
Definition: ISDOpcodes.h:328
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
findMemSDNode
static MemSDNode * findMemSDNode(SDNode *N)
Definition: AMDGPUISelDAGToDAG.cpp:1507
llvm::Hi_32
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:348
LoopInfo.h
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:642
llvm::ISD::ATOMIC_LOAD_FADD
@ ATOMIC_LOAD_FADD
Definition: ISDOpcodes.h:1189
i32
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM ID Predecessors according to mbb< bb27, 0x8b0a7c0 > Note ADDri is not a two address instruction its result reg1037 is an operand of the PHI node in bb76 and its operand reg1039 is the result of the PHI node We should treat it as a two address code and make sure the ADDri is scheduled after any node that reads reg1039 Use info(i.e. register scavenger) to assign it a free register to allow reuse the collector could move the objects and invalidate the derived pointer This is bad enough in the first but safe points can crop up unpredictably **array_addr i32
Definition: README.txt:122
llvm::ISD::FPOW
@ FPOW
Definition: ISDOpcodes.h:911
llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
R600MCTargetDesc.h
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:640
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::SelectionDAG::RemoveDeadNode
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
Definition: SelectionDAG.cpp:954
llvm::countPopulation
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:567
llvm::AMDGPUISD::LDEXP
@ LDEXP
Definition: AMDGPUISelLowering.h:422
llvm::isInt< 32 >
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:373
llvm::ISD::FLOG10
@ FLOG10
Definition: ISDOpcodes.h:914
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:340
llvm::ISD::FMAD
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:486
llvm::isUInt< 16 >
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:408
Index
uint32_t Index
Definition: ELFObjHandler.cpp:82
uint64_t
llvm::ConstantFPSDNode
Definition: SelectionDAGNodes.h:1613
llvm::SelectionDAGISel::TII
const TargetInstrInfo * TII
Definition: SelectionDAGISel.h:53
llvm::MemSDNode::getMemOperand
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Definition: SelectionDAGNodes.h:1345
llvm::ISD::LOAD
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:959
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:78
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::AMDGPUISD::LOAD_D16_HI
@ LOAD_D16_HI
Definition: AMDGPUISelLowering.h:489
getBaseWithOffsetUsingSplitOR
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
Definition: AMDGPUISelDAGToDAG.cpp:734
llvm::SelectionDAGISel::FuncInfo
std::unique_ptr< FunctionLoweringInfo > FuncInfo
Definition: SelectionDAGISel.h:44
llvm::GCNSubtarget::hasAddNoCarry
bool hasAddNoCarry() const
Definition: GCNSubtarget.h:639
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::GCNSubtarget::privateMemoryResourceIsRangeChecked
bool privateMemoryResourceIsRangeChecked() const
Definition: GCNSubtarget.h:487
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:534
llvm::AMDGPUISD::FMAD_FTZ
@ FMAD_FTZ
Definition: AMDGPUISelLowering.h:412
llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition: SelectionDAGNodes.h:908
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:362
llvm::AMDGPUISD::ATOMIC_DEC
@ ATOMIC_DEC
Definition: AMDGPUISelLowering.h:505
llvm::SelectionDAG::getNode
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
Definition: SelectionDAG.cpp:8790
llvm::AMDGPUISD::CVT_PK_I16_I32
@ CVT_PK_I16_I32
Definition: AMDGPUISelLowering.h:464
llvm::M68kBeads::Term
@ Term
Definition: M68kBaseInfo.h:71
llvm::AMDGPUISD::BFE_I32
@ BFE_I32
Definition: AMDGPUISelLowering.h:428
AMDGPUDAGToDAGISel::getPassName
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
Definition: AMDGPUISelDAGToDAG.cpp:781
llvm::LoadSDNode::getExtensionType
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Definition: SelectionDAGNodes.h:2329
llvm::AMDGPUArgumentUsageInfo
Definition: AMDGPUArgumentUsageInfo.h:158
llvm::SelectionDAG::MorphNodeTo
SDNode * MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef< SDValue > Ops)
This mutates the specified node to have the specified return type, opcode, and operands.
Definition: SelectionDAG.cpp:9392
llvm::isUInt< 32 >
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:411
llvm::SDValue::getValue
SDValue getValue(unsigned R) const
Definition: SelectionDAGNodes.h:171
llvm::TargetMachine::Options
TargetOptions Options
Definition: TargetMachine.h:118
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::AMDGPUISD::SIN_HW
@ SIN_HW
Definition: AMDGPUISelLowering.h:392
llvm::isUInt< 8 >
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:405
llvm::ConstantSDNode::getZExtValue
uint64_t getZExtValue() const
Definition: SelectionDAGNodes.h:1579
llvm::AMDGPUISD::RSQ
@ RSQ
Definition: AMDGPUISelLowering.h:417
i16
< i32 > ret i32 conv5 And the following x86 eax movsbl ecx cmpl ecx sete al movzbl eax ret It should be possible to eliminate the sign extensions LLVM misses a load store narrowing opportunity in this i16
Definition: README.txt:1493
llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1404
llvm::MCInstrDesc::OpInfo
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:208
llvm::SelectionDAGISel::CurDAG
SelectionDAG * CurDAG
Definition: SelectionDAGISel.h:48
llvm::SelectionDAG::getMachineNode
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition: SelectionDAG.cpp:9493
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::ISD::SUBCARRY
@ SUBCARRY
Definition: ISDOpcodes.h:304
llvm::SelectionDAG::setNodeMemRefs
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
Definition: SelectionDAG.cpp:9261
llvm::GCNSubtarget::getLDSBankCount
int getLDSBankCount() const
Definition: GCNSubtarget.h:274
llvm::GCNSubtarget::hasDOTOpSelHazard
bool hasDOTOpSelHazard() const
Definition: GCNSubtarget.h:997
AMDGPUDAGToDAGISel::Select
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
Definition: AMDGPUISelDAGToDAG.cpp:506
v4i32
Vector Rotate Left Mask Mask v4i32
Definition: README_P9.txt:112
llvm::SelectionDAG::SelectNodeTo
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
Definition: SelectionDAG.cpp:9285
llvm::AMDGPU::IsaInfo::TargetIDSetting::Off
@ Off
R600RegisterInfo.h
llvm::isMask_32
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:467
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:364
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::MachineFunction
Definition: MachineFunction.h:241
llvm::AMDGPU::CPol::SCC
@ SCC
Definition: SIDefines.h:303
llvm::SISrcMods::NEG_HI
@ NEG_HI
Definition: SIDefines.h:219
SelectionDAGISel.h
llvm::LoopInfo
Definition: LoopInfo.h:1086
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:137
llvm::SPIRV::SamplerAddressingMode::Clamp
@ Clamp
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:359
llvm::GCNSubtarget::hasFlatInstOffsets
bool hasFlatInstOffsets() const
Definition: GCNSubtarget.h:553
llvm::EVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:352
AMDGPU.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::ISD::UMUL_LOHI
@ UMUL_LOHI
Definition: ISDOpcodes.h:251
llvm::AMDGPUISD::LOAD_D16_LO_U8
@ LOAD_D16_LO_U8
Definition: AMDGPUISelLowering.h:494
llvm::SelectionDAG::ReplaceAllUsesOfValueWith
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
Definition: SelectionDAG.cpp:10081
uint32_t
llvm::SDValue::getOperand
const SDValue & getOperand(unsigned i) const
Definition: SelectionDAGNodes.h:1133
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::SIInstrFlags::FLAT
@ FLAT
Definition: SIDefines.h:59
llvm::ISD::UNDEF
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:211
llvm::ISD::FEXP
@ FEXP
Definition: ISDOpcodes.h:915
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::ISD::SMUL_LOHI
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
llvm::ISD::FEXP2
@ FEXP2
Definition: ISDOpcodes.h:916
llvm::CodeGenOpt::Level
Level
Definition: CodeGen.h:52
llvm::SDVTList
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Definition: SelectionDAGNodes.h:78
llvm::ISD::FMUL
@ FMUL
Definition: ISDOpcodes.h:392
llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition: ISDOpcodes.h:1384
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::ISD::INTRINSIC_WO_CHAIN
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
llvm::LoadSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2333
llvm::LoopInfoBase::getLoopsInPreorder
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
Definition: LoopInfoImpl.h:577
llvm::ISD::FSQRT
@ FSQRT
Definition: ISDOpcodes.h:906
llvm::SelectionDAGISel::MF
MachineFunction * MF
Definition: SelectionDAGISel.h:46
llvm::AMDGPUISD::RCP
@ RCP
Definition: AMDGPUISelLowering.h:416
llvm::ISD::FMAXNUM
@ FMAXNUM
Definition: ISDOpcodes.h:937
llvm::SelectionDAG::computeKnownBits
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
Definition: SelectionDAG.cpp:2897
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:606
llvm::AMDGPUISD::LOAD_D16_HI_U8
@ LOAD_D16_HI_U8
Definition: AMDGPUISelLowering.h:492
llvm::AMDGPUISD::MAD_U64_U32
@ MAD_U64_U32
Definition: AMDGPUISelLowering.h:440
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:344
llvm::MemSDNode::getAddressSpace
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Definition: SelectionDAGNodes.h:1352
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
llvm::AMDGPUISD::ATOMIC_LOAD_FMIN
@ ATOMIC_LOAD_FMIN
Definition: AMDGPUISelLowering.h:506
llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPU.h:360
llvm::ISD::FCOS
@ FCOS
Definition: ISDOpcodes.h:909
llvm::SelectionDAG::getEntryNode
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:531
llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:917
llvm::ISD::FSIN
@ FSIN
Definition: ISDOpcodes.h:908
llvm::ISD::BUILD_VECTOR
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:514
llvm::SITargetLowering
Definition: SIISelLowering.h:31
llvm::SelectionDAG::getTargetExtractSubreg
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
Definition: SelectionDAG.cpp:9611
llvm::SelectionDAGISel::ReplaceUses
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
Definition: SelectionDAGISel.h:208
llvm::ISD::SUBC
@ SUBC
Definition: ISDOpcodes.h:270
llvm::GCNSubtarget::ldsRequiresM0Init
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
Definition: GCNSubtarget.h:617
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:137
gwsIntrinToOpcode
static unsigned gwsIntrinToOpcode(unsigned IntrID)
Definition: AMDGPUISelDAGToDAG.cpp:2318
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::AMDGPUPerfHintAnalysis
Definition: AMDGPUPerfHintAnalysis.h:23
llvm::ISD::STORE
@ STORE
Definition: ISDOpcodes.h:960
llvm::AMDGPUISD::CVT_PKRTZ_F16_F32
@ CVT_PKRTZ_F16_F32
Definition: AMDGPUISelLowering.h:461
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:435
llvm::ISD::UINT_TO_FP
@ UINT_TO_FP
Definition: ISDOpcodes.h:774
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
llvm::GCNSubtarget::getGeneration
Generation getGeneration() const
Definition: GCNSubtarget.h:265
llvm::RISCVMatInt::Imm
@ Imm
Definition: RISCVMatInt.h:23
llvm::SIInstrInfo
Definition: SIInstrInfo.h:43
llvm::AMDGPU::isInlinableLiteral32
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
Definition: AMDGPUBaseInfo.cpp:2028
llvm::EVT::getVectorElementType
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:300
llvm::ISD::FP_EXTEND
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:871
llvm::TargetOptions::NoNaNsFPMath
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
Definition: TargetOptions.h:175
llvm::AMDGPUISD::FRACT
@ FRACT
Definition: AMDGPUISelLowering.h:372
llvm::ISD::FSUB
@ FSUB
Definition: ISDOpcodes.h:391
llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:691
LegacyDivergenceAnalysis.h
llvm::SelectionDAGISel
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
Definition: SelectionDAGISel.h:40
llvm::AMDGPUISD::FMAX3
@ FMAX3
Definition: AMDGPUISelLowering.h:396
llvm::GCNSubtarget::getConstantBusLimit
unsigned getConstantBusLimit(unsigned Opcode) const
Definition: AMDGPUSubtarget.cpp:180
llvm::ISD::FREM
@ FREM
Definition: ISDOpcodes.h:394
llvm::AMDGPUISD::MAD_I64_I32
@ MAD_I64_I32
Definition: AMDGPUISelLowering.h:441
Dominators.h
N
#define N
llvm::APInt::countTrailingOnes
unsigned countTrailingOnes() const
Count the number of trailing one bits.
Definition: APInt.h:1571
llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:358
llvm::ISD::SUBE
@ SUBE
Definition: ISDOpcodes.h:280
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:348
llvm::ISD::SRL
@ SRL
Definition: ISDOpcodes.h:693
llvm::TargetMachine::getTargetTriple
const Triple & getTargetTriple() const
Definition: TargetMachine.h:126
llvm::GCNSubtarget::d16PreservesUnusedBits
bool d16PreservesUnusedBits() const
Definition: GCNSubtarget.h:607
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel", "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) INITIALIZE_PASS_END(AMDGPUDAGToDAGISel
llvm::AMDGPUISD::COS_HW
@ COS_HW
Definition: AMDGPUISelLowering.h:391
llvm::SIInstrFlags::IsDOT
@ IsDOT
Definition: SIDefines.h:120
llvm::SDValue::getOpcode
unsigned getOpcode() const
Definition: SelectionDAGNodes.h:1121
llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:652
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::ISD::INTRINSIC_W_CHAIN
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
llvm::ISD::FNEG
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:904
llvm::SelectionDAG::getMachineFunction
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:449
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::ISD::BUILD_PAIR
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::AMDGPUISD::DWORDADDR
@ DWORDADDR
Definition: AMDGPUISelLowering.h:371
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::ISD::TargetFrameIndex
@ TargetFrameIndex
Definition: ISDOpcodes.h:166
f64
QP Compare Ordered outs ins xscmpudp No builtin are required Or llvm fcmp order unorder compare DP QP Compare builtin are required DP xscmp *dp write to VSX register Use int_ppc_vsx_xscmpeqdp f64
Definition: README_P9.txt:314
llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:918
llvm::AMDGPUISD::RCP_IFLAG
@ RCP_IFLAG
Definition: AMDGPUISelLowering.h:419
llvm::AMDGPUISD::ATOMIC_INC
@ ATOMIC_INC
Definition: AMDGPUISelLowering.h:504
llvm::ISD::FCANONICALIZE
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:499
llvm::ISD::FLOG
@ FLOG
Definition: ISDOpcodes.h:912
InitializePasses.h
llvm::GCNSubtarget::hasUsableDSOffset
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition: GCNSubtarget.h:402
llvm::ISD::ADDE
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:279
llvm::ISD::FP_ROUND
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:852
llvm::MCInstrDesc::getNumOperands
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:230
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::SelectionDAG::getTarget
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:453
llvm::EVT::bitsEq
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:228
AMDGPUTargetMachine.h
llvm::AMDGPUISD::BFE_U32
@ BFE_U32
Definition: AMDGPUISelLowering.h:427
llvm::ISD::SCALAR_TO_VECTOR
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:606
AMDGPUDAGToDAGISel::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Definition: AMDGPUISelDAGToDAG.cpp:124
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1236
llvm::AMDGPUISD::LOAD_D16_LO
@ LOAD_D16_LO
Definition: AMDGPUISelLowering.h:490
IsCopyFromSGPR
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)
Definition: AMDGPUISelDAGToDAG.cpp:1428
llvm::ISD::FDIV
@ FDIV
Definition: ISDOpcodes.h:393
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:52