LLVM  10.0.0svn
AMDGPUISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines an instruction selector for the AMDGPU target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
16 #include "AMDGPUISelLowering.h" // For AMDGPUISD
17 #include "AMDGPUInstrInfo.h"
18 #include "AMDGPUPerfHintAnalysis.h"
19 #include "AMDGPURegisterInfo.h"
20 #include "AMDGPUSubtarget.h"
21 #include "AMDGPUTargetMachine.h"
22 #include "SIDefines.h"
23 #include "SIISelLowering.h"
24 #include "SIInstrInfo.h"
25 #include "SIMachineFunctionInfo.h"
26 #include "SIRegisterInfo.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/ADT/StringRef.h"
41 #include "llvm/IR/BasicBlock.h"
42 #ifdef EXPENSIVE_CHECKS
43 #include "llvm/IR/Dominators.h"
44 #endif
45 #include "llvm/IR/Instruction.h"
46 #include "llvm/MC/MCInstrDesc.h"
47 #include "llvm/Support/Casting.h"
48 #include "llvm/Support/CodeGen.h"
52 #include <cassert>
53 #include <cstdint>
54 #include <new>
55 #include <vector>
56 
57 #define DEBUG_TYPE "isel"
58 
59 using namespace llvm;
60 
61 namespace llvm {
62 
63 class R600InstrInfo;
64 
65 } // end namespace llvm
66 
67 //===----------------------------------------------------------------------===//
68 // Instruction Selector Implementation
69 //===----------------------------------------------------------------------===//
70 
71 namespace {
72 
73 static bool isNullConstantOrUndef(SDValue V) {
74  if (V.isUndef())
75  return true;
76 
78  return Const != nullptr && Const->isNullValue();
79 }
80 
81 static bool getConstantValue(SDValue N, uint32_t &Out) {
82  // This is only used for packed vectors, where ussing 0 for undef should
83  // always be good.
84  if (N.isUndef()) {
85  Out = 0;
86  return true;
87  }
88 
89  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
90  Out = C->getAPIntValue().getSExtValue();
91  return true;
92  }
93 
94  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
95  Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
96  return true;
97  }
98 
99  return false;
100 }
101 
102 // TODO: Handle undef as zero
103 static SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG,
104  bool Negate = false) {
105  assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
106  uint32_t LHSVal, RHSVal;
107  if (getConstantValue(N->getOperand(0), LHSVal) &&
108  getConstantValue(N->getOperand(1), RHSVal)) {
109  SDLoc SL(N);
110  uint32_t K = Negate ?
111  (-LHSVal & 0xffff) | (-RHSVal << 16) :
112  (LHSVal & 0xffff) | (RHSVal << 16);
113  return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
114  DAG.getTargetConstant(K, SL, MVT::i32));
115  }
116 
117  return nullptr;
118 }
119 
120 static SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
121  return packConstantV2I16(N, DAG, true);
122 }
123 
124 /// AMDGPU specific code to select AMDGPU machine instructions for
125 /// SelectionDAG operations.
126 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
127  // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
128  // make the right decision when generating code for different targets.
129  const GCNSubtarget *Subtarget;
130  bool EnableLateStructurizeCFG;
131 
132 public:
133  explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
135  : SelectionDAGISel(*TM, OptLevel) {
136  EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
137  }
138  ~AMDGPUDAGToDAGISel() override = default;
139 
140  void getAnalysisUsage(AnalysisUsage &AU) const override {
143 #ifdef EXPENSIVE_CHECKS
146 #endif
148  }
149 
150  bool matchLoadD16FromBuildVector(SDNode *N) const;
151 
152  bool runOnMachineFunction(MachineFunction &MF) override;
153  void PreprocessISelDAG() override;
154  void Select(SDNode *N) override;
155  StringRef getPassName() const override;
156  void PostprocessISelDAG() override;
157 
158 protected:
159  void SelectBuildVector(SDNode *N, unsigned RegClassID);
160 
161 private:
162  std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
163  bool isNoNanSrc(SDValue N) const;
164  bool isInlineImmediate(const SDNode *N, bool Negated = false) const;
165  bool isNegInlineImmediate(const SDNode *N) const {
166  return isInlineImmediate(N, true);
167  }
168 
169  bool isVGPRImm(const SDNode *N) const;
170  bool isUniformLoad(const SDNode *N) const;
171  bool isUniformBr(const SDNode *N) const;
172 
173  MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
174 
175  SDNode *glueCopyToM0LDSInit(SDNode *N) const;
176  SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
177 
178  const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
179  virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
180  virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
181  bool isDSOffsetLegal(SDValue Base, unsigned Offset,
182  unsigned OffsetBits) const;
183  bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
184  bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
185  SDValue &Offset1) const;
186  bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
187  SDValue &SOffset, SDValue &Offset, SDValue &Offen,
188  SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
189  SDValue &TFE, SDValue &DLC, SDValue &SWZ) const;
190  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
191  SDValue &SOffset, SDValue &Offset, SDValue &GLC,
192  SDValue &SLC, SDValue &TFE, SDValue &DLC,
193  SDValue &SWZ) const;
194  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
195  SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
196  SDValue &SLC) const;
197  bool SelectMUBUFScratchOffen(SDNode *Parent,
198  SDValue Addr, SDValue &RSrc, SDValue &VAddr,
199  SDValue &SOffset, SDValue &ImmOffset) const;
200  bool SelectMUBUFScratchOffset(SDNode *Parent,
201  SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
202  SDValue &Offset) const;
203 
204  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
205  SDValue &Offset, SDValue &GLC, SDValue &SLC,
206  SDValue &TFE, SDValue &DLC, SDValue &SWZ) const;
207  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
208  SDValue &Offset, SDValue &SLC) const;
209  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
210  SDValue &Offset) const;
211 
212  template <bool IsSigned>
213  bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
214  SDValue &Offset, SDValue &SLC) const;
215  bool SelectFlatAtomic(SDNode *N, SDValue Addr, SDValue &VAddr,
216  SDValue &Offset, SDValue &SLC) const;
217  bool SelectFlatAtomicSigned(SDNode *N, SDValue Addr, SDValue &VAddr,
218  SDValue &Offset, SDValue &SLC) const;
219 
220  bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
221  bool &Imm) const;
222  SDValue Expand32BitAddress(SDValue Addr) const;
223  bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
224  bool &Imm) const;
225  bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
226  bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
227  bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
228  bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
229  bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
230  bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
231 
232  bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
233  bool SelectVOP3Mods_f32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
234  bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
235  bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
236  bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
237  bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
238  SDValue &Clamp, SDValue &Omod) const;
239  bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
240  SDValue &Clamp, SDValue &Omod) const;
241 
242  bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
243  SDValue &Clamp,
244  SDValue &Omod) const;
245 
246  bool SelectVOP3OMods(SDValue In, SDValue &Src,
247  SDValue &Clamp, SDValue &Omod) const;
248 
249  bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
250  bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
251  SDValue &Clamp) const;
252 
253  bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
254  bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods,
255  SDValue &Clamp) const;
256 
257  bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
258  bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
259  SDValue &Clamp) const;
260  bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
261  bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
262 
263  SDValue getHi16Elt(SDValue In) const;
264 
265  void SelectADD_SUB_I64(SDNode *N);
266  void SelectAddcSubb(SDNode *N);
267  void SelectUADDO_USUBO(SDNode *N);
268  void SelectDIV_SCALE(SDNode *N);
269  void SelectDIV_FMAS(SDNode *N);
270  void SelectMAD_64_32(SDNode *N);
271  void SelectFMA_W_CHAIN(SDNode *N);
272  void SelectFMUL_W_CHAIN(SDNode *N);
273 
274  SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
275  uint32_t Offset, uint32_t Width);
276  void SelectS_BFEFromShifts(SDNode *N);
277  void SelectS_BFE(SDNode *N);
278  bool isCBranchSCC(const SDNode *N) const;
279  void SelectBRCOND(SDNode *N);
280  void SelectFMAD_FMA(SDNode *N);
281  void SelectATOMIC_CMP_SWAP(SDNode *N);
282  void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
283  void SelectDS_GWS(SDNode *N, unsigned IntrID);
284  void SelectINTRINSIC_W_CHAIN(SDNode *N);
285  void SelectINTRINSIC_WO_CHAIN(SDNode *N);
286  void SelectINTRINSIC_VOID(SDNode *N);
287 
288 protected:
289  // Include the pieces autogenerated from the target description.
290 #include "AMDGPUGenDAGISel.inc"
291 };
292 
293 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
294  const R600Subtarget *Subtarget;
295 
296  bool isConstantLoad(const MemSDNode *N, int cbID) const;
297  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
298  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
299  SDValue& Offset);
300 public:
301  explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
302  AMDGPUDAGToDAGISel(TM, OptLevel) {}
303 
304  void Select(SDNode *N) override;
305 
306  bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
307  SDValue &Offset) override;
308  bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
309  SDValue &Offset) override;
310 
311  bool runOnMachineFunction(MachineFunction &MF) override;
312 
313  void PreprocessISelDAG() override {}
314 
315 protected:
316  // Include the pieces autogenerated from the target description.
317 #include "R600GenDAGISel.inc"
318 };
319 
320 static SDValue stripBitcast(SDValue Val) {
321  return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
322 }
323 
324 // Figure out if this is really an extract of the high 16-bits of a dword.
325 static bool isExtractHiElt(SDValue In, SDValue &Out) {
326  In = stripBitcast(In);
327  if (In.getOpcode() != ISD::TRUNCATE)
328  return false;
329 
330  SDValue Srl = In.getOperand(0);
331  if (Srl.getOpcode() == ISD::SRL) {
332  if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
333  if (ShiftAmt->getZExtValue() == 16) {
334  Out = stripBitcast(Srl.getOperand(0));
335  return true;
336  }
337  }
338  }
339 
340  return false;
341 }
342 
343 // Look through operations that obscure just looking at the low 16-bits of the
344 // same register.
345 static SDValue stripExtractLoElt(SDValue In) {
346  if (In.getOpcode() == ISD::TRUNCATE) {
347  SDValue Src = In.getOperand(0);
348  if (Src.getValueType().getSizeInBits() == 32)
349  return stripBitcast(Src);
350  }
351 
352  return In;
353 }
354 
355 } // end anonymous namespace
356 
357 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
358  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
362 #ifdef EXPENSIVE_CHECKS
365 #endif
366 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",
367  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
368 
369 /// This pass converts a legalized DAG into a AMDGPU-specific
370 // DAG, ready for instruction scheduling.
372  CodeGenOpt::Level OptLevel) {
373  return new AMDGPUDAGToDAGISel(TM, OptLevel);
374 }
375 
376 /// This pass converts a legalized DAG into a R600-specific
377 // DAG, ready for instruction scheduling.
379  CodeGenOpt::Level OptLevel) {
380  return new R600DAGToDAGISel(TM, OptLevel);
381 }
382 
383 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
384 #ifdef EXPENSIVE_CHECKS
385  DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
386  LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
387  for (auto &L : LI->getLoopsInPreorder()) {
388  assert(L->isLCSSAForm(DT));
389  }
390 #endif
391  Subtarget = &MF.getSubtarget<GCNSubtarget>();
393 }
394 
395 bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {
396  assert(Subtarget->d16PreservesUnusedBits());
397  MVT VT = N->getValueType(0).getSimpleVT();
398  if (VT != MVT::v2i16 && VT != MVT::v2f16)
399  return false;
400 
401  SDValue Lo = N->getOperand(0);
402  SDValue Hi = N->getOperand(1);
403 
404  LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));
405 
406  // build_vector lo, (load ptr) -> load_d16_hi ptr, lo
407  // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
408  // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
409 
410  // Need to check for possible indirect dependencies on the other half of the
411  // vector to avoid introducing a cycle.
412  if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {
413  SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
414 
415  SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo);
416  SDValue Ops[] = {
417  LdHi->getChain(), LdHi->getBasePtr(), TiedIn
418  };
419 
420  unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
421  if (LdHi->getMemoryVT() == MVT::i8) {
422  LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
424  } else {
425  assert(LdHi->getMemoryVT() == MVT::i16);
426  }
427 
428  SDValue NewLoadHi =
429  CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
430  Ops, LdHi->getMemoryVT(),
431  LdHi->getMemOperand());
432 
433  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
434  CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
435  return true;
436  }
437 
438  // build_vector (load ptr), hi -> load_d16_lo ptr, hi
439  // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
440  // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
441  LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));
442  if (LdLo && Lo.hasOneUse()) {
443  SDValue TiedIn = getHi16Elt(Hi);
444  if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode()))
445  return false;
446 
447  SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
448  unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
449  if (LdLo->getMemoryVT() == MVT::i8) {
450  LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
452  } else {
453  assert(LdLo->getMemoryVT() == MVT::i16);
454  }
455 
456  TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
457 
458  SDValue Ops[] = {
459  LdLo->getChain(), LdLo->getBasePtr(), TiedIn
460  };
461 
462  SDValue NewLoadLo =
463  CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
464  Ops, LdLo->getMemoryVT(),
465  LdLo->getMemOperand());
466 
467  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
468  CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
469  return true;
470  }
471 
472  return false;
473 }
474 
475 void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
476  if (!Subtarget->d16PreservesUnusedBits())
477  return;
478 
479  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
480 
481  bool MadeChange = false;
482  while (Position != CurDAG->allnodes_begin()) {
483  SDNode *N = &*--Position;
484  if (N->use_empty())
485  continue;
486 
487  switch (N->getOpcode()) {
488  case ISD::BUILD_VECTOR:
489  MadeChange |= matchLoadD16FromBuildVector(N);
490  break;
491  default:
492  break;
493  }
494  }
495 
496  if (MadeChange) {
497  CurDAG->RemoveDeadNodes();
498  LLVM_DEBUG(dbgs() << "After PreProcess:\n";
499  CurDAG->dump(););
500  }
501 }
502 
503 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
504  if (TM.Options.NoNaNsFPMath)
505  return true;
506 
507  // TODO: Move into isKnownNeverNaN
508  if (N->getFlags().isDefined())
509  return N->getFlags().hasNoNaNs();
510 
511  return CurDAG->isKnownNeverNaN(N);
512 }
513 
514 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N,
515  bool Negated) const {
516  if (N->isUndef())
517  return true;
518 
519  const SIInstrInfo *TII = Subtarget->getInstrInfo();
520  if (Negated) {
521  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
522  return TII->isInlineConstant(-C->getAPIntValue());
523 
524  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
525  return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt());
526 
527  } else {
528  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
529  return TII->isInlineConstant(C->getAPIntValue());
530 
531  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
532  return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
533  }
534 
535  return false;
536 }
537 
538 /// Determine the register class for \p OpNo
539 /// \returns The register class of the virtual register that will be used for
540 /// the given operand number \OpNo or NULL if the register class cannot be
541 /// determined.
542 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
543  unsigned OpNo) const {
544  if (!N->isMachineOpcode()) {
545  if (N->getOpcode() == ISD::CopyToReg) {
546  unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
547  if (Register::isVirtualRegister(Reg)) {
548  MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
549  return MRI.getRegClass(Reg);
550  }
551 
552  const SIRegisterInfo *TRI
553  = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
554  return TRI->getPhysRegClass(Reg);
555  }
556 
557  return nullptr;
558  }
559 
560  switch (N->getMachineOpcode()) {
561  default: {
562  const MCInstrDesc &Desc =
563  Subtarget->getInstrInfo()->get(N->getMachineOpcode());
564  unsigned OpIdx = Desc.getNumDefs() + OpNo;
565  if (OpIdx >= Desc.getNumOperands())
566  return nullptr;
567  int RegClass = Desc.OpInfo[OpIdx].RegClass;
568  if (RegClass == -1)
569  return nullptr;
570 
571  return Subtarget->getRegisterInfo()->getRegClass(RegClass);
572  }
573  case AMDGPU::REG_SEQUENCE: {
574  unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
575  const TargetRegisterClass *SuperRC =
576  Subtarget->getRegisterInfo()->getRegClass(RCID);
577 
578  SDValue SubRegOp = N->getOperand(OpNo + 1);
579  unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
580  return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
581  SubRegIdx);
582  }
583  }
584 }
585 
586 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
587  const SITargetLowering& Lowering =
588  *static_cast<const SITargetLowering*>(getTargetLowering());
589 
590  assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");
591 
592  SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N),
593  Val);
594 
595  SDValue Glue = M0.getValue(1);
596 
598  Ops.push_back(M0); // Replace the chain.
599  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
600  Ops.push_back(N->getOperand(i));
601 
602  Ops.push_back(Glue);
603  return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
604 }
605 
606 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
607  unsigned AS = cast<MemSDNode>(N)->getAddressSpace();
608  if (AS == AMDGPUAS::LOCAL_ADDRESS) {
609  if (Subtarget->ldsRequiresM0Init())
610  return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
611  } else if (AS == AMDGPUAS::REGION_ADDRESS) {
612  MachineFunction &MF = CurDAG->getMachineFunction();
613  unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
614  return
615  glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32));
616  }
617  return N;
618 }
619 
620 MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
621  EVT VT) const {
622  SDNode *Lo = CurDAG->getMachineNode(
623  AMDGPU::S_MOV_B32, DL, MVT::i32,
624  CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
625  SDNode *Hi =
626  CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
627  CurDAG->getTargetConstant(Imm >> 32, DL, MVT::i32));
628  const SDValue Ops[] = {
629  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
630  SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
631  SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
632 
633  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
634 }
635 
636 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
637  switch (NumVectorElts) {
638  case 1:
639  return AMDGPU::SReg_32_XM0RegClassID;
640  case 2:
641  return AMDGPU::SReg_64RegClassID;
642  case 3:
643  return AMDGPU::SGPR_96RegClassID;
644  case 4:
645  return AMDGPU::SGPR_128RegClassID;
646  case 5:
647  return AMDGPU::SGPR_160RegClassID;
648  case 8:
649  return AMDGPU::SReg_256RegClassID;
650  case 16:
651  return AMDGPU::SReg_512RegClassID;
652  case 32:
653  return AMDGPU::SReg_1024RegClassID;
654  }
655 
656  llvm_unreachable("invalid vector size");
657 }
658 
659 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
660  EVT VT = N->getValueType(0);
661  unsigned NumVectorElts = VT.getVectorNumElements();
662  EVT EltVT = VT.getVectorElementType();
663  SDLoc DL(N);
664  SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
665 
666  if (NumVectorElts == 1) {
667  CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
668  RegClass);
669  return;
670  }
671 
672  assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
673  "supported yet");
674  // 32 = Max Num Vector Elements
675  // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
676  // 1 = Vector Register Class
677  SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
678 
679  RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
680  bool IsRegSeq = true;
681  unsigned NOps = N->getNumOperands();
682  for (unsigned i = 0; i < NOps; i++) {
683  // XXX: Why is this here?
684  if (isa<RegisterSDNode>(N->getOperand(i))) {
685  IsRegSeq = false;
686  break;
687  }
689  RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
690  RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
691  }
692  if (NOps != NumVectorElts) {
693  // Fill in the missing undef elements if this was a scalar_to_vector.
694  assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
695  MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
696  DL, EltVT);
697  for (unsigned i = NOps; i < NumVectorElts; ++i) {
699  RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
700  RegSeqArgs[1 + (2 * i) + 1] =
701  CurDAG->getTargetConstant(Sub, DL, MVT::i32);
702  }
703  }
704 
705  if (!IsRegSeq)
706  SelectCode(N);
707  CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
708 }
709 
711  unsigned int Opc = N->getOpcode();
712  if (N->isMachineOpcode()) {
713  N->setNodeId(-1);
714  return; // Already selected.
715  }
716 
717  // isa<MemSDNode> almost works but is slightly too permissive for some DS
718  // intrinsics.
719  if (Opc == ISD::LOAD || Opc == ISD::STORE || isa<AtomicSDNode>(N) ||
720  (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
721  Opc == ISD::ATOMIC_LOAD_FADD ||
723  Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) {
724  N = glueCopyToM0LDSInit(N);
725  SelectCode(N);
726  return;
727  }
728 
729  switch (Opc) {
730  default:
731  break;
732  // We are selecting i64 ADD here instead of custom lower it during
733  // DAG legalization, so we can fold some i64 ADDs used for address
734  // calculation into the LOAD and STORE instructions.
735  case ISD::ADDC:
736  case ISD::ADDE:
737  case ISD::SUBC:
738  case ISD::SUBE: {
739  if (N->getValueType(0) != MVT::i64)
740  break;
741 
742  SelectADD_SUB_I64(N);
743  return;
744  }
745  case ISD::ADDCARRY:
746  case ISD::SUBCARRY:
747  if (N->getValueType(0) != MVT::i32)
748  break;
749 
750  SelectAddcSubb(N);
751  return;
752  case ISD::UADDO:
753  case ISD::USUBO: {
754  SelectUADDO_USUBO(N);
755  return;
756  }
758  SelectFMUL_W_CHAIN(N);
759  return;
760  }
761  case AMDGPUISD::FMA_W_CHAIN: {
762  SelectFMA_W_CHAIN(N);
763  return;
764  }
765 
767  case ISD::BUILD_VECTOR: {
768  EVT VT = N->getValueType(0);
769  unsigned NumVectorElts = VT.getVectorNumElements();
770  if (VT.getScalarSizeInBits() == 16) {
771  if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
772  if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) {
773  ReplaceNode(N, Packed);
774  return;
775  }
776  }
777 
778  break;
779  }
780 
782  unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
783  SelectBuildVector(N, RegClassID);
784  return;
785  }
786  case ISD::BUILD_PAIR: {
787  SDValue RC, SubReg0, SubReg1;
788  SDLoc DL(N);
789  if (N->getValueType(0) == MVT::i128) {
790  RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32);
791  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
792  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
793  } else if (N->getValueType(0) == MVT::i64) {
794  RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
795  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
796  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
797  } else {
798  llvm_unreachable("Unhandled value type for BUILD_PAIR");
799  }
800  const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
801  N->getOperand(1), SubReg1 };
802  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
803  N->getValueType(0), Ops));
804  return;
805  }
806 
807  case ISD::Constant:
808  case ISD::ConstantFP: {
809  if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
810  break;
811 
812  uint64_t Imm;
813  if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
814  Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
815  else {
816  ConstantSDNode *C = cast<ConstantSDNode>(N);
817  Imm = C->getZExtValue();
818  }
819 
820  SDLoc DL(N);
821  ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
822  return;
823  }
824  case AMDGPUISD::BFE_I32:
825  case AMDGPUISD::BFE_U32: {
826  // There is a scalar version available, but unlike the vector version which
827  // has a separate operand for the offset and width, the scalar version packs
828  // the width and offset into a single operand. Try to move to the scalar
829  // version if the offsets are constant, so that we can try to keep extended
830  // loads of kernel arguments in SGPRs.
831 
832  // TODO: Technically we could try to pattern match scalar bitshifts of
833  // dynamic values, but it's probably not useful.
835  if (!Offset)
836  break;
837 
839  if (!Width)
840  break;
841 
842  bool Signed = Opc == AMDGPUISD::BFE_I32;
843 
844  uint32_t OffsetVal = Offset->getZExtValue();
845  uint32_t WidthVal = Width->getZExtValue();
846 
847  ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
848  SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
849  return;
850  }
851  case AMDGPUISD::DIV_SCALE: {
852  SelectDIV_SCALE(N);
853  return;
854  }
855  case AMDGPUISD::DIV_FMAS: {
856  SelectDIV_FMAS(N);
857  return;
858  }
860  case AMDGPUISD::MAD_U64_U32: {
861  SelectMAD_64_32(N);
862  return;
863  }
864  case ISD::CopyToReg: {
865  const SITargetLowering& Lowering =
866  *static_cast<const SITargetLowering*>(getTargetLowering());
867  N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
868  break;
869  }
870  case ISD::AND:
871  case ISD::SRL:
872  case ISD::SRA:
874  if (N->getValueType(0) != MVT::i32)
875  break;
876 
877  SelectS_BFE(N);
878  return;
879  case ISD::BRCOND:
880  SelectBRCOND(N);
881  return;
882  case ISD::FMAD:
883  case ISD::FMA:
884  SelectFMAD_FMA(N);
885  return;
887  SelectATOMIC_CMP_SWAP(N);
888  return;
894  // Hack around using a legal type if f16 is illegal.
895  if (N->getValueType(0) == MVT::i32) {
897  N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
898  { N->getOperand(0), N->getOperand(1) });
899  SelectCode(N);
900  return;
901  }
902 
903  break;
904  }
905  case ISD::INTRINSIC_W_CHAIN: {
906  SelectINTRINSIC_W_CHAIN(N);
907  return;
908  }
910  SelectINTRINSIC_WO_CHAIN(N);
911  return;
912  }
913  case ISD::INTRINSIC_VOID: {
914  SelectINTRINSIC_VOID(N);
915  return;
916  }
917  }
918 
919  SelectCode(N);
920 }
921 
922 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
923  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
924  const Instruction *Term = BB->getTerminator();
925  return Term->getMetadata("amdgpu.uniform") ||
926  Term->getMetadata("structurizecfg.uniform");
927 }
928 
929 StringRef AMDGPUDAGToDAGISel::getPassName() const {
930  return "AMDGPU DAG->DAG Pattern Instruction Selection";
931 }
932 
933 //===----------------------------------------------------------------------===//
934 // Complex Patterns
935 //===----------------------------------------------------------------------===//
936 
937 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
938  SDValue &Offset) {
939  return false;
940 }
941 
942 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
943  SDValue &Offset) {
944  ConstantSDNode *C;
945  SDLoc DL(Addr);
946 
947  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
948  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
949  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
950  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
951  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
952  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
953  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
954  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
955  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
956  Base = Addr.getOperand(0);
957  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
958  } else {
959  Base = Addr;
960  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
961  }
962 
963  return true;
964 }
965 
966 // FIXME: Should only handle addcarry/subcarry
967 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
968  SDLoc DL(N);
969  SDValue LHS = N->getOperand(0);
970  SDValue RHS = N->getOperand(1);
971 
972  unsigned Opcode = N->getOpcode();
973  bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
974  bool ProduceCarry =
975  ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
976  bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
977 
978  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
979  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
980 
981  SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
982  DL, MVT::i32, LHS, Sub0);
983  SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
984  DL, MVT::i32, LHS, Sub1);
985 
986  SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
987  DL, MVT::i32, RHS, Sub0);
988  SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
989  DL, MVT::i32, RHS, Sub1);
990 
991  SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
992 
993  unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
994  unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
995 
996  SDNode *AddLo;
997  if (!ConsumeCarry) {
998  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
999  AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
1000  } else {
1001  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
1002  AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
1003  }
1004  SDValue AddHiArgs[] = {
1005  SDValue(Hi0, 0),
1006  SDValue(Hi1, 0),
1007  SDValue(AddLo, 1)
1008  };
1009  SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
1010 
1011  SDValue RegSequenceArgs[] = {
1012  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
1013  SDValue(AddLo,0),
1014  Sub0,
1015  SDValue(AddHi,0),
1016  Sub1,
1017  };
1018  SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1019  MVT::i64, RegSequenceArgs);
1020 
1021  if (ProduceCarry) {
1022  // Replace the carry-use
1023  ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
1024  }
1025 
1026  // Replace the remaining uses.
1027  ReplaceNode(N, RegSequence);
1028 }
1029 
1030 void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
1031  SDLoc DL(N);
1032  SDValue LHS = N->getOperand(0);
1033  SDValue RHS = N->getOperand(1);
1034  SDValue CI = N->getOperand(2);
1035 
1036  unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
1037  : AMDGPU::V_SUBB_U32_e64;
1038  CurDAG->SelectNodeTo(
1039  N, Opc, N->getVTList(),
1040  {LHS, RHS, CI, CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
1041 }
1042 
1043 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
1044  // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
1045  // carry out despite the _i32 name. These were renamed in VI to _U32.
1046  // FIXME: We should probably rename the opcodes here.
1047  unsigned Opc = N->getOpcode() == ISD::UADDO ?
1048  AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
1049 
1050  CurDAG->SelectNodeTo(
1051  N, Opc, N->getVTList(),
1052  {N->getOperand(0), N->getOperand(1),
1053  CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
1054 }
1055 
1056 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
1057  SDLoc SL(N);
1058  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
1059  SDValue Ops[10];
1060 
1061  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
1062  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
1063  SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
1064  Ops[8] = N->getOperand(0);
1065  Ops[9] = N->getOperand(4);
1066 
1067  CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
1068 }
1069 
1070 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
1071  SDLoc SL(N);
1072  // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
1073  SDValue Ops[8];
1074 
1075  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
1076  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
1077  Ops[6] = N->getOperand(0);
1078  Ops[7] = N->getOperand(3);
1079 
1080  CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
1081 }
1082 
1083 // We need to handle this here because tablegen doesn't support matching
1084 // instructions with multiple outputs.
1085 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
1086  SDLoc SL(N);
1087  EVT VT = N->getValueType(0);
1088 
1089  assert(VT == MVT::f32 || VT == MVT::f64);
1090 
1091  unsigned Opc
1092  = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
1093 
1094  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
1095  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1096 }
1097 
1098 void AMDGPUDAGToDAGISel::SelectDIV_FMAS(SDNode *N) {
1099  const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
1100  const SIRegisterInfo *TRI = ST->getRegisterInfo();
1101 
1102  SDLoc SL(N);
1103  EVT VT = N->getValueType(0);
1104 
1105  assert(VT == MVT::f32 || VT == MVT::f64);
1106 
1107  unsigned Opc
1108  = (VT == MVT::f64) ? AMDGPU::V_DIV_FMAS_F64 : AMDGPU::V_DIV_FMAS_F32;
1109 
1110  SDValue CarryIn = N->getOperand(3);
1111  // V_DIV_FMAS implicitly reads VCC.
1112  SDValue VCC = CurDAG->getCopyToReg(CurDAG->getEntryNode(), SL,
1113  TRI->getVCC(), CarryIn, SDValue());
1114 
1115  SDValue Ops[10];
1116 
1117  SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
1118  SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
1119  SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
1120 
1121  Ops[8] = VCC;
1122  Ops[9] = VCC.getValue(1);
1123 
1124  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1125 }
1126 
1127 // We need to handle this here because tablegen doesn't support matching
1128 // instructions with multiple outputs.
1129 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
1130  SDLoc SL(N);
1131  bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
1132  unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32;
1133 
1134  SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
1135  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
1136  Clamp };
1137  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1138 }
1139 
1140 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset,
1141  unsigned OffsetBits) const {
1142  if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
1143  (OffsetBits == 8 && !isUInt<8>(Offset)))
1144  return false;
1145 
1146  if (Subtarget->hasUsableDSOffset() ||
1147  Subtarget->unsafeDSOffsetFoldingEnabled())
1148  return true;
1149 
1150  // On Southern Islands instruction with a negative base value and an offset
1151  // don't seem to work.
1152  return CurDAG->SignBitIsZero(Base);
1153 }
1154 
1155 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
1156  SDValue &Offset) const {
1157  SDLoc DL(Addr);
1158  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1159  SDValue N0 = Addr.getOperand(0);
1160  SDValue N1 = Addr.getOperand(1);
1161  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1162  if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
1163  // (add n0, c0)
1164  Base = N0;
1165  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1166  return true;
1167  }
1168  } else if (Addr.getOpcode() == ISD::SUB) {
1169  // sub C, x -> add (sub 0, x), C
1170  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1171  int64_t ByteOffset = C->getSExtValue();
1172  if (isUInt<16>(ByteOffset)) {
1173  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1174 
1175  // XXX - This is kind of hacky. Create a dummy sub node so we can check
1176  // the known bits in isDSOffsetLegal. We need to emit the selected node
1177  // here, so this is thrown away.
1178  SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
1179  Zero, Addr.getOperand(1));
1180 
1181  if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
1183  Opnds.push_back(Zero);
1184  Opnds.push_back(Addr.getOperand(1));
1185 
1186  // FIXME: Select to VOP3 version for with-carry.
1187  unsigned SubOp = AMDGPU::V_SUB_I32_e32;
1188  if (Subtarget->hasAddNoCarry()) {
1189  SubOp = AMDGPU::V_SUB_U32_e64;
1190  Opnds.push_back(
1191  CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1192  }
1193 
1194  MachineSDNode *MachineSub =
1195  CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1196 
1197  Base = SDValue(MachineSub, 0);
1198  Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
1199  return true;
1200  }
1201  }
1202  }
1203  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1204  // If we have a constant address, prefer to put the constant into the
1205  // offset. This can save moves to load the constant address since multiple
1206  // operations can share the zero base address register, and enables merging
1207  // into read2 / write2 instructions.
1208 
1209  SDLoc DL(Addr);
1210 
1211  if (isUInt<16>(CAddr->getZExtValue())) {
1212  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1213  MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1214  DL, MVT::i32, Zero);
1215  Base = SDValue(MovZero, 0);
1216  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1217  return true;
1218  }
1219  }
1220 
1221  // default case
1222  Base = Addr;
1223  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
1224  return true;
1225 }
1226 
1227 // TODO: If offset is too big, put low 16-bit into offset.
1228 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
1229  SDValue &Offset0,
1230  SDValue &Offset1) const {
1231  SDLoc DL(Addr);
1232 
1233  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1234  SDValue N0 = Addr.getOperand(0);
1235  SDValue N1 = Addr.getOperand(1);
1236  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1237  unsigned DWordOffset0 = C1->getZExtValue() / 4;
1238  unsigned DWordOffset1 = DWordOffset0 + 1;
1239  // (add n0, c0)
1240  if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
1241  Base = N0;
1242  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
1243  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
1244  return true;
1245  }
1246  } else if (Addr.getOpcode() == ISD::SUB) {
1247  // sub C, x -> add (sub 0, x), C
1248  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1249  unsigned DWordOffset0 = C->getZExtValue() / 4;
1250  unsigned DWordOffset1 = DWordOffset0 + 1;
1251 
1252  if (isUInt<8>(DWordOffset0)) {
1253  SDLoc DL(Addr);
1254  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1255 
1256  // XXX - This is kind of hacky. Create a dummy sub node so we can check
1257  // the known bits in isDSOffsetLegal. We need to emit the selected node
1258  // here, so this is thrown away.
1259  SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
1260  Zero, Addr.getOperand(1));
1261 
1262  if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
1264  Opnds.push_back(Zero);
1265  Opnds.push_back(Addr.getOperand(1));
1266  unsigned SubOp = AMDGPU::V_SUB_I32_e32;
1267  if (Subtarget->hasAddNoCarry()) {
1268  SubOp = AMDGPU::V_SUB_U32_e64;
1269  Opnds.push_back(
1270  CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1271  }
1272 
1273  MachineSDNode *MachineSub
1274  = CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1275 
1276  Base = SDValue(MachineSub, 0);
1277  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
1278  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
1279  return true;
1280  }
1281  }
1282  }
1283  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1284  unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
1285  unsigned DWordOffset1 = DWordOffset0 + 1;
1286  assert(4 * DWordOffset0 == CAddr->getZExtValue());
1287 
1288  if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
1289  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1290  MachineSDNode *MovZero
1291  = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1292  DL, MVT::i32, Zero);
1293  Base = SDValue(MovZero, 0);
1294  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
1295  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
1296  return true;
1297  }
1298  }
1299 
1300  // default case
1301 
1302  Base = Addr;
1303  Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
1304  Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
1305  return true;
1306 }
1307 
1308 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
1309  SDValue &VAddr, SDValue &SOffset,
1310  SDValue &Offset, SDValue &Offen,
1311  SDValue &Idxen, SDValue &Addr64,
1312  SDValue &GLC, SDValue &SLC,
1313  SDValue &TFE, SDValue &DLC,
1314  SDValue &SWZ) const {
1315  // Subtarget prefers to use flat instruction
1316  if (Subtarget->useFlatForGlobal())
1317  return false;
1318 
1319  SDLoc DL(Addr);
1320 
1321  if (!GLC.getNode())
1322  GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1323  if (!SLC.getNode())
1324  SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1325  TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
1326  DLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1327  SWZ = CurDAG->getTargetConstant(0, DL, MVT::i1);
1328 
1329  Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1330  Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1331  Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1332  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1333 
1334  ConstantSDNode *C1 = nullptr;
1335  SDValue N0 = Addr;
1336  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1337  C1 = cast<ConstantSDNode>(Addr.getOperand(1));
1338  if (isUInt<32>(C1->getZExtValue()))
1339  N0 = Addr.getOperand(0);
1340  else
1341  C1 = nullptr;
1342  }
1343 
1344  if (N0.getOpcode() == ISD::ADD) {
1345  // (add N2, N3) -> addr64, or
1346  // (add (add N2, N3), C1) -> addr64
1347  SDValue N2 = N0.getOperand(0);
1348  SDValue N3 = N0.getOperand(1);
1349  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1350 
1351  if (N2->isDivergent()) {
1352  if (N3->isDivergent()) {
1353  // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
1354  // addr64, and construct the resource from a 0 address.
1355  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1356  VAddr = N0;
1357  } else {
1358  // N2 is divergent, N3 is not.
1359  Ptr = N3;
1360  VAddr = N2;
1361  }
1362  } else {
1363  // N2 is not divergent.
1364  Ptr = N2;
1365  VAddr = N3;
1366  }
1367  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1368  } else if (N0->isDivergent()) {
1369  // N0 is divergent. Use it as the addr64, and construct the resource from a
1370  // 0 address.
1371  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1372  VAddr = N0;
1373  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1374  } else {
1375  // N0 -> offset, or
1376  // (N0 + C1) -> offset
1377  VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1378  Ptr = N0;
1379  }
1380 
1381  if (!C1) {
1382  // No offset.
1383  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1384  return true;
1385  }
1386 
1388  // Legal offset for instruction.
1389  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1390  return true;
1391  }
1392 
1393  // Illegal offset, store it in soffset.
1394  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1395  SOffset =
1396  SDValue(CurDAG->getMachineNode(
1397  AMDGPU::S_MOV_B32, DL, MVT::i32,
1398  CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1399  0);
1400  return true;
1401 }
1402 
1403 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1404  SDValue &VAddr, SDValue &SOffset,
1405  SDValue &Offset, SDValue &GLC,
1406  SDValue &SLC, SDValue &TFE,
1407  SDValue &DLC, SDValue &SWZ) const {
1408  SDValue Ptr, Offen, Idxen, Addr64;
1409 
1410  // addr64 bit was removed for volcanic islands.
1411  if (!Subtarget->hasAddr64())
1412  return false;
1413 
1414  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1415  GLC, SLC, TFE, DLC, SWZ))
1416  return false;
1417 
1418  ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1419  if (C->getSExtValue()) {
1420  SDLoc DL(Addr);
1421 
1422  const SITargetLowering& Lowering =
1423  *static_cast<const SITargetLowering*>(getTargetLowering());
1424 
1425  SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1426  return true;
1427  }
1428 
1429  return false;
1430 }
1431 
1432 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1433  SDValue &VAddr, SDValue &SOffset,
1434  SDValue &Offset,
1435  SDValue &SLC) const {
1436  SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
1437  SDValue GLC, TFE, DLC, SWZ;
1438 
1439  return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC, SWZ);
1440 }
1441 
1442 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
1443  auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
1444  return PSV && PSV->isStack();
1445 }
1446 
1447 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1448  const MachineFunction &MF = CurDAG->getMachineFunction();
1450 
1451  if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
1452  SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1453  FI->getValueType(0));
1454 
1455  // If we can resolve this to a frame index access, this will be relative to
1456  // either the stack or frame pointer SGPR.
1457  return std::make_pair(
1458  TFI, CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32));
1459  }
1460 
1461  // If we don't know this private access is a local stack object, it needs to
1462  // be relative to the entry point's scratch wave offset register.
1463  return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(),
1464  MVT::i32));
1465 }
1466 
1467 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1468  SDValue Addr, SDValue &Rsrc,
1469  SDValue &VAddr, SDValue &SOffset,
1470  SDValue &ImmOffset) const {
1471 
1472  SDLoc DL(Addr);
1473  MachineFunction &MF = CurDAG->getMachineFunction();
1475 
1476  Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1477 
1478  if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1479  unsigned Imm = CAddr->getZExtValue();
1480 
1481  SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1482  MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1483  DL, MVT::i32, HighBits);
1484  VAddr = SDValue(MovHighBits, 0);
1485 
1486  // In a call sequence, stores to the argument stack area are relative to the
1487  // stack pointer.
1488  const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1489  unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1491 
1492  SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1493  ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
1494  return true;
1495  }
1496 
1497  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1498  // (add n0, c1)
1499 
1500  SDValue N0 = Addr.getOperand(0);
1501  SDValue N1 = Addr.getOperand(1);
1502 
1503  // Offsets in vaddr must be positive if range checking is enabled.
1504  //
1505  // The total computation of vaddr + soffset + offset must not overflow. If
1506  // vaddr is negative, even if offset is 0 the sgpr offset add will end up
1507  // overflowing.
1508  //
1509  // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
1510  // always perform a range check. If a negative vaddr base index was used,
1511  // this would fail the range check. The overall address computation would
1512  // compute a valid address, but this doesn't happen due to the range
1513  // check. For out-of-bounds MUBUF loads, a 0 is returned.
1514  //
1515  // Therefore it should be safe to fold any VGPR offset on gfx9 into the
1516  // MUBUF vaddr, but not on older subtargets which can only do this if the
1517  // sign bit is known 0.
1518  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1520  (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1521  CurDAG->SignBitIsZero(N0))) {
1522  std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1523  ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1524  return true;
1525  }
1526  }
1527 
1528  // (node)
1529  std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1530  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1531  return true;
1532 }
1533 
1534 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1535  SDValue Addr,
1536  SDValue &SRsrc,
1537  SDValue &SOffset,
1538  SDValue &Offset) const {
1539  ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
1540  if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1541  return false;
1542 
1543  SDLoc DL(Addr);
1544  MachineFunction &MF = CurDAG->getMachineFunction();
1546 
1547  SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1548 
1549  const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1550  unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1552 
1553  // FIXME: Get from MachinePointerInfo? We should only be using the frame
1554  // offset if we know this is in a call sequence.
1555  SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1556 
1557  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1558  return true;
1559 }
1560 
1561 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1562  SDValue &SOffset, SDValue &Offset,
1563  SDValue &GLC, SDValue &SLC,
1564  SDValue &TFE, SDValue &DLC,
1565  SDValue &SWZ) const {
1566  SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1567  const SIInstrInfo *TII =
1568  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1569 
1570  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1571  GLC, SLC, TFE, DLC, SWZ))
1572  return false;
1573 
1574  if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1575  !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1576  !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1577  uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1578  APInt::getAllOnesValue(32).getZExtValue(); // Size
1579  SDLoc DL(Addr);
1580 
1581  const SITargetLowering& Lowering =
1582  *static_cast<const SITargetLowering*>(getTargetLowering());
1583 
1584  SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1585  return true;
1586  }
1587  return false;
1588 }
1589 
1590 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1591  SDValue &Soffset, SDValue &Offset
1592  ) const {
1593  SDValue GLC, SLC, TFE, DLC, SWZ;
1594 
1595  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ);
1596 }
1597 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1598  SDValue &Soffset, SDValue &Offset,
1599  SDValue &SLC) const {
1600  SDValue GLC, TFE, DLC, SWZ;
1601 
1602  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ);
1603 }
1604 
1605 // Find a load or store from corresponding pattern root.
1606 // Roots may be build_vector, bitconvert or their combinations.
1609  if (MemSDNode *MN = dyn_cast<MemSDNode>(N))
1610  return MN;
1611  assert(isa<BuildVectorSDNode>(N));
1612  for (SDValue V : N->op_values())
1613  if (MemSDNode *MN =
1614  dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V)))
1615  return MN;
1616  llvm_unreachable("cannot find MemSDNode in the pattern!");
1617 }
1618 
1619 template <bool IsSigned>
1620 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N,
1621  SDValue Addr,
1622  SDValue &VAddr,
1623  SDValue &Offset,
1624  SDValue &SLC) const {
1625  int64_t OffsetVal = 0;
1626 
1627  if (Subtarget->hasFlatInstOffsets() &&
1628  (!Subtarget->hasFlatSegmentOffsetBug() ||
1630  CurDAG->isBaseWithConstantOffset(Addr)) {
1631  SDValue N0 = Addr.getOperand(0);
1632  SDValue N1 = Addr.getOperand(1);
1633  int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1634 
1635  const SIInstrInfo *TII = Subtarget->getInstrInfo();
1636  if (TII->isLegalFLATOffset(COffsetVal, findMemSDNode(N)->getAddressSpace(),
1637  IsSigned)) {
1638  Addr = N0;
1639  OffsetVal = COffsetVal;
1640  }
1641  }
1642 
1643  VAddr = Addr;
1644  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
1645  SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
1646  return true;
1647 }
1648 
1649 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N,
1650  SDValue Addr,
1651  SDValue &VAddr,
1652  SDValue &Offset,
1653  SDValue &SLC) const {
1654  return SelectFlatOffset<false>(N, Addr, VAddr, Offset, SLC);
1655 }
1656 
1657 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDNode *N,
1658  SDValue Addr,
1659  SDValue &VAddr,
1660  SDValue &Offset,
1661  SDValue &SLC) const {
1662  return SelectFlatOffset<true>(N, Addr, VAddr, Offset, SLC);
1663 }
1664 
1665 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1666  SDValue &Offset, bool &Imm) const {
1667 
1668  // FIXME: Handle non-constant offsets.
1669  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1670  if (!C)
1671  return false;
1672 
1673  SDLoc SL(ByteOffsetNode);
1674  GCNSubtarget::Generation Gen = Subtarget->getGeneration();
1675  int64_t ByteOffset = C->getSExtValue();
1676  int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
1677 
1678  if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) {
1679  Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1680  Imm = true;
1681  return true;
1682  }
1683 
1684  if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1685  return false;
1686 
1687  if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1688  // 32-bit Immediates are supported on Sea Islands.
1689  Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1690  } else {
1691  SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1692  Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1693  C32Bit), 0);
1694  }
1695  Imm = false;
1696  return true;
1697 }
1698 
1699 SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
1700  if (Addr.getValueType() != MVT::i32)
1701  return Addr;
1702 
1703  // Zero-extend a 32-bit address.
1704  SDLoc SL(Addr);
1705 
1706  const MachineFunction &MF = CurDAG->getMachineFunction();
1708  unsigned AddrHiVal = Info->get32BitAddressHighBits();
1709  SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
1710 
1711  const SDValue Ops[] = {
1712  CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
1713  Addr,
1714  CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
1715  SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
1716  0),
1717  CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
1718  };
1719 
1720  return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
1721  Ops), 0);
1722 }
1723 
1724 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1725  SDValue &Offset, bool &Imm) const {
1726  SDLoc SL(Addr);
1727 
1728  // A 32-bit (address + offset) should not cause unsigned 32-bit integer
1729  // wraparound, because s_load instructions perform the addition in 64 bits.
1730  if ((Addr.getValueType() != MVT::i32 ||
1731  Addr->getFlags().hasNoUnsignedWrap()) &&
1732  CurDAG->isBaseWithConstantOffset(Addr)) {
1733  SDValue N0 = Addr.getOperand(0);
1734  SDValue N1 = Addr.getOperand(1);
1735 
1736  if (SelectSMRDOffset(N1, Offset, Imm)) {
1737  SBase = Expand32BitAddress(N0);
1738  return true;
1739  }
1740  }
1741  SBase = Expand32BitAddress(Addr);
1742  Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1743  Imm = true;
1744  return true;
1745 }
1746 
1747 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1748  SDValue &Offset) const {
1749  bool Imm;
1750  return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1751 }
1752 
1753 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1754  SDValue &Offset) const {
1755 
1756  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1757  return false;
1758 
1759  bool Imm;
1760  if (!SelectSMRD(Addr, SBase, Offset, Imm))
1761  return false;
1762 
1763  return !Imm && isa<ConstantSDNode>(Offset);
1764 }
1765 
1766 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1767  SDValue &Offset) const {
1768  bool Imm;
1769  return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1770  !isa<ConstantSDNode>(Offset);
1771 }
1772 
1773 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1774  SDValue &Offset) const {
1775  bool Imm;
1776  return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1777 }
1778 
1779 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1780  SDValue &Offset) const {
1781  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1782  return false;
1783 
1784  bool Imm;
1785  if (!SelectSMRDOffset(Addr, Offset, Imm))
1786  return false;
1787 
1788  return !Imm && isa<ConstantSDNode>(Offset);
1789 }
1790 
1791 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
1792  SDValue &Base,
1793  SDValue &Offset) const {
1794  SDLoc DL(Index);
1795 
1796  if (CurDAG->isBaseWithConstantOffset(Index)) {
1797  SDValue N0 = Index.getOperand(0);
1798  SDValue N1 = Index.getOperand(1);
1799  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1800 
1801  // (add n0, c0)
1802  // Don't peel off the offset (c0) if doing so could possibly lead
1803  // the base (n0) to be negative.
1804  if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0)) {
1805  Base = N0;
1806  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
1807  return true;
1808  }
1809  }
1810 
1811  if (isa<ConstantSDNode>(Index))
1812  return false;
1813 
1814  Base = Index;
1815  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1816  return true;
1817 }
1818 
1819 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
1820  SDValue Val, uint32_t Offset,
1821  uint32_t Width) {
1822  // Transformation function, pack the offset and width of a BFE into
1823  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1824  // source, bits [5:0] contain the offset and bits [22:16] the width.
1825  uint32_t PackedVal = Offset | (Width << 16);
1826  SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1827 
1828  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1829 }
1830 
1831 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1832  // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1833  // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1834  // Predicate: 0 < b <= c < 32
1835 
1836  const SDValue &Shl = N->getOperand(0);
1839 
1840  if (B && C) {
1841  uint32_t BVal = B->getZExtValue();
1842  uint32_t CVal = C->getZExtValue();
1843 
1844  if (0 < BVal && BVal <= CVal && CVal < 32) {
1845  bool Signed = N->getOpcode() == ISD::SRA;
1846  unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1847 
1848  ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
1849  32 - CVal));
1850  return;
1851  }
1852  }
1853  SelectCode(N);
1854 }
1855 
1856 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1857  switch (N->getOpcode()) {
1858  case ISD::AND:
1859  if (N->getOperand(0).getOpcode() == ISD::SRL) {
1860  // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1861  // Predicate: isMask(mask)
1862  const SDValue &Srl = N->getOperand(0);
1865 
1866  if (Shift && Mask) {
1867  uint32_t ShiftVal = Shift->getZExtValue();
1868  uint32_t MaskVal = Mask->getZExtValue();
1869 
1870  if (isMask_32(MaskVal)) {
1871  uint32_t WidthVal = countPopulation(MaskVal);
1872 
1873  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1874  Srl.getOperand(0), ShiftVal, WidthVal));
1875  return;
1876  }
1877  }
1878  }
1879  break;
1880  case ISD::SRL:
1881  if (N->getOperand(0).getOpcode() == ISD::AND) {
1882  // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1883  // Predicate: isMask(mask >> b)
1884  const SDValue &And = N->getOperand(0);
1887 
1888  if (Shift && Mask) {
1889  uint32_t ShiftVal = Shift->getZExtValue();
1890  uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1891 
1892  if (isMask_32(MaskVal)) {
1893  uint32_t WidthVal = countPopulation(MaskVal);
1894 
1895  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1896  And.getOperand(0), ShiftVal, WidthVal));
1897  return;
1898  }
1899  }
1900  } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
1901  SelectS_BFEFromShifts(N);
1902  return;
1903  }
1904  break;
1905  case ISD::SRA:
1906  if (N->getOperand(0).getOpcode() == ISD::SHL) {
1907  SelectS_BFEFromShifts(N);
1908  return;
1909  }
1910  break;
1911 
1912  case ISD::SIGN_EXTEND_INREG: {
1913  // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
1914  SDValue Src = N->getOperand(0);
1915  if (Src.getOpcode() != ISD::SRL)
1916  break;
1917 
1918  const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1919  if (!Amt)
1920  break;
1921 
1922  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1923  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
1924  Amt->getZExtValue(), Width));
1925  return;
1926  }
1927  }
1928 
1929  SelectCode(N);
1930 }
1931 
1932 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
1933  assert(N->getOpcode() == ISD::BRCOND);
1934  if (!N->hasOneUse())
1935  return false;
1936 
1937  SDValue Cond = N->getOperand(1);
1938  if (Cond.getOpcode() == ISD::CopyToReg)
1939  Cond = Cond.getOperand(2);
1940 
1941  if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
1942  return false;
1943 
1944  MVT VT = Cond.getOperand(0).getSimpleValueType();
1945  if (VT == MVT::i32)
1946  return true;
1947 
1948  if (VT == MVT::i64) {
1949  auto ST = static_cast<const GCNSubtarget *>(Subtarget);
1950 
1951  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
1952  return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
1953  }
1954 
1955  return false;
1956 }
1957 
1958 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
1959  SDValue Cond = N->getOperand(1);
1960 
1961  if (Cond.isUndef()) {
1962  CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
1963  N->getOperand(2), N->getOperand(0));
1964  return;
1965  }
1966 
1967  const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
1968  const SIRegisterInfo *TRI = ST->getRegisterInfo();
1969 
1970  bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
1971  unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
1972  unsigned CondReg = UseSCCBr ? (unsigned)AMDGPU::SCC : TRI->getVCC();
1973  SDLoc SL(N);
1974 
1975  if (!UseSCCBr) {
1976  // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
1977  // analyzed what generates the vcc value, so we do not know whether vcc
1978  // bits for disabled lanes are 0. Thus we need to mask out bits for
1979  // disabled lanes.
1980  //
1981  // For the case that we select S_CBRANCH_SCC1 and it gets
1982  // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
1983  // SIInstrInfo::moveToVALU which inserts the S_AND).
1984  //
1985  // We could add an analysis of what generates the vcc value here and omit
1986  // the S_AND when is unnecessary. But it would be better to add a separate
1987  // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
1988  // catches both cases.
1989  Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B32
1990  : AMDGPU::S_AND_B64,
1991  SL, MVT::i1,
1992  CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO
1993  : AMDGPU::EXEC,
1994  MVT::i1),
1995  Cond),
1996  0);
1997  }
1998 
1999  SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
2000  CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
2001  N->getOperand(2), // Basic Block
2002  VCC.getValue(0));
2003 }
2004 
2005 void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
2006  MVT VT = N->getSimpleValueType(0);
2007  bool IsFMA = N->getOpcode() == ISD::FMA;
2008  if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
2009  !Subtarget->hasFmaMixInsts()) ||
2010  ((IsFMA && Subtarget->hasMadMixInsts()) ||
2011  (!IsFMA && Subtarget->hasFmaMixInsts()))) {
2012  SelectCode(N);
2013  return;
2014  }
2015 
2016  SDValue Src0 = N->getOperand(0);
2017  SDValue Src1 = N->getOperand(1);
2018  SDValue Src2 = N->getOperand(2);
2019  unsigned Src0Mods, Src1Mods, Src2Mods;
2020 
2021  // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
2022  // using the conversion from f16.
2023  bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
2024  bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
2025  bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
2026 
2027  assert((IsFMA || !Subtarget->hasFP32Denormals()) &&
2028  "fmad selected with denormals enabled");
2029  // TODO: We can select this with f32 denormals enabled if all the sources are
2030  // converted from f16 (in which case fmad isn't legal).
2031 
2032  if (Sel0 || Sel1 || Sel2) {
2033  // For dummy operands.
2034  SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2035  SDValue Ops[] = {
2036  CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
2037  CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
2038  CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
2039  CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
2040  Zero, Zero
2041  };
2042 
2043  CurDAG->SelectNodeTo(N,
2044  IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
2045  MVT::f32, Ops);
2046  } else {
2047  SelectCode(N);
2048  }
2049 }
2050 
2051 // This is here because there isn't a way to use the generated sub0_sub1 as the
2052 // subreg index to EXTRACT_SUBREG in tablegen.
2053 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
2054  MemSDNode *Mem = cast<MemSDNode>(N);
2055  unsigned AS = Mem->getAddressSpace();
2056  if (AS == AMDGPUAS::FLAT_ADDRESS) {
2057  SelectCode(N);
2058  return;
2059  }
2060 
2061  MVT VT = N->getSimpleValueType(0);
2062  bool Is32 = (VT == MVT::i32);
2063  SDLoc SL(N);
2064 
2065  MachineSDNode *CmpSwap = nullptr;
2066  if (Subtarget->hasAddr64()) {
2067  SDValue SRsrc, VAddr, SOffset, Offset, SLC;
2068 
2069  if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
2070  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
2071  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
2072  SDValue CmpVal = Mem->getOperand(2);
2073 
2074  // XXX - Do we care about glue operands?
2075 
2076  SDValue Ops[] = {
2077  CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
2078  };
2079 
2080  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
2081  }
2082  }
2083 
2084  if (!CmpSwap) {
2085  SDValue SRsrc, SOffset, Offset, SLC;
2086  if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
2087  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
2088  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
2089 
2090  SDValue CmpVal = Mem->getOperand(2);
2091  SDValue Ops[] = {
2092  CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
2093  };
2094 
2095  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
2096  }
2097  }
2098 
2099  if (!CmpSwap) {
2100  SelectCode(N);
2101  return;
2102  }
2103 
2104  MachineMemOperand *MMO = Mem->getMemOperand();
2105  CurDAG->setNodeMemRefs(CmpSwap, {MMO});
2106 
2107  unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
2108  SDValue Extract
2109  = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
2110 
2111  ReplaceUses(SDValue(N, 0), Extract);
2112  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
2113  CurDAG->RemoveDeadNode(N);
2114 }
2115 
2116 void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
2117  // The address is assumed to be uniform, so if it ends up in a VGPR, it will
2118  // be copied to an SGPR with readfirstlane.
2119  unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2120  AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2121 
2122  SDValue Chain = N->getOperand(0);
2123  SDValue Ptr = N->getOperand(2);
2124  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2125  MachineMemOperand *MMO = M->getMemOperand();
2126  bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
2127 
2128  SDValue Offset;
2129  if (CurDAG->isBaseWithConstantOffset(Ptr)) {
2130  SDValue PtrBase = Ptr.getOperand(0);
2131  SDValue PtrOffset = Ptr.getOperand(1);
2132 
2133  const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
2134  if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue(), 16)) {
2135  N = glueCopyToM0(N, PtrBase);
2136  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
2137  }
2138  }
2139 
2140  if (!Offset) {
2141  N = glueCopyToM0(N, Ptr);
2142  Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2143  }
2144 
2145  SDValue Ops[] = {
2146  Offset,
2147  CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
2148  Chain,
2149  N->getOperand(N->getNumOperands() - 1) // New glue
2150  };
2151 
2152  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2153  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2154 }
2155 
2156 static unsigned gwsIntrinToOpcode(unsigned IntrID) {
2157  switch (IntrID) {
2158  case Intrinsic::amdgcn_ds_gws_init:
2159  return AMDGPU::DS_GWS_INIT;
2160  case Intrinsic::amdgcn_ds_gws_barrier:
2161  return AMDGPU::DS_GWS_BARRIER;
2162  case Intrinsic::amdgcn_ds_gws_sema_v:
2163  return AMDGPU::DS_GWS_SEMA_V;
2164  case Intrinsic::amdgcn_ds_gws_sema_br:
2165  return AMDGPU::DS_GWS_SEMA_BR;
2166  case Intrinsic::amdgcn_ds_gws_sema_p:
2167  return AMDGPU::DS_GWS_SEMA_P;
2168  case Intrinsic::amdgcn_ds_gws_sema_release_all:
2169  return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2170  default:
2171  llvm_unreachable("not a gws intrinsic");
2172  }
2173 }
2174 
2175 void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
2176  if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2177  !Subtarget->hasGWSSemaReleaseAll()) {
2178  // Let this error.
2179  SelectCode(N);
2180  return;
2181  }
2182 
2183  // Chain, intrinsic ID, vsrc, offset
2184  const bool HasVSrc = N->getNumOperands() == 4;
2185  assert(HasVSrc || N->getNumOperands() == 3);
2186 
2187  SDLoc SL(N);
2188  SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2);
2189  int ImmOffset = 0;
2190  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2191  MachineMemOperand *MMO = M->getMemOperand();
2192 
2193  // Don't worry if the offset ends up in a VGPR. Only one lane will have
2194  // effect, so SIFixSGPRCopies will validly insert readfirstlane.
2195 
2196  // The resource id offset is computed as (<isa opaque base> + M0[21:16] +
2197  // offset field) % 64. Some versions of the programming guide omit the m0
2198  // part, or claim it's from offset 0.
2199  if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
2200  // If we have a constant offset, try to use the 0 in m0 as the base.
2201  // TODO: Look into changing the default m0 initialization value. If the
2202  // default -1 only set the low 16-bits, we could leave it as-is and add 1 to
2203  // the immediate offset.
2204  glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32));
2205  ImmOffset = ConstOffset->getZExtValue();
2206  } else {
2207  if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
2208  ImmOffset = BaseOffset.getConstantOperandVal(1);
2209  BaseOffset = BaseOffset.getOperand(0);
2210  }
2211 
2212  // Prefer to do the shift in an SGPR since it should be possible to use m0
2213  // as the result directly. If it's already an SGPR, it will be eliminated
2214  // later.
2215  SDNode *SGPROffset
2216  = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
2217  BaseOffset);
2218  // Shift to offset in m0
2219  SDNode *M0Base
2220  = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
2221  SDValue(SGPROffset, 0),
2222  CurDAG->getTargetConstant(16, SL, MVT::i32));
2223  glueCopyToM0(N, SDValue(M0Base, 0));
2224  }
2225 
2226  SDValue Chain = N->getOperand(0);
2227  SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
2228 
2229  // TODO: Can this just be removed from the instruction?
2230  SDValue GDS = CurDAG->getTargetConstant(1, SL, MVT::i1);
2231 
2232  const unsigned Opc = gwsIntrinToOpcode(IntrID);
2234  if (HasVSrc)
2235  Ops.push_back(N->getOperand(2));
2236  Ops.push_back(OffsetField);
2237  Ops.push_back(GDS);
2238  Ops.push_back(Chain);
2239 
2240  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2241  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2242 }
2243 
2244 void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
2245  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2246  switch (IntrID) {
2247  case Intrinsic::amdgcn_ds_append:
2248  case Intrinsic::amdgcn_ds_consume: {
2249  if (N->getValueType(0) != MVT::i32)
2250  break;
2251  SelectDSAppendConsume(N, IntrID);
2252  return;
2253  }
2254  }
2255 
2256  SelectCode(N);
2257 }
2258 
2259 void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
2260  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
2261  unsigned Opcode;
2262  switch (IntrID) {
2263  case Intrinsic::amdgcn_wqm:
2264  Opcode = AMDGPU::WQM;
2265  break;
2266  case Intrinsic::amdgcn_softwqm:
2267  Opcode = AMDGPU::SOFT_WQM;
2268  break;
2269  case Intrinsic::amdgcn_wwm:
2270  Opcode = AMDGPU::WWM;
2271  break;
2272  default:
2273  SelectCode(N);
2274  return;
2275  }
2276 
2277  SDValue Src = N->getOperand(1);
2278  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});
2279 }
2280 
2281 void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
2282  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2283  switch (IntrID) {
2284  case Intrinsic::amdgcn_ds_gws_init:
2285  case Intrinsic::amdgcn_ds_gws_barrier:
2286  case Intrinsic::amdgcn_ds_gws_sema_v:
2287  case Intrinsic::amdgcn_ds_gws_sema_br:
2288  case Intrinsic::amdgcn_ds_gws_sema_p:
2289  case Intrinsic::amdgcn_ds_gws_sema_release_all:
2290  SelectDS_GWS(N, IntrID);
2291  return;
2292  default:
2293  break;
2294  }
2295 
2296  SelectCode(N);
2297 }
2298 
2299 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
2300  unsigned &Mods) const {
2301  Mods = 0;
2302  Src = In;
2303 
2304  if (Src.getOpcode() == ISD::FNEG) {
2305  Mods |= SISrcMods::NEG;
2306  Src = Src.getOperand(0);
2307  }
2308 
2309  if (Src.getOpcode() == ISD::FABS) {
2310  Mods |= SISrcMods::ABS;
2311  Src = Src.getOperand(0);
2312  }
2313 
2314  return true;
2315 }
2316 
2317 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
2318  SDValue &SrcMods) const {
2319  unsigned Mods;
2320  if (SelectVOP3ModsImpl(In, Src, Mods)) {
2321  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2322  return true;
2323  }
2324 
2325  return false;
2326 }
2327 
2328 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
2329  SDValue &SrcMods) const {
2330  SelectVOP3Mods(In, Src, SrcMods);
2331  return isNoNanSrc(Src);
2332 }
2333 
2334 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_f32(SDValue In, SDValue &Src,
2335  SDValue &SrcMods) const {
2336  if (In.getValueType() == MVT::f32)
2337  return SelectVOP3Mods(In, Src, SrcMods);
2338  Src = In;
2339  SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);;
2340  return true;
2341 }
2342 
2343 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
2344  if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
2345  return false;
2346 
2347  Src = In;
2348  return true;
2349 }
2350 
2351 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
2352  SDValue &SrcMods, SDValue &Clamp,
2353  SDValue &Omod) const {
2354  SDLoc DL(In);
2355  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2356  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2357 
2358  return SelectVOP3Mods(In, Src, SrcMods);
2359 }
2360 
2361 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
2362  SDValue &SrcMods,
2363  SDValue &Clamp,
2364  SDValue &Omod) const {
2365  Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2366  return SelectVOP3Mods(In, Src, SrcMods);
2367 }
2368 
2369 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
2370  SDValue &Clamp, SDValue &Omod) const {
2371  Src = In;
2372 
2373  SDLoc DL(In);
2374  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2375  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2376 
2377  return true;
2378 }
2379 
2380 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
2381  SDValue &SrcMods) const {
2382  unsigned Mods = 0;
2383  Src = In;
2384 
2385  if (Src.getOpcode() == ISD::FNEG) {
2386  Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
2387  Src = Src.getOperand(0);
2388  }
2389 
2390  if (Src.getOpcode() == ISD::BUILD_VECTOR) {
2391  unsigned VecMods = Mods;
2392 
2393  SDValue Lo = stripBitcast(Src.getOperand(0));
2394  SDValue Hi = stripBitcast(Src.getOperand(1));
2395 
2396  if (Lo.getOpcode() == ISD::FNEG) {
2397  Lo = stripBitcast(Lo.getOperand(0));
2398  Mods ^= SISrcMods::NEG;
2399  }
2400 
2401  if (Hi.getOpcode() == ISD::FNEG) {
2402  Hi = stripBitcast(Hi.getOperand(0));
2403  Mods ^= SISrcMods::NEG_HI;
2404  }
2405 
2406  if (isExtractHiElt(Lo, Lo))
2407  Mods |= SISrcMods::OP_SEL_0;
2408 
2409  if (isExtractHiElt(Hi, Hi))
2410  Mods |= SISrcMods::OP_SEL_1;
2411 
2412  Lo = stripExtractLoElt(Lo);
2413  Hi = stripExtractLoElt(Hi);
2414 
2415  if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
2416  // Really a scalar input. Just select from the low half of the register to
2417  // avoid packing.
2418 
2419  Src = Lo;
2420  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2421  return true;
2422  }
2423 
2424  Mods = VecMods;
2425  }
2426 
2427  // Packed instructions do not have abs modifiers.
2428  Mods |= SISrcMods::OP_SEL_1;
2429 
2430  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2431  return true;
2432 }
2433 
2434 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src,
2435  SDValue &SrcMods,
2436  SDValue &Clamp) const {
2437  SDLoc SL(In);
2438 
2439  // FIXME: Handle clamp and op_sel
2440  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2441 
2442  return SelectVOP3PMods(In, Src, SrcMods);
2443 }
2444 
2445 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
2446  SDValue &SrcMods) const {
2447  Src = In;
2448  // FIXME: Handle op_sel
2449  SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2450  return true;
2451 }
2452 
2453 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src,
2454  SDValue &SrcMods,
2455  SDValue &Clamp) const {
2456  SDLoc SL(In);
2457 
2458  // FIXME: Handle clamp
2459  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2460 
2461  return SelectVOP3OpSel(In, Src, SrcMods);
2462 }
2463 
2464 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
2465  SDValue &SrcMods) const {
2466  // FIXME: Handle op_sel
2467  return SelectVOP3Mods(In, Src, SrcMods);
2468 }
2469 
2470 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src,
2471  SDValue &SrcMods,
2472  SDValue &Clamp) const {
2473  SDLoc SL(In);
2474 
2475  // FIXME: Handle clamp
2476  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2477 
2478  return SelectVOP3OpSelMods(In, Src, SrcMods);
2479 }
2480 
2481 // The return value is not whether the match is possible (which it always is),
2482 // but whether or not it a conversion is really used.
2483 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
2484  unsigned &Mods) const {
2485  Mods = 0;
2486  SelectVOP3ModsImpl(In, Src, Mods);
2487 
2488  if (Src.getOpcode() == ISD::FP_EXTEND) {
2489  Src = Src.getOperand(0);
2490  assert(Src.getValueType() == MVT::f16);
2491  Src = stripBitcast(Src);
2492 
2493  // Be careful about folding modifiers if we already have an abs. fneg is
2494  // applied last, so we don't want to apply an earlier fneg.
2495  if ((Mods & SISrcMods::ABS) == 0) {
2496  unsigned ModsTmp;
2497  SelectVOP3ModsImpl(Src, Src, ModsTmp);
2498 
2499  if ((ModsTmp & SISrcMods::NEG) != 0)
2500  Mods ^= SISrcMods::NEG;
2501 
2502  if ((ModsTmp & SISrcMods::ABS) != 0)
2503  Mods |= SISrcMods::ABS;
2504  }
2505 
2506  // op_sel/op_sel_hi decide the source type and source.
2507  // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
2508  // If the sources's op_sel is set, it picks the high half of the source
2509  // register.
2510 
2511  Mods |= SISrcMods::OP_SEL_1;
2512  if (isExtractHiElt(Src, Src)) {
2513  Mods |= SISrcMods::OP_SEL_0;
2514 
2515  // TODO: Should we try to look for neg/abs here?
2516  }
2517 
2518  return true;
2519  }
2520 
2521  return false;
2522 }
2523 
2524 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
2525  SDValue &SrcMods) const {
2526  unsigned Mods = 0;
2527  SelectVOP3PMadMixModsImpl(In, Src, Mods);
2528  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2529  return true;
2530 }
2531 
2532 SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
2533  if (In.isUndef())
2534  return CurDAG->getUNDEF(MVT::i32);
2535 
2536  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
2537  SDLoc SL(In);
2538  return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
2539  }
2540 
2541  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
2542  SDLoc SL(In);
2543  return CurDAG->getConstant(
2544  C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
2545  }
2546 
2547  SDValue Src;
2548  if (isExtractHiElt(In, Src))
2549  return Src;
2550 
2551  return SDValue();
2552 }
2553 
2554 bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
2555  assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn);
2556 
2557  const SIRegisterInfo *SIRI =
2558  static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
2559  const SIInstrInfo * SII =
2560  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
2561 
2562  unsigned Limit = 0;
2563  bool AllUsesAcceptSReg = true;
2564  for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
2565  Limit < 10 && U != E; ++U, ++Limit) {
2566  const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
2567 
2568  // If the register class is unknown, it could be an unknown
2569  // register class that needs to be an SGPR, e.g. an inline asm
2570  // constraint
2571  if (!RC || SIRI->isSGPRClass(RC))
2572  return false;
2573 
2574  if (RC != &AMDGPU::VS_32RegClass) {
2575  AllUsesAcceptSReg = false;
2576  SDNode * User = *U;
2577  if (User->isMachineOpcode()) {
2578  unsigned Opc = User->getMachineOpcode();
2579  MCInstrDesc Desc = SII->get(Opc);
2580  if (Desc.isCommutable()) {
2581  unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
2582  unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
2583  if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
2584  unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
2585  const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
2586  if (CommutedRC == &AMDGPU::VS_32RegClass)
2587  AllUsesAcceptSReg = true;
2588  }
2589  }
2590  }
2591  // If "AllUsesAcceptSReg == false" so far we haven't suceeded
2592  // commuting current user. This means have at least one use
2593  // that strictly require VGPR. Thus, we will not attempt to commute
2594  // other user instructions.
2595  if (!AllUsesAcceptSReg)
2596  break;
2597  }
2598  }
2599  return !AllUsesAcceptSReg && (Limit < 10);
2600 }
2601 
2602 bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
2603  auto Ld = cast<LoadSDNode>(N);
2604 
2605  return Ld->getAlignment() >= 4 &&
2606  (
2607  (
2608  (
2609  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
2610  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
2611  )
2612  &&
2613  !N->isDivergent()
2614  )
2615  ||
2616  (
2617  Subtarget->getScalarizeGlobalBehavior() &&
2618  Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
2619  !Ld->isVolatile() &&
2620  !N->isDivergent() &&
2621  static_cast<const SITargetLowering *>(
2622  getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)
2623  )
2624  );
2625 }
2626 
2627 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
2629  *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
2630  bool IsModified = false;
2631  do {
2632  IsModified = false;
2633 
2634  // Go over all selected nodes and try to fold them a bit more
2635  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
2636  while (Position != CurDAG->allnodes_end()) {
2637  SDNode *Node = &*Position++;
2638  MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
2639  if (!MachineNode)
2640  continue;
2641 
2642  SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
2643  if (ResNode != Node) {
2644  if (ResNode)
2645  ReplaceUses(Node, ResNode);
2646  IsModified = true;
2647  }
2648  }
2649  CurDAG->RemoveDeadNodes();
2650  } while (IsModified);
2651 }
2652 
2653 bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
2654  Subtarget = &MF.getSubtarget<R600Subtarget>();
2656 }
2657 
2658 bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
2659  if (!N->readMem())
2660  return false;
2661  if (CbId == -1)
2664 
2665  return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
2666 }
2667 
2668 bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
2669  SDValue& IntPtr) {
2670  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
2671  IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
2672  true);
2673  return true;
2674  }
2675  return false;
2676 }
2677 
2678 bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
2679  SDValue& BaseReg, SDValue &Offset) {
2680  if (!isa<ConstantSDNode>(Addr)) {
2681  BaseReg = Addr;
2682  Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
2683  return true;
2684  }
2685  return false;
2686 }
2687 
2689  unsigned int Opc = N->getOpcode();
2690  if (N->isMachineOpcode()) {
2691  N->setNodeId(-1);
2692  return; // Already selected.
2693  }
2694 
2695  switch (Opc) {
2696  default: break;
2698  case ISD::SCALAR_TO_VECTOR:
2699  case ISD::BUILD_VECTOR: {
2700  EVT VT = N->getValueType(0);
2701  unsigned NumVectorElts = VT.getVectorNumElements();
2702  unsigned RegClassID;
2703  // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
2704  // that adds a 128 bits reg copy when going through TwoAddressInstructions
2705  // pass. We want to avoid 128 bits copies as much as possible because they
2706  // can't be bundled by our scheduler.
2707  switch(NumVectorElts) {
2708  case 2: RegClassID = R600::R600_Reg64RegClassID; break;
2709  case 4:
2711  RegClassID = R600::R600_Reg128VerticalRegClassID;
2712  else
2713  RegClassID = R600::R600_Reg128RegClassID;
2714  break;
2715  default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
2716  }
2717  SelectBuildVector(N, RegClassID);
2718  return;
2719  }
2720  }
2721 
2722  SelectCode(N);
2723 }
2724 
2725 bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
2726  SDValue &Offset) {
2727  ConstantSDNode *C;
2728  SDLoc DL(Addr);
2729 
2730  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
2731  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
2732  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2733  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
2734  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
2735  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
2736  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2737  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
2738  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
2739  Base = Addr.getOperand(0);
2740  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2741  } else {
2742  Base = Addr;
2743  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
2744  }
2745 
2746  return true;
2747 }
2748 
2749 bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
2750  SDValue &Offset) {
2751  ConstantSDNode *IMMOffset;
2752 
2753  if (Addr.getOpcode() == ISD::ADD
2754  && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
2755  && isInt<16>(IMMOffset->getZExtValue())) {
2756 
2757  Base = Addr.getOperand(0);
2758  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2759  MVT::i32);
2760  return true;
2761  // If the pointer address is constant, we can move it to the offset field.
2762  } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
2763  && isInt<16>(IMMOffset->getZExtValue())) {
2764  Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
2765  SDLoc(CurDAG->getEntryNode()),
2766  R600::ZERO, MVT::i32);
2767  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2768  MVT::i32);
2769  return true;
2770  }
2771 
2772  // Default case, no offset
2773  Base = Addr;
2774  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2775  return true;
2776 }
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:596
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:385
EVT getValueType() const
Return the ValueType of the referenced return value.
Interface definition for SIRegisterInfo.
unsigned getVCC() const
bool isUndef() const
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand...
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1571
INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel", "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) INITIALIZE_PASS_END(AMDGPUDAGToDAGISel
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:561
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
AMDGPU specific subclass of TargetSubtarget.
static unsigned gwsIntrinToOpcode(unsigned IntrID)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
uint64_t getDefaultRsrcDataFormat() const
FunctionPass * createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a R600-specific.
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MCInstrDesc.h:461
Shadow Stack GC Lowering
SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL, SDValue V) const
const SDValue & getBasePtr() const
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:222
Address space for 32-bit constant memory.
Definition: AMDGPU.h:277
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:179
SDVTList getVTList() const
unsigned Reg
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
const SDValue & getChain() const
virtual SDNode * PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const =0
SDNode * legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const
Legalize target independent instructions (e.g.
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:342
unsigned const TargetRegisterInfo * TRI
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
const SDNodeFlags getFlags() const
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:144
Address space for region memory. (GDS)
Definition: AMDGPU.h:271
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
bool isInlineConstant(const APInt &Imm) const
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:441
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:158
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineSDNode * wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr) const
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:270
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwises returns null...
Definition: PointerUnion.h:201
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:226
const HexagonInstrInfo * TII
Shift and rotation operations.
Definition: ISDOpcodes.h:442
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:190
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:169
unsigned SubReg
uint64_t getConstantOperandVal(unsigned i) const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
static MemSDNode * findMemSDNode(SDNode *N)
Position
Position to insert a new instruction relative to an existing instruction.
This file implements a class to represent arbitrary precision integral constant values and operations...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
TargetRegisterInfo interface that is implemented by all hw codegen targets.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
int64_t getSExtValue() const
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:297
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:244
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:1013
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:596
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:150
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:359
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static const unsigned CommuteAnyOperandIndex
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:165
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:379
unsigned const MachineRegisterInfo * MRI
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
Machine Value Type.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Address space for local memory.
Definition: AMDGPU.h:274
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:272
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
iterator_range< value_op_iterator > op_values() const
Address space for flat memory.
Definition: AMDGPU.h:269
const SDValue & getOperand(unsigned Num) const
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:231
SI DAG Lowering interface definition.
Represent the analysis usage information of a pass.
This class provides iterator support for SDUse operands that use a specific SDNode.
bool hasNoNaNs() const
constexpr double e
Definition: MathExtras.h:57
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:71
bool isDefined() const
Returns true if the flags are in a defined state.
The AMDGPU TargetMachine interface definition for hw codgen targets.
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:581
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
Extended Value Type.
Definition: ValueTypes.h:33
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts)
bool use_empty() const
Return true if there are no uses of this node.
MachineSDNode * buildRSRC(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr, uint32_t RsrcDword1, uint64_t RsrcDword2And3) const
Return a resource descriptor with the &#39;Add TID&#39; bit enabled The TID (Thread ID) is multiplied by the ...
Iterator for intrusive lists based on ilist_node.
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:556
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:264
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:221
Interface definition of the TargetLowering class that is common to all AMD GPUs.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:673
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
bool isDivergent() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:241
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
bool readMem() const
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
EVT getMemoryVT() const
Return the type of the in-memory value.
Class for arbitrary precision integers.
Definition: APInt.h:69
Special value supplied for machine level alias analysis.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo)
Address space for constant memory (VTX2).
Definition: AMDGPU.h:273
static use_iterator use_end()
static SDValue stripBitcast(SDValue Val)
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:419
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:76
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:518
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:643
#define N
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
unsigned getOpcode() const
SDValue getValue(unsigned R) const
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:382
SmallVector< LoopT *, 4 > getLoopsInPreorder()
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
Definition: LoopInfoImpl.h:567
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isLegalMUBUFImmOffset(unsigned Imm)
Definition: SIInstrInfo.h:1003
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
LLVM Value Representation.
Definition: Value.h:74
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:334
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:69
bool hasNoUnsignedWrap() const
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:190
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
bool isUndef() const
Return true if the type of the node type undefined.
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:1208
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, bool Signed) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations...
Definition: ISDOpcodes.h:338
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:475
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:259
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:222
const SDValue & getOperand(unsigned i) const
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:506
#define LLVM_DEBUG(X)
Definition: Debug.h:122
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:611
Analyzes if a function potentially memory bound and if a kernel kernel may benefit from limiting numb...
const SDValue & getBasePtr() const
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:407
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:241
This class is used to represent ISD::LOAD nodes.
const SIRegisterInfo * getRegisterInfo() const override