LLVM  10.0.0svn
AMDGPUISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines an instruction selector for the AMDGPU target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
16 #include "AMDGPUISelLowering.h" // For AMDGPUISD
17 #include "AMDGPUInstrInfo.h"
18 #include "AMDGPUPerfHintAnalysis.h"
19 #include "AMDGPURegisterInfo.h"
20 #include "AMDGPUSubtarget.h"
21 #include "AMDGPUTargetMachine.h"
22 #include "SIDefines.h"
23 #include "SIISelLowering.h"
24 #include "SIInstrInfo.h"
25 #include "SIMachineFunctionInfo.h"
26 #include "SIRegisterInfo.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/ADT/StringRef.h"
41 #include "llvm/IR/BasicBlock.h"
42 #ifdef EXPENSIVE_CHECKS
43 #include "llvm/IR/Dominators.h"
44 #endif
45 #include "llvm/IR/Instruction.h"
46 #include "llvm/MC/MCInstrDesc.h"
47 #include "llvm/Support/Casting.h"
48 #include "llvm/Support/CodeGen.h"
52 #include <cassert>
53 #include <cstdint>
54 #include <new>
55 #include <vector>
56 
57 #define DEBUG_TYPE "isel"
58 
59 using namespace llvm;
60 
61 namespace llvm {
62 
63 class R600InstrInfo;
64 
65 } // end namespace llvm
66 
67 //===----------------------------------------------------------------------===//
68 // Instruction Selector Implementation
69 //===----------------------------------------------------------------------===//
70 
71 namespace {
72 
73 static bool isNullConstantOrUndef(SDValue V) {
74  if (V.isUndef())
75  return true;
76 
78  return Const != nullptr && Const->isNullValue();
79 }
80 
81 static bool getConstantValue(SDValue N, uint32_t &Out) {
82  // This is only used for packed vectors, where ussing 0 for undef should
83  // always be good.
84  if (N.isUndef()) {
85  Out = 0;
86  return true;
87  }
88 
89  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
90  Out = C->getAPIntValue().getSExtValue();
91  return true;
92  }
93 
94  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
95  Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
96  return true;
97  }
98 
99  return false;
100 }
101 
102 // TODO: Handle undef as zero
103 static SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG,
104  bool Negate = false) {
105  assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
106  uint32_t LHSVal, RHSVal;
107  if (getConstantValue(N->getOperand(0), LHSVal) &&
108  getConstantValue(N->getOperand(1), RHSVal)) {
109  SDLoc SL(N);
110  uint32_t K = Negate ?
111  (-LHSVal & 0xffff) | (-RHSVal << 16) :
112  (LHSVal & 0xffff) | (RHSVal << 16);
113  return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
114  DAG.getTargetConstant(K, SL, MVT::i32));
115  }
116 
117  return nullptr;
118 }
119 
120 static SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
121  return packConstantV2I16(N, DAG, true);
122 }
123 
124 /// AMDGPU specific code to select AMDGPU machine instructions for
125 /// SelectionDAG operations.
126 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
127  // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
128  // make the right decision when generating code for different targets.
129  const GCNSubtarget *Subtarget;
130  bool EnableLateStructurizeCFG;
131 
132 public:
133  explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
135  : SelectionDAGISel(*TM, OptLevel) {
136  EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
137  }
138  ~AMDGPUDAGToDAGISel() override = default;
139 
140  void getAnalysisUsage(AnalysisUsage &AU) const override {
143 #ifdef EXPENSIVE_CHECKS
146 #endif
148  }
149 
150  bool matchLoadD16FromBuildVector(SDNode *N) const;
151 
152  bool runOnMachineFunction(MachineFunction &MF) override;
153  void PreprocessISelDAG() override;
154  void Select(SDNode *N) override;
155  StringRef getPassName() const override;
156  void PostprocessISelDAG() override;
157 
158 protected:
159  void SelectBuildVector(SDNode *N, unsigned RegClassID);
160 
161 private:
162  std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
163  bool isNoNanSrc(SDValue N) const;
164  bool isInlineImmediate(const SDNode *N, bool Negated = false) const;
165  bool isNegInlineImmediate(const SDNode *N) const {
166  return isInlineImmediate(N, true);
167  }
168 
169  bool isVGPRImm(const SDNode *N) const;
170  bool isUniformLoad(const SDNode *N) const;
171  bool isUniformBr(const SDNode *N) const;
172 
173  MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
174 
175  SDNode *glueCopyToM0LDSInit(SDNode *N) const;
176  SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
177 
178  const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
179  virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
180  virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
181  bool isDSOffsetLegal(SDValue Base, unsigned Offset,
182  unsigned OffsetBits) const;
183  bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
184  bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
185  SDValue &Offset1) const;
186  bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
187  SDValue &SOffset, SDValue &Offset, SDValue &Offen,
188  SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
189  SDValue &TFE, SDValue &DLC) const;
190  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
191  SDValue &SOffset, SDValue &Offset, SDValue &GLC,
192  SDValue &SLC, SDValue &TFE, SDValue &DLC) const;
193  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
194  SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
195  SDValue &SLC) const;
196  bool SelectMUBUFScratchOffen(SDNode *Parent,
197  SDValue Addr, SDValue &RSrc, SDValue &VAddr,
198  SDValue &SOffset, SDValue &ImmOffset) const;
199  bool SelectMUBUFScratchOffset(SDNode *Parent,
200  SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
201  SDValue &Offset) const;
202 
203  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
204  SDValue &Offset, SDValue &GLC, SDValue &SLC,
205  SDValue &TFE, SDValue &DLC) const;
206  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
207  SDValue &Offset, SDValue &SLC) const;
208  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
209  SDValue &Offset) const;
210 
211  bool SelectFlatAtomic(SDNode *N, SDValue Addr, SDValue &VAddr,
212  SDValue &Offset, SDValue &SLC) const;
213  bool SelectFlatAtomicSigned(SDNode *N, SDValue Addr, SDValue &VAddr,
214  SDValue &Offset, SDValue &SLC) const;
215 
216  template <bool IsSigned>
217  bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
218  SDValue &Offset, SDValue &SLC) const;
219 
220  bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
221  bool &Imm) const;
222  SDValue Expand32BitAddress(SDValue Addr) const;
223  bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
224  bool &Imm) const;
225  bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
226  bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
227  bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
228  bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
229  bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
230  bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
231 
232  bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
233  bool SelectVOP3Mods_f32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
234  bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
235  bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
236  bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
237  bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
238  SDValue &Clamp, SDValue &Omod) const;
239  bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
240  SDValue &Clamp, SDValue &Omod) const;
241 
242  bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
243  SDValue &Clamp,
244  SDValue &Omod) const;
245 
246  bool SelectVOP3OMods(SDValue In, SDValue &Src,
247  SDValue &Clamp, SDValue &Omod) const;
248 
249  bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
250  bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
251  SDValue &Clamp) const;
252 
253  bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
254  bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods,
255  SDValue &Clamp) const;
256 
257  bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
258  bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
259  SDValue &Clamp) const;
260  bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
261  bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
262 
263  SDValue getHi16Elt(SDValue In) const;
264 
265  void SelectADD_SUB_I64(SDNode *N);
266  void SelectAddcSubb(SDNode *N);
267  void SelectUADDO_USUBO(SDNode *N);
268  void SelectDIV_SCALE(SDNode *N);
269  void SelectDIV_FMAS(SDNode *N);
270  void SelectMAD_64_32(SDNode *N);
271  void SelectFMA_W_CHAIN(SDNode *N);
272  void SelectFMUL_W_CHAIN(SDNode *N);
273 
274  SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
275  uint32_t Offset, uint32_t Width);
276  void SelectS_BFEFromShifts(SDNode *N);
277  void SelectS_BFE(SDNode *N);
278  bool isCBranchSCC(const SDNode *N) const;
279  void SelectBRCOND(SDNode *N);
280  void SelectFMAD_FMA(SDNode *N);
281  void SelectATOMIC_CMP_SWAP(SDNode *N);
282  void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
283  void SelectDS_GWS(SDNode *N, unsigned IntrID);
284  void SelectINTRINSIC_W_CHAIN(SDNode *N);
285  void SelectINTRINSIC_WO_CHAIN(SDNode *N);
286  void SelectINTRINSIC_VOID(SDNode *N);
287 
288 protected:
289  // Include the pieces autogenerated from the target description.
290 #include "AMDGPUGenDAGISel.inc"
291 };
292 
293 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
294  const R600Subtarget *Subtarget;
295 
296  bool isConstantLoad(const MemSDNode *N, int cbID) const;
297  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
298  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
299  SDValue& Offset);
300 public:
301  explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
302  AMDGPUDAGToDAGISel(TM, OptLevel) {}
303 
304  void Select(SDNode *N) override;
305 
306  bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
307  SDValue &Offset) override;
308  bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
309  SDValue &Offset) override;
310 
311  bool runOnMachineFunction(MachineFunction &MF) override;
312 
313  void PreprocessISelDAG() override {}
314 
315 protected:
316  // Include the pieces autogenerated from the target description.
317 #include "R600GenDAGISel.inc"
318 };
319 
320 static SDValue stripBitcast(SDValue Val) {
321  return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
322 }
323 
324 // Figure out if this is really an extract of the high 16-bits of a dword.
325 static bool isExtractHiElt(SDValue In, SDValue &Out) {
326  In = stripBitcast(In);
327  if (In.getOpcode() != ISD::TRUNCATE)
328  return false;
329 
330  SDValue Srl = In.getOperand(0);
331  if (Srl.getOpcode() == ISD::SRL) {
332  if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
333  if (ShiftAmt->getZExtValue() == 16) {
334  Out = stripBitcast(Srl.getOperand(0));
335  return true;
336  }
337  }
338  }
339 
340  return false;
341 }
342 
343 // Look through operations that obscure just looking at the low 16-bits of the
344 // same register.
345 static SDValue stripExtractLoElt(SDValue In) {
346  if (In.getOpcode() == ISD::TRUNCATE) {
347  SDValue Src = In.getOperand(0);
348  if (Src.getValueType().getSizeInBits() == 32)
349  return stripBitcast(Src);
350  }
351 
352  return In;
353 }
354 
355 } // end anonymous namespace
356 
357 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
358  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
362 #ifdef EXPENSIVE_CHECKS
365 #endif
366 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",
367  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
368 
369 /// This pass converts a legalized DAG into a AMDGPU-specific
370 // DAG, ready for instruction scheduling.
372  CodeGenOpt::Level OptLevel) {
373  return new AMDGPUDAGToDAGISel(TM, OptLevel);
374 }
375 
376 /// This pass converts a legalized DAG into a R600-specific
377 // DAG, ready for instruction scheduling.
379  CodeGenOpt::Level OptLevel) {
380  return new R600DAGToDAGISel(TM, OptLevel);
381 }
382 
383 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
384 #ifdef EXPENSIVE_CHECKS
385  DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
386  LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
387  for (auto &L : LI->getLoopsInPreorder()) {
388  assert(L->isLCSSAForm(DT));
389  }
390 #endif
391  Subtarget = &MF.getSubtarget<GCNSubtarget>();
393 }
394 
395 bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {
396  assert(Subtarget->d16PreservesUnusedBits());
397  MVT VT = N->getValueType(0).getSimpleVT();
398  if (VT != MVT::v2i16 && VT != MVT::v2f16)
399  return false;
400 
401  SDValue Lo = N->getOperand(0);
402  SDValue Hi = N->getOperand(1);
403 
404  LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));
405 
406  // build_vector lo, (load ptr) -> load_d16_hi ptr, lo
407  // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
408  // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
409 
410  // Need to check for possible indirect dependencies on the other half of the
411  // vector to avoid introducing a cycle.
412  if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {
413  SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
414 
415  SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo);
416  SDValue Ops[] = {
417  LdHi->getChain(), LdHi->getBasePtr(), TiedIn
418  };
419 
420  unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
421  if (LdHi->getMemoryVT() == MVT::i8) {
422  LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
424  } else {
425  assert(LdHi->getMemoryVT() == MVT::i16);
426  }
427 
428  SDValue NewLoadHi =
429  CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
430  Ops, LdHi->getMemoryVT(),
431  LdHi->getMemOperand());
432 
433  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
434  CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
435  return true;
436  }
437 
438  // build_vector (load ptr), hi -> load_d16_lo ptr, hi
439  // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
440  // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
441  LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));
442  if (LdLo && Lo.hasOneUse()) {
443  SDValue TiedIn = getHi16Elt(Hi);
444  if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode()))
445  return false;
446 
447  SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
448  unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
449  if (LdLo->getMemoryVT() == MVT::i8) {
450  LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
452  } else {
453  assert(LdLo->getMemoryVT() == MVT::i16);
454  }
455 
456  TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
457 
458  SDValue Ops[] = {
459  LdLo->getChain(), LdLo->getBasePtr(), TiedIn
460  };
461 
462  SDValue NewLoadLo =
463  CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
464  Ops, LdLo->getMemoryVT(),
465  LdLo->getMemOperand());
466 
467  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
468  CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
469  return true;
470  }
471 
472  return false;
473 }
474 
475 void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
476  if (!Subtarget->d16PreservesUnusedBits())
477  return;
478 
479  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
480 
481  bool MadeChange = false;
482  while (Position != CurDAG->allnodes_begin()) {
483  SDNode *N = &*--Position;
484  if (N->use_empty())
485  continue;
486 
487  switch (N->getOpcode()) {
488  case ISD::BUILD_VECTOR:
489  MadeChange |= matchLoadD16FromBuildVector(N);
490  break;
491  default:
492  break;
493  }
494  }
495 
496  if (MadeChange) {
497  CurDAG->RemoveDeadNodes();
498  LLVM_DEBUG(dbgs() << "After PreProcess:\n";
499  CurDAG->dump(););
500  }
501 }
502 
503 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
504  if (TM.Options.NoNaNsFPMath)
505  return true;
506 
507  // TODO: Move into isKnownNeverNaN
508  if (N->getFlags().isDefined())
509  return N->getFlags().hasNoNaNs();
510 
511  return CurDAG->isKnownNeverNaN(N);
512 }
513 
514 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N,
515  bool Negated) const {
516  if (N->isUndef())
517  return true;
518 
519  const SIInstrInfo *TII = Subtarget->getInstrInfo();
520  if (Negated) {
521  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
522  return TII->isInlineConstant(-C->getAPIntValue());
523 
524  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
525  return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt());
526 
527  } else {
528  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
529  return TII->isInlineConstant(C->getAPIntValue());
530 
531  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
532  return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
533  }
534 
535  return false;
536 }
537 
538 /// Determine the register class for \p OpNo
539 /// \returns The register class of the virtual register that will be used for
540 /// the given operand number \OpNo or NULL if the register class cannot be
541 /// determined.
542 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
543  unsigned OpNo) const {
544  if (!N->isMachineOpcode()) {
545  if (N->getOpcode() == ISD::CopyToReg) {
546  unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
547  if (Register::isVirtualRegister(Reg)) {
548  MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
549  return MRI.getRegClass(Reg);
550  }
551 
552  const SIRegisterInfo *TRI
553  = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
554  return TRI->getPhysRegClass(Reg);
555  }
556 
557  return nullptr;
558  }
559 
560  switch (N->getMachineOpcode()) {
561  default: {
562  const MCInstrDesc &Desc =
563  Subtarget->getInstrInfo()->get(N->getMachineOpcode());
564  unsigned OpIdx = Desc.getNumDefs() + OpNo;
565  if (OpIdx >= Desc.getNumOperands())
566  return nullptr;
567  int RegClass = Desc.OpInfo[OpIdx].RegClass;
568  if (RegClass == -1)
569  return nullptr;
570 
571  return Subtarget->getRegisterInfo()->getRegClass(RegClass);
572  }
573  case AMDGPU::REG_SEQUENCE: {
574  unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
575  const TargetRegisterClass *SuperRC =
576  Subtarget->getRegisterInfo()->getRegClass(RCID);
577 
578  SDValue SubRegOp = N->getOperand(OpNo + 1);
579  unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
580  return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
581  SubRegIdx);
582  }
583  }
584 }
585 
586 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
587  const SITargetLowering& Lowering =
588  *static_cast<const SITargetLowering*>(getTargetLowering());
589 
590  assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");
591 
592  SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N),
593  Val);
594 
595  SDValue Glue = M0.getValue(1);
596 
598  Ops.push_back(M0); // Replace the chain.
599  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
600  Ops.push_back(N->getOperand(i));
601 
602  Ops.push_back(Glue);
603  return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
604 }
605 
606 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
607  unsigned AS = cast<MemSDNode>(N)->getAddressSpace();
608  if (AS == AMDGPUAS::LOCAL_ADDRESS) {
609  if (Subtarget->ldsRequiresM0Init())
610  return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
611  } else if (AS == AMDGPUAS::REGION_ADDRESS) {
612  MachineFunction &MF = CurDAG->getMachineFunction();
613  unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
614  return
615  glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32));
616  }
617  return N;
618 }
619 
620 MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
621  EVT VT) const {
622  SDNode *Lo = CurDAG->getMachineNode(
623  AMDGPU::S_MOV_B32, DL, MVT::i32,
624  CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
625  SDNode *Hi =
626  CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
627  CurDAG->getTargetConstant(Imm >> 32, DL, MVT::i32));
628  const SDValue Ops[] = {
629  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
630  SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
631  SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
632 
633  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
634 }
635 
636 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
637  switch (NumVectorElts) {
638  case 1:
639  return AMDGPU::SReg_32_XM0RegClassID;
640  case 2:
641  return AMDGPU::SReg_64RegClassID;
642  case 3:
643  return AMDGPU::SGPR_96RegClassID;
644  case 4:
645  return AMDGPU::SReg_128RegClassID;
646  case 5:
647  return AMDGPU::SGPR_160RegClassID;
648  case 8:
649  return AMDGPU::SReg_256RegClassID;
650  case 16:
651  return AMDGPU::SReg_512RegClassID;
652  case 32:
653  return AMDGPU::SReg_1024RegClassID;
654  }
655 
656  llvm_unreachable("invalid vector size");
657 }
658 
659 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
660  EVT VT = N->getValueType(0);
661  unsigned NumVectorElts = VT.getVectorNumElements();
662  EVT EltVT = VT.getVectorElementType();
663  SDLoc DL(N);
664  SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
665 
666  if (NumVectorElts == 1) {
667  CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
668  RegClass);
669  return;
670  }
671 
672  assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
673  "supported yet");
674  // 32 = Max Num Vector Elements
675  // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
676  // 1 = Vector Register Class
677  SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
678 
679  RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
680  bool IsRegSeq = true;
681  unsigned NOps = N->getNumOperands();
682  for (unsigned i = 0; i < NOps; i++) {
683  // XXX: Why is this here?
684  if (isa<RegisterSDNode>(N->getOperand(i))) {
685  IsRegSeq = false;
686  break;
687  }
689  RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
690  RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
691  }
692  if (NOps != NumVectorElts) {
693  // Fill in the missing undef elements if this was a scalar_to_vector.
694  assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
695  MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
696  DL, EltVT);
697  for (unsigned i = NOps; i < NumVectorElts; ++i) {
699  RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
700  RegSeqArgs[1 + (2 * i) + 1] =
701  CurDAG->getTargetConstant(Sub, DL, MVT::i32);
702  }
703  }
704 
705  if (!IsRegSeq)
706  SelectCode(N);
707  CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
708 }
709 
711  unsigned int Opc = N->getOpcode();
712  if (N->isMachineOpcode()) {
713  N->setNodeId(-1);
714  return; // Already selected.
715  }
716 
717  if (isa<AtomicSDNode>(N) ||
718  (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
719  Opc == ISD::ATOMIC_LOAD_FADD ||
722  N = glueCopyToM0LDSInit(N);
723 
724  switch (Opc) {
725  default:
726  break;
727  // We are selecting i64 ADD here instead of custom lower it during
728  // DAG legalization, so we can fold some i64 ADDs used for address
729  // calculation into the LOAD and STORE instructions.
730  case ISD::ADDC:
731  case ISD::ADDE:
732  case ISD::SUBC:
733  case ISD::SUBE: {
734  if (N->getValueType(0) != MVT::i64)
735  break;
736 
737  SelectADD_SUB_I64(N);
738  return;
739  }
740  case ISD::ADDCARRY:
741  case ISD::SUBCARRY:
742  if (N->getValueType(0) != MVT::i32)
743  break;
744 
745  SelectAddcSubb(N);
746  return;
747  case ISD::UADDO:
748  case ISD::USUBO: {
749  SelectUADDO_USUBO(N);
750  return;
751  }
753  SelectFMUL_W_CHAIN(N);
754  return;
755  }
756  case AMDGPUISD::FMA_W_CHAIN: {
757  SelectFMA_W_CHAIN(N);
758  return;
759  }
760 
762  case ISD::BUILD_VECTOR: {
763  EVT VT = N->getValueType(0);
764  unsigned NumVectorElts = VT.getVectorNumElements();
765  if (VT.getScalarSizeInBits() == 16) {
766  if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
767  if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) {
768  ReplaceNode(N, Packed);
769  return;
770  }
771  }
772 
773  break;
774  }
775 
777  unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
778  SelectBuildVector(N, RegClassID);
779  return;
780  }
781  case ISD::BUILD_PAIR: {
782  SDValue RC, SubReg0, SubReg1;
783  SDLoc DL(N);
784  if (N->getValueType(0) == MVT::i128) {
785  RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
786  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
787  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
788  } else if (N->getValueType(0) == MVT::i64) {
789  RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
790  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
791  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
792  } else {
793  llvm_unreachable("Unhandled value type for BUILD_PAIR");
794  }
795  const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
796  N->getOperand(1), SubReg1 };
797  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
798  N->getValueType(0), Ops));
799  return;
800  }
801 
802  case ISD::Constant:
803  case ISD::ConstantFP: {
804  if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
805  break;
806 
807  uint64_t Imm;
808  if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
809  Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
810  else {
811  ConstantSDNode *C = cast<ConstantSDNode>(N);
812  Imm = C->getZExtValue();
813  }
814 
815  SDLoc DL(N);
816  ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
817  return;
818  }
819  case ISD::LOAD:
820  case ISD::STORE:
821  case ISD::ATOMIC_LOAD:
822  case ISD::ATOMIC_STORE: {
823  N = glueCopyToM0LDSInit(N);
824  break;
825  }
826 
827  case AMDGPUISD::BFE_I32:
828  case AMDGPUISD::BFE_U32: {
829  // There is a scalar version available, but unlike the vector version which
830  // has a separate operand for the offset and width, the scalar version packs
831  // the width and offset into a single operand. Try to move to the scalar
832  // version if the offsets are constant, so that we can try to keep extended
833  // loads of kernel arguments in SGPRs.
834 
835  // TODO: Technically we could try to pattern match scalar bitshifts of
836  // dynamic values, but it's probably not useful.
838  if (!Offset)
839  break;
840 
842  if (!Width)
843  break;
844 
845  bool Signed = Opc == AMDGPUISD::BFE_I32;
846 
847  uint32_t OffsetVal = Offset->getZExtValue();
848  uint32_t WidthVal = Width->getZExtValue();
849 
850  ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
851  SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
852  return;
853  }
854  case AMDGPUISD::DIV_SCALE: {
855  SelectDIV_SCALE(N);
856  return;
857  }
858  case AMDGPUISD::DIV_FMAS: {
859  SelectDIV_FMAS(N);
860  return;
861  }
863  case AMDGPUISD::MAD_U64_U32: {
864  SelectMAD_64_32(N);
865  return;
866  }
867  case ISD::CopyToReg: {
868  const SITargetLowering& Lowering =
869  *static_cast<const SITargetLowering*>(getTargetLowering());
870  N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
871  break;
872  }
873  case ISD::AND:
874  case ISD::SRL:
875  case ISD::SRA:
877  if (N->getValueType(0) != MVT::i32)
878  break;
879 
880  SelectS_BFE(N);
881  return;
882  case ISD::BRCOND:
883  SelectBRCOND(N);
884  return;
885  case ISD::FMAD:
886  case ISD::FMA:
887  SelectFMAD_FMA(N);
888  return;
890  SelectATOMIC_CMP_SWAP(N);
891  return;
897  // Hack around using a legal type if f16 is illegal.
898  if (N->getValueType(0) == MVT::i32) {
900  N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
901  { N->getOperand(0), N->getOperand(1) });
902  SelectCode(N);
903  return;
904  }
905 
906  break;
907  }
908  case ISD::INTRINSIC_W_CHAIN: {
909  SelectINTRINSIC_W_CHAIN(N);
910  return;
911  }
913  SelectINTRINSIC_WO_CHAIN(N);
914  return;
915  }
916  case ISD::INTRINSIC_VOID: {
917  SelectINTRINSIC_VOID(N);
918  return;
919  }
920  }
921 
922  SelectCode(N);
923 }
924 
925 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
926  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
927  const Instruction *Term = BB->getTerminator();
928  return Term->getMetadata("amdgpu.uniform") ||
929  Term->getMetadata("structurizecfg.uniform");
930 }
931 
932 StringRef AMDGPUDAGToDAGISel::getPassName() const {
933  return "AMDGPU DAG->DAG Pattern Instruction Selection";
934 }
935 
936 //===----------------------------------------------------------------------===//
937 // Complex Patterns
938 //===----------------------------------------------------------------------===//
939 
940 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
941  SDValue &Offset) {
942  return false;
943 }
944 
945 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
946  SDValue &Offset) {
947  ConstantSDNode *C;
948  SDLoc DL(Addr);
949 
950  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
951  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
952  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
953  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
954  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
955  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
956  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
957  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
958  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
959  Base = Addr.getOperand(0);
960  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
961  } else {
962  Base = Addr;
963  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
964  }
965 
966  return true;
967 }
968 
969 // FIXME: Should only handle addcarry/subcarry
970 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
971  SDLoc DL(N);
972  SDValue LHS = N->getOperand(0);
973  SDValue RHS = N->getOperand(1);
974 
975  unsigned Opcode = N->getOpcode();
976  bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
977  bool ProduceCarry =
978  ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
979  bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
980 
981  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
982  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
983 
984  SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
985  DL, MVT::i32, LHS, Sub0);
986  SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
987  DL, MVT::i32, LHS, Sub1);
988 
989  SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
990  DL, MVT::i32, RHS, Sub0);
991  SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
992  DL, MVT::i32, RHS, Sub1);
993 
994  SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
995 
996  unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
997  unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
998 
999  SDNode *AddLo;
1000  if (!ConsumeCarry) {
1001  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
1002  AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
1003  } else {
1004  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
1005  AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
1006  }
1007  SDValue AddHiArgs[] = {
1008  SDValue(Hi0, 0),
1009  SDValue(Hi1, 0),
1010  SDValue(AddLo, 1)
1011  };
1012  SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
1013 
1014  SDValue RegSequenceArgs[] = {
1015  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
1016  SDValue(AddLo,0),
1017  Sub0,
1018  SDValue(AddHi,0),
1019  Sub1,
1020  };
1021  SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1022  MVT::i64, RegSequenceArgs);
1023 
1024  if (ProduceCarry) {
1025  // Replace the carry-use
1026  ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
1027  }
1028 
1029  // Replace the remaining uses.
1030  ReplaceNode(N, RegSequence);
1031 }
1032 
1033 void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
1034  SDLoc DL(N);
1035  SDValue LHS = N->getOperand(0);
1036  SDValue RHS = N->getOperand(1);
1037  SDValue CI = N->getOperand(2);
1038 
1039  unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
1040  : AMDGPU::V_SUBB_U32_e64;
1041  CurDAG->SelectNodeTo(
1042  N, Opc, N->getVTList(),
1043  {LHS, RHS, CI, CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
1044 }
1045 
1046 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
1047  // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
1048  // carry out despite the _i32 name. These were renamed in VI to _U32.
1049  // FIXME: We should probably rename the opcodes here.
1050  unsigned Opc = N->getOpcode() == ISD::UADDO ?
1051  AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
1052 
1053  CurDAG->SelectNodeTo(
1054  N, Opc, N->getVTList(),
1055  {N->getOperand(0), N->getOperand(1),
1056  CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
1057 }
1058 
1059 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
1060  SDLoc SL(N);
1061  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
1062  SDValue Ops[10];
1063 
1064  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
1065  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
1066  SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
1067  Ops[8] = N->getOperand(0);
1068  Ops[9] = N->getOperand(4);
1069 
1070  CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
1071 }
1072 
1073 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
1074  SDLoc SL(N);
1075  // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
1076  SDValue Ops[8];
1077 
1078  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
1079  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
1080  Ops[6] = N->getOperand(0);
1081  Ops[7] = N->getOperand(3);
1082 
1083  CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
1084 }
1085 
1086 // We need to handle this here because tablegen doesn't support matching
1087 // instructions with multiple outputs.
1088 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
1089  SDLoc SL(N);
1090  EVT VT = N->getValueType(0);
1091 
1092  assert(VT == MVT::f32 || VT == MVT::f64);
1093 
1094  unsigned Opc
1095  = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
1096 
1097  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
1098  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1099 }
1100 
1101 void AMDGPUDAGToDAGISel::SelectDIV_FMAS(SDNode *N) {
1102  const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
1103  const SIRegisterInfo *TRI = ST->getRegisterInfo();
1104 
1105  SDLoc SL(N);
1106  EVT VT = N->getValueType(0);
1107 
1108  assert(VT == MVT::f32 || VT == MVT::f64);
1109 
1110  unsigned Opc
1111  = (VT == MVT::f64) ? AMDGPU::V_DIV_FMAS_F64 : AMDGPU::V_DIV_FMAS_F32;
1112 
1113  SDValue CarryIn = N->getOperand(3);
1114  // V_DIV_FMAS implicitly reads VCC.
1115  SDValue VCC = CurDAG->getCopyToReg(CurDAG->getEntryNode(), SL,
1116  TRI->getVCC(), CarryIn, SDValue());
1117 
1118  SDValue Ops[10];
1119 
1120  SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
1121  SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
1122  SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
1123 
1124  Ops[8] = VCC;
1125  Ops[9] = VCC.getValue(1);
1126 
1127  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1128 }
1129 
1130 // We need to handle this here because tablegen doesn't support matching
1131 // instructions with multiple outputs.
1132 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
1133  SDLoc SL(N);
1134  bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
1135  unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32;
1136 
1137  SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
1138  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
1139  Clamp };
1140  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1141 }
1142 
1143 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset,
1144  unsigned OffsetBits) const {
1145  if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
1146  (OffsetBits == 8 && !isUInt<8>(Offset)))
1147  return false;
1148 
1149  if (Subtarget->hasUsableDSOffset() ||
1150  Subtarget->unsafeDSOffsetFoldingEnabled())
1151  return true;
1152 
1153  // On Southern Islands instruction with a negative base value and an offset
1154  // don't seem to work.
1155  return CurDAG->SignBitIsZero(Base);
1156 }
1157 
1158 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
1159  SDValue &Offset) const {
1160  SDLoc DL(Addr);
1161  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1162  SDValue N0 = Addr.getOperand(0);
1163  SDValue N1 = Addr.getOperand(1);
1164  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1165  if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
1166  // (add n0, c0)
1167  Base = N0;
1168  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1169  return true;
1170  }
1171  } else if (Addr.getOpcode() == ISD::SUB) {
1172  // sub C, x -> add (sub 0, x), C
1173  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1174  int64_t ByteOffset = C->getSExtValue();
1175  if (isUInt<16>(ByteOffset)) {
1176  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1177 
1178  // XXX - This is kind of hacky. Create a dummy sub node so we can check
1179  // the known bits in isDSOffsetLegal. We need to emit the selected node
1180  // here, so this is thrown away.
1181  SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
1182  Zero, Addr.getOperand(1));
1183 
1184  if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
1186  Opnds.push_back(Zero);
1187  Opnds.push_back(Addr.getOperand(1));
1188 
1189  // FIXME: Select to VOP3 version for with-carry.
1190  unsigned SubOp = AMDGPU::V_SUB_I32_e32;
1191  if (Subtarget->hasAddNoCarry()) {
1192  SubOp = AMDGPU::V_SUB_U32_e64;
1193  Opnds.push_back(
1194  CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1195  }
1196 
1197  MachineSDNode *MachineSub =
1198  CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1199 
1200  Base = SDValue(MachineSub, 0);
1201  Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
1202  return true;
1203  }
1204  }
1205  }
1206  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1207  // If we have a constant address, prefer to put the constant into the
1208  // offset. This can save moves to load the constant address since multiple
1209  // operations can share the zero base address register, and enables merging
1210  // into read2 / write2 instructions.
1211 
1212  SDLoc DL(Addr);
1213 
1214  if (isUInt<16>(CAddr->getZExtValue())) {
1215  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1216  MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1217  DL, MVT::i32, Zero);
1218  Base = SDValue(MovZero, 0);
1219  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1220  return true;
1221  }
1222  }
1223 
1224  // default case
1225  Base = Addr;
1226  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
1227  return true;
1228 }
1229 
1230 // TODO: If offset is too big, put low 16-bit into offset.
1231 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
1232  SDValue &Offset0,
1233  SDValue &Offset1) const {
1234  SDLoc DL(Addr);
1235 
1236  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1237  SDValue N0 = Addr.getOperand(0);
1238  SDValue N1 = Addr.getOperand(1);
1239  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1240  unsigned DWordOffset0 = C1->getZExtValue() / 4;
1241  unsigned DWordOffset1 = DWordOffset0 + 1;
1242  // (add n0, c0)
1243  if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
1244  Base = N0;
1245  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
1246  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
1247  return true;
1248  }
1249  } else if (Addr.getOpcode() == ISD::SUB) {
1250  // sub C, x -> add (sub 0, x), C
1251  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1252  unsigned DWordOffset0 = C->getZExtValue() / 4;
1253  unsigned DWordOffset1 = DWordOffset0 + 1;
1254 
1255  if (isUInt<8>(DWordOffset0)) {
1256  SDLoc DL(Addr);
1257  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1258 
1259  // XXX - This is kind of hacky. Create a dummy sub node so we can check
1260  // the known bits in isDSOffsetLegal. We need to emit the selected node
1261  // here, so this is thrown away.
1262  SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
1263  Zero, Addr.getOperand(1));
1264 
1265  if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
1267  Opnds.push_back(Zero);
1268  Opnds.push_back(Addr.getOperand(1));
1269  unsigned SubOp = AMDGPU::V_SUB_I32_e32;
1270  if (Subtarget->hasAddNoCarry()) {
1271  SubOp = AMDGPU::V_SUB_U32_e64;
1272  Opnds.push_back(
1273  CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1274  }
1275 
1276  MachineSDNode *MachineSub
1277  = CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1278 
1279  Base = SDValue(MachineSub, 0);
1280  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
1281  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
1282  return true;
1283  }
1284  }
1285  }
1286  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1287  unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
1288  unsigned DWordOffset1 = DWordOffset0 + 1;
1289  assert(4 * DWordOffset0 == CAddr->getZExtValue());
1290 
1291  if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
1292  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1293  MachineSDNode *MovZero
1294  = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1295  DL, MVT::i32, Zero);
1296  Base = SDValue(MovZero, 0);
1297  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
1298  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
1299  return true;
1300  }
1301  }
1302 
1303  // default case
1304 
1305  Base = Addr;
1306  Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
1307  Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
1308  return true;
1309 }
1310 
1311 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
1312  SDValue &VAddr, SDValue &SOffset,
1313  SDValue &Offset, SDValue &Offen,
1314  SDValue &Idxen, SDValue &Addr64,
1315  SDValue &GLC, SDValue &SLC,
1316  SDValue &TFE, SDValue &DLC) const {
1317  // Subtarget prefers to use flat instruction
1318  if (Subtarget->useFlatForGlobal())
1319  return false;
1320 
1321  SDLoc DL(Addr);
1322 
1323  if (!GLC.getNode())
1324  GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1325  if (!SLC.getNode())
1326  SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1327  TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
1328  DLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1329 
1330  Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1331  Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1332  Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1333  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1334 
1335  ConstantSDNode *C1 = nullptr;
1336  SDValue N0 = Addr;
1337  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1338  C1 = cast<ConstantSDNode>(Addr.getOperand(1));
1339  if (isUInt<32>(C1->getZExtValue()))
1340  N0 = Addr.getOperand(0);
1341  else
1342  C1 = nullptr;
1343  }
1344 
1345  if (N0.getOpcode() == ISD::ADD) {
1346  // (add N2, N3) -> addr64, or
1347  // (add (add N2, N3), C1) -> addr64
1348  SDValue N2 = N0.getOperand(0);
1349  SDValue N3 = N0.getOperand(1);
1350  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1351 
1352  if (N2->isDivergent()) {
1353  if (N3->isDivergent()) {
1354  // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
1355  // addr64, and construct the resource from a 0 address.
1356  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1357  VAddr = N0;
1358  } else {
1359  // N2 is divergent, N3 is not.
1360  Ptr = N3;
1361  VAddr = N2;
1362  }
1363  } else {
1364  // N2 is not divergent.
1365  Ptr = N2;
1366  VAddr = N3;
1367  }
1368  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1369  } else if (N0->isDivergent()) {
1370  // N0 is divergent. Use it as the addr64, and construct the resource from a
1371  // 0 address.
1372  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1373  VAddr = N0;
1374  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1375  } else {
1376  // N0 -> offset, or
1377  // (N0 + C1) -> offset
1378  VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1379  Ptr = N0;
1380  }
1381 
1382  if (!C1) {
1383  // No offset.
1384  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1385  return true;
1386  }
1387 
1389  // Legal offset for instruction.
1390  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1391  return true;
1392  }
1393 
1394  // Illegal offset, store it in soffset.
1395  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1396  SOffset =
1397  SDValue(CurDAG->getMachineNode(
1398  AMDGPU::S_MOV_B32, DL, MVT::i32,
1399  CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1400  0);
1401  return true;
1402 }
1403 
1404 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1405  SDValue &VAddr, SDValue &SOffset,
1406  SDValue &Offset, SDValue &GLC,
1407  SDValue &SLC, SDValue &TFE,
1408  SDValue &DLC) const {
1409  SDValue Ptr, Offen, Idxen, Addr64;
1410 
1411  // addr64 bit was removed for volcanic islands.
1412  if (!Subtarget->hasAddr64())
1413  return false;
1414 
1415  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1416  GLC, SLC, TFE, DLC))
1417  return false;
1418 
1419  ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1420  if (C->getSExtValue()) {
1421  SDLoc DL(Addr);
1422 
1423  const SITargetLowering& Lowering =
1424  *static_cast<const SITargetLowering*>(getTargetLowering());
1425 
1426  SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1427  return true;
1428  }
1429 
1430  return false;
1431 }
1432 
1433 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1434  SDValue &VAddr, SDValue &SOffset,
1435  SDValue &Offset,
1436  SDValue &SLC) const {
1437  SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
1438  SDValue GLC, TFE, DLC;
1439 
1440  return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC);
1441 }
1442 
1443 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
1444  auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
1445  return PSV && PSV->isStack();
1446 }
1447 
1448 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1449  const MachineFunction &MF = CurDAG->getMachineFunction();
1451 
1452  if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
1453  SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1454  FI->getValueType(0));
1455 
1456  // If we can resolve this to a frame index access, this will be relative to
1457  // either the stack or frame pointer SGPR.
1458  return std::make_pair(
1459  TFI, CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32));
1460  }
1461 
1462  // If we don't know this private access is a local stack object, it needs to
1463  // be relative to the entry point's scratch wave offset register.
1464  return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(),
1465  MVT::i32));
1466 }
1467 
1468 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1469  SDValue Addr, SDValue &Rsrc,
1470  SDValue &VAddr, SDValue &SOffset,
1471  SDValue &ImmOffset) const {
1472 
1473  SDLoc DL(Addr);
1474  MachineFunction &MF = CurDAG->getMachineFunction();
1476 
1477  Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1478 
1479  if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1480  unsigned Imm = CAddr->getZExtValue();
1481 
1482  SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1483  MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1484  DL, MVT::i32, HighBits);
1485  VAddr = SDValue(MovHighBits, 0);
1486 
1487  // In a call sequence, stores to the argument stack area are relative to the
1488  // stack pointer.
1489  const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1490  unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1492 
1493  SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1494  ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
1495  return true;
1496  }
1497 
1498  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1499  // (add n0, c1)
1500 
1501  SDValue N0 = Addr.getOperand(0);
1502  SDValue N1 = Addr.getOperand(1);
1503 
1504  // Offsets in vaddr must be positive if range checking is enabled.
1505  //
1506  // The total computation of vaddr + soffset + offset must not overflow. If
1507  // vaddr is negative, even if offset is 0 the sgpr offset add will end up
1508  // overflowing.
1509  //
1510  // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
1511  // always perform a range check. If a negative vaddr base index was used,
1512  // this would fail the range check. The overall address computation would
1513  // compute a valid address, but this doesn't happen due to the range
1514  // check. For out-of-bounds MUBUF loads, a 0 is returned.
1515  //
1516  // Therefore it should be safe to fold any VGPR offset on gfx9 into the
1517  // MUBUF vaddr, but not on older subtargets which can only do this if the
1518  // sign bit is known 0.
1519  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1521  (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1522  CurDAG->SignBitIsZero(N0))) {
1523  std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1524  ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1525  return true;
1526  }
1527  }
1528 
1529  // (node)
1530  std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1531  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1532  return true;
1533 }
1534 
1535 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1536  SDValue Addr,
1537  SDValue &SRsrc,
1538  SDValue &SOffset,
1539  SDValue &Offset) const {
1540  ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
1541  if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1542  return false;
1543 
1544  SDLoc DL(Addr);
1545  MachineFunction &MF = CurDAG->getMachineFunction();
1547 
1548  SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1549 
1550  const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1551  unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1553 
1554  // FIXME: Get from MachinePointerInfo? We should only be using the frame
1555  // offset if we know this is in a call sequence.
1556  SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1557 
1558  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1559  return true;
1560 }
1561 
1562 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1563  SDValue &SOffset, SDValue &Offset,
1564  SDValue &GLC, SDValue &SLC,
1565  SDValue &TFE, SDValue &DLC) const {
1566  SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1567  const SIInstrInfo *TII =
1568  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1569 
1570  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1571  GLC, SLC, TFE, DLC))
1572  return false;
1573 
1574  if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1575  !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1576  !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1577  uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1578  APInt::getAllOnesValue(32).getZExtValue(); // Size
1579  SDLoc DL(Addr);
1580 
1581  const SITargetLowering& Lowering =
1582  *static_cast<const SITargetLowering*>(getTargetLowering());
1583 
1584  SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1585  return true;
1586  }
1587  return false;
1588 }
1589 
1590 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1591  SDValue &Soffset, SDValue &Offset
1592  ) const {
1593  SDValue GLC, SLC, TFE, DLC;
1594 
1595  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC);
1596 }
1597 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1598  SDValue &Soffset, SDValue &Offset,
1599  SDValue &SLC) const {
1600  SDValue GLC, TFE, DLC;
1601 
1602  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC);
1603 }
1604 
1605 template <bool IsSigned>
1606 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N,
1607  SDValue Addr,
1608  SDValue &VAddr,
1609  SDValue &Offset,
1610  SDValue &SLC) const {
1611  return static_cast<const SITargetLowering*>(getTargetLowering())->
1612  SelectFlatOffset(IsSigned, *CurDAG, N, Addr, VAddr, Offset, SLC);
1613 }
1614 
1615 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N,
1616  SDValue Addr,
1617  SDValue &VAddr,
1618  SDValue &Offset,
1619  SDValue &SLC) const {
1620  return SelectFlatOffset<false>(N, Addr, VAddr, Offset, SLC);
1621 }
1622 
1623 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDNode *N,
1624  SDValue Addr,
1625  SDValue &VAddr,
1626  SDValue &Offset,
1627  SDValue &SLC) const {
1628  return SelectFlatOffset<true>(N, Addr, VAddr, Offset, SLC);
1629 }
1630 
1631 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1632  SDValue &Offset, bool &Imm) const {
1633 
1634  // FIXME: Handle non-constant offsets.
1635  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1636  if (!C)
1637  return false;
1638 
1639  SDLoc SL(ByteOffsetNode);
1640  GCNSubtarget::Generation Gen = Subtarget->getGeneration();
1641  int64_t ByteOffset = C->getSExtValue();
1642  int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
1643 
1644  if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) {
1645  Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1646  Imm = true;
1647  return true;
1648  }
1649 
1650  if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1651  return false;
1652 
1653  if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1654  // 32-bit Immediates are supported on Sea Islands.
1655  Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1656  } else {
1657  SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1658  Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1659  C32Bit), 0);
1660  }
1661  Imm = false;
1662  return true;
1663 }
1664 
1665 SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
1666  if (Addr.getValueType() != MVT::i32)
1667  return Addr;
1668 
1669  // Zero-extend a 32-bit address.
1670  SDLoc SL(Addr);
1671 
1672  const MachineFunction &MF = CurDAG->getMachineFunction();
1674  unsigned AddrHiVal = Info->get32BitAddressHighBits();
1675  SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
1676 
1677  const SDValue Ops[] = {
1678  CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
1679  Addr,
1680  CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
1681  SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
1682  0),
1683  CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
1684  };
1685 
1686  return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
1687  Ops), 0);
1688 }
1689 
1690 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1691  SDValue &Offset, bool &Imm) const {
1692  SDLoc SL(Addr);
1693 
1694  // A 32-bit (address + offset) should not cause unsigned 32-bit integer
1695  // wraparound, because s_load instructions perform the addition in 64 bits.
1696  if ((Addr.getValueType() != MVT::i32 ||
1697  Addr->getFlags().hasNoUnsignedWrap()) &&
1698  CurDAG->isBaseWithConstantOffset(Addr)) {
1699  SDValue N0 = Addr.getOperand(0);
1700  SDValue N1 = Addr.getOperand(1);
1701 
1702  if (SelectSMRDOffset(N1, Offset, Imm)) {
1703  SBase = Expand32BitAddress(N0);
1704  return true;
1705  }
1706  }
1707  SBase = Expand32BitAddress(Addr);
1708  Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1709  Imm = true;
1710  return true;
1711 }
1712 
1713 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1714  SDValue &Offset) const {
1715  bool Imm;
1716  return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1717 }
1718 
1719 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1720  SDValue &Offset) const {
1721 
1722  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1723  return false;
1724 
1725  bool Imm;
1726  if (!SelectSMRD(Addr, SBase, Offset, Imm))
1727  return false;
1728 
1729  return !Imm && isa<ConstantSDNode>(Offset);
1730 }
1731 
1732 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1733  SDValue &Offset) const {
1734  bool Imm;
1735  return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1736  !isa<ConstantSDNode>(Offset);
1737 }
1738 
1739 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1740  SDValue &Offset) const {
1741  bool Imm;
1742  return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1743 }
1744 
1745 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1746  SDValue &Offset) const {
1747  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1748  return false;
1749 
1750  bool Imm;
1751  if (!SelectSMRDOffset(Addr, Offset, Imm))
1752  return false;
1753 
1754  return !Imm && isa<ConstantSDNode>(Offset);
1755 }
1756 
1757 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
1758  SDValue &Base,
1759  SDValue &Offset) const {
1760  SDLoc DL(Index);
1761 
1762  if (CurDAG->isBaseWithConstantOffset(Index)) {
1763  SDValue N0 = Index.getOperand(0);
1764  SDValue N1 = Index.getOperand(1);
1765  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1766 
1767  // (add n0, c0)
1768  // Don't peel off the offset (c0) if doing so could possibly lead
1769  // the base (n0) to be negative.
1770  if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0)) {
1771  Base = N0;
1772  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
1773  return true;
1774  }
1775  }
1776 
1777  if (isa<ConstantSDNode>(Index))
1778  return false;
1779 
1780  Base = Index;
1781  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1782  return true;
1783 }
1784 
1785 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
1786  SDValue Val, uint32_t Offset,
1787  uint32_t Width) {
1788  // Transformation function, pack the offset and width of a BFE into
1789  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1790  // source, bits [5:0] contain the offset and bits [22:16] the width.
1791  uint32_t PackedVal = Offset | (Width << 16);
1792  SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1793 
1794  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1795 }
1796 
1797 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1798  // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1799  // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1800  // Predicate: 0 < b <= c < 32
1801 
1802  const SDValue &Shl = N->getOperand(0);
1805 
1806  if (B && C) {
1807  uint32_t BVal = B->getZExtValue();
1808  uint32_t CVal = C->getZExtValue();
1809 
1810  if (0 < BVal && BVal <= CVal && CVal < 32) {
1811  bool Signed = N->getOpcode() == ISD::SRA;
1812  unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1813 
1814  ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
1815  32 - CVal));
1816  return;
1817  }
1818  }
1819  SelectCode(N);
1820 }
1821 
1822 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1823  switch (N->getOpcode()) {
1824  case ISD::AND:
1825  if (N->getOperand(0).getOpcode() == ISD::SRL) {
1826  // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1827  // Predicate: isMask(mask)
1828  const SDValue &Srl = N->getOperand(0);
1831 
1832  if (Shift && Mask) {
1833  uint32_t ShiftVal = Shift->getZExtValue();
1834  uint32_t MaskVal = Mask->getZExtValue();
1835 
1836  if (isMask_32(MaskVal)) {
1837  uint32_t WidthVal = countPopulation(MaskVal);
1838 
1839  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1840  Srl.getOperand(0), ShiftVal, WidthVal));
1841  return;
1842  }
1843  }
1844  }
1845  break;
1846  case ISD::SRL:
1847  if (N->getOperand(0).getOpcode() == ISD::AND) {
1848  // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1849  // Predicate: isMask(mask >> b)
1850  const SDValue &And = N->getOperand(0);
1853 
1854  if (Shift && Mask) {
1855  uint32_t ShiftVal = Shift->getZExtValue();
1856  uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1857 
1858  if (isMask_32(MaskVal)) {
1859  uint32_t WidthVal = countPopulation(MaskVal);
1860 
1861  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1862  And.getOperand(0), ShiftVal, WidthVal));
1863  return;
1864  }
1865  }
1866  } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
1867  SelectS_BFEFromShifts(N);
1868  return;
1869  }
1870  break;
1871  case ISD::SRA:
1872  if (N->getOperand(0).getOpcode() == ISD::SHL) {
1873  SelectS_BFEFromShifts(N);
1874  return;
1875  }
1876  break;
1877 
1878  case ISD::SIGN_EXTEND_INREG: {
1879  // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
1880  SDValue Src = N->getOperand(0);
1881  if (Src.getOpcode() != ISD::SRL)
1882  break;
1883 
1884  const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1885  if (!Amt)
1886  break;
1887 
1888  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1889  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
1890  Amt->getZExtValue(), Width));
1891  return;
1892  }
1893  }
1894 
1895  SelectCode(N);
1896 }
1897 
1898 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
1899  assert(N->getOpcode() == ISD::BRCOND);
1900  if (!N->hasOneUse())
1901  return false;
1902 
1903  SDValue Cond = N->getOperand(1);
1904  if (Cond.getOpcode() == ISD::CopyToReg)
1905  Cond = Cond.getOperand(2);
1906 
1907  if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
1908  return false;
1909 
1910  MVT VT = Cond.getOperand(0).getSimpleValueType();
1911  if (VT == MVT::i32)
1912  return true;
1913 
1914  if (VT == MVT::i64) {
1915  auto ST = static_cast<const GCNSubtarget *>(Subtarget);
1916 
1917  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
1918  return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
1919  }
1920 
1921  return false;
1922 }
1923 
1924 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
1925  SDValue Cond = N->getOperand(1);
1926 
1927  if (Cond.isUndef()) {
1928  CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
1929  N->getOperand(2), N->getOperand(0));
1930  return;
1931  }
1932 
1933  const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
1934  const SIRegisterInfo *TRI = ST->getRegisterInfo();
1935 
1936  bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
1937  unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
1938  unsigned CondReg = UseSCCBr ? (unsigned)AMDGPU::SCC : TRI->getVCC();
1939  SDLoc SL(N);
1940 
1941  if (!UseSCCBr) {
1942  // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
1943  // analyzed what generates the vcc value, so we do not know whether vcc
1944  // bits for disabled lanes are 0. Thus we need to mask out bits for
1945  // disabled lanes.
1946  //
1947  // For the case that we select S_CBRANCH_SCC1 and it gets
1948  // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
1949  // SIInstrInfo::moveToVALU which inserts the S_AND).
1950  //
1951  // We could add an analysis of what generates the vcc value here and omit
1952  // the S_AND when is unnecessary. But it would be better to add a separate
1953  // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
1954  // catches both cases.
1955  Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B32
1956  : AMDGPU::S_AND_B64,
1957  SL, MVT::i1,
1958  CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO
1959  : AMDGPU::EXEC,
1960  MVT::i1),
1961  Cond),
1962  0);
1963  }
1964 
1965  SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
1966  CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
1967  N->getOperand(2), // Basic Block
1968  VCC.getValue(0));
1969 }
1970 
1971 void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
1972  MVT VT = N->getSimpleValueType(0);
1973  bool IsFMA = N->getOpcode() == ISD::FMA;
1974  if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
1975  !Subtarget->hasFmaMixInsts()) ||
1976  ((IsFMA && Subtarget->hasMadMixInsts()) ||
1977  (!IsFMA && Subtarget->hasFmaMixInsts()))) {
1978  SelectCode(N);
1979  return;
1980  }
1981 
1982  SDValue Src0 = N->getOperand(0);
1983  SDValue Src1 = N->getOperand(1);
1984  SDValue Src2 = N->getOperand(2);
1985  unsigned Src0Mods, Src1Mods, Src2Mods;
1986 
1987  // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
1988  // using the conversion from f16.
1989  bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
1990  bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
1991  bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
1992 
1993  assert((IsFMA || !Subtarget->hasFP32Denormals()) &&
1994  "fmad selected with denormals enabled");
1995  // TODO: We can select this with f32 denormals enabled if all the sources are
1996  // converted from f16 (in which case fmad isn't legal).
1997 
1998  if (Sel0 || Sel1 || Sel2) {
1999  // For dummy operands.
2000  SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2001  SDValue Ops[] = {
2002  CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
2003  CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
2004  CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
2005  CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
2006  Zero, Zero
2007  };
2008 
2009  CurDAG->SelectNodeTo(N,
2010  IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
2011  MVT::f32, Ops);
2012  } else {
2013  SelectCode(N);
2014  }
2015 }
2016 
2017 // This is here because there isn't a way to use the generated sub0_sub1 as the
2018 // subreg index to EXTRACT_SUBREG in tablegen.
2019 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
2020  MemSDNode *Mem = cast<MemSDNode>(N);
2021  unsigned AS = Mem->getAddressSpace();
2022  if (AS == AMDGPUAS::FLAT_ADDRESS) {
2023  SelectCode(N);
2024  return;
2025  }
2026 
2027  MVT VT = N->getSimpleValueType(0);
2028  bool Is32 = (VT == MVT::i32);
2029  SDLoc SL(N);
2030 
2031  MachineSDNode *CmpSwap = nullptr;
2032  if (Subtarget->hasAddr64()) {
2033  SDValue SRsrc, VAddr, SOffset, Offset, SLC;
2034 
2035  if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
2036  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
2037  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
2038  SDValue CmpVal = Mem->getOperand(2);
2039 
2040  // XXX - Do we care about glue operands?
2041 
2042  SDValue Ops[] = {
2043  CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
2044  };
2045 
2046  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
2047  }
2048  }
2049 
2050  if (!CmpSwap) {
2051  SDValue SRsrc, SOffset, Offset, SLC;
2052  if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
2053  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
2054  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
2055 
2056  SDValue CmpVal = Mem->getOperand(2);
2057  SDValue Ops[] = {
2058  CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
2059  };
2060 
2061  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
2062  }
2063  }
2064 
2065  if (!CmpSwap) {
2066  SelectCode(N);
2067  return;
2068  }
2069 
2070  MachineMemOperand *MMO = Mem->getMemOperand();
2071  CurDAG->setNodeMemRefs(CmpSwap, {MMO});
2072 
2073  unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
2074  SDValue Extract
2075  = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
2076 
2077  ReplaceUses(SDValue(N, 0), Extract);
2078  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
2079  CurDAG->RemoveDeadNode(N);
2080 }
2081 
2082 void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
2083  // The address is assumed to be uniform, so if it ends up in a VGPR, it will
2084  // be copied to an SGPR with readfirstlane.
2085  unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2086  AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2087 
2088  SDValue Chain = N->getOperand(0);
2089  SDValue Ptr = N->getOperand(2);
2090  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2091  MachineMemOperand *MMO = M->getMemOperand();
2092  bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
2093 
2094  SDValue Offset;
2095  if (CurDAG->isBaseWithConstantOffset(Ptr)) {
2096  SDValue PtrBase = Ptr.getOperand(0);
2097  SDValue PtrOffset = Ptr.getOperand(1);
2098 
2099  const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
2100  if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue(), 16)) {
2101  N = glueCopyToM0(N, PtrBase);
2102  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
2103  }
2104  }
2105 
2106  if (!Offset) {
2107  N = glueCopyToM0(N, Ptr);
2108  Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2109  }
2110 
2111  SDValue Ops[] = {
2112  Offset,
2113  CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
2114  Chain,
2115  N->getOperand(N->getNumOperands() - 1) // New glue
2116  };
2117 
2118  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2119  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2120 }
2121 
2122 static unsigned gwsIntrinToOpcode(unsigned IntrID) {
2123  switch (IntrID) {
2124  case Intrinsic::amdgcn_ds_gws_init:
2125  return AMDGPU::DS_GWS_INIT;
2126  case Intrinsic::amdgcn_ds_gws_barrier:
2127  return AMDGPU::DS_GWS_BARRIER;
2128  case Intrinsic::amdgcn_ds_gws_sema_v:
2129  return AMDGPU::DS_GWS_SEMA_V;
2130  case Intrinsic::amdgcn_ds_gws_sema_br:
2131  return AMDGPU::DS_GWS_SEMA_BR;
2132  case Intrinsic::amdgcn_ds_gws_sema_p:
2133  return AMDGPU::DS_GWS_SEMA_P;
2134  case Intrinsic::amdgcn_ds_gws_sema_release_all:
2135  return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2136  default:
2137  llvm_unreachable("not a gws intrinsic");
2138  }
2139 }
2140 
2141 void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
2142  if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2143  !Subtarget->hasGWSSemaReleaseAll()) {
2144  // Let this error.
2145  SelectCode(N);
2146  return;
2147  }
2148 
2149  // Chain, intrinsic ID, vsrc, offset
2150  const bool HasVSrc = N->getNumOperands() == 4;
2151  assert(HasVSrc || N->getNumOperands() == 3);
2152 
2153  SDLoc SL(N);
2154  SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2);
2155  int ImmOffset = 0;
2156  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2157  MachineMemOperand *MMO = M->getMemOperand();
2158 
2159  // Don't worry if the offset ends up in a VGPR. Only one lane will have
2160  // effect, so SIFixSGPRCopies will validly insert readfirstlane.
2161 
2162  // The resource id offset is computed as (<isa opaque base> + M0[21:16] +
2163  // offset field) % 64. Some versions of the programming guide omit the m0
2164  // part, or claim it's from offset 0.
2165  if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
2166  // If we have a constant offset, try to use the 0 in m0 as the base.
2167  // TODO: Look into changing the default m0 initialization value. If the
2168  // default -1 only set the low 16-bits, we could leave it as-is and add 1 to
2169  // the immediate offset.
2170  glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32));
2171  ImmOffset = ConstOffset->getZExtValue();
2172  } else {
2173  if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
2174  ImmOffset = BaseOffset.getConstantOperandVal(1);
2175  BaseOffset = BaseOffset.getOperand(0);
2176  }
2177 
2178  // Prefer to do the shift in an SGPR since it should be possible to use m0
2179  // as the result directly. If it's already an SGPR, it will be eliminated
2180  // later.
2181  SDNode *SGPROffset
2182  = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
2183  BaseOffset);
2184  // Shift to offset in m0
2185  SDNode *M0Base
2186  = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
2187  SDValue(SGPROffset, 0),
2188  CurDAG->getTargetConstant(16, SL, MVT::i32));
2189  glueCopyToM0(N, SDValue(M0Base, 0));
2190  }
2191 
2192  SDValue Chain = N->getOperand(0);
2193  SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
2194 
2195  // TODO: Can this just be removed from the instruction?
2196  SDValue GDS = CurDAG->getTargetConstant(1, SL, MVT::i1);
2197 
2198  const unsigned Opc = gwsIntrinToOpcode(IntrID);
2200  if (HasVSrc)
2201  Ops.push_back(N->getOperand(2));
2202  Ops.push_back(OffsetField);
2203  Ops.push_back(GDS);
2204  Ops.push_back(Chain);
2205 
2206  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2207  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2208 }
2209 
2210 void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
2211  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2212  switch (IntrID) {
2213  case Intrinsic::amdgcn_ds_append:
2214  case Intrinsic::amdgcn_ds_consume: {
2215  if (N->getValueType(0) != MVT::i32)
2216  break;
2217  SelectDSAppendConsume(N, IntrID);
2218  return;
2219  }
2220  }
2221 
2222  SelectCode(N);
2223 }
2224 
2225 void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
2226  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
2227  unsigned Opcode;
2228  switch (IntrID) {
2229  case Intrinsic::amdgcn_wqm:
2230  Opcode = AMDGPU::WQM;
2231  break;
2232  case Intrinsic::amdgcn_softwqm:
2233  Opcode = AMDGPU::SOFT_WQM;
2234  break;
2235  case Intrinsic::amdgcn_wwm:
2236  Opcode = AMDGPU::WWM;
2237  break;
2238  default:
2239  SelectCode(N);
2240  return;
2241  }
2242 
2243  SDValue Src = N->getOperand(1);
2244  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});
2245 }
2246 
2247 void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
2248  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2249  switch (IntrID) {
2250  case Intrinsic::amdgcn_ds_gws_init:
2251  case Intrinsic::amdgcn_ds_gws_barrier:
2252  case Intrinsic::amdgcn_ds_gws_sema_v:
2253  case Intrinsic::amdgcn_ds_gws_sema_br:
2254  case Intrinsic::amdgcn_ds_gws_sema_p:
2255  case Intrinsic::amdgcn_ds_gws_sema_release_all:
2256  SelectDS_GWS(N, IntrID);
2257  return;
2258  default:
2259  break;
2260  }
2261 
2262  SelectCode(N);
2263 }
2264 
2265 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
2266  unsigned &Mods) const {
2267  Mods = 0;
2268  Src = In;
2269 
2270  if (Src.getOpcode() == ISD::FNEG) {
2271  Mods |= SISrcMods::NEG;
2272  Src = Src.getOperand(0);
2273  }
2274 
2275  if (Src.getOpcode() == ISD::FABS) {
2276  Mods |= SISrcMods::ABS;
2277  Src = Src.getOperand(0);
2278  }
2279 
2280  return true;
2281 }
2282 
2283 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
2284  SDValue &SrcMods) const {
2285  unsigned Mods;
2286  if (SelectVOP3ModsImpl(In, Src, Mods)) {
2287  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2288  return true;
2289  }
2290 
2291  return false;
2292 }
2293 
2294 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
2295  SDValue &SrcMods) const {
2296  SelectVOP3Mods(In, Src, SrcMods);
2297  return isNoNanSrc(Src);
2298 }
2299 
2300 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_f32(SDValue In, SDValue &Src,
2301  SDValue &SrcMods) const {
2302  if (In.getValueType() == MVT::f32)
2303  return SelectVOP3Mods(In, Src, SrcMods);
2304  Src = In;
2305  SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);;
2306  return true;
2307 }
2308 
2309 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
2310  if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
2311  return false;
2312 
2313  Src = In;
2314  return true;
2315 }
2316 
2317 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
2318  SDValue &SrcMods, SDValue &Clamp,
2319  SDValue &Omod) const {
2320  SDLoc DL(In);
2321  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2322  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2323 
2324  return SelectVOP3Mods(In, Src, SrcMods);
2325 }
2326 
2327 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
2328  SDValue &SrcMods,
2329  SDValue &Clamp,
2330  SDValue &Omod) const {
2331  Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2332  return SelectVOP3Mods(In, Src, SrcMods);
2333 }
2334 
2335 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
2336  SDValue &Clamp, SDValue &Omod) const {
2337  Src = In;
2338 
2339  SDLoc DL(In);
2340  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2341  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2342 
2343  return true;
2344 }
2345 
2346 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
2347  SDValue &SrcMods) const {
2348  unsigned Mods = 0;
2349  Src = In;
2350 
2351  if (Src.getOpcode() == ISD::FNEG) {
2352  Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
2353  Src = Src.getOperand(0);
2354  }
2355 
2356  if (Src.getOpcode() == ISD::BUILD_VECTOR) {
2357  unsigned VecMods = Mods;
2358 
2359  SDValue Lo = stripBitcast(Src.getOperand(0));
2360  SDValue Hi = stripBitcast(Src.getOperand(1));
2361 
2362  if (Lo.getOpcode() == ISD::FNEG) {
2363  Lo = stripBitcast(Lo.getOperand(0));
2364  Mods ^= SISrcMods::NEG;
2365  }
2366 
2367  if (Hi.getOpcode() == ISD::FNEG) {
2368  Hi = stripBitcast(Hi.getOperand(0));
2369  Mods ^= SISrcMods::NEG_HI;
2370  }
2371 
2372  if (isExtractHiElt(Lo, Lo))
2373  Mods |= SISrcMods::OP_SEL_0;
2374 
2375  if (isExtractHiElt(Hi, Hi))
2376  Mods |= SISrcMods::OP_SEL_1;
2377 
2378  Lo = stripExtractLoElt(Lo);
2379  Hi = stripExtractLoElt(Hi);
2380 
2381  if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
2382  // Really a scalar input. Just select from the low half of the register to
2383  // avoid packing.
2384 
2385  Src = Lo;
2386  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2387  return true;
2388  }
2389 
2390  Mods = VecMods;
2391  }
2392 
2393  // Packed instructions do not have abs modifiers.
2394  Mods |= SISrcMods::OP_SEL_1;
2395 
2396  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2397  return true;
2398 }
2399 
2400 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src,
2401  SDValue &SrcMods,
2402  SDValue &Clamp) const {
2403  SDLoc SL(In);
2404 
2405  // FIXME: Handle clamp and op_sel
2406  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2407 
2408  return SelectVOP3PMods(In, Src, SrcMods);
2409 }
2410 
2411 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
2412  SDValue &SrcMods) const {
2413  Src = In;
2414  // FIXME: Handle op_sel
2415  SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2416  return true;
2417 }
2418 
2419 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src,
2420  SDValue &SrcMods,
2421  SDValue &Clamp) const {
2422  SDLoc SL(In);
2423 
2424  // FIXME: Handle clamp
2425  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2426 
2427  return SelectVOP3OpSel(In, Src, SrcMods);
2428 }
2429 
2430 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
2431  SDValue &SrcMods) const {
2432  // FIXME: Handle op_sel
2433  return SelectVOP3Mods(In, Src, SrcMods);
2434 }
2435 
2436 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src,
2437  SDValue &SrcMods,
2438  SDValue &Clamp) const {
2439  SDLoc SL(In);
2440 
2441  // FIXME: Handle clamp
2442  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2443 
2444  return SelectVOP3OpSelMods(In, Src, SrcMods);
2445 }
2446 
2447 // The return value is not whether the match is possible (which it always is),
2448 // but whether or not it a conversion is really used.
2449 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
2450  unsigned &Mods) const {
2451  Mods = 0;
2452  SelectVOP3ModsImpl(In, Src, Mods);
2453 
2454  if (Src.getOpcode() == ISD::FP_EXTEND) {
2455  Src = Src.getOperand(0);
2456  assert(Src.getValueType() == MVT::f16);
2457  Src = stripBitcast(Src);
2458 
2459  // Be careful about folding modifiers if we already have an abs. fneg is
2460  // applied last, so we don't want to apply an earlier fneg.
2461  if ((Mods & SISrcMods::ABS) == 0) {
2462  unsigned ModsTmp;
2463  SelectVOP3ModsImpl(Src, Src, ModsTmp);
2464 
2465  if ((ModsTmp & SISrcMods::NEG) != 0)
2466  Mods ^= SISrcMods::NEG;
2467 
2468  if ((ModsTmp & SISrcMods::ABS) != 0)
2469  Mods |= SISrcMods::ABS;
2470  }
2471 
2472  // op_sel/op_sel_hi decide the source type and source.
2473  // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
2474  // If the sources's op_sel is set, it picks the high half of the source
2475  // register.
2476 
2477  Mods |= SISrcMods::OP_SEL_1;
2478  if (isExtractHiElt(Src, Src)) {
2479  Mods |= SISrcMods::OP_SEL_0;
2480 
2481  // TODO: Should we try to look for neg/abs here?
2482  }
2483 
2484  return true;
2485  }
2486 
2487  return false;
2488 }
2489 
2490 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
2491  SDValue &SrcMods) const {
2492  unsigned Mods = 0;
2493  SelectVOP3PMadMixModsImpl(In, Src, Mods);
2494  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2495  return true;
2496 }
2497 
2498 SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
2499  if (In.isUndef())
2500  return CurDAG->getUNDEF(MVT::i32);
2501 
2502  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
2503  SDLoc SL(In);
2504  return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
2505  }
2506 
2507  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
2508  SDLoc SL(In);
2509  return CurDAG->getConstant(
2510  C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
2511  }
2512 
2513  SDValue Src;
2514  if (isExtractHiElt(In, Src))
2515  return Src;
2516 
2517  return SDValue();
2518 }
2519 
2520 bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
2521  assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn);
2522 
2523  const SIRegisterInfo *SIRI =
2524  static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
2525  const SIInstrInfo * SII =
2526  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
2527 
2528  unsigned Limit = 0;
2529  bool AllUsesAcceptSReg = true;
2530  for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
2531  Limit < 10 && U != E; ++U, ++Limit) {
2532  const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
2533 
2534  // If the register class is unknown, it could be an unknown
2535  // register class that needs to be an SGPR, e.g. an inline asm
2536  // constraint
2537  if (!RC || SIRI->isSGPRClass(RC))
2538  return false;
2539 
2540  if (RC != &AMDGPU::VS_32RegClass) {
2541  AllUsesAcceptSReg = false;
2542  SDNode * User = *U;
2543  if (User->isMachineOpcode()) {
2544  unsigned Opc = User->getMachineOpcode();
2545  MCInstrDesc Desc = SII->get(Opc);
2546  if (Desc.isCommutable()) {
2547  unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
2548  unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
2549  if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
2550  unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
2551  const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
2552  if (CommutedRC == &AMDGPU::VS_32RegClass)
2553  AllUsesAcceptSReg = true;
2554  }
2555  }
2556  }
2557  // If "AllUsesAcceptSReg == false" so far we haven't suceeded
2558  // commuting current user. This means have at least one use
2559  // that strictly require VGPR. Thus, we will not attempt to commute
2560  // other user instructions.
2561  if (!AllUsesAcceptSReg)
2562  break;
2563  }
2564  }
2565  return !AllUsesAcceptSReg && (Limit < 10);
2566 }
2567 
2568 bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
2569  auto Ld = cast<LoadSDNode>(N);
2570 
2571  return Ld->getAlignment() >= 4 &&
2572  (
2573  (
2574  (
2575  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
2576  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
2577  )
2578  &&
2579  !N->isDivergent()
2580  )
2581  ||
2582  (
2583  Subtarget->getScalarizeGlobalBehavior() &&
2584  Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
2585  !Ld->isVolatile() &&
2586  !N->isDivergent() &&
2587  static_cast<const SITargetLowering *>(
2588  getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)
2589  )
2590  );
2591 }
2592 
2593 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
2595  *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
2596  bool IsModified = false;
2597  do {
2598  IsModified = false;
2599 
2600  // Go over all selected nodes and try to fold them a bit more
2601  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
2602  while (Position != CurDAG->allnodes_end()) {
2603  SDNode *Node = &*Position++;
2604  MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
2605  if (!MachineNode)
2606  continue;
2607 
2608  SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
2609  if (ResNode != Node) {
2610  if (ResNode)
2611  ReplaceUses(Node, ResNode);
2612  IsModified = true;
2613  }
2614  }
2615  CurDAG->RemoveDeadNodes();
2616  } while (IsModified);
2617 }
2618 
2619 bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
2620  Subtarget = &MF.getSubtarget<R600Subtarget>();
2622 }
2623 
2624 bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
2625  if (!N->readMem())
2626  return false;
2627  if (CbId == -1)
2630 
2631  return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
2632 }
2633 
2634 bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
2635  SDValue& IntPtr) {
2636  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
2637  IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
2638  true);
2639  return true;
2640  }
2641  return false;
2642 }
2643 
2644 bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
2645  SDValue& BaseReg, SDValue &Offset) {
2646  if (!isa<ConstantSDNode>(Addr)) {
2647  BaseReg = Addr;
2648  Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
2649  return true;
2650  }
2651  return false;
2652 }
2653 
2655  unsigned int Opc = N->getOpcode();
2656  if (N->isMachineOpcode()) {
2657  N->setNodeId(-1);
2658  return; // Already selected.
2659  }
2660 
2661  switch (Opc) {
2662  default: break;
2664  case ISD::SCALAR_TO_VECTOR:
2665  case ISD::BUILD_VECTOR: {
2666  EVT VT = N->getValueType(0);
2667  unsigned NumVectorElts = VT.getVectorNumElements();
2668  unsigned RegClassID;
2669  // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
2670  // that adds a 128 bits reg copy when going through TwoAddressInstructions
2671  // pass. We want to avoid 128 bits copies as much as possible because they
2672  // can't be bundled by our scheduler.
2673  switch(NumVectorElts) {
2674  case 2: RegClassID = R600::R600_Reg64RegClassID; break;
2675  case 4:
2677  RegClassID = R600::R600_Reg128VerticalRegClassID;
2678  else
2679  RegClassID = R600::R600_Reg128RegClassID;
2680  break;
2681  default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
2682  }
2683  SelectBuildVector(N, RegClassID);
2684  return;
2685  }
2686  }
2687 
2688  SelectCode(N);
2689 }
2690 
2691 bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
2692  SDValue &Offset) {
2693  ConstantSDNode *C;
2694  SDLoc DL(Addr);
2695 
2696  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
2697  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
2698  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2699  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
2700  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
2701  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
2702  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2703  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
2704  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
2705  Base = Addr.getOperand(0);
2706  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2707  } else {
2708  Base = Addr;
2709  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
2710  }
2711 
2712  return true;
2713 }
2714 
2715 bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
2716  SDValue &Offset) {
2717  ConstantSDNode *IMMOffset;
2718 
2719  if (Addr.getOpcode() == ISD::ADD
2720  && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
2721  && isInt<16>(IMMOffset->getZExtValue())) {
2722 
2723  Base = Addr.getOperand(0);
2724  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2725  MVT::i32);
2726  return true;
2727  // If the pointer address is constant, we can move it to the offset field.
2728  } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
2729  && isInt<16>(IMMOffset->getZExtValue())) {
2730  Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
2731  SDLoc(CurDAG->getEntryNode()),
2732  R600::ZERO, MVT::i32);
2733  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2734  MVT::i32);
2735  return true;
2736  }
2737 
2738  // Default case, no offset
2739  Base = Addr;
2740  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2741  return true;
2742 }
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:595
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:348
EVT getValueType() const
Return the ValueType of the referenced return value.
Interface definition for SIRegisterInfo.
unsigned getVCC() const
static unsigned getSubRegFromChannel(unsigned Channel)
bool isUndef() const
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand...
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1571
INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel", "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) INITIALIZE_PASS_END(AMDGPUDAGToDAGISel
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:561
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
AMDGPU specific subclass of TargetSubtarget.
static unsigned gwsIntrinToOpcode(unsigned IntrID)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
uint64_t getDefaultRsrcDataFormat() const
FunctionPass * createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a R600-specific.
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MCInstrDesc.h:456
Shadow Stack GC Lowering
SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL, SDValue V) const
const SDValue & getBasePtr() const
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:222
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:178
SDVTList getVTList() const
unsigned Reg
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
const SDValue & getChain() const
virtual SDNode * PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const =0
SDNode * legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const
Legalize target independent instructions (e.g.
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:305
unsigned const TargetRegisterInfo * TRI
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
const SDNodeFlags getFlags() const
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
bool isInlineConstant(const APInt &Imm) const
Address space for region memory. (GDS)
Definition: AMDGPU.h:271
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:404
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:158
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineSDNode * wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr) const
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwises returns null...
Definition: PointerUnion.h:201
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:225
const HexagonInstrInfo * TII
Shift and rotation operations.
Definition: ISDOpcodes.h:441
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:190
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:169
unsigned SubReg
uint64_t getConstantOperandVal(unsigned i) const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:270
Position
Position to insert a new instruction relative to an existing instruction.
This file implements a class to represent arbitrary precision integral constant values and operations...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
TargetRegisterInfo interface that is implemented by all hw codegen targets.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
int64_t getSExtValue() const
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:297
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:244
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:1012
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:596
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:150
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:358
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static const unsigned CommuteAnyOperandIndex
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:165
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:342
unsigned const MachineRegisterInfo * MRI
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
Machine Value Type.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:272
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const SDValue & getOperand(unsigned Num) const
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:231
SI DAG Lowering interface definition.
Represent the analysis usage information of a pass.
This class provides iterator support for SDUse operands that use a specific SDNode.
bool hasNoNaNs() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:71
bool isDefined() const
Returns true if the flags are in a defined state.
The AMDGPU TargetMachine interface definition for hw codgen targets.
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:580
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
Extended Value Type.
Definition: ValueTypes.h:33
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts)
Address space for local memory.
Definition: AMDGPU.h:274
bool use_empty() const
Return true if there are no uses of this node.
MachineSDNode * buildRSRC(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr, uint32_t RsrcDword1, uint64_t RsrcDword2And3) const
Return a resource descriptor with the &#39;Add TID&#39; bit enabled The TID (Thread ID) is multiplied by the ...
Iterator for intrusive lists based on ilist_node.
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:519
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:264
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:221
Interface definition of the TargetLowering class that is common to all AMD GPUs.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:672
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
bool isDivergent() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:240
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
bool readMem() const
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
EVT getMemoryVT() const
Return the type of the in-memory value.
Class for arbitrary precision integers.
Definition: APInt.h:69
Special value supplied for machine level alias analysis.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo)
static use_iterator use_end()
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Address space for constant memory (VTX2).
Definition: AMDGPU.h:273
Address space for 32-bit constant memory.
Definition: AMDGPU.h:277
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:418
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:76
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:517
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:642
#define N
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
unsigned getOpcode() const
SDValue getValue(unsigned R) const
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:345
Address space for flat memory.
Definition: AMDGPU.h:269
SmallVector< LoopT *, 4 > getLoopsInPreorder()
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
Definition: LoopInfoImpl.h:569
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isLegalMUBUFImmOffset(unsigned Imm)
Definition: SIInstrInfo.h:996
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
LLVM Value Representation.
Definition: Value.h:73
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:333
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:69
bool hasNoUnsignedWrap() const
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:189
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
bool isUndef() const
Return true if the type of the node type undefined.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:1208
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations...
Definition: ISDOpcodes.h:337
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:474
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:259
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:222
const SDValue & getOperand(unsigned i) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:820
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:505
#define LLVM_DEBUG(X)
Definition: Debug.h:122
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:610
Analyzes if a function potentially memory bound and if a kernel kernel may benefit from limiting numb...
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:816
const SDValue & getBasePtr() const
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:406
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:241
This class is used to represent ISD::LOAD nodes.
const SIRegisterInfo * getRegisterInfo() const override