LLVM  7.0.0svn
AMDGPUISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Defines an instruction selector for the AMDGPU target.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
17 #include "AMDGPUISelLowering.h" // For AMDGPUISD
18 #include "AMDGPUInstrInfo.h"
19 #include "AMDGPURegisterInfo.h"
20 #include "AMDGPUSubtarget.h"
21 #include "AMDGPUTargetMachine.h"
22 #include "SIDefines.h"
23 #include "SIISelLowering.h"
24 #include "SIInstrInfo.h"
25 #include "SIMachineFunctionInfo.h"
26 #include "SIRegisterInfo.h"
27 #include "llvm/ADT/APInt.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/StringRef.h"
40 #include "llvm/IR/BasicBlock.h"
41 #include "llvm/IR/Instruction.h"
42 #include "llvm/MC/MCInstrDesc.h"
43 #include "llvm/Support/Casting.h"
44 #include "llvm/Support/CodeGen.h"
47 #include <cassert>
48 #include <cstdint>
49 #include <new>
50 #include <vector>
51 
52 using namespace llvm;
53 
54 namespace llvm {
55 
56 class R600InstrInfo;
57 
58 } // end namespace llvm
59 
60 //===----------------------------------------------------------------------===//
61 // Instruction Selector Implementation
62 //===----------------------------------------------------------------------===//
63 
64 namespace {
65 
66 /// AMDGPU specific code to select AMDGPU machine instructions for
67 /// SelectionDAG operations.
68 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
69  // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
70  // make the right decision when generating code for different targets.
71  const AMDGPUSubtarget *Subtarget;
73  bool EnableLateStructurizeCFG;
74 
75 public:
76  explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
78  : SelectionDAGISel(*TM, OptLevel) {
79  AMDGPUASI = AMDGPU::getAMDGPUAS(*TM);
80  EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
81  }
82  ~AMDGPUDAGToDAGISel() override = default;
83 
84  void getAnalysisUsage(AnalysisUsage &AU) const override {
87  }
88 
89  bool runOnMachineFunction(MachineFunction &MF) override;
90  void Select(SDNode *N) override;
91  StringRef getPassName() const override;
92  void PostprocessISelDAG() override;
93 
94 protected:
95  void SelectBuildVector(SDNode *N, unsigned RegClassID);
96 
97 private:
98  std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
99  bool isNoNanSrc(SDValue N) const;
100  bool isInlineImmediate(const SDNode *N) const;
101  bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
102  const R600InstrInfo *TII);
103  bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
104  bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
105 
106  bool isConstantLoad(const MemSDNode *N, int cbID) const;
107  bool isUniformBr(const SDNode *N) const;
108 
109  SDNode *glueCopyToM0(SDNode *N) const;
110 
111  const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
112  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
113  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
114  SDValue& Offset);
115  virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
116  virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
117  bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
118  unsigned OffsetBits) const;
119  bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
120  bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
121  SDValue &Offset1) const;
122  bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
123  SDValue &SOffset, SDValue &Offset, SDValue &Offen,
124  SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
125  SDValue &TFE) const;
126  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
127  SDValue &SOffset, SDValue &Offset, SDValue &GLC,
128  SDValue &SLC, SDValue &TFE) const;
129  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
130  SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
131  SDValue &SLC) const;
132  bool SelectMUBUFScratchOffen(SDNode *Parent,
133  SDValue Addr, SDValue &RSrc, SDValue &VAddr,
134  SDValue &SOffset, SDValue &ImmOffset) const;
135  bool SelectMUBUFScratchOffset(SDNode *Parent,
136  SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
137  SDValue &Offset) const;
138 
139  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
140  SDValue &Offset, SDValue &GLC, SDValue &SLC,
141  SDValue &TFE) const;
142  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
143  SDValue &Offset, SDValue &SLC) const;
144  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
145  SDValue &Offset) const;
146  bool SelectMUBUFConstant(SDValue Constant,
147  SDValue &SOffset,
148  SDValue &ImmOffset) const;
149  bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset,
150  SDValue &ImmOffset) const;
151  bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
152  SDValue &ImmOffset, SDValue &VOffset) const;
153 
154  bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
155  SDValue &Offset, SDValue &SLC) const;
156  bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr,
157  SDValue &Offset, SDValue &SLC) const;
158 
159  template <bool IsSigned>
160  bool SelectFlatOffset(SDValue Addr, SDValue &VAddr,
161  SDValue &Offset, SDValue &SLC) const;
162 
163  bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
164  bool &Imm) const;
165  bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
166  bool &Imm) const;
167  bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
168  bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
169  bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
170  bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
171  bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
172  bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
173 
174  bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
175  bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
176  bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
177  bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
178  bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
179  SDValue &Clamp, SDValue &Omod) const;
180  bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
181  SDValue &Clamp, SDValue &Omod) const;
182 
183  bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
184  SDValue &Clamp,
185  SDValue &Omod) const;
186 
187  bool SelectVOP3OMods(SDValue In, SDValue &Src,
188  SDValue &Clamp, SDValue &Omod) const;
189 
190  bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
191  bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
192  SDValue &Clamp) const;
193 
194  bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
195  bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods,
196  SDValue &Clamp) const;
197 
198  bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
199  bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
200  SDValue &Clamp) const;
201  bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
202  bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
203 
204  bool SelectHi16Elt(SDValue In, SDValue &Src) const;
205 
206  void SelectADD_SUB_I64(SDNode *N);
207  void SelectUADDO_USUBO(SDNode *N);
208  void SelectDIV_SCALE(SDNode *N);
209  void SelectMAD_64_32(SDNode *N);
210  void SelectFMA_W_CHAIN(SDNode *N);
211  void SelectFMUL_W_CHAIN(SDNode *N);
212 
213  SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
214  uint32_t Offset, uint32_t Width);
215  void SelectS_BFEFromShifts(SDNode *N);
216  void SelectS_BFE(SDNode *N);
217  bool isCBranchSCC(const SDNode *N) const;
218  void SelectBRCOND(SDNode *N);
219  void SelectFMAD(SDNode *N);
220  void SelectATOMIC_CMP_SWAP(SDNode *N);
221 
222 protected:
223  // Include the pieces autogenerated from the target description.
224 #include "AMDGPUGenDAGISel.inc"
225 };
226 
227 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
228 public:
229  explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
230  AMDGPUDAGToDAGISel(TM, OptLevel) {}
231 
232  void Select(SDNode *N) override;
233 
234  bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
235  SDValue &Offset) override;
236  bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
237  SDValue &Offset) override;
238 };
239 
240 } // end anonymous namespace
241 
242 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel",
243  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
245 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "isel",
246  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
247 
248 /// \brief This pass converts a legalized DAG into a AMDGPU-specific
249 // DAG, ready for instruction scheduling.
251  CodeGenOpt::Level OptLevel) {
252  return new AMDGPUDAGToDAGISel(TM, OptLevel);
253 }
254 
255 /// \brief This pass converts a legalized DAG into a R600-specific
256 // DAG, ready for instruction scheduling.
258  CodeGenOpt::Level OptLevel) {
259  return new R600DAGToDAGISel(TM, OptLevel);
260 }
261 
262 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
263  Subtarget = &MF.getSubtarget<AMDGPUSubtarget>();
265 }
266 
267 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
268  if (TM.Options.NoNaNsFPMath)
269  return true;
270 
271  // TODO: Move into isKnownNeverNaN
272  if (N->getFlags().isDefined())
273  return N->getFlags().hasNoNaNs();
274 
275  return CurDAG->isKnownNeverNaN(N);
276 }
277 
278 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
279  const SIInstrInfo *TII
280  = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo();
281 
282  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
283  return TII->isInlineConstant(C->getAPIntValue());
284 
285  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
286  return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
287 
288  return false;
289 }
290 
291 /// \brief Determine the register class for \p OpNo
292 /// \returns The register class of the virtual register that will be used for
293 /// the given operand number \OpNo or NULL if the register class cannot be
294 /// determined.
295 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
296  unsigned OpNo) const {
297  if (!N->isMachineOpcode()) {
298  if (N->getOpcode() == ISD::CopyToReg) {
299  unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
301  MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
302  return MRI.getRegClass(Reg);
303  }
304 
305  const SIRegisterInfo *TRI
306  = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo();
307  return TRI->getPhysRegClass(Reg);
308  }
309 
310  return nullptr;
311  }
312 
313  switch (N->getMachineOpcode()) {
314  default: {
315  const MCInstrDesc &Desc =
316  Subtarget->getInstrInfo()->get(N->getMachineOpcode());
317  unsigned OpIdx = Desc.getNumDefs() + OpNo;
318  if (OpIdx >= Desc.getNumOperands())
319  return nullptr;
320  int RegClass = Desc.OpInfo[OpIdx].RegClass;
321  if (RegClass == -1)
322  return nullptr;
323 
324  return Subtarget->getRegisterInfo()->getRegClass(RegClass);
325  }
326  case AMDGPU::REG_SEQUENCE: {
327  unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
328  const TargetRegisterClass *SuperRC =
329  Subtarget->getRegisterInfo()->getRegClass(RCID);
330 
331  SDValue SubRegOp = N->getOperand(OpNo + 1);
332  unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
333  return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
334  SubRegIdx);
335  }
336  }
337 }
338 
339 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
340  if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS ||
341  !Subtarget->ldsRequiresM0Init())
342  return N;
343 
344  const SITargetLowering& Lowering =
345  *static_cast<const SITargetLowering*>(getTargetLowering());
346 
347  // Write max value to m0 before each load operation
348 
349  SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
350  CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
351 
352  SDValue Glue = M0.getValue(1);
353 
355  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
356  Ops.push_back(N->getOperand(i));
357  }
358  Ops.push_back(Glue);
359  return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
360 }
361 
362 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
363  switch (NumVectorElts) {
364  case 1:
365  return AMDGPU::SReg_32_XM0RegClassID;
366  case 2:
367  return AMDGPU::SReg_64RegClassID;
368  case 4:
369  return AMDGPU::SReg_128RegClassID;
370  case 8:
371  return AMDGPU::SReg_256RegClassID;
372  case 16:
373  return AMDGPU::SReg_512RegClassID;
374  }
375 
376  llvm_unreachable("invalid vector size");
377 }
378 
379 static bool getConstantValue(SDValue N, uint32_t &Out) {
380  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
381  Out = C->getAPIntValue().getZExtValue();
382  return true;
383  }
384 
385  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
386  Out = C->getValueAPF().bitcastToAPInt().getZExtValue();
387  return true;
388  }
389 
390  return false;
391 }
392 
393 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
394  EVT VT = N->getValueType(0);
395  unsigned NumVectorElts = VT.getVectorNumElements();
396  EVT EltVT = VT.getVectorElementType();
397  const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
398  SDLoc DL(N);
399  SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
400 
401  if (NumVectorElts == 1) {
402  CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
403  RegClass);
404  return;
405  }
406 
407  assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
408  "supported yet");
409  // 16 = Max Num Vector Elements
410  // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
411  // 1 = Vector Register Class
412  SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
413 
414  RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
415  bool IsRegSeq = true;
416  unsigned NOps = N->getNumOperands();
417  for (unsigned i = 0; i < NOps; i++) {
418  // XXX: Why is this here?
419  if (isa<RegisterSDNode>(N->getOperand(i))) {
420  IsRegSeq = false;
421  break;
422  }
423  RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
424  RegSeqArgs[1 + (2 * i) + 1] =
425  CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
426  MVT::i32);
427  }
428  if (NOps != NumVectorElts) {
429  // Fill in the missing undef elements if this was a scalar_to_vector.
430  assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
431  MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
432  DL, EltVT);
433  for (unsigned i = NOps; i < NumVectorElts; ++i) {
434  RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
435  RegSeqArgs[1 + (2 * i) + 1] =
436  CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
437  }
438  }
439 
440  if (!IsRegSeq)
441  SelectCode(N);
442  CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
443 }
444 
446  unsigned int Opc = N->getOpcode();
447  if (N->isMachineOpcode()) {
448  N->setNodeId(-1);
449  return; // Already selected.
450  }
451 
452  if (isa<AtomicSDNode>(N) ||
453  (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC))
454  N = glueCopyToM0(N);
455 
456  switch (Opc) {
457  default:
458  break;
459  // We are selecting i64 ADD here instead of custom lower it during
460  // DAG legalization, so we can fold some i64 ADDs used for address
461  // calculation into the LOAD and STORE instructions.
462  case ISD::ADDC:
463  case ISD::ADDE:
464  case ISD::SUBC:
465  case ISD::SUBE: {
466  if (N->getValueType(0) != MVT::i64)
467  break;
468 
469  SelectADD_SUB_I64(N);
470  return;
471  }
472  case ISD::UADDO:
473  case ISD::USUBO: {
474  SelectUADDO_USUBO(N);
475  return;
476  }
478  SelectFMUL_W_CHAIN(N);
479  return;
480  }
481  case AMDGPUISD::FMA_W_CHAIN: {
482  SelectFMA_W_CHAIN(N);
483  return;
484  }
485 
487  case ISD::BUILD_VECTOR: {
488  EVT VT = N->getValueType(0);
489  unsigned NumVectorElts = VT.getVectorNumElements();
490 
491  if (VT == MVT::v2i16 || VT == MVT::v2f16) {
492  if (Opc == ISD::BUILD_VECTOR) {
493  uint32_t LHSVal, RHSVal;
494  if (getConstantValue(N->getOperand(0), LHSVal) &&
495  getConstantValue(N->getOperand(1), RHSVal)) {
496  uint32_t K = LHSVal | (RHSVal << 16);
497  CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT,
498  CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32));
499  return;
500  }
501  }
502 
503  break;
504  }
505 
507  unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
508  SelectBuildVector(N, RegClassID);
509  return;
510  }
511  case ISD::BUILD_PAIR: {
512  SDValue RC, SubReg0, SubReg1;
513  SDLoc DL(N);
514  if (N->getValueType(0) == MVT::i128) {
515  RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
516  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
517  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
518  } else if (N->getValueType(0) == MVT::i64) {
519  RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
520  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
521  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
522  } else {
523  llvm_unreachable("Unhandled value type for BUILD_PAIR");
524  }
525  const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
526  N->getOperand(1), SubReg1 };
527  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
528  N->getValueType(0), Ops));
529  return;
530  }
531 
532  case ISD::Constant:
533  case ISD::ConstantFP: {
534  if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
535  break;
536 
537  uint64_t Imm;
538  if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
539  Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
540  else {
541  ConstantSDNode *C = cast<ConstantSDNode>(N);
542  Imm = C->getZExtValue();
543  }
544 
545  SDLoc DL(N);
546  SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
547  CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
548  MVT::i32));
549  SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
550  CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
551  const SDValue Ops[] = {
552  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
553  SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
554  SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
555  };
556 
557  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
558  N->getValueType(0), Ops));
559  return;
560  }
561  case ISD::LOAD:
562  case ISD::STORE: {
563  N = glueCopyToM0(N);
564  break;
565  }
566 
567  case AMDGPUISD::BFE_I32:
568  case AMDGPUISD::BFE_U32: {
569  // There is a scalar version available, but unlike the vector version which
570  // has a separate operand for the offset and width, the scalar version packs
571  // the width and offset into a single operand. Try to move to the scalar
572  // version if the offsets are constant, so that we can try to keep extended
573  // loads of kernel arguments in SGPRs.
574 
575  // TODO: Technically we could try to pattern match scalar bitshifts of
576  // dynamic values, but it's probably not useful.
578  if (!Offset)
579  break;
580 
582  if (!Width)
583  break;
584 
585  bool Signed = Opc == AMDGPUISD::BFE_I32;
586 
587  uint32_t OffsetVal = Offset->getZExtValue();
588  uint32_t WidthVal = Width->getZExtValue();
589 
590  ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
591  SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
592  return;
593  }
594  case AMDGPUISD::DIV_SCALE: {
595  SelectDIV_SCALE(N);
596  return;
597  }
599  case AMDGPUISD::MAD_U64_U32: {
600  SelectMAD_64_32(N);
601  return;
602  }
603  case ISD::CopyToReg: {
604  const SITargetLowering& Lowering =
605  *static_cast<const SITargetLowering*>(getTargetLowering());
606  N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
607  break;
608  }
609  case ISD::AND:
610  case ISD::SRL:
611  case ISD::SRA:
613  if (N->getValueType(0) != MVT::i32)
614  break;
615 
616  SelectS_BFE(N);
617  return;
618  case ISD::BRCOND:
619  SelectBRCOND(N);
620  return;
621  case ISD::FMAD:
622  SelectFMAD(N);
623  return;
625  SelectATOMIC_CMP_SWAP(N);
626  return;
627  }
628 
629  SelectCode(N);
630 }
631 
632 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
633  if (!N->readMem())
634  return false;
635  if (CbId == -1)
637 
638  return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId;
639 }
640 
641 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
642  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
643  const Instruction *Term = BB->getTerminator();
644  return Term->getMetadata("amdgpu.uniform") ||
645  Term->getMetadata("structurizecfg.uniform");
646 }
647 
648 StringRef AMDGPUDAGToDAGISel::getPassName() const {
649  return "AMDGPU DAG->DAG Pattern Instruction Selection";
650 }
651 
652 //===----------------------------------------------------------------------===//
653 // Complex Patterns
654 //===----------------------------------------------------------------------===//
655 
656 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
657  SDValue& IntPtr) {
658  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
659  IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
660  true);
661  return true;
662  }
663  return false;
664 }
665 
666 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
667  SDValue& BaseReg, SDValue &Offset) {
668  if (!isa<ConstantSDNode>(Addr)) {
669  BaseReg = Addr;
670  Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
671  return true;
672  }
673  return false;
674 }
675 
676 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
677  SDValue &Offset) {
678  return false;
679 }
680 
681 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
682  SDValue &Offset) {
683  ConstantSDNode *C;
684  SDLoc DL(Addr);
685 
686  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
687  Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
688  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
689  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
690  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
691  Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
692  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
693  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
694  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
695  Base = Addr.getOperand(0);
696  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
697  } else {
698  Base = Addr;
699  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
700  }
701 
702  return true;
703 }
704 
705 // FIXME: Should only handle addcarry/subcarry
706 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
707  SDLoc DL(N);
708  SDValue LHS = N->getOperand(0);
709  SDValue RHS = N->getOperand(1);
710 
711  unsigned Opcode = N->getOpcode();
712  bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
713  bool ProduceCarry =
714  ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
715  bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
716 
717  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
718  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
719 
720  SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
721  DL, MVT::i32, LHS, Sub0);
722  SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
723  DL, MVT::i32, LHS, Sub1);
724 
725  SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
726  DL, MVT::i32, RHS, Sub0);
727  SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
728  DL, MVT::i32, RHS, Sub1);
729 
730  SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
731 
732  unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
733  unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
734 
735  SDNode *AddLo;
736  if (!ConsumeCarry) {
737  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
738  AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
739  } else {
740  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
741  AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
742  }
743  SDValue AddHiArgs[] = {
744  SDValue(Hi0, 0),
745  SDValue(Hi1, 0),
746  SDValue(AddLo, 1)
747  };
748  SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
749 
750  SDValue RegSequenceArgs[] = {
751  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
752  SDValue(AddLo,0),
753  Sub0,
754  SDValue(AddHi,0),
755  Sub1,
756  };
757  SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
758  MVT::i64, RegSequenceArgs);
759 
760  if (ProduceCarry) {
761  // Replace the carry-use
762  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(AddHi, 1));
763  }
764 
765  // Replace the remaining uses.
766  CurDAG->ReplaceAllUsesWith(N, RegSequence);
767  CurDAG->RemoveDeadNode(N);
768 }
769 
770 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
771  // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
772  // carry out despite the _i32 name. These were renamed in VI to _U32.
773  // FIXME: We should probably rename the opcodes here.
774  unsigned Opc = N->getOpcode() == ISD::UADDO ?
775  AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
776 
777  CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
778  { N->getOperand(0), N->getOperand(1) });
779 }
780 
781 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
782  SDLoc SL(N);
783  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
784  SDValue Ops[10];
785 
786  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
787  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
788  SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
789  Ops[8] = N->getOperand(0);
790  Ops[9] = N->getOperand(4);
791 
792  CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
793 }
794 
795 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
796  SDLoc SL(N);
797  // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
798  SDValue Ops[8];
799 
800  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
801  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
802  Ops[6] = N->getOperand(0);
803  Ops[7] = N->getOperand(3);
804 
805  CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
806 }
807 
808 // We need to handle this here because tablegen doesn't support matching
809 // instructions with multiple outputs.
810 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
811  SDLoc SL(N);
812  EVT VT = N->getValueType(0);
813 
814  assert(VT == MVT::f32 || VT == MVT::f64);
815 
816  unsigned Opc
817  = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
818 
819  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
820  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
821 }
822 
823 // We need to handle this here because tablegen doesn't support matching
824 // instructions with multiple outputs.
825 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
826  SDLoc SL(N);
827  bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
828  unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32;
829 
830  SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
831  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
832  Clamp };
833  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
834 }
835 
836 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
837  unsigned OffsetBits) const {
838  if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
839  (OffsetBits == 8 && !isUInt<8>(Offset)))
840  return false;
841 
842  if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
843  Subtarget->unsafeDSOffsetFoldingEnabled())
844  return true;
845 
846  // On Southern Islands instruction with a negative base value and an offset
847  // don't seem to work.
848  return CurDAG->SignBitIsZero(Base);
849 }
850 
851 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
852  SDValue &Offset) const {
853  SDLoc DL(Addr);
854  if (CurDAG->isBaseWithConstantOffset(Addr)) {
855  SDValue N0 = Addr.getOperand(0);
856  SDValue N1 = Addr.getOperand(1);
857  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
858  if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
859  // (add n0, c0)
860  Base = N0;
861  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
862  return true;
863  }
864  } else if (Addr.getOpcode() == ISD::SUB) {
865  // sub C, x -> add (sub 0, x), C
866  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
867  int64_t ByteOffset = C->getSExtValue();
868  if (isUInt<16>(ByteOffset)) {
869  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
870 
871  // XXX - This is kind of hacky. Create a dummy sub node so we can check
872  // the known bits in isDSOffsetLegal. We need to emit the selected node
873  // here, so this is thrown away.
874  SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
875  Zero, Addr.getOperand(1));
876 
877  if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
878  // FIXME: Select to VOP3 version for with-carry.
879  unsigned SubOp = Subtarget->hasAddNoCarry() ?
880  AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
881 
882  MachineSDNode *MachineSub
883  = CurDAG->getMachineNode(SubOp, DL, MVT::i32,
884  Zero, Addr.getOperand(1));
885 
886  Base = SDValue(MachineSub, 0);
887  Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
888  return true;
889  }
890  }
891  }
892  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
893  // If we have a constant address, prefer to put the constant into the
894  // offset. This can save moves to load the constant address since multiple
895  // operations can share the zero base address register, and enables merging
896  // into read2 / write2 instructions.
897 
898  SDLoc DL(Addr);
899 
900  if (isUInt<16>(CAddr->getZExtValue())) {
901  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
902  MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
903  DL, MVT::i32, Zero);
904  Base = SDValue(MovZero, 0);
905  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
906  return true;
907  }
908  }
909 
910  // default case
911  Base = Addr;
912  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
913  return true;
914 }
915 
916 // TODO: If offset is too big, put low 16-bit into offset.
917 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
918  SDValue &Offset0,
919  SDValue &Offset1) const {
920  SDLoc DL(Addr);
921 
922  if (CurDAG->isBaseWithConstantOffset(Addr)) {
923  SDValue N0 = Addr.getOperand(0);
924  SDValue N1 = Addr.getOperand(1);
925  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
926  unsigned DWordOffset0 = C1->getZExtValue() / 4;
927  unsigned DWordOffset1 = DWordOffset0 + 1;
928  // (add n0, c0)
929  if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
930  Base = N0;
931  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
932  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
933  return true;
934  }
935  } else if (Addr.getOpcode() == ISD::SUB) {
936  // sub C, x -> add (sub 0, x), C
937  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
938  unsigned DWordOffset0 = C->getZExtValue() / 4;
939  unsigned DWordOffset1 = DWordOffset0 + 1;
940 
941  if (isUInt<8>(DWordOffset0)) {
942  SDLoc DL(Addr);
943  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
944 
945  // XXX - This is kind of hacky. Create a dummy sub node so we can check
946  // the known bits in isDSOffsetLegal. We need to emit the selected node
947  // here, so this is thrown away.
948  SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
949  Zero, Addr.getOperand(1));
950 
951  if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
952  unsigned SubOp = Subtarget->hasAddNoCarry() ?
953  AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
954 
955  MachineSDNode *MachineSub
956  = CurDAG->getMachineNode(SubOp, DL, MVT::i32,
957  Zero, Addr.getOperand(1));
958 
959  Base = SDValue(MachineSub, 0);
960  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
961  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
962  return true;
963  }
964  }
965  }
966  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
967  unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
968  unsigned DWordOffset1 = DWordOffset0 + 1;
969  assert(4 * DWordOffset0 == CAddr->getZExtValue());
970 
971  if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
972  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
973  MachineSDNode *MovZero
974  = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
975  DL, MVT::i32, Zero);
976  Base = SDValue(MovZero, 0);
977  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
978  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
979  return true;
980  }
981  }
982 
983  // default case
984 
985  // FIXME: This is broken on SI where we still need to check if the base
986  // pointer is positive here.
987  Base = Addr;
988  Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
989  Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
990  return true;
991 }
992 
993 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
994  SDValue &VAddr, SDValue &SOffset,
995  SDValue &Offset, SDValue &Offen,
996  SDValue &Idxen, SDValue &Addr64,
997  SDValue &GLC, SDValue &SLC,
998  SDValue &TFE) const {
999  // Subtarget prefers to use flat instruction
1000  if (Subtarget->useFlatForGlobal())
1001  return false;
1002 
1003  SDLoc DL(Addr);
1004 
1005  if (!GLC.getNode())
1006  GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1007  if (!SLC.getNode())
1008  SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1009  TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
1010 
1011  Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1012  Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1013  Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1014  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1015 
1016  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1017  SDValue N0 = Addr.getOperand(0);
1018  SDValue N1 = Addr.getOperand(1);
1019  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1020 
1021  if (N0.getOpcode() == ISD::ADD) {
1022  // (add (add N2, N3), C1) -> addr64
1023  SDValue N2 = N0.getOperand(0);
1024  SDValue N3 = N0.getOperand(1);
1025  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1026  Ptr = N2;
1027  VAddr = N3;
1028  } else {
1029  // (add N0, C1) -> offset
1030  VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1031  Ptr = N0;
1032  }
1033 
1035  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1036  return true;
1037  }
1038 
1039  if (isUInt<32>(C1->getZExtValue())) {
1040  // Illegal offset, store it in soffset.
1041  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1042  SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1043  CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1044  0);
1045  return true;
1046  }
1047  }
1048 
1049  if (Addr.getOpcode() == ISD::ADD) {
1050  // (add N0, N1) -> addr64
1051  SDValue N0 = Addr.getOperand(0);
1052  SDValue N1 = Addr.getOperand(1);
1053  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1054  Ptr = N0;
1055  VAddr = N1;
1056  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1057  return true;
1058  }
1059 
1060  // default case -> offset
1061  VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1062  Ptr = Addr;
1063  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1064 
1065  return true;
1066 }
1067 
1068 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1069  SDValue &VAddr, SDValue &SOffset,
1070  SDValue &Offset, SDValue &GLC,
1071  SDValue &SLC, SDValue &TFE) const {
1072  SDValue Ptr, Offen, Idxen, Addr64;
1073 
1074  // addr64 bit was removed for volcanic islands.
1075  if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
1076  return false;
1077 
1078  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1079  GLC, SLC, TFE))
1080  return false;
1081 
1082  ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1083  if (C->getSExtValue()) {
1084  SDLoc DL(Addr);
1085 
1086  const SITargetLowering& Lowering =
1087  *static_cast<const SITargetLowering*>(getTargetLowering());
1088 
1089  SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1090  return true;
1091  }
1092 
1093  return false;
1094 }
1095 
1096 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1097  SDValue &VAddr, SDValue &SOffset,
1098  SDValue &Offset,
1099  SDValue &SLC) const {
1100  SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
1101  SDValue GLC, TFE;
1102 
1103  return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
1104 }
1105 
1106 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
1107  auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
1108  return PSV && PSV->isStack();
1109 }
1110 
1111 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1112  const MachineFunction &MF = CurDAG->getMachineFunction();
1114 
1115  if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
1116  SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1117  FI->getValueType(0));
1118 
1119  // If we can resolve this to a frame index access, this is relative to the
1120  // frame pointer SGPR.
1121  return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(),
1122  MVT::i32));
1123  }
1124 
1125  // If we don't know this private access is a local stack object, it needs to
1126  // be relative to the entry point's scratch wave offset register.
1127  return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(),
1128  MVT::i32));
1129 }
1130 
1131 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1132  SDValue Addr, SDValue &Rsrc,
1133  SDValue &VAddr, SDValue &SOffset,
1134  SDValue &ImmOffset) const {
1135 
1136  SDLoc DL(Addr);
1137  MachineFunction &MF = CurDAG->getMachineFunction();
1139 
1140  Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1141 
1142  if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1143  unsigned Imm = CAddr->getZExtValue();
1144 
1145  SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1146  MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1147  DL, MVT::i32, HighBits);
1148  VAddr = SDValue(MovHighBits, 0);
1149 
1150  // In a call sequence, stores to the argument stack area are relative to the
1151  // stack pointer.
1152  const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1153  unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1155 
1156  SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1157  ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
1158  return true;
1159  }
1160 
1161  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1162  // (add n0, c1)
1163 
1164  SDValue N0 = Addr.getOperand(0);
1165  SDValue N1 = Addr.getOperand(1);
1166 
1167  // Offsets in vaddr must be positive if range checking is enabled.
1168  //
1169  // The total computation of vaddr + soffset + offset must not overflow. If
1170  // vaddr is negative, even if offset is 0 the sgpr offset add will end up
1171  // overflowing.
1172  //
1173  // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
1174  // always perform a range check. If a negative vaddr base index was used,
1175  // this would fail the range check. The overall address computation would
1176  // compute a valid address, but this doesn't happen due to the range
1177  // check. For out-of-bounds MUBUF loads, a 0 is returned.
1178  //
1179  // Therefore it should be safe to fold any VGPR offset on gfx9 into the
1180  // MUBUF vaddr, but not on older subtargets which can only do this if the
1181  // sign bit is known 0.
1182  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1184  (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1185  CurDAG->SignBitIsZero(N0))) {
1186  std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1187  ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1188  return true;
1189  }
1190  }
1191 
1192  // (node)
1193  std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1194  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1195  return true;
1196 }
1197 
1198 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1199  SDValue Addr,
1200  SDValue &SRsrc,
1201  SDValue &SOffset,
1202  SDValue &Offset) const {
1203  ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
1204  if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1205  return false;
1206 
1207  SDLoc DL(Addr);
1208  MachineFunction &MF = CurDAG->getMachineFunction();
1210 
1211  SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1212 
1213  const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1214  unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1216 
1217  // FIXME: Get from MachinePointerInfo? We should only be using the frame
1218  // offset if we know this is in a call sequence.
1219  SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1220 
1221  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1222  return true;
1223 }
1224 
1225 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1226  SDValue &SOffset, SDValue &Offset,
1227  SDValue &GLC, SDValue &SLC,
1228  SDValue &TFE) const {
1229  SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1230  const SIInstrInfo *TII =
1231  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1232 
1233  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1234  GLC, SLC, TFE))
1235  return false;
1236 
1237  if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1238  !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1239  !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1240  uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1241  APInt::getAllOnesValue(32).getZExtValue(); // Size
1242  SDLoc DL(Addr);
1243 
1244  const SITargetLowering& Lowering =
1245  *static_cast<const SITargetLowering*>(getTargetLowering());
1246 
1247  SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1248  return true;
1249  }
1250  return false;
1251 }
1252 
1253 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1254  SDValue &Soffset, SDValue &Offset
1255  ) const {
1256  SDValue GLC, SLC, TFE;
1257 
1258  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1259 }
1260 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1261  SDValue &Soffset, SDValue &Offset,
1262  SDValue &SLC) const {
1263  SDValue GLC, TFE;
1264 
1265  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1266 }
1267 
1268 bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant,
1269  SDValue &SOffset,
1270  SDValue &ImmOffset) const {
1271  SDLoc DL(Constant);
1272  const uint32_t Align = 4;
1273  const uint32_t MaxImm = alignDown(4095, Align);
1274  uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue();
1275  uint32_t Overflow = 0;
1276 
1277  if (Imm > MaxImm) {
1278  if (Imm <= MaxImm + 64) {
1279  // Use an SOffset inline constant for 4..64
1280  Overflow = Imm - MaxImm;
1281  Imm = MaxImm;
1282  } else {
1283  // Try to keep the same value in SOffset for adjacent loads, so that
1284  // the corresponding register contents can be re-used.
1285  //
1286  // Load values with all low-bits (except for alignment bits) set into
1287  // SOffset, so that a larger range of values can be covered using
1288  // s_movk_i32.
1289  //
1290  // Atomic operations fail to work correctly when individual address
1291  // components are unaligned, even if their sum is aligned.
1292  uint32_t High = (Imm + Align) & ~4095;
1293  uint32_t Low = (Imm + Align) & 4095;
1294  Imm = Low;
1295  Overflow = High - Align;
1296  }
1297  }
1298 
1299  // There is a hardware bug in SI and CI which prevents address clamping in
1300  // MUBUF instructions from working correctly with SOffsets. The immediate
1301  // offset is unaffected.
1302  if (Overflow > 0 &&
1303  Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
1304  return false;
1305 
1306  ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16);
1307 
1308  if (Overflow <= 64)
1309  SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32);
1310  else
1311  SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1312  CurDAG->getTargetConstant(Overflow, DL, MVT::i32)),
1313  0);
1314 
1315  return true;
1316 }
1317 
1318 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset,
1319  SDValue &SOffset,
1320  SDValue &ImmOffset) const {
1321  SDLoc DL(Offset);
1322 
1323  if (!isa<ConstantSDNode>(Offset))
1324  return false;
1325 
1326  return SelectMUBUFConstant(Offset, SOffset, ImmOffset);
1327 }
1328 
1329 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
1330  SDValue &SOffset,
1331  SDValue &ImmOffset,
1332  SDValue &VOffset) const {
1333  SDLoc DL(Offset);
1334 
1335  // Don't generate an unnecessary voffset for constant offsets.
1336  if (isa<ConstantSDNode>(Offset)) {
1337  SDValue Tmp1, Tmp2;
1338 
1339  // When necessary, use a voffset in <= CI anyway to work around a hardware
1340  // bug.
1341  if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS ||
1342  SelectMUBUFConstant(Offset, Tmp1, Tmp2))
1343  return false;
1344  }
1345 
1346  if (CurDAG->isBaseWithConstantOffset(Offset)) {
1347  SDValue N0 = Offset.getOperand(0);
1348  SDValue N1 = Offset.getOperand(1);
1349  if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 &&
1350  SelectMUBUFConstant(N1, SOffset, ImmOffset)) {
1351  VOffset = N0;
1352  return true;
1353  }
1354  }
1355 
1356  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1357  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1358  VOffset = Offset;
1359 
1360  return true;
1361 }
1362 
1363 template <bool IsSigned>
1364 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
1365  SDValue &VAddr,
1366  SDValue &Offset,
1367  SDValue &SLC) const {
1368  int64_t OffsetVal = 0;
1369 
1370  if (Subtarget->hasFlatInstOffsets() &&
1371  CurDAG->isBaseWithConstantOffset(Addr)) {
1372  SDValue N0 = Addr.getOperand(0);
1373  SDValue N1 = Addr.getOperand(1);
1374  int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1375 
1376  if ((IsSigned && isInt<13>(COffsetVal)) ||
1377  (!IsSigned && isUInt<12>(COffsetVal))) {
1378  Addr = N0;
1379  OffsetVal = COffsetVal;
1380  }
1381  }
1382 
1383  VAddr = Addr;
1384  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
1385  SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
1386 
1387  return true;
1388 }
1389 
1390 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr,
1391  SDValue &VAddr,
1392  SDValue &Offset,
1393  SDValue &SLC) const {
1394  return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC);
1395 }
1396 
1397 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr,
1398  SDValue &VAddr,
1399  SDValue &Offset,
1400  SDValue &SLC) const {
1401  return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC);
1402 }
1403 
1404 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1405  SDValue &Offset, bool &Imm) const {
1406 
1407  // FIXME: Handle non-constant offsets.
1408  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1409  if (!C)
1410  return false;
1411 
1412  SDLoc SL(ByteOffsetNode);
1413  AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration();
1414  int64_t ByteOffset = C->getSExtValue();
1415  int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
1416 
1417  if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) {
1418  Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1419  Imm = true;
1420  return true;
1421  }
1422 
1423  if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1424  return false;
1425 
1426  if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1427  // 32-bit Immediates are supported on Sea Islands.
1428  Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1429  } else {
1430  SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1431  Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1432  C32Bit), 0);
1433  }
1434  Imm = false;
1435  return true;
1436 }
1437 
1438 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1439  SDValue &Offset, bool &Imm) const {
1440  SDLoc SL(Addr);
1441  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1442  SDValue N0 = Addr.getOperand(0);
1443  SDValue N1 = Addr.getOperand(1);
1444 
1445  if (SelectSMRDOffset(N1, Offset, Imm)) {
1446  SBase = N0;
1447  return true;
1448  }
1449  }
1450  SBase = Addr;
1451  Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1452  Imm = true;
1453  return true;
1454 }
1455 
1456 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1457  SDValue &Offset) const {
1458  bool Imm;
1459  return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1460 }
1461 
1462 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1463  SDValue &Offset) const {
1464 
1465  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1466  return false;
1467 
1468  bool Imm;
1469  if (!SelectSMRD(Addr, SBase, Offset, Imm))
1470  return false;
1471 
1472  return !Imm && isa<ConstantSDNode>(Offset);
1473 }
1474 
1475 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1476  SDValue &Offset) const {
1477  bool Imm;
1478  return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1479  !isa<ConstantSDNode>(Offset);
1480 }
1481 
1482 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1483  SDValue &Offset) const {
1484  bool Imm;
1485  return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1486 }
1487 
1488 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1489  SDValue &Offset) const {
1490  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1491  return false;
1492 
1493  bool Imm;
1494  if (!SelectSMRDOffset(Addr, Offset, Imm))
1495  return false;
1496 
1497  return !Imm && isa<ConstantSDNode>(Offset);
1498 }
1499 
1500 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
1501  SDValue &Base,
1502  SDValue &Offset) const {
1503  SDLoc DL(Index);
1504 
1505  if (CurDAG->isBaseWithConstantOffset(Index)) {
1506  SDValue N0 = Index.getOperand(0);
1507  SDValue N1 = Index.getOperand(1);
1508  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1509 
1510  // (add n0, c0)
1511  Base = N0;
1512  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
1513  return true;
1514  }
1515 
1516  if (isa<ConstantSDNode>(Index))
1517  return false;
1518 
1519  Base = Index;
1520  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1521  return true;
1522 }
1523 
1524 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
1525  SDValue Val, uint32_t Offset,
1526  uint32_t Width) {
1527  // Transformation function, pack the offset and width of a BFE into
1528  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1529  // source, bits [5:0] contain the offset and bits [22:16] the width.
1530  uint32_t PackedVal = Offset | (Width << 16);
1531  SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1532 
1533  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1534 }
1535 
1536 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1537  // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1538  // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1539  // Predicate: 0 < b <= c < 32
1540 
1541  const SDValue &Shl = N->getOperand(0);
1544 
1545  if (B && C) {
1546  uint32_t BVal = B->getZExtValue();
1547  uint32_t CVal = C->getZExtValue();
1548 
1549  if (0 < BVal && BVal <= CVal && CVal < 32) {
1550  bool Signed = N->getOpcode() == ISD::SRA;
1551  unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1552 
1553  ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
1554  32 - CVal));
1555  return;
1556  }
1557  }
1558  SelectCode(N);
1559 }
1560 
1561 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1562  switch (N->getOpcode()) {
1563  case ISD::AND:
1564  if (N->getOperand(0).getOpcode() == ISD::SRL) {
1565  // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1566  // Predicate: isMask(mask)
1567  const SDValue &Srl = N->getOperand(0);
1570 
1571  if (Shift && Mask) {
1572  uint32_t ShiftVal = Shift->getZExtValue();
1573  uint32_t MaskVal = Mask->getZExtValue();
1574 
1575  if (isMask_32(MaskVal)) {
1576  uint32_t WidthVal = countPopulation(MaskVal);
1577 
1578  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1579  Srl.getOperand(0), ShiftVal, WidthVal));
1580  return;
1581  }
1582  }
1583  }
1584  break;
1585  case ISD::SRL:
1586  if (N->getOperand(0).getOpcode() == ISD::AND) {
1587  // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1588  // Predicate: isMask(mask >> b)
1589  const SDValue &And = N->getOperand(0);
1592 
1593  if (Shift && Mask) {
1594  uint32_t ShiftVal = Shift->getZExtValue();
1595  uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1596 
1597  if (isMask_32(MaskVal)) {
1598  uint32_t WidthVal = countPopulation(MaskVal);
1599 
1600  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1601  And.getOperand(0), ShiftVal, WidthVal));
1602  return;
1603  }
1604  }
1605  } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
1606  SelectS_BFEFromShifts(N);
1607  return;
1608  }
1609  break;
1610  case ISD::SRA:
1611  if (N->getOperand(0).getOpcode() == ISD::SHL) {
1612  SelectS_BFEFromShifts(N);
1613  return;
1614  }
1615  break;
1616 
1617  case ISD::SIGN_EXTEND_INREG: {
1618  // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
1619  SDValue Src = N->getOperand(0);
1620  if (Src.getOpcode() != ISD::SRL)
1621  break;
1622 
1623  const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1624  if (!Amt)
1625  break;
1626 
1627  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1628  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
1629  Amt->getZExtValue(), Width));
1630  return;
1631  }
1632  }
1633 
1634  SelectCode(N);
1635 }
1636 
1637 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
1638  assert(N->getOpcode() == ISD::BRCOND);
1639  if (!N->hasOneUse())
1640  return false;
1641 
1642  SDValue Cond = N->getOperand(1);
1643  if (Cond.getOpcode() == ISD::CopyToReg)
1644  Cond = Cond.getOperand(2);
1645 
1646  if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
1647  return false;
1648 
1649  MVT VT = Cond.getOperand(0).getSimpleValueType();
1650  if (VT == MVT::i32)
1651  return true;
1652 
1653  if (VT == MVT::i64) {
1654  auto ST = static_cast<const SISubtarget *>(Subtarget);
1655 
1656  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
1657  return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
1658  }
1659 
1660  return false;
1661 }
1662 
1663 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
1664  SDValue Cond = N->getOperand(1);
1665 
1666  if (Cond.isUndef()) {
1667  CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
1668  N->getOperand(2), N->getOperand(0));
1669  return;
1670  }
1671 
1672  bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
1673  unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
1674  unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC;
1675  SDLoc SL(N);
1676 
1677  if (!UseSCCBr) {
1678  // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
1679  // analyzed what generates the vcc value, so we do not know whether vcc
1680  // bits for disabled lanes are 0. Thus we need to mask out bits for
1681  // disabled lanes.
1682  //
1683  // For the case that we select S_CBRANCH_SCC1 and it gets
1684  // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
1685  // SIInstrInfo::moveToVALU which inserts the S_AND).
1686  //
1687  // We could add an analysis of what generates the vcc value here and omit
1688  // the S_AND when is unnecessary. But it would be better to add a separate
1689  // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
1690  // catches both cases.
1691  Cond = SDValue(CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
1692  CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
1693  Cond),
1694  0);
1695  }
1696 
1697  SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
1698  CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
1699  N->getOperand(2), // Basic Block
1700  VCC.getValue(0));
1701 }
1702 
1703 void AMDGPUDAGToDAGISel::SelectFMAD(SDNode *N) {
1704  MVT VT = N->getSimpleValueType(0);
1705  if (VT != MVT::f32 || !Subtarget->hasMadMixInsts()) {
1706  SelectCode(N);
1707  return;
1708  }
1709 
1710  SDValue Src0 = N->getOperand(0);
1711  SDValue Src1 = N->getOperand(1);
1712  SDValue Src2 = N->getOperand(2);
1713  unsigned Src0Mods, Src1Mods, Src2Mods;
1714 
1715  // Avoid using v_mad_mix_f32 unless there is actually an operand using the
1716  // conversion from f16.
1717  bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
1718  bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
1719  bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
1720 
1721  assert(!Subtarget->hasFP32Denormals() &&
1722  "fmad selected with denormals enabled");
1723  // TODO: We can select this with f32 denormals enabled if all the sources are
1724  // converted from f16 (in which case fmad isn't legal).
1725 
1726  if (Sel0 || Sel1 || Sel2) {
1727  // For dummy operands.
1728  SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
1729  SDValue Ops[] = {
1730  CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
1731  CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
1732  CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
1733  CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
1734  Zero, Zero
1735  };
1736 
1737  CurDAG->SelectNodeTo(N, AMDGPU::V_MAD_MIX_F32, MVT::f32, Ops);
1738  } else {
1739  SelectCode(N);
1740  }
1741 }
1742 
1743 // This is here because there isn't a way to use the generated sub0_sub1 as the
1744 // subreg index to EXTRACT_SUBREG in tablegen.
1745 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
1746  MemSDNode *Mem = cast<MemSDNode>(N);
1747  unsigned AS = Mem->getAddressSpace();
1748  if (AS == AMDGPUASI.FLAT_ADDRESS) {
1749  SelectCode(N);
1750  return;
1751  }
1752 
1753  MVT VT = N->getSimpleValueType(0);
1754  bool Is32 = (VT == MVT::i32);
1755  SDLoc SL(N);
1756 
1757  MachineSDNode *CmpSwap = nullptr;
1758  if (Subtarget->hasAddr64()) {
1759  SDValue SRsrc, VAddr, SOffset, Offset, SLC;
1760 
1761  if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
1762  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
1763  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
1764  SDValue CmpVal = Mem->getOperand(2);
1765 
1766  // XXX - Do we care about glue operands?
1767 
1768  SDValue Ops[] = {
1769  CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1770  };
1771 
1772  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1773  }
1774  }
1775 
1776  if (!CmpSwap) {
1777  SDValue SRsrc, SOffset, Offset, SLC;
1778  if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
1779  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
1780  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
1781 
1782  SDValue CmpVal = Mem->getOperand(2);
1783  SDValue Ops[] = {
1784  CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1785  };
1786 
1787  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1788  }
1789  }
1790 
1791  if (!CmpSwap) {
1792  SelectCode(N);
1793  return;
1794  }
1795 
1797  *MMOs = Mem->getMemOperand();
1798  CmpSwap->setMemRefs(MMOs, MMOs + 1);
1799 
1800  unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
1801  SDValue Extract
1802  = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
1803 
1804  ReplaceUses(SDValue(N, 0), Extract);
1805  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
1806  CurDAG->RemoveDeadNode(N);
1807 }
1808 
1809 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
1810  unsigned &Mods) const {
1811  Mods = 0;
1812  Src = In;
1813 
1814  if (Src.getOpcode() == ISD::FNEG) {
1815  Mods |= SISrcMods::NEG;
1816  Src = Src.getOperand(0);
1817  }
1818 
1819  if (Src.getOpcode() == ISD::FABS) {
1820  Mods |= SISrcMods::ABS;
1821  Src = Src.getOperand(0);
1822  }
1823 
1824  return true;
1825 }
1826 
1827 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
1828  SDValue &SrcMods) const {
1829  unsigned Mods;
1830  if (SelectVOP3ModsImpl(In, Src, Mods)) {
1831  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1832  return true;
1833  }
1834 
1835  return false;
1836 }
1837 
1838 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
1839  SDValue &SrcMods) const {
1840  SelectVOP3Mods(In, Src, SrcMods);
1841  return isNoNanSrc(Src);
1842 }
1843 
1844 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
1845  if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
1846  return false;
1847 
1848  Src = In;
1849  return true;
1850 }
1851 
1852 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
1853  SDValue &SrcMods, SDValue &Clamp,
1854  SDValue &Omod) const {
1855  SDLoc DL(In);
1856  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1857  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
1858 
1859  return SelectVOP3Mods(In, Src, SrcMods);
1860 }
1861 
1862 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
1863  SDValue &SrcMods,
1864  SDValue &Clamp,
1865  SDValue &Omod) const {
1866  Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1867  return SelectVOP3Mods(In, Src, SrcMods);
1868 }
1869 
1870 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
1871  SDValue &Clamp, SDValue &Omod) const {
1872  Src = In;
1873 
1874  SDLoc DL(In);
1875  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1876  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
1877 
1878  return true;
1879 }
1880 
1882  return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
1883 }
1884 
1885 // Figure out if this is really an extract of the high 16-bits of a dword.
1886 static bool isExtractHiElt(SDValue In, SDValue &Out) {
1887  In = stripBitcast(In);
1888  if (In.getOpcode() != ISD::TRUNCATE)
1889  return false;
1890 
1891  SDValue Srl = In.getOperand(0);
1892  if (Srl.getOpcode() == ISD::SRL) {
1893  if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
1894  if (ShiftAmt->getZExtValue() == 16) {
1895  Out = stripBitcast(Srl.getOperand(0));
1896  return true;
1897  }
1898  }
1899  }
1900 
1901  return false;
1902 }
1903 
1904 // Look through operations that obscure just looking at the low 16-bits of the
1905 // same register.
1907  if (In.getOpcode() == ISD::TRUNCATE) {
1908  SDValue Src = In.getOperand(0);
1909  if (Src.getValueType().getSizeInBits() == 32)
1910  return stripBitcast(Src);
1911  }
1912 
1913  return In;
1914 }
1915 
1916 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
1917  SDValue &SrcMods) const {
1918  unsigned Mods = 0;
1919  Src = In;
1920 
1921  if (Src.getOpcode() == ISD::FNEG) {
1922  Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
1923  Src = Src.getOperand(0);
1924  }
1925 
1926  if (Src.getOpcode() == ISD::BUILD_VECTOR) {
1927  unsigned VecMods = Mods;
1928 
1929  SDValue Lo = stripBitcast(Src.getOperand(0));
1930  SDValue Hi = stripBitcast(Src.getOperand(1));
1931 
1932  if (Lo.getOpcode() == ISD::FNEG) {
1933  Lo = stripBitcast(Lo.getOperand(0));
1934  Mods ^= SISrcMods::NEG;
1935  }
1936 
1937  if (Hi.getOpcode() == ISD::FNEG) {
1938  Hi = stripBitcast(Hi.getOperand(0));
1939  Mods ^= SISrcMods::NEG_HI;
1940  }
1941 
1942  if (isExtractHiElt(Lo, Lo))
1943  Mods |= SISrcMods::OP_SEL_0;
1944 
1945  if (isExtractHiElt(Hi, Hi))
1946  Mods |= SISrcMods::OP_SEL_1;
1947 
1948  Lo = stripExtractLoElt(Lo);
1949  Hi = stripExtractLoElt(Hi);
1950 
1951  if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
1952  // Really a scalar input. Just select from the low half of the register to
1953  // avoid packing.
1954 
1955  Src = Lo;
1956  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1957  return true;
1958  }
1959 
1960  Mods = VecMods;
1961  }
1962 
1963  // Packed instructions do not have abs modifiers.
1964  Mods |= SISrcMods::OP_SEL_1;
1965 
1966  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1967  return true;
1968 }
1969 
1970 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src,
1971  SDValue &SrcMods,
1972  SDValue &Clamp) const {
1973  SDLoc SL(In);
1974 
1975  // FIXME: Handle clamp and op_sel
1976  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
1977 
1978  return SelectVOP3PMods(In, Src, SrcMods);
1979 }
1980 
1981 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
1982  SDValue &SrcMods) const {
1983  Src = In;
1984  // FIXME: Handle op_sel
1985  SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1986  return true;
1987 }
1988 
1989 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src,
1990  SDValue &SrcMods,
1991  SDValue &Clamp) const {
1992  SDLoc SL(In);
1993 
1994  // FIXME: Handle clamp
1995  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
1996 
1997  return SelectVOP3OpSel(In, Src, SrcMods);
1998 }
1999 
2000 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
2001  SDValue &SrcMods) const {
2002  // FIXME: Handle op_sel
2003  return SelectVOP3Mods(In, Src, SrcMods);
2004 }
2005 
2006 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src,
2007  SDValue &SrcMods,
2008  SDValue &Clamp) const {
2009  SDLoc SL(In);
2010 
2011  // FIXME: Handle clamp
2012  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2013 
2014  return SelectVOP3OpSelMods(In, Src, SrcMods);
2015 }
2016 
2017 // The return value is not whether the match is possible (which it always is),
2018 // but whether or not it a conversion is really used.
2019 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
2020  unsigned &Mods) const {
2021  Mods = 0;
2022  SelectVOP3ModsImpl(In, Src, Mods);
2023 
2024  if (Src.getOpcode() == ISD::FP_EXTEND) {
2025  Src = Src.getOperand(0);
2026  assert(Src.getValueType() == MVT::f16);
2027  Src = stripBitcast(Src);
2028 
2029  // Be careful about folding modifiers if we already have an abs. fneg is
2030  // applied last, so we don't want to apply an earlier fneg.
2031  if ((Mods & SISrcMods::ABS) == 0) {
2032  unsigned ModsTmp;
2033  SelectVOP3ModsImpl(Src, Src, ModsTmp);
2034 
2035  if ((ModsTmp & SISrcMods::NEG) != 0)
2036  Mods ^= SISrcMods::NEG;
2037 
2038  if ((ModsTmp & SISrcMods::ABS) != 0)
2039  Mods |= SISrcMods::ABS;
2040  }
2041 
2042  // op_sel/op_sel_hi decide the source type and source.
2043  // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
2044  // If the sources's op_sel is set, it picks the high half of the source
2045  // register.
2046 
2047  Mods |= SISrcMods::OP_SEL_1;
2048  if (isExtractHiElt(Src, Src)) {
2049  Mods |= SISrcMods::OP_SEL_0;
2050 
2051  // TODO: Should we try to look for neg/abs here?
2052  }
2053 
2054  return true;
2055  }
2056 
2057  return false;
2058 }
2059 
2060 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
2061  SDValue &SrcMods) const {
2062  unsigned Mods = 0;
2063  SelectVOP3PMadMixModsImpl(In, Src, Mods);
2064  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2065  return true;
2066 }
2067 
2068 // TODO: Can we identify things like v_mad_mixhi_f16?
2069 bool AMDGPUDAGToDAGISel::SelectHi16Elt(SDValue In, SDValue &Src) const {
2070  if (In.isUndef()) {
2071  Src = In;
2072  return true;
2073  }
2074 
2075  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
2076  SDLoc SL(In);
2077  SDValue K = CurDAG->getTargetConstant(C->getZExtValue() << 16, SL, MVT::i32);
2078  MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
2079  SL, MVT::i32, K);
2080  Src = SDValue(MovK, 0);
2081  return true;
2082  }
2083 
2084  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
2085  SDLoc SL(In);
2086  SDValue K = CurDAG->getTargetConstant(
2087  C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
2088  MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
2089  SL, MVT::i32, K);
2090  Src = SDValue(MovK, 0);
2091  return true;
2092  }
2093 
2094  return isExtractHiElt(In, Src);
2095 }
2096 
2097 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
2099  *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
2100  bool IsModified = false;
2101  do {
2102  IsModified = false;
2103 
2104  // Go over all selected nodes and try to fold them a bit more
2105  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
2106  while (Position != CurDAG->allnodes_end()) {
2107  SDNode *Node = &*Position++;
2108  MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
2109  if (!MachineNode)
2110  continue;
2111 
2112  SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
2113  if (ResNode != Node) {
2114  if (ResNode)
2115  ReplaceUses(Node, ResNode);
2116  IsModified = true;
2117  }
2118  }
2119  CurDAG->RemoveDeadNodes();
2120  } while (IsModified);
2121 }
2122 
2124  unsigned int Opc = N->getOpcode();
2125  if (N->isMachineOpcode()) {
2126  N->setNodeId(-1);
2127  return; // Already selected.
2128  }
2129 
2130  switch (Opc) {
2131  default: break;
2133  case ISD::SCALAR_TO_VECTOR:
2134  case ISD::BUILD_VECTOR: {
2135  EVT VT = N->getValueType(0);
2136  unsigned NumVectorElts = VT.getVectorNumElements();
2137  unsigned RegClassID;
2138  // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
2139  // that adds a 128 bits reg copy when going through TwoAddressInstructions
2140  // pass. We want to avoid 128 bits copies as much as possible because they
2141  // can't be bundled by our scheduler.
2142  switch(NumVectorElts) {
2143  case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
2144  case 4:
2146  RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
2147  else
2148  RegClassID = AMDGPU::R600_Reg128RegClassID;
2149  break;
2150  default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
2151  }
2152  SelectBuildVector(N, RegClassID);
2153  return;
2154  }
2155  }
2156 
2157  SelectCode(N);
2158 }
2159 
2160 bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
2161  SDValue &Offset) {
2162  ConstantSDNode *C;
2163  SDLoc DL(Addr);
2164 
2165  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
2166  Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
2167  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2168  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
2169  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
2170  Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
2171  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2172  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
2173  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
2174  Base = Addr.getOperand(0);
2175  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2176  } else {
2177  Base = Addr;
2178  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
2179  }
2180 
2181  return true;
2182 }
2183 
2184 bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
2185  SDValue &Offset) {
2186  ConstantSDNode *IMMOffset;
2187 
2188  if (Addr.getOpcode() == ISD::ADD
2189  && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
2190  && isInt<16>(IMMOffset->getZExtValue())) {
2191 
2192  Base = Addr.getOperand(0);
2193  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2194  MVT::i32);
2195  return true;
2196  // If the pointer address is constant, we can move it to the offset field.
2197  } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
2198  && isInt<16>(IMMOffset->getZExtValue())) {
2199  Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
2200  SDLoc(CurDAG->getEntryNode()),
2201  AMDGPU::ZERO, MVT::i32);
2202  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2203  MVT::i32);
2204  return true;
2205  }
2206 
2207  // Default case, no offset
2208  Base = Addr;
2209  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2210  return true;
2211 }
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:546
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:341
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
static bool isExtractHiElt(SDValue In, SDValue &Out)
EVT getValueType() const
Return the ValueType of the referenced return value.
Interface definition for SIRegisterInfo.
bool isUndef() const
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1542
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:555
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
AMDGPU specific subclass of TargetSubtarget.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd)
Assign this MachineSDNodes&#39;s memory reference descriptor list.
uint64_t getDefaultRsrcDataFormat() const
FunctionPass * createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a R600-specific.
Shadow Stack GC Lowering
SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL, SDValue V) const
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:223
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
SDVTList getVTList() const
AMDGPUAS getAMDGPUAS(const Module &M)
const SDValue & getChain() const
virtual SDNode * PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const =0
static bool getConstantValue(SDValue N, uint32_t &Out)
SDNode * legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const
Legalize target independent instructions (e.g.
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:298
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
const SDNodeFlags getFlags() const
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
bool isInlineConstant(const APInt &Imm) const
uint64_t High
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:397
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineSDNode * wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr) const
unsigned getSubRegFromChannel(unsigned Channel) const
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:710
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
OpenCL uses address spaces to differentiate between various memory regions on the hardware...
Definition: AMDGPU.h:214
const HexagonInstrInfo * TII
Shift and rotation operations.
Definition: ISDOpcodes.h:380
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:191
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:170
unsigned SubReg
Reg
All possible values of the reg field in the ModR/M byte.
This file implements a class to represent arbitrary precision integral constant values and operations...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
TargetRegisterInfo interface that is implemented by all hw codegen targets.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Address space for constant memory (VTX2)
Definition: AMDGPU.h:225
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
int64_t getSExtValue() const
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:195
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:917
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:303
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:335
unsigned const MachineRegisterInfo * MRI
Machine Value Type.
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.
This is an important base class in LLVM.
Definition: Constant.h:42
const SDValue & getOperand(unsigned Num) const
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:232
SI DAG Lowering interface definition.
Represent the analysis usage information of a pass.
INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel", "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) INITIALIZE_PASS_END(AMDGPUDAGToDAGISel
bool hasNoNaNs() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
Address space for local memory.
Definition: AMDGPU.h:226
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:69
bool isDefined() const
Returns true if the flags are in a defined state.
AMDGPU DAG DAG Pattern Instruction Selection
The AMDGPU TargetMachine interface definition for hw codgen targets.
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:531
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwises returns null...
Definition: PointerUnion.h:142
Extended Value Type.
Definition: ValueTypes.h:34
const AMDGPUAS & AS
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts)
MachineSDNode * buildRSRC(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr, uint32_t RsrcDword1, uint64_t RsrcDword2And3) const
Return a resource descriptor with the &#39;Add TID&#39; bit enabled The TID (Thread ID) is multiplied by the ...
Iterator for intrusive lists based on ilist_node.
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:512
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:265
Interface definition of the TargetLowering class that is common to all AMD GPUs.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:612
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:225
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
bool readMem() const
Special value supplied for machine level alias analysis.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo)
static SDValue stripExtractLoElt(SDValue In)
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:363
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:76
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:464
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:582
unsigned FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:217
#define N
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
unsigned getOpcode() const
SDValue getValue(unsigned R) const
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:338
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isLegalMUBUFImmOffset(unsigned Imm)
Definition: SIInstrInfo.h:882
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:174
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
static SDValue stripBitcast(SDValue Val)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations...
Definition: ISDOpcodes.h:282
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:413
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:223
const SDValue & getOperand(unsigned i) const
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:120
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:452
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, FLOG, FLOG2, FLOG10, FEXP, FEXP2, FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary floating point operations.
Definition: ISDOpcodes.h:562
MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num)
allocateMemRefsArray - Allocate an array to hold MachineMemOperand pointers.
const SDValue & getBasePtr() const
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:351
constexpr char Args[]
Key for Kernel::Metadata::mArgs.