LLVM  6.0.0svn
AMDGPUISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Defines an instruction selector for the AMDGPU target.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
17 #include "AMDGPUISelLowering.h" // For AMDGPUISD
18 #include "AMDGPUInstrInfo.h"
19 #include "AMDGPURegisterInfo.h"
20 #include "AMDGPUSubtarget.h"
21 #include "AMDGPUTargetMachine.h"
22 #include "SIDefines.h"
23 #include "SIISelLowering.h"
24 #include "SIInstrInfo.h"
25 #include "SIMachineFunctionInfo.h"
26 #include "SIRegisterInfo.h"
27 #include "llvm/ADT/APInt.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/StringRef.h"
40 #include "llvm/IR/BasicBlock.h"
41 #include "llvm/IR/Instruction.h"
42 #include "llvm/MC/MCInstrDesc.h"
43 #include "llvm/Support/Casting.h"
44 #include "llvm/Support/CodeGen.h"
47 #include <cassert>
48 #include <cstdint>
49 #include <new>
50 #include <vector>
51 
52 using namespace llvm;
53 
54 namespace llvm {
55 
56 class R600InstrInfo;
57 
58 } // end namespace llvm
59 
60 //===----------------------------------------------------------------------===//
61 // Instruction Selector Implementation
62 //===----------------------------------------------------------------------===//
63 
64 namespace {
65 
66 /// AMDGPU specific code to select AMDGPU machine instructions for
67 /// SelectionDAG operations.
68 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
69  // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
70  // make the right decision when generating code for different targets.
71  const AMDGPUSubtarget *Subtarget;
73  bool EnableLateStructurizeCFG;
74 
75 public:
76  explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
78  : SelectionDAGISel(*TM, OptLevel) {
79  AMDGPUASI = AMDGPU::getAMDGPUAS(*TM);
80  EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
81  }
82  ~AMDGPUDAGToDAGISel() override = default;
83 
84  void getAnalysisUsage(AnalysisUsage &AU) const override {
87  }
88 
89  bool runOnMachineFunction(MachineFunction &MF) override;
90  void Select(SDNode *N) override;
91  StringRef getPassName() const override;
92  void PostprocessISelDAG() override;
93 
94 protected:
95  void SelectBuildVector(SDNode *N, unsigned RegClassID);
96 
97 private:
98  std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
99  bool isNoNanSrc(SDValue N) const;
100  bool isInlineImmediate(const SDNode *N) const;
101  bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
102  const R600InstrInfo *TII);
103  bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
104  bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
105 
106  bool isConstantLoad(const MemSDNode *N, int cbID) const;
107  bool isUniformBr(const SDNode *N) const;
108 
109  SDNode *glueCopyToM0(SDNode *N) const;
110 
111  const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
112  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
113  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
114  SDValue& Offset);
115  virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
116  virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
117  bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
118  unsigned OffsetBits) const;
119  bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
120  bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
121  SDValue &Offset1) const;
122  bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
123  SDValue &SOffset, SDValue &Offset, SDValue &Offen,
124  SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
125  SDValue &TFE) const;
126  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
127  SDValue &SOffset, SDValue &Offset, SDValue &GLC,
128  SDValue &SLC, SDValue &TFE) const;
129  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
130  SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
131  SDValue &SLC) const;
132  bool SelectMUBUFScratchOffen(SDNode *Parent,
133  SDValue Addr, SDValue &RSrc, SDValue &VAddr,
134  SDValue &SOffset, SDValue &ImmOffset) const;
135  bool SelectMUBUFScratchOffset(SDNode *Parent,
136  SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
137  SDValue &Offset) const;
138 
139  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
140  SDValue &Offset, SDValue &GLC, SDValue &SLC,
141  SDValue &TFE) const;
142  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
143  SDValue &Offset, SDValue &SLC) const;
144  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
145  SDValue &Offset) const;
146  bool SelectMUBUFConstant(SDValue Constant,
147  SDValue &SOffset,
148  SDValue &ImmOffset) const;
149  bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset,
150  SDValue &ImmOffset) const;
151  bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
152  SDValue &ImmOffset, SDValue &VOffset) const;
153 
154  bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
155  SDValue &Offset, SDValue &SLC) const;
156  bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr,
157  SDValue &Offset, SDValue &SLC) const;
158 
159  template <bool IsSigned>
160  bool SelectFlatOffset(SDValue Addr, SDValue &VAddr,
161  SDValue &Offset, SDValue &SLC) const;
162 
163  bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
164  bool &Imm) const;
165  bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
166  bool &Imm) const;
167  bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
168  bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
169  bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
170  bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
171  bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
172  bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
173 
174  bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
175  bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
176  bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
177  bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
178  bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
179  SDValue &Clamp, SDValue &Omod) const;
180  bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
181  SDValue &Clamp, SDValue &Omod) const;
182 
183  bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
184  SDValue &Clamp,
185  SDValue &Omod) const;
186 
187  bool SelectVOP3OMods(SDValue In, SDValue &Src,
188  SDValue &Clamp, SDValue &Omod) const;
189 
190  bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
191  bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
192  SDValue &Clamp) const;
193 
194  bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
195  bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods,
196  SDValue &Clamp) const;
197 
198  bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
199  bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
200  SDValue &Clamp) const;
201  bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
202  bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
203 
204  bool SelectHi16Elt(SDValue In, SDValue &Src) const;
205 
206  void SelectADD_SUB_I64(SDNode *N);
207  void SelectUADDO_USUBO(SDNode *N);
208  void SelectDIV_SCALE(SDNode *N);
209  void SelectMAD_64_32(SDNode *N);
210  void SelectFMA_W_CHAIN(SDNode *N);
211  void SelectFMUL_W_CHAIN(SDNode *N);
212 
213  SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
214  uint32_t Offset, uint32_t Width);
215  void SelectS_BFEFromShifts(SDNode *N);
216  void SelectS_BFE(SDNode *N);
217  bool isCBranchSCC(const SDNode *N) const;
218  void SelectBRCOND(SDNode *N);
219  void SelectFMAD(SDNode *N);
220  void SelectATOMIC_CMP_SWAP(SDNode *N);
221 
222 protected:
223  // Include the pieces autogenerated from the target description.
224 #include "AMDGPUGenDAGISel.inc"
225 };
226 
227 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
228 public:
229  explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
230  AMDGPUDAGToDAGISel(TM, OptLevel) {}
231 
232  void Select(SDNode *N) override;
233 
234  bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
235  SDValue &Offset) override;
236  bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
237  SDValue &Offset) override;
238 };
239 
240 } // end anonymous namespace
241 
242 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel",
243  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
245 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "isel",
246  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
247 
248 /// \brief This pass converts a legalized DAG into a AMDGPU-specific
249 // DAG, ready for instruction scheduling.
251  CodeGenOpt::Level OptLevel) {
252  return new AMDGPUDAGToDAGISel(TM, OptLevel);
253 }
254 
255 /// \brief This pass converts a legalized DAG into a R600-specific
256 // DAG, ready for instruction scheduling.
258  CodeGenOpt::Level OptLevel) {
259  return new R600DAGToDAGISel(TM, OptLevel);
260 }
261 
262 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
263  Subtarget = &MF.getSubtarget<AMDGPUSubtarget>();
265 }
266 
267 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
268  if (TM.Options.NoNaNsFPMath)
269  return true;
270 
271  // TODO: Move into isKnownNeverNaN
272  if (N->getFlags().isDefined())
273  return N->getFlags().hasNoNaNs();
274 
275  return CurDAG->isKnownNeverNaN(N);
276 }
277 
278 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
279  const SIInstrInfo *TII
280  = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo();
281 
282  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
283  return TII->isInlineConstant(C->getAPIntValue());
284 
285  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
286  return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
287 
288  return false;
289 }
290 
291 /// \brief Determine the register class for \p OpNo
292 /// \returns The register class of the virtual register that will be used for
293 /// the given operand number \OpNo or NULL if the register class cannot be
294 /// determined.
295 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
296  unsigned OpNo) const {
297  if (!N->isMachineOpcode()) {
298  if (N->getOpcode() == ISD::CopyToReg) {
299  unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
301  MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
302  return MRI.getRegClass(Reg);
303  }
304 
305  const SIRegisterInfo *TRI
306  = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo();
307  return TRI->getPhysRegClass(Reg);
308  }
309 
310  return nullptr;
311  }
312 
313  switch (N->getMachineOpcode()) {
314  default: {
315  const MCInstrDesc &Desc =
316  Subtarget->getInstrInfo()->get(N->getMachineOpcode());
317  unsigned OpIdx = Desc.getNumDefs() + OpNo;
318  if (OpIdx >= Desc.getNumOperands())
319  return nullptr;
320  int RegClass = Desc.OpInfo[OpIdx].RegClass;
321  if (RegClass == -1)
322  return nullptr;
323 
324  return Subtarget->getRegisterInfo()->getRegClass(RegClass);
325  }
326  case AMDGPU::REG_SEQUENCE: {
327  unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
328  const TargetRegisterClass *SuperRC =
329  Subtarget->getRegisterInfo()->getRegClass(RCID);
330 
331  SDValue SubRegOp = N->getOperand(OpNo + 1);
332  unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
333  return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
334  SubRegIdx);
335  }
336  }
337 }
338 
339 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
340  if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS)
341  return N;
342 
343  const SITargetLowering& Lowering =
344  *static_cast<const SITargetLowering*>(getTargetLowering());
345 
346  // Write max value to m0 before each load operation
347 
348  SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
349  CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
350 
351  SDValue Glue = M0.getValue(1);
352 
354  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
355  Ops.push_back(N->getOperand(i));
356  }
357  Ops.push_back(Glue);
358  CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
359 
360  return N;
361 }
362 
363 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
364  switch (NumVectorElts) {
365  case 1:
366  return AMDGPU::SReg_32_XM0RegClassID;
367  case 2:
368  return AMDGPU::SReg_64RegClassID;
369  case 4:
370  return AMDGPU::SReg_128RegClassID;
371  case 8:
372  return AMDGPU::SReg_256RegClassID;
373  case 16:
374  return AMDGPU::SReg_512RegClassID;
375  }
376 
377  llvm_unreachable("invalid vector size");
378 }
379 
380 static bool getConstantValue(SDValue N, uint32_t &Out) {
381  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
382  Out = C->getAPIntValue().getZExtValue();
383  return true;
384  }
385 
386  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
387  Out = C->getValueAPF().bitcastToAPInt().getZExtValue();
388  return true;
389  }
390 
391  return false;
392 }
393 
394 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
395  EVT VT = N->getValueType(0);
396  unsigned NumVectorElts = VT.getVectorNumElements();
397  EVT EltVT = VT.getVectorElementType();
398  const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
399  SDLoc DL(N);
400  SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
401 
402  if (NumVectorElts == 1) {
403  CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
404  RegClass);
405  return;
406  }
407 
408  assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
409  "supported yet");
410  // 16 = Max Num Vector Elements
411  // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
412  // 1 = Vector Register Class
413  SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
414 
415  RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
416  bool IsRegSeq = true;
417  unsigned NOps = N->getNumOperands();
418  for (unsigned i = 0; i < NOps; i++) {
419  // XXX: Why is this here?
420  if (isa<RegisterSDNode>(N->getOperand(i))) {
421  IsRegSeq = false;
422  break;
423  }
424  RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
425  RegSeqArgs[1 + (2 * i) + 1] =
426  CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
427  MVT::i32);
428  }
429  if (NOps != NumVectorElts) {
430  // Fill in the missing undef elements if this was a scalar_to_vector.
431  assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
432  MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
433  DL, EltVT);
434  for (unsigned i = NOps; i < NumVectorElts; ++i) {
435  RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
436  RegSeqArgs[1 + (2 * i) + 1] =
437  CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
438  }
439  }
440 
441  if (!IsRegSeq)
442  SelectCode(N);
443  CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
444 }
445 
447  unsigned int Opc = N->getOpcode();
448  if (N->isMachineOpcode()) {
449  N->setNodeId(-1);
450  return; // Already selected.
451  }
452 
453  if (isa<AtomicSDNode>(N) ||
454  (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC))
455  N = glueCopyToM0(N);
456 
457  switch (Opc) {
458  default: break;
459  // We are selecting i64 ADD here instead of custom lower it during
460  // DAG legalization, so we can fold some i64 ADDs used for address
461  // calculation into the LOAD and STORE instructions.
462  case ISD::ADDC:
463  case ISD::ADDE:
464  case ISD::SUBC:
465  case ISD::SUBE: {
466  if (N->getValueType(0) != MVT::i64)
467  break;
468 
469  SelectADD_SUB_I64(N);
470  return;
471  }
472  case ISD::UADDO:
473  case ISD::USUBO: {
474  SelectUADDO_USUBO(N);
475  return;
476  }
478  SelectFMUL_W_CHAIN(N);
479  return;
480  }
481  case AMDGPUISD::FMA_W_CHAIN: {
482  SelectFMA_W_CHAIN(N);
483  return;
484  }
485 
487  case ISD::BUILD_VECTOR: {
488  EVT VT = N->getValueType(0);
489  unsigned NumVectorElts = VT.getVectorNumElements();
490 
491  if (VT == MVT::v2i16 || VT == MVT::v2f16) {
492  if (Opc == ISD::BUILD_VECTOR) {
493  uint32_t LHSVal, RHSVal;
494  if (getConstantValue(N->getOperand(0), LHSVal) &&
495  getConstantValue(N->getOperand(1), RHSVal)) {
496  uint32_t K = LHSVal | (RHSVal << 16);
497  CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT,
498  CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32));
499  return;
500  }
501  }
502 
503  break;
504  }
505 
507  unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
508  SelectBuildVector(N, RegClassID);
509  return;
510  }
511  case ISD::BUILD_PAIR: {
512  SDValue RC, SubReg0, SubReg1;
513  SDLoc DL(N);
514  if (N->getValueType(0) == MVT::i128) {
515  RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
516  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
517  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
518  } else if (N->getValueType(0) == MVT::i64) {
519  RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
520  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
521  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
522  } else {
523  llvm_unreachable("Unhandled value type for BUILD_PAIR");
524  }
525  const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
526  N->getOperand(1), SubReg1 };
527  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
528  N->getValueType(0), Ops));
529  return;
530  }
531 
532  case ISD::Constant:
533  case ISD::ConstantFP: {
534  if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
535  break;
536 
537  uint64_t Imm;
538  if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
539  Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
540  else {
541  ConstantSDNode *C = cast<ConstantSDNode>(N);
542  Imm = C->getZExtValue();
543  }
544 
545  SDLoc DL(N);
546  SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
547  CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
548  MVT::i32));
549  SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
550  CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
551  const SDValue Ops[] = {
552  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
553  SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
554  SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
555  };
556 
557  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
558  N->getValueType(0), Ops));
559  return;
560  }
561  case ISD::LOAD:
562  case ISD::STORE: {
563  N = glueCopyToM0(N);
564  break;
565  }
566 
567  case AMDGPUISD::BFE_I32:
568  case AMDGPUISD::BFE_U32: {
569  // There is a scalar version available, but unlike the vector version which
570  // has a separate operand for the offset and width, the scalar version packs
571  // the width and offset into a single operand. Try to move to the scalar
572  // version if the offsets are constant, so that we can try to keep extended
573  // loads of kernel arguments in SGPRs.
574 
575  // TODO: Technically we could try to pattern match scalar bitshifts of
576  // dynamic values, but it's probably not useful.
578  if (!Offset)
579  break;
580 
582  if (!Width)
583  break;
584 
585  bool Signed = Opc == AMDGPUISD::BFE_I32;
586 
587  uint32_t OffsetVal = Offset->getZExtValue();
588  uint32_t WidthVal = Width->getZExtValue();
589 
590  ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
591  SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
592  return;
593  }
594  case AMDGPUISD::DIV_SCALE: {
595  SelectDIV_SCALE(N);
596  return;
597  }
599  case AMDGPUISD::MAD_U64_U32: {
600  SelectMAD_64_32(N);
601  return;
602  }
603  case ISD::CopyToReg: {
604  const SITargetLowering& Lowering =
605  *static_cast<const SITargetLowering*>(getTargetLowering());
606  N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
607  break;
608  }
609  case ISD::AND:
610  case ISD::SRL:
611  case ISD::SRA:
613  if (N->getValueType(0) != MVT::i32)
614  break;
615 
616  SelectS_BFE(N);
617  return;
618  case ISD::BRCOND:
619  SelectBRCOND(N);
620  return;
621  case ISD::FMAD:
622  SelectFMAD(N);
623  return;
625  SelectATOMIC_CMP_SWAP(N);
626  return;
627  }
628 
629  SelectCode(N);
630 }
631 
632 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
633  if (!N->readMem())
634  return false;
635  if (CbId == -1)
637 
638  return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId;
639 }
640 
641 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
642  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
643  const Instruction *Term = BB->getTerminator();
644  return Term->getMetadata("amdgpu.uniform") ||
645  Term->getMetadata("structurizecfg.uniform");
646 }
647 
648 StringRef AMDGPUDAGToDAGISel::getPassName() const {
649  return "AMDGPU DAG->DAG Pattern Instruction Selection";
650 }
651 
652 //===----------------------------------------------------------------------===//
653 // Complex Patterns
654 //===----------------------------------------------------------------------===//
655 
656 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
657  SDValue& IntPtr) {
658  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
659  IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
660  true);
661  return true;
662  }
663  return false;
664 }
665 
666 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
667  SDValue& BaseReg, SDValue &Offset) {
668  if (!isa<ConstantSDNode>(Addr)) {
669  BaseReg = Addr;
670  Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
671  return true;
672  }
673  return false;
674 }
675 
676 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
677  SDValue &Offset) {
678  return false;
679 }
680 
681 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
682  SDValue &Offset) {
683  ConstantSDNode *C;
684  SDLoc DL(Addr);
685 
686  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
687  Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
688  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
689  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
690  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
691  Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
692  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
693  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
694  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
695  Base = Addr.getOperand(0);
696  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
697  } else {
698  Base = Addr;
699  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
700  }
701 
702  return true;
703 }
704 
705 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
706  SDLoc DL(N);
707  SDValue LHS = N->getOperand(0);
708  SDValue RHS = N->getOperand(1);
709 
710  unsigned Opcode = N->getOpcode();
711  bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
712  bool ProduceCarry =
713  ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
714  bool IsAdd =
715  (Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE);
716 
717  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
718  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
719 
720  SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
721  DL, MVT::i32, LHS, Sub0);
722  SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
723  DL, MVT::i32, LHS, Sub1);
724 
725  SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
726  DL, MVT::i32, RHS, Sub0);
727  SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
728  DL, MVT::i32, RHS, Sub1);
729 
730  SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
731 
732  unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
733  unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
734 
735  SDNode *AddLo;
736  if (!ConsumeCarry) {
737  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
738  AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
739  } else {
740  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
741  AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
742  }
743  SDValue AddHiArgs[] = {
744  SDValue(Hi0, 0),
745  SDValue(Hi1, 0),
746  SDValue(AddLo, 1)
747  };
748  SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
749 
750  SDValue RegSequenceArgs[] = {
751  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
752  SDValue(AddLo,0),
753  Sub0,
754  SDValue(AddHi,0),
755  Sub1,
756  };
757  SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
758  MVT::i64, RegSequenceArgs);
759 
760  if (ProduceCarry) {
761  // Replace the carry-use
762  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(AddHi, 1));
763  }
764 
765  // Replace the remaining uses.
766  CurDAG->ReplaceAllUsesWith(N, RegSequence);
767  CurDAG->RemoveDeadNode(N);
768 }
769 
770 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
771  // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
772  // carry out despite the _i32 name. These were renamed in VI to _U32.
773  // FIXME: We should probably rename the opcodes here.
774  unsigned Opc = N->getOpcode() == ISD::UADDO ?
775  AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
776 
777  CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
778  { N->getOperand(0), N->getOperand(1) });
779 }
780 
781 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
782  SDLoc SL(N);
783  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
784  SDValue Ops[10];
785 
786  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
787  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
788  SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
789  Ops[8] = N->getOperand(0);
790  Ops[9] = N->getOperand(4);
791 
792  CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
793 }
794 
795 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
796  SDLoc SL(N);
797  // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
798  SDValue Ops[8];
799 
800  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
801  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
802  Ops[6] = N->getOperand(0);
803  Ops[7] = N->getOperand(3);
804 
805  CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
806 }
807 
808 // We need to handle this here because tablegen doesn't support matching
809 // instructions with multiple outputs.
810 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
811  SDLoc SL(N);
812  EVT VT = N->getValueType(0);
813 
814  assert(VT == MVT::f32 || VT == MVT::f64);
815 
816  unsigned Opc
817  = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
818 
819  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
820  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
821 }
822 
823 // We need to handle this here because tablegen doesn't support matching
824 // instructions with multiple outputs.
825 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
826  SDLoc SL(N);
827  bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
828  unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32;
829 
830  SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
831  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
832  Clamp };
833  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
834 }
835 
836 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
837  unsigned OffsetBits) const {
838  if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
839  (OffsetBits == 8 && !isUInt<8>(Offset)))
840  return false;
841 
842  if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
843  Subtarget->unsafeDSOffsetFoldingEnabled())
844  return true;
845 
846  // On Southern Islands instruction with a negative base value and an offset
847  // don't seem to work.
848  return CurDAG->SignBitIsZero(Base);
849 }
850 
851 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
852  SDValue &Offset) const {
853  SDLoc DL(Addr);
854  if (CurDAG->isBaseWithConstantOffset(Addr)) {
855  SDValue N0 = Addr.getOperand(0);
856  SDValue N1 = Addr.getOperand(1);
857  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
858  if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
859  // (add n0, c0)
860  Base = N0;
861  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
862  return true;
863  }
864  } else if (Addr.getOpcode() == ISD::SUB) {
865  // sub C, x -> add (sub 0, x), C
866  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
867  int64_t ByteOffset = C->getSExtValue();
868  if (isUInt<16>(ByteOffset)) {
869  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
870 
871  // XXX - This is kind of hacky. Create a dummy sub node so we can check
872  // the known bits in isDSOffsetLegal. We need to emit the selected node
873  // here, so this is thrown away.
874  SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
875  Zero, Addr.getOperand(1));
876 
877  if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
878  MachineSDNode *MachineSub
879  = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
880  Zero, Addr.getOperand(1));
881 
882  Base = SDValue(MachineSub, 0);
883  Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
884  return true;
885  }
886  }
887  }
888  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
889  // If we have a constant address, prefer to put the constant into the
890  // offset. This can save moves to load the constant address since multiple
891  // operations can share the zero base address register, and enables merging
892  // into read2 / write2 instructions.
893 
894  SDLoc DL(Addr);
895 
896  if (isUInt<16>(CAddr->getZExtValue())) {
897  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
898  MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
899  DL, MVT::i32, Zero);
900  Base = SDValue(MovZero, 0);
901  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
902  return true;
903  }
904  }
905 
906  // default case
907  Base = Addr;
908  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
909  return true;
910 }
911 
912 // TODO: If offset is too big, put low 16-bit into offset.
913 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
914  SDValue &Offset0,
915  SDValue &Offset1) const {
916  SDLoc DL(Addr);
917 
918  if (CurDAG->isBaseWithConstantOffset(Addr)) {
919  SDValue N0 = Addr.getOperand(0);
920  SDValue N1 = Addr.getOperand(1);
921  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
922  unsigned DWordOffset0 = C1->getZExtValue() / 4;
923  unsigned DWordOffset1 = DWordOffset0 + 1;
924  // (add n0, c0)
925  if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
926  Base = N0;
927  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
928  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
929  return true;
930  }
931  } else if (Addr.getOpcode() == ISD::SUB) {
932  // sub C, x -> add (sub 0, x), C
933  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
934  unsigned DWordOffset0 = C->getZExtValue() / 4;
935  unsigned DWordOffset1 = DWordOffset0 + 1;
936 
937  if (isUInt<8>(DWordOffset0)) {
938  SDLoc DL(Addr);
939  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
940 
941  // XXX - This is kind of hacky. Create a dummy sub node so we can check
942  // the known bits in isDSOffsetLegal. We need to emit the selected node
943  // here, so this is thrown away.
944  SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
945  Zero, Addr.getOperand(1));
946 
947  if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
948  MachineSDNode *MachineSub
949  = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
950  Zero, Addr.getOperand(1));
951 
952  Base = SDValue(MachineSub, 0);
953  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
954  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
955  return true;
956  }
957  }
958  }
959  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
960  unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
961  unsigned DWordOffset1 = DWordOffset0 + 1;
962  assert(4 * DWordOffset0 == CAddr->getZExtValue());
963 
964  if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
965  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
966  MachineSDNode *MovZero
967  = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
968  DL, MVT::i32, Zero);
969  Base = SDValue(MovZero, 0);
970  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
971  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
972  return true;
973  }
974  }
975 
976  // default case
977 
978  // FIXME: This is broken on SI where we still need to check if the base
979  // pointer is positive here.
980  Base = Addr;
981  Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
982  Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
983  return true;
984 }
985 
986 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
987  SDValue &VAddr, SDValue &SOffset,
988  SDValue &Offset, SDValue &Offen,
989  SDValue &Idxen, SDValue &Addr64,
990  SDValue &GLC, SDValue &SLC,
991  SDValue &TFE) const {
992  // Subtarget prefers to use flat instruction
993  if (Subtarget->useFlatForGlobal())
994  return false;
995 
996  SDLoc DL(Addr);
997 
998  if (!GLC.getNode())
999  GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1000  if (!SLC.getNode())
1001  SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1002  TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
1003 
1004  Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1005  Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1006  Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1007  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1008 
1009  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1010  SDValue N0 = Addr.getOperand(0);
1011  SDValue N1 = Addr.getOperand(1);
1012  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1013 
1014  if (N0.getOpcode() == ISD::ADD) {
1015  // (add (add N2, N3), C1) -> addr64
1016  SDValue N2 = N0.getOperand(0);
1017  SDValue N3 = N0.getOperand(1);
1018  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1019  Ptr = N2;
1020  VAddr = N3;
1021  } else {
1022  // (add N0, C1) -> offset
1023  VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1024  Ptr = N0;
1025  }
1026 
1028  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1029  return true;
1030  }
1031 
1032  if (isUInt<32>(C1->getZExtValue())) {
1033  // Illegal offset, store it in soffset.
1034  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1035  SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1036  CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1037  0);
1038  return true;
1039  }
1040  }
1041 
1042  if (Addr.getOpcode() == ISD::ADD) {
1043  // (add N0, N1) -> addr64
1044  SDValue N0 = Addr.getOperand(0);
1045  SDValue N1 = Addr.getOperand(1);
1046  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1047  Ptr = N0;
1048  VAddr = N1;
1049  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1050  return true;
1051  }
1052 
1053  // default case -> offset
1054  VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1055  Ptr = Addr;
1056  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1057 
1058  return true;
1059 }
1060 
1061 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1062  SDValue &VAddr, SDValue &SOffset,
1063  SDValue &Offset, SDValue &GLC,
1064  SDValue &SLC, SDValue &TFE) const {
1065  SDValue Ptr, Offen, Idxen, Addr64;
1066 
1067  // addr64 bit was removed for volcanic islands.
1068  if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
1069  return false;
1070 
1071  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1072  GLC, SLC, TFE))
1073  return false;
1074 
1075  ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1076  if (C->getSExtValue()) {
1077  SDLoc DL(Addr);
1078 
1079  const SITargetLowering& Lowering =
1080  *static_cast<const SITargetLowering*>(getTargetLowering());
1081 
1082  SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1083  return true;
1084  }
1085 
1086  return false;
1087 }
1088 
1089 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1090  SDValue &VAddr, SDValue &SOffset,
1091  SDValue &Offset,
1092  SDValue &SLC) const {
1093  SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
1094  SDValue GLC, TFE;
1095 
1096  return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
1097 }
1098 
1099 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
1100  auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
1101  return PSV && PSV->isStack();
1102 }
1103 
1104 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1105  const MachineFunction &MF = CurDAG->getMachineFunction();
1107 
1108  if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
1109  SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1110  FI->getValueType(0));
1111 
1112  // If we can resolve this to a frame index access, this is relative to the
1113  // frame pointer SGPR.
1114  return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(),
1115  MVT::i32));
1116  }
1117 
1118  // If we don't know this private access is a local stack object, it needs to
1119  // be relative to the entry point's scratch wave offset register.
1120  return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(),
1121  MVT::i32));
1122 }
1123 
1124 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1125  SDValue Addr, SDValue &Rsrc,
1126  SDValue &VAddr, SDValue &SOffset,
1127  SDValue &ImmOffset) const {
1128 
1129  SDLoc DL(Addr);
1130  MachineFunction &MF = CurDAG->getMachineFunction();
1132 
1133  Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1134 
1135  if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1136  unsigned Imm = CAddr->getZExtValue();
1137 
1138  SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1139  MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1140  DL, MVT::i32, HighBits);
1141  VAddr = SDValue(MovHighBits, 0);
1142 
1143  // In a call sequence, stores to the argument stack area are relative to the
1144  // stack pointer.
1145  const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1146  unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1148 
1149  SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1150  ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
1151  return true;
1152  }
1153 
1154  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1155  // (add n0, c1)
1156 
1157  SDValue N0 = Addr.getOperand(0);
1158  SDValue N1 = Addr.getOperand(1);
1159 
1160  // Offsets in vaddr must be positive.
1161  //
1162  // The total computation of vaddr + soffset + offset must not overflow.
1163  // If vaddr is negative, even if offset is 0 the sgpr offset add will end up
1164  // overflowing.
1165  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1167  CurDAG->SignBitIsZero(N0)) {
1168  std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1169  ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1170  return true;
1171  }
1172  }
1173 
1174  // (node)
1175  std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1176  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1177  return true;
1178 }
1179 
1180 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1181  SDValue Addr,
1182  SDValue &SRsrc,
1183  SDValue &SOffset,
1184  SDValue &Offset) const {
1185  ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
1186  if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1187  return false;
1188 
1189  SDLoc DL(Addr);
1190  MachineFunction &MF = CurDAG->getMachineFunction();
1192 
1193  SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1194 
1195  const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1196  unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1198 
1199  // FIXME: Get from MachinePointerInfo? We should only be using the frame
1200  // offset if we know this is in a call sequence.
1201  SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1202 
1203  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1204  return true;
1205 }
1206 
1207 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1208  SDValue &SOffset, SDValue &Offset,
1209  SDValue &GLC, SDValue &SLC,
1210  SDValue &TFE) const {
1211  SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1212  const SIInstrInfo *TII =
1213  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1214 
1215  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1216  GLC, SLC, TFE))
1217  return false;
1218 
1219  if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1220  !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1221  !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1222  uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1223  APInt::getAllOnesValue(32).getZExtValue(); // Size
1224  SDLoc DL(Addr);
1225 
1226  const SITargetLowering& Lowering =
1227  *static_cast<const SITargetLowering*>(getTargetLowering());
1228 
1229  SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1230  return true;
1231  }
1232  return false;
1233 }
1234 
1235 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1236  SDValue &Soffset, SDValue &Offset
1237  ) const {
1238  SDValue GLC, SLC, TFE;
1239 
1240  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1241 }
1242 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1243  SDValue &Soffset, SDValue &Offset,
1244  SDValue &SLC) const {
1245  SDValue GLC, TFE;
1246 
1247  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1248 }
1249 
1250 bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant,
1251  SDValue &SOffset,
1252  SDValue &ImmOffset) const {
1253  SDLoc DL(Constant);
1254  const uint32_t Align = 4;
1255  const uint32_t MaxImm = alignDown(4095, Align);
1256  uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue();
1257  uint32_t Overflow = 0;
1258 
1259  if (Imm > MaxImm) {
1260  if (Imm <= MaxImm + 64) {
1261  // Use an SOffset inline constant for 4..64
1262  Overflow = Imm - MaxImm;
1263  Imm = MaxImm;
1264  } else {
1265  // Try to keep the same value in SOffset for adjacent loads, so that
1266  // the corresponding register contents can be re-used.
1267  //
1268  // Load values with all low-bits (except for alignment bits) set into
1269  // SOffset, so that a larger range of values can be covered using
1270  // s_movk_i32.
1271  //
1272  // Atomic operations fail to work correctly when individual address
1273  // components are unaligned, even if their sum is aligned.
1274  uint32_t High = (Imm + Align) & ~4095;
1275  uint32_t Low = (Imm + Align) & 4095;
1276  Imm = Low;
1277  Overflow = High - Align;
1278  }
1279  }
1280 
1281  // There is a hardware bug in SI and CI which prevents address clamping in
1282  // MUBUF instructions from working correctly with SOffsets. The immediate
1283  // offset is unaffected.
1284  if (Overflow > 0 &&
1285  Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
1286  return false;
1287 
1288  ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16);
1289 
1290  if (Overflow <= 64)
1291  SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32);
1292  else
1293  SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1294  CurDAG->getTargetConstant(Overflow, DL, MVT::i32)),
1295  0);
1296 
1297  return true;
1298 }
1299 
1300 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset,
1301  SDValue &SOffset,
1302  SDValue &ImmOffset) const {
1303  SDLoc DL(Offset);
1304 
1305  if (!isa<ConstantSDNode>(Offset))
1306  return false;
1307 
1308  return SelectMUBUFConstant(Offset, SOffset, ImmOffset);
1309 }
1310 
1311 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
1312  SDValue &SOffset,
1313  SDValue &ImmOffset,
1314  SDValue &VOffset) const {
1315  SDLoc DL(Offset);
1316 
1317  // Don't generate an unnecessary voffset for constant offsets.
1318  if (isa<ConstantSDNode>(Offset)) {
1319  SDValue Tmp1, Tmp2;
1320 
1321  // When necessary, use a voffset in <= CI anyway to work around a hardware
1322  // bug.
1323  if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS ||
1324  SelectMUBUFConstant(Offset, Tmp1, Tmp2))
1325  return false;
1326  }
1327 
1328  if (CurDAG->isBaseWithConstantOffset(Offset)) {
1329  SDValue N0 = Offset.getOperand(0);
1330  SDValue N1 = Offset.getOperand(1);
1331  if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 &&
1332  SelectMUBUFConstant(N1, SOffset, ImmOffset)) {
1333  VOffset = N0;
1334  return true;
1335  }
1336  }
1337 
1338  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1339  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1340  VOffset = Offset;
1341 
1342  return true;
1343 }
1344 
1345 template <bool IsSigned>
1346 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
1347  SDValue &VAddr,
1348  SDValue &Offset,
1349  SDValue &SLC) const {
1350  int64_t OffsetVal = 0;
1351 
1352  if (Subtarget->hasFlatInstOffsets() &&
1353  CurDAG->isBaseWithConstantOffset(Addr)) {
1354  SDValue N0 = Addr.getOperand(0);
1355  SDValue N1 = Addr.getOperand(1);
1356  int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1357 
1358  if ((IsSigned && isInt<13>(COffsetVal)) ||
1359  (!IsSigned && isUInt<12>(COffsetVal))) {
1360  Addr = N0;
1361  OffsetVal = COffsetVal;
1362  }
1363  }
1364 
1365  VAddr = Addr;
1366  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
1367  SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
1368 
1369  return true;
1370 }
1371 
1372 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr,
1373  SDValue &VAddr,
1374  SDValue &Offset,
1375  SDValue &SLC) const {
1376  return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC);
1377 }
1378 
1379 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr,
1380  SDValue &VAddr,
1381  SDValue &Offset,
1382  SDValue &SLC) const {
1383  return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC);
1384 }
1385 
1386 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1387  SDValue &Offset, bool &Imm) const {
1388 
1389  // FIXME: Handle non-constant offsets.
1390  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1391  if (!C)
1392  return false;
1393 
1394  SDLoc SL(ByteOffsetNode);
1395  AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration();
1396  int64_t ByteOffset = C->getSExtValue();
1397  int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
1398 
1399  if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) {
1400  Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1401  Imm = true;
1402  return true;
1403  }
1404 
1405  if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1406  return false;
1407 
1408  if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1409  // 32-bit Immediates are supported on Sea Islands.
1410  Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1411  } else {
1412  SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1413  Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1414  C32Bit), 0);
1415  }
1416  Imm = false;
1417  return true;
1418 }
1419 
1420 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1421  SDValue &Offset, bool &Imm) const {
1422  SDLoc SL(Addr);
1423  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1424  SDValue N0 = Addr.getOperand(0);
1425  SDValue N1 = Addr.getOperand(1);
1426 
1427  if (SelectSMRDOffset(N1, Offset, Imm)) {
1428  SBase = N0;
1429  return true;
1430  }
1431  }
1432  SBase = Addr;
1433  Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1434  Imm = true;
1435  return true;
1436 }
1437 
1438 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1439  SDValue &Offset) const {
1440  bool Imm;
1441  return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1442 }
1443 
1444 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1445  SDValue &Offset) const {
1446 
1447  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1448  return false;
1449 
1450  bool Imm;
1451  if (!SelectSMRD(Addr, SBase, Offset, Imm))
1452  return false;
1453 
1454  return !Imm && isa<ConstantSDNode>(Offset);
1455 }
1456 
1457 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1458  SDValue &Offset) const {
1459  bool Imm;
1460  return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1461  !isa<ConstantSDNode>(Offset);
1462 }
1463 
1464 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1465  SDValue &Offset) const {
1466  bool Imm;
1467  return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1468 }
1469 
1470 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1471  SDValue &Offset) const {
1472  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1473  return false;
1474 
1475  bool Imm;
1476  if (!SelectSMRDOffset(Addr, Offset, Imm))
1477  return false;
1478 
1479  return !Imm && isa<ConstantSDNode>(Offset);
1480 }
1481 
1482 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
1483  SDValue &Base,
1484  SDValue &Offset) const {
1485  SDLoc DL(Index);
1486 
1487  if (CurDAG->isBaseWithConstantOffset(Index)) {
1488  SDValue N0 = Index.getOperand(0);
1489  SDValue N1 = Index.getOperand(1);
1490  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1491 
1492  // (add n0, c0)
1493  Base = N0;
1494  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
1495  return true;
1496  }
1497 
1498  if (isa<ConstantSDNode>(Index))
1499  return false;
1500 
1501  Base = Index;
1502  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1503  return true;
1504 }
1505 
1506 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
1507  SDValue Val, uint32_t Offset,
1508  uint32_t Width) {
1509  // Transformation function, pack the offset and width of a BFE into
1510  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1511  // source, bits [5:0] contain the offset and bits [22:16] the width.
1512  uint32_t PackedVal = Offset | (Width << 16);
1513  SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1514 
1515  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1516 }
1517 
1518 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1519  // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1520  // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1521  // Predicate: 0 < b <= c < 32
1522 
1523  const SDValue &Shl = N->getOperand(0);
1526 
1527  if (B && C) {
1528  uint32_t BVal = B->getZExtValue();
1529  uint32_t CVal = C->getZExtValue();
1530 
1531  if (0 < BVal && BVal <= CVal && CVal < 32) {
1532  bool Signed = N->getOpcode() == ISD::SRA;
1533  unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1534 
1535  ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
1536  32 - CVal));
1537  return;
1538  }
1539  }
1540  SelectCode(N);
1541 }
1542 
1543 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1544  switch (N->getOpcode()) {
1545  case ISD::AND:
1546  if (N->getOperand(0).getOpcode() == ISD::SRL) {
1547  // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1548  // Predicate: isMask(mask)
1549  const SDValue &Srl = N->getOperand(0);
1552 
1553  if (Shift && Mask) {
1554  uint32_t ShiftVal = Shift->getZExtValue();
1555  uint32_t MaskVal = Mask->getZExtValue();
1556 
1557  if (isMask_32(MaskVal)) {
1558  uint32_t WidthVal = countPopulation(MaskVal);
1559 
1560  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1561  Srl.getOperand(0), ShiftVal, WidthVal));
1562  return;
1563  }
1564  }
1565  }
1566  break;
1567  case ISD::SRL:
1568  if (N->getOperand(0).getOpcode() == ISD::AND) {
1569  // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1570  // Predicate: isMask(mask >> b)
1571  const SDValue &And = N->getOperand(0);
1574 
1575  if (Shift && Mask) {
1576  uint32_t ShiftVal = Shift->getZExtValue();
1577  uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1578 
1579  if (isMask_32(MaskVal)) {
1580  uint32_t WidthVal = countPopulation(MaskVal);
1581 
1582  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1583  And.getOperand(0), ShiftVal, WidthVal));
1584  return;
1585  }
1586  }
1587  } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
1588  SelectS_BFEFromShifts(N);
1589  return;
1590  }
1591  break;
1592  case ISD::SRA:
1593  if (N->getOperand(0).getOpcode() == ISD::SHL) {
1594  SelectS_BFEFromShifts(N);
1595  return;
1596  }
1597  break;
1598 
1599  case ISD::SIGN_EXTEND_INREG: {
1600  // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
1601  SDValue Src = N->getOperand(0);
1602  if (Src.getOpcode() != ISD::SRL)
1603  break;
1604 
1605  const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1606  if (!Amt)
1607  break;
1608 
1609  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1610  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
1611  Amt->getZExtValue(), Width));
1612  return;
1613  }
1614  }
1615 
1616  SelectCode(N);
1617 }
1618 
1619 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
1620  assert(N->getOpcode() == ISD::BRCOND);
1621  if (!N->hasOneUse())
1622  return false;
1623 
1624  SDValue Cond = N->getOperand(1);
1625  if (Cond.getOpcode() == ISD::CopyToReg)
1626  Cond = Cond.getOperand(2);
1627 
1628  if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
1629  return false;
1630 
1631  MVT VT = Cond.getOperand(0).getSimpleValueType();
1632  if (VT == MVT::i32)
1633  return true;
1634 
1635  if (VT == MVT::i64) {
1636  auto ST = static_cast<const SISubtarget *>(Subtarget);
1637 
1638  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
1639  return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
1640  }
1641 
1642  return false;
1643 }
1644 
1645 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
1646  SDValue Cond = N->getOperand(1);
1647 
1648  if (Cond.isUndef()) {
1649  CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
1650  N->getOperand(2), N->getOperand(0));
1651  return;
1652  }
1653 
1654  bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
1655  unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
1656  unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC;
1657  SDLoc SL(N);
1658 
1659  SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
1660  CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
1661  N->getOperand(2), // Basic Block
1662  VCC.getValue(0));
1663 }
1664 
1665 void AMDGPUDAGToDAGISel::SelectFMAD(SDNode *N) {
1666  MVT VT = N->getSimpleValueType(0);
1667  if (VT != MVT::f32 || !Subtarget->hasMadMixInsts()) {
1668  SelectCode(N);
1669  return;
1670  }
1671 
1672  SDValue Src0 = N->getOperand(0);
1673  SDValue Src1 = N->getOperand(1);
1674  SDValue Src2 = N->getOperand(2);
1675  unsigned Src0Mods, Src1Mods, Src2Mods;
1676 
1677  // Avoid using v_mad_mix_f32 unless there is actually an operand using the
1678  // conversion from f16.
1679  bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
1680  bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
1681  bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
1682 
1683  assert(!Subtarget->hasFP32Denormals() &&
1684  "fmad selected with denormals enabled");
1685  // TODO: We can select this with f32 denormals enabled if all the sources are
1686  // converted from f16 (in which case fmad isn't legal).
1687 
1688  if (Sel0 || Sel1 || Sel2) {
1689  // For dummy operands.
1690  SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
1691  SDValue Ops[] = {
1692  CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
1693  CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
1694  CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
1695  CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
1696  Zero, Zero
1697  };
1698 
1699  CurDAG->SelectNodeTo(N, AMDGPU::V_MAD_MIX_F32, MVT::f32, Ops);
1700  } else {
1701  SelectCode(N);
1702  }
1703 }
1704 
1705 // This is here because there isn't a way to use the generated sub0_sub1 as the
1706 // subreg index to EXTRACT_SUBREG in tablegen.
1707 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
1708  MemSDNode *Mem = cast<MemSDNode>(N);
1709  unsigned AS = Mem->getAddressSpace();
1710  if (AS == AMDGPUASI.FLAT_ADDRESS) {
1711  SelectCode(N);
1712  return;
1713  }
1714 
1715  MVT VT = N->getSimpleValueType(0);
1716  bool Is32 = (VT == MVT::i32);
1717  SDLoc SL(N);
1718 
1719  MachineSDNode *CmpSwap = nullptr;
1720  if (Subtarget->hasAddr64()) {
1721  SDValue SRsrc, VAddr, SOffset, Offset, SLC;
1722 
1723  if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
1724  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
1725  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
1726  SDValue CmpVal = Mem->getOperand(2);
1727 
1728  // XXX - Do we care about glue operands?
1729 
1730  SDValue Ops[] = {
1731  CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1732  };
1733 
1734  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1735  }
1736  }
1737 
1738  if (!CmpSwap) {
1739  SDValue SRsrc, SOffset, Offset, SLC;
1740  if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
1741  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
1742  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
1743 
1744  SDValue CmpVal = Mem->getOperand(2);
1745  SDValue Ops[] = {
1746  CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1747  };
1748 
1749  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1750  }
1751  }
1752 
1753  if (!CmpSwap) {
1754  SelectCode(N);
1755  return;
1756  }
1757 
1759  *MMOs = Mem->getMemOperand();
1760  CmpSwap->setMemRefs(MMOs, MMOs + 1);
1761 
1762  unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
1763  SDValue Extract
1764  = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
1765 
1766  ReplaceUses(SDValue(N, 0), Extract);
1767  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
1768  CurDAG->RemoveDeadNode(N);
1769 }
1770 
1771 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
1772  unsigned &Mods) const {
1773  Mods = 0;
1774  Src = In;
1775 
1776  if (Src.getOpcode() == ISD::FNEG) {
1777  Mods |= SISrcMods::NEG;
1778  Src = Src.getOperand(0);
1779  }
1780 
1781  if (Src.getOpcode() == ISD::FABS) {
1782  Mods |= SISrcMods::ABS;
1783  Src = Src.getOperand(0);
1784  }
1785 
1786  return true;
1787 }
1788 
1789 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
1790  SDValue &SrcMods) const {
1791  unsigned Mods;
1792  if (SelectVOP3ModsImpl(In, Src, Mods)) {
1793  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1794  return true;
1795  }
1796 
1797  return false;
1798 }
1799 
1800 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
1801  SDValue &SrcMods) const {
1802  SelectVOP3Mods(In, Src, SrcMods);
1803  return isNoNanSrc(Src);
1804 }
1805 
1806 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
1807  if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
1808  return false;
1809 
1810  Src = In;
1811  return true;
1812 }
1813 
1814 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
1815  SDValue &SrcMods, SDValue &Clamp,
1816  SDValue &Omod) const {
1817  SDLoc DL(In);
1818  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1819  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
1820 
1821  return SelectVOP3Mods(In, Src, SrcMods);
1822 }
1823 
1824 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
1825  SDValue &SrcMods,
1826  SDValue &Clamp,
1827  SDValue &Omod) const {
1828  Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1829  return SelectVOP3Mods(In, Src, SrcMods);
1830 }
1831 
1832 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
1833  SDValue &Clamp, SDValue &Omod) const {
1834  Src = In;
1835 
1836  SDLoc DL(In);
1837  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1838  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
1839 
1840  return true;
1841 }
1842 
1844  return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
1845 }
1846 
1847 // Figure out if this is really an extract of the high 16-bits of a dword.
1848 static bool isExtractHiElt(SDValue In, SDValue &Out) {
1849  In = stripBitcast(In);
1850  if (In.getOpcode() != ISD::TRUNCATE)
1851  return false;
1852 
1853  SDValue Srl = In.getOperand(0);
1854  if (Srl.getOpcode() == ISD::SRL) {
1855  if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
1856  if (ShiftAmt->getZExtValue() == 16) {
1857  Out = stripBitcast(Srl.getOperand(0));
1858  return true;
1859  }
1860  }
1861  }
1862 
1863  return false;
1864 }
1865 
1866 // Look through operations that obscure just looking at the low 16-bits of the
1867 // same register.
1869  if (In.getOpcode() == ISD::TRUNCATE) {
1870  SDValue Src = In.getOperand(0);
1871  if (Src.getValueType().getSizeInBits() == 32)
1872  return stripBitcast(Src);
1873  }
1874 
1875  return In;
1876 }
1877 
1878 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
1879  SDValue &SrcMods) const {
1880  unsigned Mods = 0;
1881  Src = In;
1882 
1883  if (Src.getOpcode() == ISD::FNEG) {
1884  Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
1885  Src = Src.getOperand(0);
1886  }
1887 
1888  if (Src.getOpcode() == ISD::BUILD_VECTOR) {
1889  unsigned VecMods = Mods;
1890 
1891  SDValue Lo = stripBitcast(Src.getOperand(0));
1892  SDValue Hi = stripBitcast(Src.getOperand(1));
1893 
1894  if (Lo.getOpcode() == ISD::FNEG) {
1895  Lo = stripBitcast(Lo.getOperand(0));
1896  Mods ^= SISrcMods::NEG;
1897  }
1898 
1899  if (Hi.getOpcode() == ISD::FNEG) {
1900  Hi = stripBitcast(Hi.getOperand(0));
1901  Mods ^= SISrcMods::NEG_HI;
1902  }
1903 
1904  if (isExtractHiElt(Lo, Lo))
1905  Mods |= SISrcMods::OP_SEL_0;
1906 
1907  if (isExtractHiElt(Hi, Hi))
1908  Mods |= SISrcMods::OP_SEL_1;
1909 
1910  Lo = stripExtractLoElt(Lo);
1911  Hi = stripExtractLoElt(Hi);
1912 
1913  if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
1914  // Really a scalar input. Just select from the low half of the register to
1915  // avoid packing.
1916 
1917  Src = Lo;
1918  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1919  return true;
1920  }
1921 
1922  Mods = VecMods;
1923  }
1924 
1925  // Packed instructions do not have abs modifiers.
1926  Mods |= SISrcMods::OP_SEL_1;
1927 
1928  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1929  return true;
1930 }
1931 
1932 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src,
1933  SDValue &SrcMods,
1934  SDValue &Clamp) const {
1935  SDLoc SL(In);
1936 
1937  // FIXME: Handle clamp and op_sel
1938  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
1939 
1940  return SelectVOP3PMods(In, Src, SrcMods);
1941 }
1942 
1943 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
1944  SDValue &SrcMods) const {
1945  Src = In;
1946  // FIXME: Handle op_sel
1947  SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1948  return true;
1949 }
1950 
1951 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src,
1952  SDValue &SrcMods,
1953  SDValue &Clamp) const {
1954  SDLoc SL(In);
1955 
1956  // FIXME: Handle clamp
1957  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
1958 
1959  return SelectVOP3OpSel(In, Src, SrcMods);
1960 }
1961 
1962 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
1963  SDValue &SrcMods) const {
1964  // FIXME: Handle op_sel
1965  return SelectVOP3Mods(In, Src, SrcMods);
1966 }
1967 
1968 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src,
1969  SDValue &SrcMods,
1970  SDValue &Clamp) const {
1971  SDLoc SL(In);
1972 
1973  // FIXME: Handle clamp
1974  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
1975 
1976  return SelectVOP3OpSelMods(In, Src, SrcMods);
1977 }
1978 
1979 // The return value is not whether the match is possible (which it always is),
1980 // but whether or not it a conversion is really used.
1981 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
1982  unsigned &Mods) const {
1983  Mods = 0;
1984  SelectVOP3ModsImpl(In, Src, Mods);
1985 
1986  if (Src.getOpcode() == ISD::FP_EXTEND) {
1987  Src = Src.getOperand(0);
1988  assert(Src.getValueType() == MVT::f16);
1989  Src = stripBitcast(Src);
1990 
1991  // Be careful about folding modifiers if we already have an abs. fneg is
1992  // applied last, so we don't want to apply an earlier fneg.
1993  if ((Mods & SISrcMods::ABS) == 0) {
1994  unsigned ModsTmp;
1995  SelectVOP3ModsImpl(Src, Src, ModsTmp);
1996 
1997  if ((ModsTmp & SISrcMods::NEG) != 0)
1998  Mods ^= SISrcMods::NEG;
1999 
2000  if ((ModsTmp & SISrcMods::ABS) != 0)
2001  Mods |= SISrcMods::ABS;
2002  }
2003 
2004  // op_sel/op_sel_hi decide the source type and source.
2005  // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
2006  // If the sources's op_sel is set, it picks the high half of the source
2007  // register.
2008 
2009  Mods |= SISrcMods::OP_SEL_1;
2010  if (isExtractHiElt(Src, Src)) {
2011  Mods |= SISrcMods::OP_SEL_0;
2012 
2013  // TODO: Should we try to look for neg/abs here?
2014  }
2015 
2016  return true;
2017  }
2018 
2019  return false;
2020 }
2021 
2022 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
2023  SDValue &SrcMods) const {
2024  unsigned Mods = 0;
2025  SelectVOP3PMadMixModsImpl(In, Src, Mods);
2026  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2027  return true;
2028 }
2029 
2030 // TODO: Can we identify things like v_mad_mixhi_f16?
2031 bool AMDGPUDAGToDAGISel::SelectHi16Elt(SDValue In, SDValue &Src) const {
2032  if (In.isUndef()) {
2033  Src = In;
2034  return true;
2035  }
2036 
2037  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
2038  SDLoc SL(In);
2039  SDValue K = CurDAG->getTargetConstant(C->getZExtValue() << 16, SL, MVT::i32);
2040  MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
2041  SL, MVT::i32, K);
2042  Src = SDValue(MovK, 0);
2043  return true;
2044  }
2045 
2046  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
2047  SDLoc SL(In);
2048  SDValue K = CurDAG->getTargetConstant(
2049  C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
2050  MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
2051  SL, MVT::i32, K);
2052  Src = SDValue(MovK, 0);
2053  return true;
2054  }
2055 
2056  return isExtractHiElt(In, Src);
2057 }
2058 
2059 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
2061  *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
2062  bool IsModified = false;
2063  do {
2064  IsModified = false;
2065  // Go over all selected nodes and try to fold them a bit more
2066  for (SDNode &Node : CurDAG->allnodes()) {
2067  MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
2068  if (!MachineNode)
2069  continue;
2070 
2071  SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
2072  if (ResNode != &Node) {
2073  ReplaceUses(&Node, ResNode);
2074  IsModified = true;
2075  }
2076  }
2077  CurDAG->RemoveDeadNodes();
2078  } while (IsModified);
2079 }
2080 
2082  unsigned int Opc = N->getOpcode();
2083  if (N->isMachineOpcode()) {
2084  N->setNodeId(-1);
2085  return; // Already selected.
2086  }
2087 
2088  switch (Opc) {
2089  default: break;
2091  case ISD::SCALAR_TO_VECTOR:
2092  case ISD::BUILD_VECTOR: {
2093  EVT VT = N->getValueType(0);
2094  unsigned NumVectorElts = VT.getVectorNumElements();
2095  unsigned RegClassID;
2096  // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
2097  // that adds a 128 bits reg copy when going through TwoAddressInstructions
2098  // pass. We want to avoid 128 bits copies as much as possible because they
2099  // can't be bundled by our scheduler.
2100  switch(NumVectorElts) {
2101  case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
2102  case 4:
2104  RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
2105  else
2106  RegClassID = AMDGPU::R600_Reg128RegClassID;
2107  break;
2108  default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
2109  }
2110  SelectBuildVector(N, RegClassID);
2111  return;
2112  }
2113  }
2114 
2115  SelectCode(N);
2116 }
2117 
2118 bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
2119  SDValue &Offset) {
2120  ConstantSDNode *C;
2121  SDLoc DL(Addr);
2122 
2123  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
2124  Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
2125  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2126  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
2127  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
2128  Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
2129  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2130  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
2131  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
2132  Base = Addr.getOperand(0);
2133  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2134  } else {
2135  Base = Addr;
2136  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
2137  }
2138 
2139  return true;
2140 }
2141 
2142 bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
2143  SDValue &Offset) {
2144  ConstantSDNode *IMMOffset;
2145 
2146  if (Addr.getOpcode() == ISD::ADD
2147  && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
2148  && isInt<16>(IMMOffset->getZExtValue())) {
2149 
2150  Base = Addr.getOperand(0);
2151  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2152  MVT::i32);
2153  return true;
2154  // If the pointer address is constant, we can move it to the offset field.
2155  } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
2156  && isInt<16>(IMMOffset->getZExtValue())) {
2157  Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
2158  SDLoc(CurDAG->getEntryNode()),
2159  AMDGPU::ZERO, MVT::i32);
2160  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2161  MVT::i32);
2162  return true;
2163  }
2164 
2165  // Default case, no offset
2166  Base = Addr;
2167  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2168  return true;
2169 }
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:545
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:341
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
static bool isExtractHiElt(SDValue In, SDValue &Out)
EVT getValueType() const
Return the ValueType of the referenced return value.
Interface definition for SIRegisterInfo.
bool isUndef() const
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1542
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:555
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
AMDGPU specific subclass of TargetSubtarget.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd)
Assign this MachineSDNodes&#39;s memory reference descriptor list.
uint64_t getDefaultRsrcDataFormat() const
FunctionPass * createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a R600-specific.
Shadow Stack GC Lowering
SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL, SDValue V) const
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:222
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
SDVTList getVTList() const
AMDGPUAS getAMDGPUAS(const Module &M)
const SDValue & getChain() const
virtual SDNode * PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const =0
static bool getConstantValue(SDValue N, uint32_t &Out)
SDNode * legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const
Legalize target independent instructions (e.g.
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:298
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
const SDNodeFlags getFlags() const
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
bool isInlineConstant(const APInt &Imm) const
uint64_t High
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:397
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineSDNode * wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr) const
unsigned getSubRegFromChannel(unsigned Channel) const
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:710
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
OpenCL uses address spaces to differentiate between various memory regions on the hardware...
Definition: AMDGPU.h:214
const HexagonInstrInfo * TII
Shift and rotation operations.
Definition: ISDOpcodes.h:379
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:190
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:170
unsigned SubReg
Reg
All possible values of the reg field in the ModR/M byte.
This file implements a class to represent arbitrary precision integral constant values and operations...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
TargetRegisterInfo interface that is implemented by all hw codegen targets.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
int64_t getSExtValue() const
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:194
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:916
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:302
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:335
unsigned const MachineRegisterInfo * MRI
Machine Value Type.
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.
This is an important base class in LLVM.
Definition: Constant.h:42
const SDValue & getOperand(unsigned Num) const
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:231
SI DAG Lowering interface definition.
Represent the analysis usage information of a pass.
INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel", "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) INITIALIZE_PASS_END(AMDGPUDAGToDAGISel
bool hasNoNaNs() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:69
bool isDefined() const
Returns true if the flags are in a defined state.
AMDGPU DAG DAG Pattern Instruction Selection
The AMDGPU TargetMachine interface definition for hw codgen targets.
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:530
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwises returns null...
Definition: PointerUnion.h:142
Extended Value Type.
Definition: ValueTypes.h:34
const AMDGPUAS & AS
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts)
MachineSDNode * buildRSRC(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr, uint32_t RsrcDword1, uint64_t RsrcDword2And3) const
Return a resource descriptor with the &#39;Add TID&#39; bit enabled The TID (Thread ID) is multiplied by the ...
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:512
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:265
Interface definition of the TargetLowering class that is common to all AMD GPUs.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:611
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:225
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
bool readMem() const
Special value supplied for machine level alias analysis.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo)
static SDValue stripExtractLoElt(SDValue In)
Address space for constant memory (VTX2)
Definition: AMDGPU.h:225
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:362
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:76
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:463
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:581
unsigned FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:217
#define N
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Address space for local memory.
Definition: AMDGPU.h:226
unsigned getOpcode() const
SDValue getValue(unsigned R) const
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:338
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isLegalMUBUFImmOffset(unsigned Imm)
Definition: SIInstrInfo.h:881
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:174
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:57
static SDValue stripBitcast(SDValue Val)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations...
Definition: ISDOpcodes.h:281
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:412
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:223
const SDValue & getOperand(unsigned i) const
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:120
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:451
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, FLOG, FLOG2, FLOG10, FEXP, FEXP2, FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary floating point operations.
Definition: ISDOpcodes.h:561
MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num)
allocateMemRefsArray - Allocate an array to hold MachineMemOperand pointers.
const SDValue & getBasePtr() const
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:350
constexpr char Args[]
Key for Kernel::Metadata::mArgs.