LLVM  9.0.0svn
AMDGPUISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines an instruction selector for the AMDGPU target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
16 #include "AMDGPUISelLowering.h" // For AMDGPUISD
17 #include "AMDGPUInstrInfo.h"
18 #include "AMDGPUPerfHintAnalysis.h"
19 #include "AMDGPURegisterInfo.h"
20 #include "AMDGPUSubtarget.h"
21 #include "AMDGPUTargetMachine.h"
22 #include "SIDefines.h"
23 #include "SIISelLowering.h"
24 #include "SIInstrInfo.h"
25 #include "SIMachineFunctionInfo.h"
26 #include "SIRegisterInfo.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/ADT/StringRef.h"
41 #include "llvm/IR/BasicBlock.h"
42 #include "llvm/IR/Instruction.h"
43 #include "llvm/MC/MCInstrDesc.h"
44 #include "llvm/Support/Casting.h"
45 #include "llvm/Support/CodeGen.h"
49 #include <cassert>
50 #include <cstdint>
51 #include <new>
52 #include <vector>
53 
54 using namespace llvm;
55 
56 namespace llvm {
57 
58 class R600InstrInfo;
59 
60 } // end namespace llvm
61 
62 //===----------------------------------------------------------------------===//
63 // Instruction Selector Implementation
64 //===----------------------------------------------------------------------===//
65 
66 namespace {
67 
68 /// AMDGPU specific code to select AMDGPU machine instructions for
69 /// SelectionDAG operations.
70 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
71  // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
72  // make the right decision when generating code for different targets.
73  const GCNSubtarget *Subtarget;
74  bool EnableLateStructurizeCFG;
75 
76 public:
77  explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
79  : SelectionDAGISel(*TM, OptLevel) {
80  EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
81  }
82  ~AMDGPUDAGToDAGISel() override = default;
83 
84  void getAnalysisUsage(AnalysisUsage &AU) const override {
89  }
90 
91  bool runOnMachineFunction(MachineFunction &MF) override;
92  void Select(SDNode *N) override;
93  StringRef getPassName() const override;
94  void PostprocessISelDAG() override;
95 
96 protected:
97  void SelectBuildVector(SDNode *N, unsigned RegClassID);
98 
99 private:
100  std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
101  bool isNoNanSrc(SDValue N) const;
102  bool isInlineImmediate(const SDNode *N) const;
103  bool isVGPRImm(const SDNode *N) const;
104  bool isUniformLoad(const SDNode *N) const;
105  bool isUniformBr(const SDNode *N) const;
106 
107  MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
108 
109  SDNode *glueCopyToM0LDSInit(SDNode *N) const;
110  SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
111 
112  const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
113  virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
114  virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
115  bool isDSOffsetLegal(SDValue Base, unsigned Offset,
116  unsigned OffsetBits) const;
117  bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
118  bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
119  SDValue &Offset1) const;
120  bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
121  SDValue &SOffset, SDValue &Offset, SDValue &Offen,
122  SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
123  SDValue &TFE) const;
124  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
125  SDValue &SOffset, SDValue &Offset, SDValue &GLC,
126  SDValue &SLC, SDValue &TFE) const;
127  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
128  SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
129  SDValue &SLC) const;
130  bool SelectMUBUFScratchOffen(SDNode *Parent,
131  SDValue Addr, SDValue &RSrc, SDValue &VAddr,
132  SDValue &SOffset, SDValue &ImmOffset) const;
133  bool SelectMUBUFScratchOffset(SDNode *Parent,
134  SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
135  SDValue &Offset) const;
136 
137  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
138  SDValue &Offset, SDValue &GLC, SDValue &SLC,
139  SDValue &TFE) const;
140  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
141  SDValue &Offset, SDValue &SLC) const;
142  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
143  SDValue &Offset) const;
144 
145  bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
146  SDValue &Offset, SDValue &SLC) const;
147  bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr,
148  SDValue &Offset, SDValue &SLC) const;
149 
150  template <bool IsSigned>
151  bool SelectFlatOffset(SDValue Addr, SDValue &VAddr,
152  SDValue &Offset, SDValue &SLC) const;
153 
154  bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
155  bool &Imm) const;
156  SDValue Expand32BitAddress(SDValue Addr) const;
157  bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
158  bool &Imm) const;
159  bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
160  bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
161  bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
162  bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
163  bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
164  bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
165 
166  bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
167  bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
168  bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
169  bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
170  bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
171  SDValue &Clamp, SDValue &Omod) const;
172  bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
173  SDValue &Clamp, SDValue &Omod) const;
174 
175  bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
176  SDValue &Clamp,
177  SDValue &Omod) const;
178 
179  bool SelectVOP3OMods(SDValue In, SDValue &Src,
180  SDValue &Clamp, SDValue &Omod) const;
181 
182  bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
183  bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
184  SDValue &Clamp) const;
185 
186  bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
187  bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods,
188  SDValue &Clamp) const;
189 
190  bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
191  bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
192  SDValue &Clamp) const;
193  bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
194  bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
195 
196  bool SelectHi16Elt(SDValue In, SDValue &Src) const;
197 
198  void SelectADD_SUB_I64(SDNode *N);
199  void SelectUADDO_USUBO(SDNode *N);
200  void SelectDIV_SCALE(SDNode *N);
201  void SelectMAD_64_32(SDNode *N);
202  void SelectFMA_W_CHAIN(SDNode *N);
203  void SelectFMUL_W_CHAIN(SDNode *N);
204 
205  SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
206  uint32_t Offset, uint32_t Width);
207  void SelectS_BFEFromShifts(SDNode *N);
208  void SelectS_BFE(SDNode *N);
209  bool isCBranchSCC(const SDNode *N) const;
210  void SelectBRCOND(SDNode *N);
211  void SelectFMAD_FMA(SDNode *N);
212  void SelectATOMIC_CMP_SWAP(SDNode *N);
213  void SelectINTRINSIC_W_CHAIN(SDNode *N);
214 
215 protected:
216  // Include the pieces autogenerated from the target description.
217 #include "AMDGPUGenDAGISel.inc"
218 };
219 
220 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
221  const R600Subtarget *Subtarget;
222 
223  bool isConstantLoad(const MemSDNode *N, int cbID) const;
224  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
225  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
226  SDValue& Offset);
227 public:
228  explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
229  AMDGPUDAGToDAGISel(TM, OptLevel) {}
230 
231  void Select(SDNode *N) override;
232 
233  bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
234  SDValue &Offset) override;
235  bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
236  SDValue &Offset) override;
237 
238  bool runOnMachineFunction(MachineFunction &MF) override;
239 protected:
240  // Include the pieces autogenerated from the target description.
241 #include "R600GenDAGISel.inc"
242 };
243 
244 } // end anonymous namespace
245 
246 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
247  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
251 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",
252  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
253 
254 /// This pass converts a legalized DAG into a AMDGPU-specific
255 // DAG, ready for instruction scheduling.
257  CodeGenOpt::Level OptLevel) {
258  return new AMDGPUDAGToDAGISel(TM, OptLevel);
259 }
260 
261 /// This pass converts a legalized DAG into a R600-specific
262 // DAG, ready for instruction scheduling.
264  CodeGenOpt::Level OptLevel) {
265  return new R600DAGToDAGISel(TM, OptLevel);
266 }
267 
268 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
269  Subtarget = &MF.getSubtarget<GCNSubtarget>();
271 }
272 
273 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
274  if (TM.Options.NoNaNsFPMath)
275  return true;
276 
277  // TODO: Move into isKnownNeverNaN
278  if (N->getFlags().isDefined())
279  return N->getFlags().hasNoNaNs();
280 
281  return CurDAG->isKnownNeverNaN(N);
282 }
283 
284 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
285  const SIInstrInfo *TII = Subtarget->getInstrInfo();
286 
287  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
288  return TII->isInlineConstant(C->getAPIntValue());
289 
290  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
291  return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
292 
293  return false;
294 }
295 
296 /// Determine the register class for \p OpNo
297 /// \returns The register class of the virtual register that will be used for
298 /// the given operand number \OpNo or NULL if the register class cannot be
299 /// determined.
300 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
301  unsigned OpNo) const {
302  if (!N->isMachineOpcode()) {
303  if (N->getOpcode() == ISD::CopyToReg) {
304  unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
306  MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
307  return MRI.getRegClass(Reg);
308  }
309 
310  const SIRegisterInfo *TRI
311  = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
312  return TRI->getPhysRegClass(Reg);
313  }
314 
315  return nullptr;
316  }
317 
318  switch (N->getMachineOpcode()) {
319  default: {
320  const MCInstrDesc &Desc =
321  Subtarget->getInstrInfo()->get(N->getMachineOpcode());
322  unsigned OpIdx = Desc.getNumDefs() + OpNo;
323  if (OpIdx >= Desc.getNumOperands())
324  return nullptr;
325  int RegClass = Desc.OpInfo[OpIdx].RegClass;
326  if (RegClass == -1)
327  return nullptr;
328 
329  return Subtarget->getRegisterInfo()->getRegClass(RegClass);
330  }
331  case AMDGPU::REG_SEQUENCE: {
332  unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
333  const TargetRegisterClass *SuperRC =
334  Subtarget->getRegisterInfo()->getRegClass(RCID);
335 
336  SDValue SubRegOp = N->getOperand(OpNo + 1);
337  unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
338  return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
339  SubRegIdx);
340  }
341  }
342 }
343 
344 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
345  const SITargetLowering& Lowering =
346  *static_cast<const SITargetLowering*>(getTargetLowering());
347 
348  // Write max value to m0 before each load operation
349 
350  SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
351  Val);
352 
353  SDValue Glue = M0.getValue(1);
354 
356  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
357  Ops.push_back(N->getOperand(i));
358 
359  Ops.push_back(Glue);
360  return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
361 }
362 
363 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
364  if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS ||
365  !Subtarget->ldsRequiresM0Init())
366  return N;
367  return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
368 }
369 
370 MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
371  EVT VT) const {
372  SDNode *Lo = CurDAG->getMachineNode(
373  AMDGPU::S_MOV_B32, DL, MVT::i32,
374  CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
375  SDNode *Hi =
376  CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
377  CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
378  const SDValue Ops[] = {
379  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
380  SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
381  SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
382 
383  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
384 }
385 
386 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
387  switch (NumVectorElts) {
388  case 1:
389  return AMDGPU::SReg_32_XM0RegClassID;
390  case 2:
391  return AMDGPU::SReg_64RegClassID;
392  case 4:
393  return AMDGPU::SReg_128RegClassID;
394  case 8:
395  return AMDGPU::SReg_256RegClassID;
396  case 16:
397  return AMDGPU::SReg_512RegClassID;
398  }
399 
400  llvm_unreachable("invalid vector size");
401 }
402 
403 static bool getConstantValue(SDValue N, uint32_t &Out) {
404  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
405  Out = C->getAPIntValue().getZExtValue();
406  return true;
407  }
408 
409  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
410  Out = C->getValueAPF().bitcastToAPInt().getZExtValue();
411  return true;
412  }
413 
414  return false;
415 }
416 
417 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
418  EVT VT = N->getValueType(0);
419  unsigned NumVectorElts = VT.getVectorNumElements();
420  EVT EltVT = VT.getVectorElementType();
421  SDLoc DL(N);
422  SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
423 
424  if (NumVectorElts == 1) {
425  CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
426  RegClass);
427  return;
428  }
429 
430  assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
431  "supported yet");
432  // 16 = Max Num Vector Elements
433  // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
434  // 1 = Vector Register Class
435  SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
436 
437  RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
438  bool IsRegSeq = true;
439  unsigned NOps = N->getNumOperands();
440  for (unsigned i = 0; i < NOps; i++) {
441  // XXX: Why is this here?
442  if (isa<RegisterSDNode>(N->getOperand(i))) {
443  IsRegSeq = false;
444  break;
445  }
447  RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
448  RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
449  }
450  if (NOps != NumVectorElts) {
451  // Fill in the missing undef elements if this was a scalar_to_vector.
452  assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
453  MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
454  DL, EltVT);
455  for (unsigned i = NOps; i < NumVectorElts; ++i) {
457  RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
458  RegSeqArgs[1 + (2 * i) + 1] =
459  CurDAG->getTargetConstant(Sub, DL, MVT::i32);
460  }
461  }
462 
463  if (!IsRegSeq)
464  SelectCode(N);
465  CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
466 }
467 
469  unsigned int Opc = N->getOpcode();
470  if (N->isMachineOpcode()) {
471  N->setNodeId(-1);
472  return; // Already selected.
473  }
474 
475  if (isa<AtomicSDNode>(N) ||
476  (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
477  Opc == ISD::ATOMIC_LOAD_FADD ||
480  N = glueCopyToM0LDSInit(N);
481 
482  switch (Opc) {
483  default:
484  break;
485  // We are selecting i64 ADD here instead of custom lower it during
486  // DAG legalization, so we can fold some i64 ADDs used for address
487  // calculation into the LOAD and STORE instructions.
488  case ISD::ADDC:
489  case ISD::ADDE:
490  case ISD::SUBC:
491  case ISD::SUBE: {
492  if (N->getValueType(0) != MVT::i64)
493  break;
494 
495  SelectADD_SUB_I64(N);
496  return;
497  }
498  case ISD::UADDO:
499  case ISD::USUBO: {
500  SelectUADDO_USUBO(N);
501  return;
502  }
504  SelectFMUL_W_CHAIN(N);
505  return;
506  }
507  case AMDGPUISD::FMA_W_CHAIN: {
508  SelectFMA_W_CHAIN(N);
509  return;
510  }
511 
513  case ISD::BUILD_VECTOR: {
514  EVT VT = N->getValueType(0);
515  unsigned NumVectorElts = VT.getVectorNumElements();
516  if (VT.getScalarSizeInBits() == 16) {
517  if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
518  uint32_t LHSVal, RHSVal;
519  if (getConstantValue(N->getOperand(0), LHSVal) &&
520  getConstantValue(N->getOperand(1), RHSVal)) {
521  uint32_t K = LHSVal | (RHSVal << 16);
522  CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT,
523  CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32));
524  return;
525  }
526  }
527 
528  break;
529  }
530 
532  unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
533  SelectBuildVector(N, RegClassID);
534  return;
535  }
536  case ISD::BUILD_PAIR: {
537  SDValue RC, SubReg0, SubReg1;
538  SDLoc DL(N);
539  if (N->getValueType(0) == MVT::i128) {
540  RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
541  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
542  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
543  } else if (N->getValueType(0) == MVT::i64) {
544  RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
545  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
546  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
547  } else {
548  llvm_unreachable("Unhandled value type for BUILD_PAIR");
549  }
550  const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
551  N->getOperand(1), SubReg1 };
552  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
553  N->getValueType(0), Ops));
554  return;
555  }
556 
557  case ISD::Constant:
558  case ISD::ConstantFP: {
559  if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
560  break;
561 
562  uint64_t Imm;
563  if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
564  Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
565  else {
566  ConstantSDNode *C = cast<ConstantSDNode>(N);
567  Imm = C->getZExtValue();
568  }
569 
570  SDLoc DL(N);
571  ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
572  return;
573  }
574  case ISD::LOAD:
575  case ISD::STORE:
576  case ISD::ATOMIC_LOAD:
577  case ISD::ATOMIC_STORE: {
578  N = glueCopyToM0LDSInit(N);
579  break;
580  }
581 
582  case AMDGPUISD::BFE_I32:
583  case AMDGPUISD::BFE_U32: {
584  // There is a scalar version available, but unlike the vector version which
585  // has a separate operand for the offset and width, the scalar version packs
586  // the width and offset into a single operand. Try to move to the scalar
587  // version if the offsets are constant, so that we can try to keep extended
588  // loads of kernel arguments in SGPRs.
589 
590  // TODO: Technically we could try to pattern match scalar bitshifts of
591  // dynamic values, but it's probably not useful.
593  if (!Offset)
594  break;
595 
597  if (!Width)
598  break;
599 
600  bool Signed = Opc == AMDGPUISD::BFE_I32;
601 
602  uint32_t OffsetVal = Offset->getZExtValue();
603  uint32_t WidthVal = Width->getZExtValue();
604 
605  ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
606  SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
607  return;
608  }
609  case AMDGPUISD::DIV_SCALE: {
610  SelectDIV_SCALE(N);
611  return;
612  }
614  case AMDGPUISD::MAD_U64_U32: {
615  SelectMAD_64_32(N);
616  return;
617  }
618  case ISD::CopyToReg: {
619  const SITargetLowering& Lowering =
620  *static_cast<const SITargetLowering*>(getTargetLowering());
621  N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
622  break;
623  }
624  case ISD::AND:
625  case ISD::SRL:
626  case ISD::SRA:
628  if (N->getValueType(0) != MVT::i32)
629  break;
630 
631  SelectS_BFE(N);
632  return;
633  case ISD::BRCOND:
634  SelectBRCOND(N);
635  return;
636  case ISD::FMAD:
637  case ISD::FMA:
638  SelectFMAD_FMA(N);
639  return;
641  SelectATOMIC_CMP_SWAP(N);
642  return;
648  // Hack around using a legal type if f16 is illegal.
649  if (N->getValueType(0) == MVT::i32) {
651  N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
652  { N->getOperand(0), N->getOperand(1) });
653  SelectCode(N);
654  return;
655  }
656 
657  break;
658  }
659  case ISD::INTRINSIC_W_CHAIN: {
660  SelectINTRINSIC_W_CHAIN(N);
661  return;
662  }
663  }
664 
665  SelectCode(N);
666 }
667 
668 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
669  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
670  const Instruction *Term = BB->getTerminator();
671  return Term->getMetadata("amdgpu.uniform") ||
672  Term->getMetadata("structurizecfg.uniform");
673 }
674 
675 StringRef AMDGPUDAGToDAGISel::getPassName() const {
676  return "AMDGPU DAG->DAG Pattern Instruction Selection";
677 }
678 
679 //===----------------------------------------------------------------------===//
680 // Complex Patterns
681 //===----------------------------------------------------------------------===//
682 
683 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
684  SDValue &Offset) {
685  return false;
686 }
687 
688 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
689  SDValue &Offset) {
690  ConstantSDNode *C;
691  SDLoc DL(Addr);
692 
693  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
694  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
695  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
696  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
697  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
698  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
699  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
700  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
701  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
702  Base = Addr.getOperand(0);
703  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
704  } else {
705  Base = Addr;
706  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
707  }
708 
709  return true;
710 }
711 
712 // FIXME: Should only handle addcarry/subcarry
713 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
714  SDLoc DL(N);
715  SDValue LHS = N->getOperand(0);
716  SDValue RHS = N->getOperand(1);
717 
718  unsigned Opcode = N->getOpcode();
719  bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
720  bool ProduceCarry =
721  ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
722  bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
723 
724  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
725  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
726 
727  SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
728  DL, MVT::i32, LHS, Sub0);
729  SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
730  DL, MVT::i32, LHS, Sub1);
731 
732  SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
733  DL, MVT::i32, RHS, Sub0);
734  SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
735  DL, MVT::i32, RHS, Sub1);
736 
737  SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
738 
739  unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
740  unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
741 
742  SDNode *AddLo;
743  if (!ConsumeCarry) {
744  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
745  AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
746  } else {
747  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
748  AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
749  }
750  SDValue AddHiArgs[] = {
751  SDValue(Hi0, 0),
752  SDValue(Hi1, 0),
753  SDValue(AddLo, 1)
754  };
755  SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
756 
757  SDValue RegSequenceArgs[] = {
758  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
759  SDValue(AddLo,0),
760  Sub0,
761  SDValue(AddHi,0),
762  Sub1,
763  };
764  SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
765  MVT::i64, RegSequenceArgs);
766 
767  if (ProduceCarry) {
768  // Replace the carry-use
769  ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
770  }
771 
772  // Replace the remaining uses.
773  ReplaceNode(N, RegSequence);
774 }
775 
776 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
777  // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
778  // carry out despite the _i32 name. These were renamed in VI to _U32.
779  // FIXME: We should probably rename the opcodes here.
780  unsigned Opc = N->getOpcode() == ISD::UADDO ?
781  AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
782 
783  CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
784  { N->getOperand(0), N->getOperand(1) });
785 }
786 
787 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
788  SDLoc SL(N);
789  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
790  SDValue Ops[10];
791 
792  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
793  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
794  SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
795  Ops[8] = N->getOperand(0);
796  Ops[9] = N->getOperand(4);
797 
798  CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
799 }
800 
801 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
802  SDLoc SL(N);
803  // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
804  SDValue Ops[8];
805 
806  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
807  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
808  Ops[6] = N->getOperand(0);
809  Ops[7] = N->getOperand(3);
810 
811  CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
812 }
813 
814 // We need to handle this here because tablegen doesn't support matching
815 // instructions with multiple outputs.
816 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
817  SDLoc SL(N);
818  EVT VT = N->getValueType(0);
819 
820  assert(VT == MVT::f32 || VT == MVT::f64);
821 
822  unsigned Opc
823  = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
824 
825  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
826  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
827 }
828 
829 // We need to handle this here because tablegen doesn't support matching
830 // instructions with multiple outputs.
831 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
832  SDLoc SL(N);
833  bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
834  unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32;
835 
836  SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
837  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
838  Clamp };
839  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
840 }
841 
842 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset,
843  unsigned OffsetBits) const {
844  if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
845  (OffsetBits == 8 && !isUInt<8>(Offset)))
846  return false;
847 
848  if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
849  Subtarget->unsafeDSOffsetFoldingEnabled())
850  return true;
851 
852  // On Southern Islands instruction with a negative base value and an offset
853  // don't seem to work.
854  return CurDAG->SignBitIsZero(Base);
855 }
856 
857 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
858  SDValue &Offset) const {
859  SDLoc DL(Addr);
860  if (CurDAG->isBaseWithConstantOffset(Addr)) {
861  SDValue N0 = Addr.getOperand(0);
862  SDValue N1 = Addr.getOperand(1);
863  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
864  if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
865  // (add n0, c0)
866  Base = N0;
867  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
868  return true;
869  }
870  } else if (Addr.getOpcode() == ISD::SUB) {
871  // sub C, x -> add (sub 0, x), C
872  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
873  int64_t ByteOffset = C->getSExtValue();
874  if (isUInt<16>(ByteOffset)) {
875  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
876 
877  // XXX - This is kind of hacky. Create a dummy sub node so we can check
878  // the known bits in isDSOffsetLegal. We need to emit the selected node
879  // here, so this is thrown away.
880  SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
881  Zero, Addr.getOperand(1));
882 
883  if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
884  // FIXME: Select to VOP3 version for with-carry.
885  unsigned SubOp = Subtarget->hasAddNoCarry() ?
886  AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
887 
888  MachineSDNode *MachineSub
889  = CurDAG->getMachineNode(SubOp, DL, MVT::i32,
890  Zero, Addr.getOperand(1));
891 
892  Base = SDValue(MachineSub, 0);
893  Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
894  return true;
895  }
896  }
897  }
898  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
899  // If we have a constant address, prefer to put the constant into the
900  // offset. This can save moves to load the constant address since multiple
901  // operations can share the zero base address register, and enables merging
902  // into read2 / write2 instructions.
903 
904  SDLoc DL(Addr);
905 
906  if (isUInt<16>(CAddr->getZExtValue())) {
907  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
908  MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
909  DL, MVT::i32, Zero);
910  Base = SDValue(MovZero, 0);
911  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
912  return true;
913  }
914  }
915 
916  // default case
917  Base = Addr;
918  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
919  return true;
920 }
921 
922 // TODO: If offset is too big, put low 16-bit into offset.
923 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
924  SDValue &Offset0,
925  SDValue &Offset1) const {
926  SDLoc DL(Addr);
927 
928  if (CurDAG->isBaseWithConstantOffset(Addr)) {
929  SDValue N0 = Addr.getOperand(0);
930  SDValue N1 = Addr.getOperand(1);
931  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
932  unsigned DWordOffset0 = C1->getZExtValue() / 4;
933  unsigned DWordOffset1 = DWordOffset0 + 1;
934  // (add n0, c0)
935  if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
936  Base = N0;
937  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
938  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
939  return true;
940  }
941  } else if (Addr.getOpcode() == ISD::SUB) {
942  // sub C, x -> add (sub 0, x), C
943  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
944  unsigned DWordOffset0 = C->getZExtValue() / 4;
945  unsigned DWordOffset1 = DWordOffset0 + 1;
946 
947  if (isUInt<8>(DWordOffset0)) {
948  SDLoc DL(Addr);
949  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
950 
951  // XXX - This is kind of hacky. Create a dummy sub node so we can check
952  // the known bits in isDSOffsetLegal. We need to emit the selected node
953  // here, so this is thrown away.
954  SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
955  Zero, Addr.getOperand(1));
956 
957  if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
958  unsigned SubOp = Subtarget->hasAddNoCarry() ?
959  AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
960 
961  MachineSDNode *MachineSub
962  = CurDAG->getMachineNode(SubOp, DL, MVT::i32,
963  Zero, Addr.getOperand(1));
964 
965  Base = SDValue(MachineSub, 0);
966  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
967  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
968  return true;
969  }
970  }
971  }
972  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
973  unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
974  unsigned DWordOffset1 = DWordOffset0 + 1;
975  assert(4 * DWordOffset0 == CAddr->getZExtValue());
976 
977  if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
978  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
979  MachineSDNode *MovZero
980  = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
981  DL, MVT::i32, Zero);
982  Base = SDValue(MovZero, 0);
983  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
984  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
985  return true;
986  }
987  }
988 
989  // default case
990 
991  Base = Addr;
992  Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
993  Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
994  return true;
995 }
996 
997 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
998  SDValue &VAddr, SDValue &SOffset,
999  SDValue &Offset, SDValue &Offen,
1000  SDValue &Idxen, SDValue &Addr64,
1001  SDValue &GLC, SDValue &SLC,
1002  SDValue &TFE) const {
1003  // Subtarget prefers to use flat instruction
1004  if (Subtarget->useFlatForGlobal())
1005  return false;
1006 
1007  SDLoc DL(Addr);
1008 
1009  if (!GLC.getNode())
1010  GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1011  if (!SLC.getNode())
1012  SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1013  TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
1014 
1015  Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1016  Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1017  Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1018  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1019 
1020  ConstantSDNode *C1 = nullptr;
1021  SDValue N0 = Addr;
1022  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1023  C1 = cast<ConstantSDNode>(Addr.getOperand(1));
1024  if (isUInt<32>(C1->getZExtValue()))
1025  N0 = Addr.getOperand(0);
1026  else
1027  C1 = nullptr;
1028  }
1029 
1030  if (N0.getOpcode() == ISD::ADD) {
1031  // (add N2, N3) -> addr64, or
1032  // (add (add N2, N3), C1) -> addr64
1033  SDValue N2 = N0.getOperand(0);
1034  SDValue N3 = N0.getOperand(1);
1035  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1036 
1037  if (N2->isDivergent()) {
1038  if (N3->isDivergent()) {
1039  // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
1040  // addr64, and construct the resource from a 0 address.
1041  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1042  VAddr = N0;
1043  } else {
1044  // N2 is divergent, N3 is not.
1045  Ptr = N3;
1046  VAddr = N2;
1047  }
1048  } else {
1049  // N2 is not divergent.
1050  Ptr = N2;
1051  VAddr = N3;
1052  }
1053  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1054  } else if (N0->isDivergent()) {
1055  // N0 is divergent. Use it as the addr64, and construct the resource from a
1056  // 0 address.
1057  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1058  VAddr = N0;
1059  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1060  } else {
1061  // N0 -> offset, or
1062  // (N0 + C1) -> offset
1063  VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1064  Ptr = N0;
1065  }
1066 
1067  if (!C1) {
1068  // No offset.
1069  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1070  return true;
1071  }
1072 
1074  // Legal offset for instruction.
1075  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1076  return true;
1077  }
1078 
1079  // Illegal offset, store it in soffset.
1080  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1081  SOffset =
1082  SDValue(CurDAG->getMachineNode(
1083  AMDGPU::S_MOV_B32, DL, MVT::i32,
1084  CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1085  0);
1086  return true;
1087 }
1088 
1089 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1090  SDValue &VAddr, SDValue &SOffset,
1091  SDValue &Offset, SDValue &GLC,
1092  SDValue &SLC, SDValue &TFE) const {
1093  SDValue Ptr, Offen, Idxen, Addr64;
1094 
1095  // addr64 bit was removed for volcanic islands.
1096  if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
1097  return false;
1098 
1099  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1100  GLC, SLC, TFE))
1101  return false;
1102 
1103  ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1104  if (C->getSExtValue()) {
1105  SDLoc DL(Addr);
1106 
1107  const SITargetLowering& Lowering =
1108  *static_cast<const SITargetLowering*>(getTargetLowering());
1109 
1110  SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1111  return true;
1112  }
1113 
1114  return false;
1115 }
1116 
1117 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1118  SDValue &VAddr, SDValue &SOffset,
1119  SDValue &Offset,
1120  SDValue &SLC) const {
1121  SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
1122  SDValue GLC, TFE;
1123 
1124  return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
1125 }
1126 
1127 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
1128  auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
1129  return PSV && PSV->isStack();
1130 }
1131 
1132 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1133  const MachineFunction &MF = CurDAG->getMachineFunction();
1135 
1136  if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
1137  SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1138  FI->getValueType(0));
1139 
1140  // If we can resolve this to a frame index access, this is relative to the
1141  // frame pointer SGPR.
1142  return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(),
1143  MVT::i32));
1144  }
1145 
1146  // If we don't know this private access is a local stack object, it needs to
1147  // be relative to the entry point's scratch wave offset register.
1148  return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(),
1149  MVT::i32));
1150 }
1151 
1152 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1153  SDValue Addr, SDValue &Rsrc,
1154  SDValue &VAddr, SDValue &SOffset,
1155  SDValue &ImmOffset) const {
1156 
1157  SDLoc DL(Addr);
1158  MachineFunction &MF = CurDAG->getMachineFunction();
1160 
1161  Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1162 
1163  if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1164  unsigned Imm = CAddr->getZExtValue();
1165 
1166  SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1167  MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1168  DL, MVT::i32, HighBits);
1169  VAddr = SDValue(MovHighBits, 0);
1170 
1171  // In a call sequence, stores to the argument stack area are relative to the
1172  // stack pointer.
1173  const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1174  unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1176 
1177  SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1178  ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
1179  return true;
1180  }
1181 
1182  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1183  // (add n0, c1)
1184 
1185  SDValue N0 = Addr.getOperand(0);
1186  SDValue N1 = Addr.getOperand(1);
1187 
1188  // Offsets in vaddr must be positive if range checking is enabled.
1189  //
1190  // The total computation of vaddr + soffset + offset must not overflow. If
1191  // vaddr is negative, even if offset is 0 the sgpr offset add will end up
1192  // overflowing.
1193  //
1194  // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
1195  // always perform a range check. If a negative vaddr base index was used,
1196  // this would fail the range check. The overall address computation would
1197  // compute a valid address, but this doesn't happen due to the range
1198  // check. For out-of-bounds MUBUF loads, a 0 is returned.
1199  //
1200  // Therefore it should be safe to fold any VGPR offset on gfx9 into the
1201  // MUBUF vaddr, but not on older subtargets which can only do this if the
1202  // sign bit is known 0.
1203  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1205  (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1206  CurDAG->SignBitIsZero(N0))) {
1207  std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1208  ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1209  return true;
1210  }
1211  }
1212 
1213  // (node)
1214  std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1215  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1216  return true;
1217 }
1218 
1219 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1220  SDValue Addr,
1221  SDValue &SRsrc,
1222  SDValue &SOffset,
1223  SDValue &Offset) const {
1224  ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
1225  if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1226  return false;
1227 
1228  SDLoc DL(Addr);
1229  MachineFunction &MF = CurDAG->getMachineFunction();
1231 
1232  SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1233 
1234  const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1235  unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1237 
1238  // FIXME: Get from MachinePointerInfo? We should only be using the frame
1239  // offset if we know this is in a call sequence.
1240  SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1241 
1242  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1243  return true;
1244 }
1245 
1246 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1247  SDValue &SOffset, SDValue &Offset,
1248  SDValue &GLC, SDValue &SLC,
1249  SDValue &TFE) const {
1250  SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1251  const SIInstrInfo *TII =
1252  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1253 
1254  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1255  GLC, SLC, TFE))
1256  return false;
1257 
1258  if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1259  !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1260  !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1261  uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1262  APInt::getAllOnesValue(32).getZExtValue(); // Size
1263  SDLoc DL(Addr);
1264 
1265  const SITargetLowering& Lowering =
1266  *static_cast<const SITargetLowering*>(getTargetLowering());
1267 
1268  SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1269  return true;
1270  }
1271  return false;
1272 }
1273 
1274 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1275  SDValue &Soffset, SDValue &Offset
1276  ) const {
1277  SDValue GLC, SLC, TFE;
1278 
1279  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1280 }
1281 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1282  SDValue &Soffset, SDValue &Offset,
1283  SDValue &SLC) const {
1284  SDValue GLC, TFE;
1285 
1286  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1287 }
1288 
1289 template <bool IsSigned>
1290 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
1291  SDValue &VAddr,
1292  SDValue &Offset,
1293  SDValue &SLC) const {
1294  int64_t OffsetVal = 0;
1295 
1296  if (Subtarget->hasFlatInstOffsets() &&
1297  CurDAG->isBaseWithConstantOffset(Addr)) {
1298  SDValue N0 = Addr.getOperand(0);
1299  SDValue N1 = Addr.getOperand(1);
1300  int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1301 
1302  if ((IsSigned && isInt<13>(COffsetVal)) ||
1303  (!IsSigned && isUInt<12>(COffsetVal))) {
1304  Addr = N0;
1305  OffsetVal = COffsetVal;
1306  }
1307  }
1308 
1309  VAddr = Addr;
1310  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
1311  SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
1312 
1313  return true;
1314 }
1315 
1316 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr,
1317  SDValue &VAddr,
1318  SDValue &Offset,
1319  SDValue &SLC) const {
1320  return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC);
1321 }
1322 
1323 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr,
1324  SDValue &VAddr,
1325  SDValue &Offset,
1326  SDValue &SLC) const {
1327  return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC);
1328 }
1329 
1330 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1331  SDValue &Offset, bool &Imm) const {
1332 
1333  // FIXME: Handle non-constant offsets.
1334  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1335  if (!C)
1336  return false;
1337 
1338  SDLoc SL(ByteOffsetNode);
1339  GCNSubtarget::Generation Gen = Subtarget->getGeneration();
1340  int64_t ByteOffset = C->getSExtValue();
1341  int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
1342 
1343  if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) {
1344  Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1345  Imm = true;
1346  return true;
1347  }
1348 
1349  if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1350  return false;
1351 
1352  if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1353  // 32-bit Immediates are supported on Sea Islands.
1354  Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1355  } else {
1356  SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1357  Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1358  C32Bit), 0);
1359  }
1360  Imm = false;
1361  return true;
1362 }
1363 
1364 SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
1365  if (Addr.getValueType() != MVT::i32)
1366  return Addr;
1367 
1368  // Zero-extend a 32-bit address.
1369  SDLoc SL(Addr);
1370 
1371  const MachineFunction &MF = CurDAG->getMachineFunction();
1373  unsigned AddrHiVal = Info->get32BitAddressHighBits();
1374  SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
1375 
1376  const SDValue Ops[] = {
1377  CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
1378  Addr,
1379  CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
1380  SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
1381  0),
1382  CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
1383  };
1384 
1385  return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
1386  Ops), 0);
1387 }
1388 
1389 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1390  SDValue &Offset, bool &Imm) const {
1391  SDLoc SL(Addr);
1392 
1393  // A 32-bit (address + offset) should not cause unsigned 32-bit integer
1394  // wraparound, because s_load instructions perform the addition in 64 bits.
1395  if ((Addr.getValueType() != MVT::i32 ||
1396  Addr->getFlags().hasNoUnsignedWrap()) &&
1397  CurDAG->isBaseWithConstantOffset(Addr)) {
1398  SDValue N0 = Addr.getOperand(0);
1399  SDValue N1 = Addr.getOperand(1);
1400 
1401  if (SelectSMRDOffset(N1, Offset, Imm)) {
1402  SBase = Expand32BitAddress(N0);
1403  return true;
1404  }
1405  }
1406  SBase = Expand32BitAddress(Addr);
1407  Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1408  Imm = true;
1409  return true;
1410 }
1411 
1412 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1413  SDValue &Offset) const {
1414  bool Imm;
1415  return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1416 }
1417 
1418 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1419  SDValue &Offset) const {
1420 
1421  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1422  return false;
1423 
1424  bool Imm;
1425  if (!SelectSMRD(Addr, SBase, Offset, Imm))
1426  return false;
1427 
1428  return !Imm && isa<ConstantSDNode>(Offset);
1429 }
1430 
1431 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1432  SDValue &Offset) const {
1433  bool Imm;
1434  return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1435  !isa<ConstantSDNode>(Offset);
1436 }
1437 
1438 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1439  SDValue &Offset) const {
1440  bool Imm;
1441  return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1442 }
1443 
1444 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1445  SDValue &Offset) const {
1446  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1447  return false;
1448 
1449  bool Imm;
1450  if (!SelectSMRDOffset(Addr, Offset, Imm))
1451  return false;
1452 
1453  return !Imm && isa<ConstantSDNode>(Offset);
1454 }
1455 
1456 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
1457  SDValue &Base,
1458  SDValue &Offset) const {
1459  SDLoc DL(Index);
1460 
1461  if (CurDAG->isBaseWithConstantOffset(Index)) {
1462  SDValue N0 = Index.getOperand(0);
1463  SDValue N1 = Index.getOperand(1);
1464  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1465 
1466  // (add n0, c0)
1467  // Don't peel off the offset (c0) if doing so could possibly lead
1468  // the base (n0) to be negative.
1469  if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0)) {
1470  Base = N0;
1471  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
1472  return true;
1473  }
1474  }
1475 
1476  if (isa<ConstantSDNode>(Index))
1477  return false;
1478 
1479  Base = Index;
1480  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1481  return true;
1482 }
1483 
1484 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
1485  SDValue Val, uint32_t Offset,
1486  uint32_t Width) {
1487  // Transformation function, pack the offset and width of a BFE into
1488  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1489  // source, bits [5:0] contain the offset and bits [22:16] the width.
1490  uint32_t PackedVal = Offset | (Width << 16);
1491  SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1492 
1493  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1494 }
1495 
1496 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1497  // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1498  // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1499  // Predicate: 0 < b <= c < 32
1500 
1501  const SDValue &Shl = N->getOperand(0);
1504 
1505  if (B && C) {
1506  uint32_t BVal = B->getZExtValue();
1507  uint32_t CVal = C->getZExtValue();
1508 
1509  if (0 < BVal && BVal <= CVal && CVal < 32) {
1510  bool Signed = N->getOpcode() == ISD::SRA;
1511  unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1512 
1513  ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
1514  32 - CVal));
1515  return;
1516  }
1517  }
1518  SelectCode(N);
1519 }
1520 
1521 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1522  switch (N->getOpcode()) {
1523  case ISD::AND:
1524  if (N->getOperand(0).getOpcode() == ISD::SRL) {
1525  // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1526  // Predicate: isMask(mask)
1527  const SDValue &Srl = N->getOperand(0);
1530 
1531  if (Shift && Mask) {
1532  uint32_t ShiftVal = Shift->getZExtValue();
1533  uint32_t MaskVal = Mask->getZExtValue();
1534 
1535  if (isMask_32(MaskVal)) {
1536  uint32_t WidthVal = countPopulation(MaskVal);
1537 
1538  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1539  Srl.getOperand(0), ShiftVal, WidthVal));
1540  return;
1541  }
1542  }
1543  }
1544  break;
1545  case ISD::SRL:
1546  if (N->getOperand(0).getOpcode() == ISD::AND) {
1547  // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1548  // Predicate: isMask(mask >> b)
1549  const SDValue &And = N->getOperand(0);
1552 
1553  if (Shift && Mask) {
1554  uint32_t ShiftVal = Shift->getZExtValue();
1555  uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1556 
1557  if (isMask_32(MaskVal)) {
1558  uint32_t WidthVal = countPopulation(MaskVal);
1559 
1560  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1561  And.getOperand(0), ShiftVal, WidthVal));
1562  return;
1563  }
1564  }
1565  } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
1566  SelectS_BFEFromShifts(N);
1567  return;
1568  }
1569  break;
1570  case ISD::SRA:
1571  if (N->getOperand(0).getOpcode() == ISD::SHL) {
1572  SelectS_BFEFromShifts(N);
1573  return;
1574  }
1575  break;
1576 
1577  case ISD::SIGN_EXTEND_INREG: {
1578  // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
1579  SDValue Src = N->getOperand(0);
1580  if (Src.getOpcode() != ISD::SRL)
1581  break;
1582 
1583  const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1584  if (!Amt)
1585  break;
1586 
1587  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1588  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
1589  Amt->getZExtValue(), Width));
1590  return;
1591  }
1592  }
1593 
1594  SelectCode(N);
1595 }
1596 
1597 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
1598  assert(N->getOpcode() == ISD::BRCOND);
1599  if (!N->hasOneUse())
1600  return false;
1601 
1602  SDValue Cond = N->getOperand(1);
1603  if (Cond.getOpcode() == ISD::CopyToReg)
1604  Cond = Cond.getOperand(2);
1605 
1606  if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
1607  return false;
1608 
1609  MVT VT = Cond.getOperand(0).getSimpleValueType();
1610  if (VT == MVT::i32)
1611  return true;
1612 
1613  if (VT == MVT::i64) {
1614  auto ST = static_cast<const GCNSubtarget *>(Subtarget);
1615 
1616  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
1617  return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
1618  }
1619 
1620  return false;
1621 }
1622 
1623 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
1624  SDValue Cond = N->getOperand(1);
1625 
1626  if (Cond.isUndef()) {
1627  CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
1628  N->getOperand(2), N->getOperand(0));
1629  return;
1630  }
1631 
1632  bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
1633  unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
1634  unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC;
1635  SDLoc SL(N);
1636 
1637  if (!UseSCCBr) {
1638  // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
1639  // analyzed what generates the vcc value, so we do not know whether vcc
1640  // bits for disabled lanes are 0. Thus we need to mask out bits for
1641  // disabled lanes.
1642  //
1643  // For the case that we select S_CBRANCH_SCC1 and it gets
1644  // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
1645  // SIInstrInfo::moveToVALU which inserts the S_AND).
1646  //
1647  // We could add an analysis of what generates the vcc value here and omit
1648  // the S_AND when is unnecessary. But it would be better to add a separate
1649  // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
1650  // catches both cases.
1651  Cond = SDValue(CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
1652  CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
1653  Cond),
1654  0);
1655  }
1656 
1657  SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
1658  CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
1659  N->getOperand(2), // Basic Block
1660  VCC.getValue(0));
1661 }
1662 
1663 void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
1664  MVT VT = N->getSimpleValueType(0);
1665  bool IsFMA = N->getOpcode() == ISD::FMA;
1666  if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
1667  !Subtarget->hasFmaMixInsts()) ||
1668  ((IsFMA && Subtarget->hasMadMixInsts()) ||
1669  (!IsFMA && Subtarget->hasFmaMixInsts()))) {
1670  SelectCode(N);
1671  return;
1672  }
1673 
1674  SDValue Src0 = N->getOperand(0);
1675  SDValue Src1 = N->getOperand(1);
1676  SDValue Src2 = N->getOperand(2);
1677  unsigned Src0Mods, Src1Mods, Src2Mods;
1678 
1679  // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
1680  // using the conversion from f16.
1681  bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
1682  bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
1683  bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
1684 
1685  assert((IsFMA || !Subtarget->hasFP32Denormals()) &&
1686  "fmad selected with denormals enabled");
1687  // TODO: We can select this with f32 denormals enabled if all the sources are
1688  // converted from f16 (in which case fmad isn't legal).
1689 
1690  if (Sel0 || Sel1 || Sel2) {
1691  // For dummy operands.
1692  SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
1693  SDValue Ops[] = {
1694  CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
1695  CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
1696  CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
1697  CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
1698  Zero, Zero
1699  };
1700 
1701  CurDAG->SelectNodeTo(N,
1702  IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
1703  MVT::f32, Ops);
1704  } else {
1705  SelectCode(N);
1706  }
1707 }
1708 
1709 // This is here because there isn't a way to use the generated sub0_sub1 as the
1710 // subreg index to EXTRACT_SUBREG in tablegen.
1711 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
1712  MemSDNode *Mem = cast<MemSDNode>(N);
1713  unsigned AS = Mem->getAddressSpace();
1714  if (AS == AMDGPUAS::FLAT_ADDRESS) {
1715  SelectCode(N);
1716  return;
1717  }
1718 
1719  MVT VT = N->getSimpleValueType(0);
1720  bool Is32 = (VT == MVT::i32);
1721  SDLoc SL(N);
1722 
1723  MachineSDNode *CmpSwap = nullptr;
1724  if (Subtarget->hasAddr64()) {
1725  SDValue SRsrc, VAddr, SOffset, Offset, SLC;
1726 
1727  if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
1728  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
1729  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
1730  SDValue CmpVal = Mem->getOperand(2);
1731 
1732  // XXX - Do we care about glue operands?
1733 
1734  SDValue Ops[] = {
1735  CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1736  };
1737 
1738  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1739  }
1740  }
1741 
1742  if (!CmpSwap) {
1743  SDValue SRsrc, SOffset, Offset, SLC;
1744  if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
1745  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
1746  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
1747 
1748  SDValue CmpVal = Mem->getOperand(2);
1749  SDValue Ops[] = {
1750  CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1751  };
1752 
1753  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1754  }
1755  }
1756 
1757  if (!CmpSwap) {
1758  SelectCode(N);
1759  return;
1760  }
1761 
1762  MachineMemOperand *MMO = Mem->getMemOperand();
1763  CurDAG->setNodeMemRefs(CmpSwap, {MMO});
1764 
1765  unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
1766  SDValue Extract
1767  = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
1768 
1769  ReplaceUses(SDValue(N, 0), Extract);
1770  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
1771  CurDAG->RemoveDeadNode(N);
1772 }
1773 
1774 void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
1775  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1776  if ((IntrID != Intrinsic::amdgcn_ds_append &&
1777  IntrID != Intrinsic::amdgcn_ds_consume) ||
1778  N->getValueType(0) != MVT::i32) {
1779  SelectCode(N);
1780  return;
1781  }
1782 
1783  // The address is assumed to be uniform, so if it ends up in a VGPR, it will
1784  // be copied to an SGPR with readfirstlane.
1785  unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
1786  AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
1787 
1788  SDValue Chain = N->getOperand(0);
1789  SDValue Ptr = N->getOperand(2);
1790  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
1791  bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
1792 
1793  SDValue Offset;
1794  if (CurDAG->isBaseWithConstantOffset(Ptr)) {
1795  SDValue PtrBase = Ptr.getOperand(0);
1796  SDValue PtrOffset = Ptr.getOperand(1);
1797 
1798  const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
1799  if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue(), 16)) {
1800  N = glueCopyToM0(N, PtrBase);
1801  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
1802  }
1803  }
1804 
1805  if (!Offset) {
1806  N = glueCopyToM0(N, Ptr);
1807  Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
1808  }
1809 
1810  SDValue Ops[] = {
1811  Offset,
1812  CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
1813  Chain,
1814  N->getOperand(N->getNumOperands() - 1) // New glue
1815  };
1816 
1817  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1818 }
1819 
1820 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
1821  unsigned &Mods) const {
1822  Mods = 0;
1823  Src = In;
1824 
1825  if (Src.getOpcode() == ISD::FNEG) {
1826  Mods |= SISrcMods::NEG;
1827  Src = Src.getOperand(0);
1828  }
1829 
1830  if (Src.getOpcode() == ISD::FABS) {
1831  Mods |= SISrcMods::ABS;
1832  Src = Src.getOperand(0);
1833  }
1834 
1835  return true;
1836 }
1837 
1838 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
1839  SDValue &SrcMods) const {
1840  unsigned Mods;
1841  if (SelectVOP3ModsImpl(In, Src, Mods)) {
1842  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1843  return true;
1844  }
1845 
1846  return false;
1847 }
1848 
1849 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
1850  SDValue &SrcMods) const {
1851  SelectVOP3Mods(In, Src, SrcMods);
1852  return isNoNanSrc(Src);
1853 }
1854 
1855 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
1856  if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
1857  return false;
1858 
1859  Src = In;
1860  return true;
1861 }
1862 
1863 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
1864  SDValue &SrcMods, SDValue &Clamp,
1865  SDValue &Omod) const {
1866  SDLoc DL(In);
1867  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1868  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
1869 
1870  return SelectVOP3Mods(In, Src, SrcMods);
1871 }
1872 
1873 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
1874  SDValue &SrcMods,
1875  SDValue &Clamp,
1876  SDValue &Omod) const {
1877  Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1878  return SelectVOP3Mods(In, Src, SrcMods);
1879 }
1880 
1881 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
1882  SDValue &Clamp, SDValue &Omod) const {
1883  Src = In;
1884 
1885  SDLoc DL(In);
1886  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1887  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
1888 
1889  return true;
1890 }
1891 
1893  return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
1894 }
1895 
1896 // Figure out if this is really an extract of the high 16-bits of a dword.
1897 static bool isExtractHiElt(SDValue In, SDValue &Out) {
1898  In = stripBitcast(In);
1899  if (In.getOpcode() != ISD::TRUNCATE)
1900  return false;
1901 
1902  SDValue Srl = In.getOperand(0);
1903  if (Srl.getOpcode() == ISD::SRL) {
1904  if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
1905  if (ShiftAmt->getZExtValue() == 16) {
1906  Out = stripBitcast(Srl.getOperand(0));
1907  return true;
1908  }
1909  }
1910  }
1911 
1912  return false;
1913 }
1914 
1915 // Look through operations that obscure just looking at the low 16-bits of the
1916 // same register.
1918  if (In.getOpcode() == ISD::TRUNCATE) {
1919  SDValue Src = In.getOperand(0);
1920  if (Src.getValueType().getSizeInBits() == 32)
1921  return stripBitcast(Src);
1922  }
1923 
1924  return In;
1925 }
1926 
1927 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
1928  SDValue &SrcMods) const {
1929  unsigned Mods = 0;
1930  Src = In;
1931 
1932  if (Src.getOpcode() == ISD::FNEG) {
1933  Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
1934  Src = Src.getOperand(0);
1935  }
1936 
1937  if (Src.getOpcode() == ISD::BUILD_VECTOR) {
1938  unsigned VecMods = Mods;
1939 
1940  SDValue Lo = stripBitcast(Src.getOperand(0));
1941  SDValue Hi = stripBitcast(Src.getOperand(1));
1942 
1943  if (Lo.getOpcode() == ISD::FNEG) {
1944  Lo = stripBitcast(Lo.getOperand(0));
1945  Mods ^= SISrcMods::NEG;
1946  }
1947 
1948  if (Hi.getOpcode() == ISD::FNEG) {
1949  Hi = stripBitcast(Hi.getOperand(0));
1950  Mods ^= SISrcMods::NEG_HI;
1951  }
1952 
1953  if (isExtractHiElt(Lo, Lo))
1954  Mods |= SISrcMods::OP_SEL_0;
1955 
1956  if (isExtractHiElt(Hi, Hi))
1957  Mods |= SISrcMods::OP_SEL_1;
1958 
1959  Lo = stripExtractLoElt(Lo);
1960  Hi = stripExtractLoElt(Hi);
1961 
1962  if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
1963  // Really a scalar input. Just select from the low half of the register to
1964  // avoid packing.
1965 
1966  Src = Lo;
1967  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1968  return true;
1969  }
1970 
1971  Mods = VecMods;
1972  }
1973 
1974  // Packed instructions do not have abs modifiers.
1975  Mods |= SISrcMods::OP_SEL_1;
1976 
1977  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1978  return true;
1979 }
1980 
1981 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src,
1982  SDValue &SrcMods,
1983  SDValue &Clamp) const {
1984  SDLoc SL(In);
1985 
1986  // FIXME: Handle clamp and op_sel
1987  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
1988 
1989  return SelectVOP3PMods(In, Src, SrcMods);
1990 }
1991 
1992 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
1993  SDValue &SrcMods) const {
1994  Src = In;
1995  // FIXME: Handle op_sel
1996  SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1997  return true;
1998 }
1999 
2000 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src,
2001  SDValue &SrcMods,
2002  SDValue &Clamp) const {
2003  SDLoc SL(In);
2004 
2005  // FIXME: Handle clamp
2006  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2007 
2008  return SelectVOP3OpSel(In, Src, SrcMods);
2009 }
2010 
2011 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
2012  SDValue &SrcMods) const {
2013  // FIXME: Handle op_sel
2014  return SelectVOP3Mods(In, Src, SrcMods);
2015 }
2016 
2017 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src,
2018  SDValue &SrcMods,
2019  SDValue &Clamp) const {
2020  SDLoc SL(In);
2021 
2022  // FIXME: Handle clamp
2023  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2024 
2025  return SelectVOP3OpSelMods(In, Src, SrcMods);
2026 }
2027 
2028 // The return value is not whether the match is possible (which it always is),
2029 // but whether or not it a conversion is really used.
2030 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
2031  unsigned &Mods) const {
2032  Mods = 0;
2033  SelectVOP3ModsImpl(In, Src, Mods);
2034 
2035  if (Src.getOpcode() == ISD::FP_EXTEND) {
2036  Src = Src.getOperand(0);
2037  assert(Src.getValueType() == MVT::f16);
2038  Src = stripBitcast(Src);
2039 
2040  // Be careful about folding modifiers if we already have an abs. fneg is
2041  // applied last, so we don't want to apply an earlier fneg.
2042  if ((Mods & SISrcMods::ABS) == 0) {
2043  unsigned ModsTmp;
2044  SelectVOP3ModsImpl(Src, Src, ModsTmp);
2045 
2046  if ((ModsTmp & SISrcMods::NEG) != 0)
2047  Mods ^= SISrcMods::NEG;
2048 
2049  if ((ModsTmp & SISrcMods::ABS) != 0)
2050  Mods |= SISrcMods::ABS;
2051  }
2052 
2053  // op_sel/op_sel_hi decide the source type and source.
2054  // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
2055  // If the sources's op_sel is set, it picks the high half of the source
2056  // register.
2057 
2058  Mods |= SISrcMods::OP_SEL_1;
2059  if (isExtractHiElt(Src, Src)) {
2060  Mods |= SISrcMods::OP_SEL_0;
2061 
2062  // TODO: Should we try to look for neg/abs here?
2063  }
2064 
2065  return true;
2066  }
2067 
2068  return false;
2069 }
2070 
2071 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
2072  SDValue &SrcMods) const {
2073  unsigned Mods = 0;
2074  SelectVOP3PMadMixModsImpl(In, Src, Mods);
2075  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2076  return true;
2077 }
2078 
2079 // TODO: Can we identify things like v_mad_mixhi_f16?
2080 bool AMDGPUDAGToDAGISel::SelectHi16Elt(SDValue In, SDValue &Src) const {
2081  if (In.isUndef()) {
2082  Src = In;
2083  return true;
2084  }
2085 
2086  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
2087  SDLoc SL(In);
2088  SDValue K = CurDAG->getTargetConstant(C->getZExtValue() << 16, SL, MVT::i32);
2089  MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
2090  SL, MVT::i32, K);
2091  Src = SDValue(MovK, 0);
2092  return true;
2093  }
2094 
2095  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
2096  SDLoc SL(In);
2097  SDValue K = CurDAG->getTargetConstant(
2098  C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
2099  MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
2100  SL, MVT::i32, K);
2101  Src = SDValue(MovK, 0);
2102  return true;
2103  }
2104 
2105  return isExtractHiElt(In, Src);
2106 }
2107 
2108 bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
2109  if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
2110  return false;
2111  }
2112  const SIRegisterInfo *SIRI =
2113  static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
2114  const SIInstrInfo * SII =
2115  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
2116 
2117  unsigned Limit = 0;
2118  bool AllUsesAcceptSReg = true;
2119  for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
2120  Limit < 10 && U != E; ++U, ++Limit) {
2121  const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
2122 
2123  // If the register class is unknown, it could be an unknown
2124  // register class that needs to be an SGPR, e.g. an inline asm
2125  // constraint
2126  if (!RC || SIRI->isSGPRClass(RC))
2127  return false;
2128 
2129  if (RC != &AMDGPU::VS_32RegClass) {
2130  AllUsesAcceptSReg = false;
2131  SDNode * User = *U;
2132  if (User->isMachineOpcode()) {
2133  unsigned Opc = User->getMachineOpcode();
2134  MCInstrDesc Desc = SII->get(Opc);
2135  if (Desc.isCommutable()) {
2136  unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
2137  unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
2138  if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
2139  unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
2140  const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
2141  if (CommutedRC == &AMDGPU::VS_32RegClass)
2142  AllUsesAcceptSReg = true;
2143  }
2144  }
2145  }
2146  // If "AllUsesAcceptSReg == false" so far we haven't suceeded
2147  // commuting current user. This means have at least one use
2148  // that strictly require VGPR. Thus, we will not attempt to commute
2149  // other user instructions.
2150  if (!AllUsesAcceptSReg)
2151  break;
2152  }
2153  }
2154  return !AllUsesAcceptSReg && (Limit < 10);
2155 }
2156 
2157 bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
2158  auto Ld = cast<LoadSDNode>(N);
2159 
2160  return Ld->getAlignment() >= 4 &&
2161  (
2162  (
2163  (
2164  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
2165  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
2166  )
2167  &&
2168  !N->isDivergent()
2169  )
2170  ||
2171  (
2172  Subtarget->getScalarizeGlobalBehavior() &&
2173  Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
2174  !Ld->isVolatile() &&
2175  !N->isDivergent() &&
2176  static_cast<const SITargetLowering *>(
2177  getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)
2178  )
2179  );
2180 }
2181 
2182 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
2184  *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
2185  bool IsModified = false;
2186  do {
2187  IsModified = false;
2188 
2189  // Go over all selected nodes and try to fold them a bit more
2190  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
2191  while (Position != CurDAG->allnodes_end()) {
2192  SDNode *Node = &*Position++;
2193  MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
2194  if (!MachineNode)
2195  continue;
2196 
2197  SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
2198  if (ResNode != Node) {
2199  if (ResNode)
2200  ReplaceUses(Node, ResNode);
2201  IsModified = true;
2202  }
2203  }
2204  CurDAG->RemoveDeadNodes();
2205  } while (IsModified);
2206 }
2207 
2208 bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
2209  Subtarget = &MF.getSubtarget<R600Subtarget>();
2211 }
2212 
2213 bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
2214  if (!N->readMem())
2215  return false;
2216  if (CbId == -1)
2219 
2220  return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
2221 }
2222 
2223 bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
2224  SDValue& IntPtr) {
2225  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
2226  IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
2227  true);
2228  return true;
2229  }
2230  return false;
2231 }
2232 
2233 bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
2234  SDValue& BaseReg, SDValue &Offset) {
2235  if (!isa<ConstantSDNode>(Addr)) {
2236  BaseReg = Addr;
2237  Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
2238  return true;
2239  }
2240  return false;
2241 }
2242 
2244  unsigned int Opc = N->getOpcode();
2245  if (N->isMachineOpcode()) {
2246  N->setNodeId(-1);
2247  return; // Already selected.
2248  }
2249 
2250  switch (Opc) {
2251  default: break;
2253  case ISD::SCALAR_TO_VECTOR:
2254  case ISD::BUILD_VECTOR: {
2255  EVT VT = N->getValueType(0);
2256  unsigned NumVectorElts = VT.getVectorNumElements();
2257  unsigned RegClassID;
2258  // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
2259  // that adds a 128 bits reg copy when going through TwoAddressInstructions
2260  // pass. We want to avoid 128 bits copies as much as possible because they
2261  // can't be bundled by our scheduler.
2262  switch(NumVectorElts) {
2263  case 2: RegClassID = R600::R600_Reg64RegClassID; break;
2264  case 4:
2266  RegClassID = R600::R600_Reg128VerticalRegClassID;
2267  else
2268  RegClassID = R600::R600_Reg128RegClassID;
2269  break;
2270  default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
2271  }
2272  SelectBuildVector(N, RegClassID);
2273  return;
2274  }
2275  }
2276 
2277  SelectCode(N);
2278 }
2279 
2280 bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
2281  SDValue &Offset) {
2282  ConstantSDNode *C;
2283  SDLoc DL(Addr);
2284 
2285  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
2286  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
2287  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2288  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
2289  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
2290  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
2291  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2292  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
2293  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
2294  Base = Addr.getOperand(0);
2295  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2296  } else {
2297  Base = Addr;
2298  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
2299  }
2300 
2301  return true;
2302 }
2303 
2304 bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
2305  SDValue &Offset) {
2306  ConstantSDNode *IMMOffset;
2307 
2308  if (Addr.getOpcode() == ISD::ADD
2309  && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
2310  && isInt<16>(IMMOffset->getZExtValue())) {
2311 
2312  Base = Addr.getOperand(0);
2313  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2314  MVT::i32);
2315  return true;
2316  // If the pointer address is constant, we can move it to the offset field.
2317  } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
2318  && isInt<16>(IMMOffset->getZExtValue())) {
2319  Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
2320  SDLoc(CurDAG->getEntryNode()),
2321  R600::ZERO, MVT::i32);
2322  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2323  MVT::i32);
2324  return true;
2325  }
2326 
2327  // Default case, no offset
2328  Base = Addr;
2329  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2330  return true;
2331 }
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:570
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:348
Address space for region memory. (GDS)
Definition: AMDGPU.h:256
static bool isExtractHiElt(SDValue In, SDValue &Out)
EVT getValueType() const
Return the ValueType of the referenced return value.
Interface definition for SIRegisterInfo.
static unsigned getSubRegFromChannel(unsigned Channel)
bool isUndef() const
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand...
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1562
INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel", "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) INITIALIZE_PASS_END(AMDGPUDAGToDAGISel
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:561
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
AMDGPU specific subclass of TargetSubtarget.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
uint64_t getDefaultRsrcDataFormat() const
FunctionPass * createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a R600-specific.
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MCInstrDesc.h:436
Shadow Stack GC Lowering
SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL, SDValue V) const
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:222
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
SDVTList getVTList() const
unsigned Reg
const SDValue & getChain() const
virtual SDNode * PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const =0
static bool getConstantValue(SDValue N, uint32_t &Out)
SDNode * legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const
Legalize target independent instructions (e.g.
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:305
Address space for 32-bit constant memory.
Definition: AMDGPU.h:262
unsigned const TargetRegisterInfo * TRI
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
const SDNodeFlags getFlags() const
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
bool isInlineConstant(const APInt &Imm) const
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:404
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:158
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineSDNode * wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr) const
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
const HexagonInstrInfo * TII
Shift and rotation operations.
Definition: ISDOpcodes.h:409
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:190
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:169
unsigned SubReg
Position
Position to insert a new instruction relative to an existing instruction.
bool isSGPRClass(const TargetRegisterClass *RC) const
This file implements a class to represent arbitrary precision integral constant values and operations...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
TargetRegisterInfo interface that is implemented by all hw codegen targets.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
int64_t getSExtValue() const
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:297
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
Address space for local memory.
Definition: AMDGPU.h:259
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:234
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:965
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
amdgpu isel
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:326
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static const unsigned CommuteAnyOperandIndex
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:342
unsigned const MachineRegisterInfo * MRI
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
Machine Value Type.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:272
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const SDValue & getOperand(unsigned Num) const
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:231
SI DAG Lowering interface definition.
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:255
Represent the analysis usage information of a pass.
This class provides iterator support for SDUse operands that use a specific SDNode.
bool hasNoNaNs() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:71
bool isDefined() const
Returns true if the flags are in a defined state.
The AMDGPU TargetMachine interface definition for hw codgen targets.
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:555
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwises returns null...
Definition: PointerUnion.h:141
Extended Value Type.
Definition: ValueTypes.h:33
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts)
MachineSDNode * buildRSRC(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr, uint32_t RsrcDword1, uint64_t RsrcDword2And3) const
Return a resource descriptor with the &#39;Add TID&#39; bit enabled The TID (Thread ID) is multiplied by the ...
Iterator for intrusive lists based on ilist_node.
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:519
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:264
Address space for constant memory (VTX2)
Definition: AMDGPU.h:258
Interface definition of the TargetLowering class that is common to all AMD GPUs.
Address space for flat memory.
Definition: AMDGPU.h:254
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:839
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:645
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
bool isDivergent() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:225
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
bool readMem() const
Class for arbitrary precision integers.
Definition: APInt.h:69
Special value supplied for machine level alias analysis.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo)
amdgpu AMDGPU DAG DAG Pattern Instruction Selection
static SDValue stripExtractLoElt(SDValue In)
static use_iterator use_end()
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:386
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:72
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:485
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:615
#define N
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
unsigned getOpcode() const
SDValue getValue(unsigned R) const
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:345
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isLegalMUBUFImmOffset(unsigned Imm)
Definition: SIInstrInfo.h:918
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:301
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
bool hasNoUnsignedWrap() const
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:174
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:58
static SDValue stripBitcast(SDValue Val)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations...
Definition: ISDOpcodes.h:305
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:442
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:222
const SDValue & getOperand(unsigned i) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:793
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:473
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:585
Analyzes if a function potentially memory bound and if a kernel kernel may benefit from limiting numb...
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:789
const SDValue & getBasePtr() const
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:374
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override