LLVM  7.0.0svn
AMDGPUISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// Defines an instruction selector for the AMDGPU target.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
17 #include "AMDGPUISelLowering.h" // For AMDGPUISD
18 #include "AMDGPUInstrInfo.h"
19 #include "AMDGPUPerfHintAnalysis.h"
20 #include "AMDGPURegisterInfo.h"
21 #include "AMDGPUSubtarget.h"
22 #include "AMDGPUTargetMachine.h"
23 #include "SIDefines.h"
24 #include "SIISelLowering.h"
25 #include "SIInstrInfo.h"
26 #include "SIMachineFunctionInfo.h"
27 #include "SIRegisterInfo.h"
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/StringRef.h"
42 #include "llvm/IR/BasicBlock.h"
43 #include "llvm/IR/Instruction.h"
44 #include "llvm/MC/MCInstrDesc.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/CodeGen.h"
50 #include <cassert>
51 #include <cstdint>
52 #include <new>
53 #include <vector>
54 
55 using namespace llvm;
56 
57 namespace llvm {
58 
59 class R600InstrInfo;
60 
61 } // end namespace llvm
62 
63 //===----------------------------------------------------------------------===//
64 // Instruction Selector Implementation
65 //===----------------------------------------------------------------------===//
66 
67 namespace {
68 
69 /// AMDGPU specific code to select AMDGPU machine instructions for
70 /// SelectionDAG operations.
71 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
72  // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
73  // make the right decision when generating code for different targets.
74  const GCNSubtarget *Subtarget;
75  AMDGPUAS AMDGPUASI;
76  bool EnableLateStructurizeCFG;
77 
78 public:
79  explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
81  : SelectionDAGISel(*TM, OptLevel) {
82  AMDGPUASI = AMDGPU::getAMDGPUAS(*TM);
83  EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
84  }
85  ~AMDGPUDAGToDAGISel() override = default;
86 
87  void getAnalysisUsage(AnalysisUsage &AU) const override {
92  }
93 
94  bool runOnMachineFunction(MachineFunction &MF) override;
95  void Select(SDNode *N) override;
96  StringRef getPassName() const override;
97  void PostprocessISelDAG() override;
98 
99 protected:
100  void SelectBuildVector(SDNode *N, unsigned RegClassID);
101 
102 private:
103  std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
104  bool isNoNanSrc(SDValue N) const;
105  bool isInlineImmediate(const SDNode *N) const;
106 
107  bool isUniformBr(const SDNode *N) const;
108 
109  SDNode *glueCopyToM0(SDNode *N) const;
110 
111  const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
112  virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
113  virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
114  bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
115  unsigned OffsetBits) const;
116  bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
117  bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
118  SDValue &Offset1) const;
119  bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
120  SDValue &SOffset, SDValue &Offset, SDValue &Offen,
121  SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
122  SDValue &TFE) const;
123  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
124  SDValue &SOffset, SDValue &Offset, SDValue &GLC,
125  SDValue &SLC, SDValue &TFE) const;
126  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
127  SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
128  SDValue &SLC) const;
129  bool SelectMUBUFScratchOffen(SDNode *Parent,
130  SDValue Addr, SDValue &RSrc, SDValue &VAddr,
131  SDValue &SOffset, SDValue &ImmOffset) const;
132  bool SelectMUBUFScratchOffset(SDNode *Parent,
133  SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
134  SDValue &Offset) const;
135 
136  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
137  SDValue &Offset, SDValue &GLC, SDValue &SLC,
138  SDValue &TFE) const;
139  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
140  SDValue &Offset, SDValue &SLC) const;
141  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
142  SDValue &Offset) const;
143  bool SelectMUBUFConstant(SDValue Constant,
144  SDValue &SOffset,
145  SDValue &ImmOffset) const;
146  bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset,
147  SDValue &ImmOffset) const;
148  bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
149  SDValue &ImmOffset, SDValue &VOffset) const;
150 
151  bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
152  SDValue &Offset, SDValue &SLC) const;
153  bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr,
154  SDValue &Offset, SDValue &SLC) const;
155 
156  template <bool IsSigned>
157  bool SelectFlatOffset(SDValue Addr, SDValue &VAddr,
158  SDValue &Offset, SDValue &SLC) const;
159 
160  bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
161  bool &Imm) const;
162  SDValue Expand32BitAddress(SDValue Addr) const;
163  bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
164  bool &Imm) const;
165  bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
166  bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
167  bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
168  bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
169  bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
170  bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
171 
172  bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
173  bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
174  bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
175  bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
176  bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
177  SDValue &Clamp, SDValue &Omod) const;
178  bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
179  SDValue &Clamp, SDValue &Omod) const;
180 
181  bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
182  SDValue &Clamp,
183  SDValue &Omod) const;
184 
185  bool SelectVOP3OMods(SDValue In, SDValue &Src,
186  SDValue &Clamp, SDValue &Omod) const;
187 
188  bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
189  bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
190  SDValue &Clamp) const;
191 
192  bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
193  bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods,
194  SDValue &Clamp) const;
195 
196  bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
197  bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
198  SDValue &Clamp) const;
199  bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
200  bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
201 
202  bool SelectHi16Elt(SDValue In, SDValue &Src) const;
203 
204  void SelectADD_SUB_I64(SDNode *N);
205  void SelectUADDO_USUBO(SDNode *N);
206  void SelectDIV_SCALE(SDNode *N);
207  void SelectMAD_64_32(SDNode *N);
208  void SelectFMA_W_CHAIN(SDNode *N);
209  void SelectFMUL_W_CHAIN(SDNode *N);
210 
211  SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
212  uint32_t Offset, uint32_t Width);
213  void SelectS_BFEFromShifts(SDNode *N);
214  void SelectS_BFE(SDNode *N);
215  bool isCBranchSCC(const SDNode *N) const;
216  void SelectBRCOND(SDNode *N);
217  void SelectFMAD_FMA(SDNode *N);
218  void SelectATOMIC_CMP_SWAP(SDNode *N);
219 
220 protected:
221  // Include the pieces autogenerated from the target description.
222 #include "AMDGPUGenDAGISel.inc"
223 };
224 
225 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
226  const R600Subtarget *Subtarget;
227  AMDGPUAS AMDGPUASI;
228 
229  bool isConstantLoad(const MemSDNode *N, int cbID) const;
230  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
231  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
232  SDValue& Offset);
233 public:
234  explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
235  AMDGPUDAGToDAGISel(TM, OptLevel) {
236  AMDGPUASI = AMDGPU::getAMDGPUAS(*TM);
237  }
238 
239  void Select(SDNode *N) override;
240 
241  bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
242  SDValue &Offset) override;
243  bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
244  SDValue &Offset) override;
245 
246  bool runOnMachineFunction(MachineFunction &MF) override;
247 protected:
248  // Include the pieces autogenerated from the target description.
249 #include "R600GenDAGISel.inc"
250 };
251 
252 } // end anonymous namespace
253 
254 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel",
255  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
259 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "isel",
260  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
261 
262 /// This pass converts a legalized DAG into a AMDGPU-specific
263 // DAG, ready for instruction scheduling.
265  CodeGenOpt::Level OptLevel) {
266  return new AMDGPUDAGToDAGISel(TM, OptLevel);
267 }
268 
269 /// This pass converts a legalized DAG into a R600-specific
270 // DAG, ready for instruction scheduling.
272  CodeGenOpt::Level OptLevel) {
273  return new R600DAGToDAGISel(TM, OptLevel);
274 }
275 
276 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
277  Subtarget = &MF.getSubtarget<GCNSubtarget>();
279 }
280 
281 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
282  if (TM.Options.NoNaNsFPMath)
283  return true;
284 
285  // TODO: Move into isKnownNeverNaN
286  if (N->getFlags().isDefined())
287  return N->getFlags().hasNoNaNs();
288 
289  return CurDAG->isKnownNeverNaN(N);
290 }
291 
292 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
293  const SIInstrInfo *TII = Subtarget->getInstrInfo();
294 
295  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
296  return TII->isInlineConstant(C->getAPIntValue());
297 
298  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
299  return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
300 
301  return false;
302 }
303 
304 /// Determine the register class for \p OpNo
305 /// \returns The register class of the virtual register that will be used for
306 /// the given operand number \OpNo or NULL if the register class cannot be
307 /// determined.
308 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
309  unsigned OpNo) const {
310  if (!N->isMachineOpcode()) {
311  if (N->getOpcode() == ISD::CopyToReg) {
312  unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
314  MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
315  return MRI.getRegClass(Reg);
316  }
317 
318  const SIRegisterInfo *TRI
319  = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
320  return TRI->getPhysRegClass(Reg);
321  }
322 
323  return nullptr;
324  }
325 
326  switch (N->getMachineOpcode()) {
327  default: {
328  const MCInstrDesc &Desc =
329  Subtarget->getInstrInfo()->get(N->getMachineOpcode());
330  unsigned OpIdx = Desc.getNumDefs() + OpNo;
331  if (OpIdx >= Desc.getNumOperands())
332  return nullptr;
333  int RegClass = Desc.OpInfo[OpIdx].RegClass;
334  if (RegClass == -1)
335  return nullptr;
336 
337  return Subtarget->getRegisterInfo()->getRegClass(RegClass);
338  }
339  case AMDGPU::REG_SEQUENCE: {
340  unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
341  const TargetRegisterClass *SuperRC =
342  Subtarget->getRegisterInfo()->getRegClass(RCID);
343 
344  SDValue SubRegOp = N->getOperand(OpNo + 1);
345  unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
346  return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
347  SubRegIdx);
348  }
349  }
350 }
351 
352 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
353  if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS ||
354  !Subtarget->ldsRequiresM0Init())
355  return N;
356 
357  const SITargetLowering& Lowering =
358  *static_cast<const SITargetLowering*>(getTargetLowering());
359 
360  // Write max value to m0 before each load operation
361 
362  SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
363  CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
364 
365  SDValue Glue = M0.getValue(1);
366 
368  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
369  Ops.push_back(N->getOperand(i));
370  }
371  Ops.push_back(Glue);
372  return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
373 }
374 
375 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
376  switch (NumVectorElts) {
377  case 1:
378  return AMDGPU::SReg_32_XM0RegClassID;
379  case 2:
380  return AMDGPU::SReg_64RegClassID;
381  case 4:
382  return AMDGPU::SReg_128RegClassID;
383  case 8:
384  return AMDGPU::SReg_256RegClassID;
385  case 16:
386  return AMDGPU::SReg_512RegClassID;
387  }
388 
389  llvm_unreachable("invalid vector size");
390 }
391 
392 static bool getConstantValue(SDValue N, uint32_t &Out) {
393  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
394  Out = C->getAPIntValue().getZExtValue();
395  return true;
396  }
397 
398  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
399  Out = C->getValueAPF().bitcastToAPInt().getZExtValue();
400  return true;
401  }
402 
403  return false;
404 }
405 
406 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
407  EVT VT = N->getValueType(0);
408  unsigned NumVectorElts = VT.getVectorNumElements();
409  EVT EltVT = VT.getVectorElementType();
410  SDLoc DL(N);
411  SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
412 
413  if (NumVectorElts == 1) {
414  CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
415  RegClass);
416  return;
417  }
418 
419  assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
420  "supported yet");
421  // 16 = Max Num Vector Elements
422  // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
423  // 1 = Vector Register Class
424  SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
425 
426  RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
427  bool IsRegSeq = true;
428  unsigned NOps = N->getNumOperands();
429  for (unsigned i = 0; i < NOps; i++) {
430  // XXX: Why is this here?
431  if (isa<RegisterSDNode>(N->getOperand(i))) {
432  IsRegSeq = false;
433  break;
434  }
436  RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
437  RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
438  }
439  if (NOps != NumVectorElts) {
440  // Fill in the missing undef elements if this was a scalar_to_vector.
441  assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
442  MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
443  DL, EltVT);
444  for (unsigned i = NOps; i < NumVectorElts; ++i) {
446  RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
447  RegSeqArgs[1 + (2 * i) + 1] =
448  CurDAG->getTargetConstant(Sub, DL, MVT::i32);
449  }
450  }
451 
452  if (!IsRegSeq)
453  SelectCode(N);
454  CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
455 }
456 
458  unsigned int Opc = N->getOpcode();
459  if (N->isMachineOpcode()) {
460  N->setNodeId(-1);
461  return; // Already selected.
462  }
463 
464  if (isa<AtomicSDNode>(N) ||
465  (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
469  N = glueCopyToM0(N);
470 
471  switch (Opc) {
472  default:
473  break;
474  // We are selecting i64 ADD here instead of custom lower it during
475  // DAG legalization, so we can fold some i64 ADDs used for address
476  // calculation into the LOAD and STORE instructions.
477  case ISD::ADDC:
478  case ISD::ADDE:
479  case ISD::SUBC:
480  case ISD::SUBE: {
481  if (N->getValueType(0) != MVT::i64)
482  break;
483 
484  SelectADD_SUB_I64(N);
485  return;
486  }
487  case ISD::UADDO:
488  case ISD::USUBO: {
489  SelectUADDO_USUBO(N);
490  return;
491  }
493  SelectFMUL_W_CHAIN(N);
494  return;
495  }
496  case AMDGPUISD::FMA_W_CHAIN: {
497  SelectFMA_W_CHAIN(N);
498  return;
499  }
500 
502  case ISD::BUILD_VECTOR: {
503  EVT VT = N->getValueType(0);
504  unsigned NumVectorElts = VT.getVectorNumElements();
505  if (VT.getScalarSizeInBits() == 16) {
506  if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
507  uint32_t LHSVal, RHSVal;
508  if (getConstantValue(N->getOperand(0), LHSVal) &&
509  getConstantValue(N->getOperand(1), RHSVal)) {
510  uint32_t K = LHSVal | (RHSVal << 16);
511  CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT,
512  CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32));
513  return;
514  }
515  }
516 
517  break;
518  }
519 
521  unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
522  SelectBuildVector(N, RegClassID);
523  return;
524  }
525  case ISD::BUILD_PAIR: {
526  SDValue RC, SubReg0, SubReg1;
527  SDLoc DL(N);
528  if (N->getValueType(0) == MVT::i128) {
529  RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
530  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
531  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
532  } else if (N->getValueType(0) == MVT::i64) {
533  RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
534  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
535  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
536  } else {
537  llvm_unreachable("Unhandled value type for BUILD_PAIR");
538  }
539  const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
540  N->getOperand(1), SubReg1 };
541  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
542  N->getValueType(0), Ops));
543  return;
544  }
545 
546  case ISD::Constant:
547  case ISD::ConstantFP: {
548  if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
549  break;
550 
551  uint64_t Imm;
552  if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
553  Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
554  else {
555  ConstantSDNode *C = cast<ConstantSDNode>(N);
556  Imm = C->getZExtValue();
557  }
558 
559  SDLoc DL(N);
560  SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
561  CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
562  MVT::i32));
563  SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
564  CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
565  const SDValue Ops[] = {
566  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
567  SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
568  SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
569  };
570 
571  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
572  N->getValueType(0), Ops));
573  return;
574  }
575  case ISD::LOAD:
576  case ISD::STORE:
577  case ISD::ATOMIC_LOAD:
578  case ISD::ATOMIC_STORE: {
579  N = glueCopyToM0(N);
580  break;
581  }
582 
583  case AMDGPUISD::BFE_I32:
584  case AMDGPUISD::BFE_U32: {
585  // There is a scalar version available, but unlike the vector version which
586  // has a separate operand for the offset and width, the scalar version packs
587  // the width and offset into a single operand. Try to move to the scalar
588  // version if the offsets are constant, so that we can try to keep extended
589  // loads of kernel arguments in SGPRs.
590 
591  // TODO: Technically we could try to pattern match scalar bitshifts of
592  // dynamic values, but it's probably not useful.
594  if (!Offset)
595  break;
596 
598  if (!Width)
599  break;
600 
601  bool Signed = Opc == AMDGPUISD::BFE_I32;
602 
603  uint32_t OffsetVal = Offset->getZExtValue();
604  uint32_t WidthVal = Width->getZExtValue();
605 
606  ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
607  SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
608  return;
609  }
610  case AMDGPUISD::DIV_SCALE: {
611  SelectDIV_SCALE(N);
612  return;
613  }
615  case AMDGPUISD::MAD_U64_U32: {
616  SelectMAD_64_32(N);
617  return;
618  }
619  case ISD::CopyToReg: {
620  const SITargetLowering& Lowering =
621  *static_cast<const SITargetLowering*>(getTargetLowering());
622  N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
623  break;
624  }
625  case ISD::AND:
626  case ISD::SRL:
627  case ISD::SRA:
629  if (N->getValueType(0) != MVT::i32)
630  break;
631 
632  SelectS_BFE(N);
633  return;
634  case ISD::BRCOND:
635  SelectBRCOND(N);
636  return;
637  case ISD::FMAD:
638  case ISD::FMA:
639  SelectFMAD_FMA(N);
640  return;
642  SelectATOMIC_CMP_SWAP(N);
643  return;
644  }
645 
646  SelectCode(N);
647 }
648 
649 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
650  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
651  const Instruction *Term = BB->getTerminator();
652  return Term->getMetadata("amdgpu.uniform") ||
653  Term->getMetadata("structurizecfg.uniform");
654 }
655 
656 StringRef AMDGPUDAGToDAGISel::getPassName() const {
657  return "AMDGPU DAG->DAG Pattern Instruction Selection";
658 }
659 
660 //===----------------------------------------------------------------------===//
661 // Complex Patterns
662 //===----------------------------------------------------------------------===//
663 
664 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
665  SDValue &Offset) {
666  return false;
667 }
668 
669 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
670  SDValue &Offset) {
671  ConstantSDNode *C;
672  SDLoc DL(Addr);
673 
674  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
675  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
676  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
677  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
678  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
679  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
680  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
681  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
682  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
683  Base = Addr.getOperand(0);
684  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
685  } else {
686  Base = Addr;
687  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
688  }
689 
690  return true;
691 }
692 
693 // FIXME: Should only handle addcarry/subcarry
694 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
695  SDLoc DL(N);
696  SDValue LHS = N->getOperand(0);
697  SDValue RHS = N->getOperand(1);
698 
699  unsigned Opcode = N->getOpcode();
700  bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
701  bool ProduceCarry =
702  ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
703  bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
704 
705  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
706  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
707 
708  SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
709  DL, MVT::i32, LHS, Sub0);
710  SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
711  DL, MVT::i32, LHS, Sub1);
712 
713  SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
714  DL, MVT::i32, RHS, Sub0);
715  SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
716  DL, MVT::i32, RHS, Sub1);
717 
718  SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
719 
720  unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
721  unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
722 
723  SDNode *AddLo;
724  if (!ConsumeCarry) {
725  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
726  AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
727  } else {
728  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
729  AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
730  }
731  SDValue AddHiArgs[] = {
732  SDValue(Hi0, 0),
733  SDValue(Hi1, 0),
734  SDValue(AddLo, 1)
735  };
736  SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
737 
738  SDValue RegSequenceArgs[] = {
739  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
740  SDValue(AddLo,0),
741  Sub0,
742  SDValue(AddHi,0),
743  Sub1,
744  };
745  SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
746  MVT::i64, RegSequenceArgs);
747 
748  if (ProduceCarry) {
749  // Replace the carry-use
750  ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
751  }
752 
753  // Replace the remaining uses.
754  ReplaceNode(N, RegSequence);
755 }
756 
757 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
758  // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
759  // carry out despite the _i32 name. These were renamed in VI to _U32.
760  // FIXME: We should probably rename the opcodes here.
761  unsigned Opc = N->getOpcode() == ISD::UADDO ?
762  AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
763 
764  CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
765  { N->getOperand(0), N->getOperand(1) });
766 }
767 
768 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
769  SDLoc SL(N);
770  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
771  SDValue Ops[10];
772 
773  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
774  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
775  SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
776  Ops[8] = N->getOperand(0);
777  Ops[9] = N->getOperand(4);
778 
779  CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
780 }
781 
782 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
783  SDLoc SL(N);
784  // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
785  SDValue Ops[8];
786 
787  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
788  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
789  Ops[6] = N->getOperand(0);
790  Ops[7] = N->getOperand(3);
791 
792  CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
793 }
794 
795 // We need to handle this here because tablegen doesn't support matching
796 // instructions with multiple outputs.
797 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
798  SDLoc SL(N);
799  EVT VT = N->getValueType(0);
800 
801  assert(VT == MVT::f32 || VT == MVT::f64);
802 
803  unsigned Opc
804  = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
805 
806  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
807  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
808 }
809 
810 // We need to handle this here because tablegen doesn't support matching
811 // instructions with multiple outputs.
812 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
813  SDLoc SL(N);
814  bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
815  unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32;
816 
817  SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
818  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
819  Clamp };
820  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
821 }
822 
823 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
824  unsigned OffsetBits) const {
825  if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
826  (OffsetBits == 8 && !isUInt<8>(Offset)))
827  return false;
828 
829  if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
830  Subtarget->unsafeDSOffsetFoldingEnabled())
831  return true;
832 
833  // On Southern Islands instruction with a negative base value and an offset
834  // don't seem to work.
835  return CurDAG->SignBitIsZero(Base);
836 }
837 
838 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
839  SDValue &Offset) const {
840  SDLoc DL(Addr);
841  if (CurDAG->isBaseWithConstantOffset(Addr)) {
842  SDValue N0 = Addr.getOperand(0);
843  SDValue N1 = Addr.getOperand(1);
844  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
845  if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
846  // (add n0, c0)
847  Base = N0;
848  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
849  return true;
850  }
851  } else if (Addr.getOpcode() == ISD::SUB) {
852  // sub C, x -> add (sub 0, x), C
853  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
854  int64_t ByteOffset = C->getSExtValue();
855  if (isUInt<16>(ByteOffset)) {
856  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
857 
858  // XXX - This is kind of hacky. Create a dummy sub node so we can check
859  // the known bits in isDSOffsetLegal. We need to emit the selected node
860  // here, so this is thrown away.
861  SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
862  Zero, Addr.getOperand(1));
863 
864  if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
865  // FIXME: Select to VOP3 version for with-carry.
866  unsigned SubOp = Subtarget->hasAddNoCarry() ?
867  AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
868 
869  MachineSDNode *MachineSub
870  = CurDAG->getMachineNode(SubOp, DL, MVT::i32,
871  Zero, Addr.getOperand(1));
872 
873  Base = SDValue(MachineSub, 0);
874  Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
875  return true;
876  }
877  }
878  }
879  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
880  // If we have a constant address, prefer to put the constant into the
881  // offset. This can save moves to load the constant address since multiple
882  // operations can share the zero base address register, and enables merging
883  // into read2 / write2 instructions.
884 
885  SDLoc DL(Addr);
886 
887  if (isUInt<16>(CAddr->getZExtValue())) {
888  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
889  MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
890  DL, MVT::i32, Zero);
891  Base = SDValue(MovZero, 0);
892  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
893  return true;
894  }
895  }
896 
897  // default case
898  Base = Addr;
899  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
900  return true;
901 }
902 
903 // TODO: If offset is too big, put low 16-bit into offset.
904 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
905  SDValue &Offset0,
906  SDValue &Offset1) const {
907  SDLoc DL(Addr);
908 
909  if (CurDAG->isBaseWithConstantOffset(Addr)) {
910  SDValue N0 = Addr.getOperand(0);
911  SDValue N1 = Addr.getOperand(1);
912  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
913  unsigned DWordOffset0 = C1->getZExtValue() / 4;
914  unsigned DWordOffset1 = DWordOffset0 + 1;
915  // (add n0, c0)
916  if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
917  Base = N0;
918  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
919  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
920  return true;
921  }
922  } else if (Addr.getOpcode() == ISD::SUB) {
923  // sub C, x -> add (sub 0, x), C
924  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
925  unsigned DWordOffset0 = C->getZExtValue() / 4;
926  unsigned DWordOffset1 = DWordOffset0 + 1;
927 
928  if (isUInt<8>(DWordOffset0)) {
929  SDLoc DL(Addr);
930  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
931 
932  // XXX - This is kind of hacky. Create a dummy sub node so we can check
933  // the known bits in isDSOffsetLegal. We need to emit the selected node
934  // here, so this is thrown away.
935  SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
936  Zero, Addr.getOperand(1));
937 
938  if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
939  unsigned SubOp = Subtarget->hasAddNoCarry() ?
940  AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
941 
942  MachineSDNode *MachineSub
943  = CurDAG->getMachineNode(SubOp, DL, MVT::i32,
944  Zero, Addr.getOperand(1));
945 
946  Base = SDValue(MachineSub, 0);
947  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
948  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
949  return true;
950  }
951  }
952  }
953  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
954  unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
955  unsigned DWordOffset1 = DWordOffset0 + 1;
956  assert(4 * DWordOffset0 == CAddr->getZExtValue());
957 
958  if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
959  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
960  MachineSDNode *MovZero
961  = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
962  DL, MVT::i32, Zero);
963  Base = SDValue(MovZero, 0);
964  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
965  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
966  return true;
967  }
968  }
969 
970  // default case
971 
972  // FIXME: This is broken on SI where we still need to check if the base
973  // pointer is positive here.
974  Base = Addr;
975  Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
976  Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
977  return true;
978 }
979 
980 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
981  SDValue &VAddr, SDValue &SOffset,
982  SDValue &Offset, SDValue &Offen,
983  SDValue &Idxen, SDValue &Addr64,
984  SDValue &GLC, SDValue &SLC,
985  SDValue &TFE) const {
986  // Subtarget prefers to use flat instruction
987  if (Subtarget->useFlatForGlobal())
988  return false;
989 
990  SDLoc DL(Addr);
991 
992  if (!GLC.getNode())
993  GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
994  if (!SLC.getNode())
995  SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
996  TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
997 
998  Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
999  Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1000  Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1001  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1002 
1003  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1004  SDValue N0 = Addr.getOperand(0);
1005  SDValue N1 = Addr.getOperand(1);
1006  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1007 
1008  if (N0.getOpcode() == ISD::ADD) {
1009  // (add (add N2, N3), C1) -> addr64
1010  SDValue N2 = N0.getOperand(0);
1011  SDValue N3 = N0.getOperand(1);
1012  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1013  Ptr = N2;
1014  VAddr = N3;
1015  } else {
1016  // (add N0, C1) -> offset
1017  VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1018  Ptr = N0;
1019  }
1020 
1022  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1023  return true;
1024  }
1025 
1026  if (isUInt<32>(C1->getZExtValue())) {
1027  // Illegal offset, store it in soffset.
1028  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1029  SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1030  CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1031  0);
1032  return true;
1033  }
1034  }
1035 
1036  if (Addr.getOpcode() == ISD::ADD) {
1037  // (add N0, N1) -> addr64
1038  SDValue N0 = Addr.getOperand(0);
1039  SDValue N1 = Addr.getOperand(1);
1040  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1041  Ptr = N0;
1042  VAddr = N1;
1043  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1044  return true;
1045  }
1046 
1047  // default case -> offset
1048  VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1049  Ptr = Addr;
1050  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1051 
1052  return true;
1053 }
1054 
1055 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1056  SDValue &VAddr, SDValue &SOffset,
1057  SDValue &Offset, SDValue &GLC,
1058  SDValue &SLC, SDValue &TFE) const {
1059  SDValue Ptr, Offen, Idxen, Addr64;
1060 
1061  // addr64 bit was removed for volcanic islands.
1062  if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
1063  return false;
1064 
1065  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1066  GLC, SLC, TFE))
1067  return false;
1068 
1069  ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1070  if (C->getSExtValue()) {
1071  SDLoc DL(Addr);
1072 
1073  const SITargetLowering& Lowering =
1074  *static_cast<const SITargetLowering*>(getTargetLowering());
1075 
1076  SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1077  return true;
1078  }
1079 
1080  return false;
1081 }
1082 
1083 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1084  SDValue &VAddr, SDValue &SOffset,
1085  SDValue &Offset,
1086  SDValue &SLC) const {
1087  SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
1088  SDValue GLC, TFE;
1089 
1090  return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
1091 }
1092 
1093 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
1094  auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
1095  return PSV && PSV->isStack();
1096 }
1097 
1098 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1099  const MachineFunction &MF = CurDAG->getMachineFunction();
1101 
1102  if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
1103  SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1104  FI->getValueType(0));
1105 
1106  // If we can resolve this to a frame index access, this is relative to the
1107  // frame pointer SGPR.
1108  return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(),
1109  MVT::i32));
1110  }
1111 
1112  // If we don't know this private access is a local stack object, it needs to
1113  // be relative to the entry point's scratch wave offset register.
1114  return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(),
1115  MVT::i32));
1116 }
1117 
1118 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1119  SDValue Addr, SDValue &Rsrc,
1120  SDValue &VAddr, SDValue &SOffset,
1121  SDValue &ImmOffset) const {
1122 
1123  SDLoc DL(Addr);
1124  MachineFunction &MF = CurDAG->getMachineFunction();
1126 
1127  Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1128 
1129  if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1130  unsigned Imm = CAddr->getZExtValue();
1131 
1132  SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1133  MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1134  DL, MVT::i32, HighBits);
1135  VAddr = SDValue(MovHighBits, 0);
1136 
1137  // In a call sequence, stores to the argument stack area are relative to the
1138  // stack pointer.
1139  const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1140  unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1142 
1143  SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1144  ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
1145  return true;
1146  }
1147 
1148  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1149  // (add n0, c1)
1150 
1151  SDValue N0 = Addr.getOperand(0);
1152  SDValue N1 = Addr.getOperand(1);
1153 
1154  // Offsets in vaddr must be positive if range checking is enabled.
1155  //
1156  // The total computation of vaddr + soffset + offset must not overflow. If
1157  // vaddr is negative, even if offset is 0 the sgpr offset add will end up
1158  // overflowing.
1159  //
1160  // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
1161  // always perform a range check. If a negative vaddr base index was used,
1162  // this would fail the range check. The overall address computation would
1163  // compute a valid address, but this doesn't happen due to the range
1164  // check. For out-of-bounds MUBUF loads, a 0 is returned.
1165  //
1166  // Therefore it should be safe to fold any VGPR offset on gfx9 into the
1167  // MUBUF vaddr, but not on older subtargets which can only do this if the
1168  // sign bit is known 0.
1169  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1171  (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1172  CurDAG->SignBitIsZero(N0))) {
1173  std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1174  ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1175  return true;
1176  }
1177  }
1178 
1179  // (node)
1180  std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1181  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1182  return true;
1183 }
1184 
1185 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1186  SDValue Addr,
1187  SDValue &SRsrc,
1188  SDValue &SOffset,
1189  SDValue &Offset) const {
1190  ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
1191  if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1192  return false;
1193 
1194  SDLoc DL(Addr);
1195  MachineFunction &MF = CurDAG->getMachineFunction();
1197 
1198  SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1199 
1200  const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1201  unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1203 
1204  // FIXME: Get from MachinePointerInfo? We should only be using the frame
1205  // offset if we know this is in a call sequence.
1206  SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1207 
1208  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1209  return true;
1210 }
1211 
1212 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1213  SDValue &SOffset, SDValue &Offset,
1214  SDValue &GLC, SDValue &SLC,
1215  SDValue &TFE) const {
1216  SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1217  const SIInstrInfo *TII =
1218  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1219 
1220  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1221  GLC, SLC, TFE))
1222  return false;
1223 
1224  if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1225  !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1226  !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1227  uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1228  APInt::getAllOnesValue(32).getZExtValue(); // Size
1229  SDLoc DL(Addr);
1230 
1231  const SITargetLowering& Lowering =
1232  *static_cast<const SITargetLowering*>(getTargetLowering());
1233 
1234  SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1235  return true;
1236  }
1237  return false;
1238 }
1239 
1240 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1241  SDValue &Soffset, SDValue &Offset
1242  ) const {
1243  SDValue GLC, SLC, TFE;
1244 
1245  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1246 }
1247 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1248  SDValue &Soffset, SDValue &Offset,
1249  SDValue &SLC) const {
1250  SDValue GLC, TFE;
1251 
1252  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1253 }
1254 
1255 bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant,
1256  SDValue &SOffset,
1257  SDValue &ImmOffset) const {
1258  SDLoc DL(Constant);
1259  const uint32_t Align = 4;
1260  const uint32_t MaxImm = alignDown(4095, Align);
1261  uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue();
1262  uint32_t Overflow = 0;
1263 
1264  if (Imm > MaxImm) {
1265  if (Imm <= MaxImm + 64) {
1266  // Use an SOffset inline constant for 4..64
1267  Overflow = Imm - MaxImm;
1268  Imm = MaxImm;
1269  } else {
1270  // Try to keep the same value in SOffset for adjacent loads, so that
1271  // the corresponding register contents can be re-used.
1272  //
1273  // Load values with all low-bits (except for alignment bits) set into
1274  // SOffset, so that a larger range of values can be covered using
1275  // s_movk_i32.
1276  //
1277  // Atomic operations fail to work correctly when individual address
1278  // components are unaligned, even if their sum is aligned.
1279  uint32_t High = (Imm + Align) & ~4095;
1280  uint32_t Low = (Imm + Align) & 4095;
1281  Imm = Low;
1282  Overflow = High - Align;
1283  }
1284  }
1285 
1286  // There is a hardware bug in SI and CI which prevents address clamping in
1287  // MUBUF instructions from working correctly with SOffsets. The immediate
1288  // offset is unaffected.
1289  if (Overflow > 0 &&
1290  Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
1291  return false;
1292 
1293  ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16);
1294 
1295  if (Overflow <= 64)
1296  SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32);
1297  else
1298  SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1299  CurDAG->getTargetConstant(Overflow, DL, MVT::i32)),
1300  0);
1301 
1302  return true;
1303 }
1304 
1305 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset,
1306  SDValue &SOffset,
1307  SDValue &ImmOffset) const {
1308  SDLoc DL(Offset);
1309 
1310  if (!isa<ConstantSDNode>(Offset))
1311  return false;
1312 
1313  return SelectMUBUFConstant(Offset, SOffset, ImmOffset);
1314 }
1315 
1316 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
1317  SDValue &SOffset,
1318  SDValue &ImmOffset,
1319  SDValue &VOffset) const {
1320  SDLoc DL(Offset);
1321 
1322  // Don't generate an unnecessary voffset for constant offsets.
1323  if (isa<ConstantSDNode>(Offset)) {
1324  SDValue Tmp1, Tmp2;
1325 
1326  // When necessary, use a voffset in <= CI anyway to work around a hardware
1327  // bug.
1328  if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS ||
1329  SelectMUBUFConstant(Offset, Tmp1, Tmp2))
1330  return false;
1331  }
1332 
1333  if (CurDAG->isBaseWithConstantOffset(Offset)) {
1334  SDValue N0 = Offset.getOperand(0);
1335  SDValue N1 = Offset.getOperand(1);
1336  if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 &&
1337  SelectMUBUFConstant(N1, SOffset, ImmOffset)) {
1338  VOffset = N0;
1339  return true;
1340  }
1341  }
1342 
1343  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1344  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1345  VOffset = Offset;
1346 
1347  return true;
1348 }
1349 
1350 template <bool IsSigned>
1351 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
1352  SDValue &VAddr,
1353  SDValue &Offset,
1354  SDValue &SLC) const {
1355  int64_t OffsetVal = 0;
1356 
1357  if (Subtarget->hasFlatInstOffsets() &&
1358  CurDAG->isBaseWithConstantOffset(Addr)) {
1359  SDValue N0 = Addr.getOperand(0);
1360  SDValue N1 = Addr.getOperand(1);
1361  int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1362 
1363  if ((IsSigned && isInt<13>(COffsetVal)) ||
1364  (!IsSigned && isUInt<12>(COffsetVal))) {
1365  Addr = N0;
1366  OffsetVal = COffsetVal;
1367  }
1368  }
1369 
1370  VAddr = Addr;
1371  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
1372  SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
1373 
1374  return true;
1375 }
1376 
1377 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr,
1378  SDValue &VAddr,
1379  SDValue &Offset,
1380  SDValue &SLC) const {
1381  return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC);
1382 }
1383 
1384 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr,
1385  SDValue &VAddr,
1386  SDValue &Offset,
1387  SDValue &SLC) const {
1388  return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC);
1389 }
1390 
1391 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1392  SDValue &Offset, bool &Imm) const {
1393 
1394  // FIXME: Handle non-constant offsets.
1395  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1396  if (!C)
1397  return false;
1398 
1399  SDLoc SL(ByteOffsetNode);
1400  GCNSubtarget::Generation Gen = Subtarget->getGeneration();
1401  int64_t ByteOffset = C->getSExtValue();
1402  int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
1403 
1404  if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) {
1405  Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1406  Imm = true;
1407  return true;
1408  }
1409 
1410  if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1411  return false;
1412 
1413  if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1414  // 32-bit Immediates are supported on Sea Islands.
1415  Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1416  } else {
1417  SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1418  Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1419  C32Bit), 0);
1420  }
1421  Imm = false;
1422  return true;
1423 }
1424 
1425 SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
1426  if (Addr.getValueType() != MVT::i32)
1427  return Addr;
1428 
1429  // Zero-extend a 32-bit address.
1430  SDLoc SL(Addr);
1431 
1432  const MachineFunction &MF = CurDAG->getMachineFunction();
1434  unsigned AddrHiVal = Info->get32BitAddressHighBits();
1435  SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
1436 
1437  const SDValue Ops[] = {
1438  CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
1439  Addr,
1440  CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
1441  SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
1442  0),
1443  CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
1444  };
1445 
1446  return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
1447  Ops), 0);
1448 }
1449 
1450 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1451  SDValue &Offset, bool &Imm) const {
1452  SDLoc SL(Addr);
1453 
1454  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1455  SDValue N0 = Addr.getOperand(0);
1456  SDValue N1 = Addr.getOperand(1);
1457 
1458  if (SelectSMRDOffset(N1, Offset, Imm)) {
1459  SBase = Expand32BitAddress(N0);
1460  return true;
1461  }
1462  }
1463  SBase = Expand32BitAddress(Addr);
1464  Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1465  Imm = true;
1466  return true;
1467 }
1468 
1469 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1470  SDValue &Offset) const {
1471  bool Imm;
1472  return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1473 }
1474 
1475 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1476  SDValue &Offset) const {
1477 
1478  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1479  return false;
1480 
1481  bool Imm;
1482  if (!SelectSMRD(Addr, SBase, Offset, Imm))
1483  return false;
1484 
1485  return !Imm && isa<ConstantSDNode>(Offset);
1486 }
1487 
1488 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1489  SDValue &Offset) const {
1490  bool Imm;
1491  return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1492  !isa<ConstantSDNode>(Offset);
1493 }
1494 
1495 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1496  SDValue &Offset) const {
1497  bool Imm;
1498  return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1499 }
1500 
1501 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1502  SDValue &Offset) const {
1503  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1504  return false;
1505 
1506  bool Imm;
1507  if (!SelectSMRDOffset(Addr, Offset, Imm))
1508  return false;
1509 
1510  return !Imm && isa<ConstantSDNode>(Offset);
1511 }
1512 
1513 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
1514  SDValue &Base,
1515  SDValue &Offset) const {
1516  SDLoc DL(Index);
1517 
1518  if (CurDAG->isBaseWithConstantOffset(Index)) {
1519  SDValue N0 = Index.getOperand(0);
1520  SDValue N1 = Index.getOperand(1);
1521  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1522 
1523  // (add n0, c0)
1524  Base = N0;
1525  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
1526  return true;
1527  }
1528 
1529  if (isa<ConstantSDNode>(Index))
1530  return false;
1531 
1532  Base = Index;
1533  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1534  return true;
1535 }
1536 
1537 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
1538  SDValue Val, uint32_t Offset,
1539  uint32_t Width) {
1540  // Transformation function, pack the offset and width of a BFE into
1541  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1542  // source, bits [5:0] contain the offset and bits [22:16] the width.
1543  uint32_t PackedVal = Offset | (Width << 16);
1544  SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1545 
1546  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1547 }
1548 
1549 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1550  // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1551  // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1552  // Predicate: 0 < b <= c < 32
1553 
1554  const SDValue &Shl = N->getOperand(0);
1557 
1558  if (B && C) {
1559  uint32_t BVal = B->getZExtValue();
1560  uint32_t CVal = C->getZExtValue();
1561 
1562  if (0 < BVal && BVal <= CVal && CVal < 32) {
1563  bool Signed = N->getOpcode() == ISD::SRA;
1564  unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1565 
1566  ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
1567  32 - CVal));
1568  return;
1569  }
1570  }
1571  SelectCode(N);
1572 }
1573 
1574 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1575  switch (N->getOpcode()) {
1576  case ISD::AND:
1577  if (N->getOperand(0).getOpcode() == ISD::SRL) {
1578  // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1579  // Predicate: isMask(mask)
1580  const SDValue &Srl = N->getOperand(0);
1583 
1584  if (Shift && Mask) {
1585  uint32_t ShiftVal = Shift->getZExtValue();
1586  uint32_t MaskVal = Mask->getZExtValue();
1587 
1588  if (isMask_32(MaskVal)) {
1589  uint32_t WidthVal = countPopulation(MaskVal);
1590 
1591  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1592  Srl.getOperand(0), ShiftVal, WidthVal));
1593  return;
1594  }
1595  }
1596  }
1597  break;
1598  case ISD::SRL:
1599  if (N->getOperand(0).getOpcode() == ISD::AND) {
1600  // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1601  // Predicate: isMask(mask >> b)
1602  const SDValue &And = N->getOperand(0);
1605 
1606  if (Shift && Mask) {
1607  uint32_t ShiftVal = Shift->getZExtValue();
1608  uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1609 
1610  if (isMask_32(MaskVal)) {
1611  uint32_t WidthVal = countPopulation(MaskVal);
1612 
1613  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1614  And.getOperand(0), ShiftVal, WidthVal));
1615  return;
1616  }
1617  }
1618  } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
1619  SelectS_BFEFromShifts(N);
1620  return;
1621  }
1622  break;
1623  case ISD::SRA:
1624  if (N->getOperand(0).getOpcode() == ISD::SHL) {
1625  SelectS_BFEFromShifts(N);
1626  return;
1627  }
1628  break;
1629 
1630  case ISD::SIGN_EXTEND_INREG: {
1631  // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
1632  SDValue Src = N->getOperand(0);
1633  if (Src.getOpcode() != ISD::SRL)
1634  break;
1635 
1636  const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1637  if (!Amt)
1638  break;
1639 
1640  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1641  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
1642  Amt->getZExtValue(), Width));
1643  return;
1644  }
1645  }
1646 
1647  SelectCode(N);
1648 }
1649 
1650 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
1651  assert(N->getOpcode() == ISD::BRCOND);
1652  if (!N->hasOneUse())
1653  return false;
1654 
1655  SDValue Cond = N->getOperand(1);
1656  if (Cond.getOpcode() == ISD::CopyToReg)
1657  Cond = Cond.getOperand(2);
1658 
1659  if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
1660  return false;
1661 
1662  MVT VT = Cond.getOperand(0).getSimpleValueType();
1663  if (VT == MVT::i32)
1664  return true;
1665 
1666  if (VT == MVT::i64) {
1667  auto ST = static_cast<const GCNSubtarget *>(Subtarget);
1668 
1669  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
1670  return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
1671  }
1672 
1673  return false;
1674 }
1675 
1676 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
1677  SDValue Cond = N->getOperand(1);
1678 
1679  if (Cond.isUndef()) {
1680  CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
1681  N->getOperand(2), N->getOperand(0));
1682  return;
1683  }
1684 
1685  bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
1686  unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
1687  unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC;
1688  SDLoc SL(N);
1689 
1690  if (!UseSCCBr) {
1691  // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
1692  // analyzed what generates the vcc value, so we do not know whether vcc
1693  // bits for disabled lanes are 0. Thus we need to mask out bits for
1694  // disabled lanes.
1695  //
1696  // For the case that we select S_CBRANCH_SCC1 and it gets
1697  // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
1698  // SIInstrInfo::moveToVALU which inserts the S_AND).
1699  //
1700  // We could add an analysis of what generates the vcc value here and omit
1701  // the S_AND when is unnecessary. But it would be better to add a separate
1702  // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
1703  // catches both cases.
1704  Cond = SDValue(CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
1705  CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
1706  Cond),
1707  0);
1708  }
1709 
1710  SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
1711  CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
1712  N->getOperand(2), // Basic Block
1713  VCC.getValue(0));
1714 }
1715 
1716 void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
1717  MVT VT = N->getSimpleValueType(0);
1718  bool IsFMA = N->getOpcode() == ISD::FMA;
1719  if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
1720  !Subtarget->hasFmaMixInsts()) ||
1721  ((IsFMA && Subtarget->hasMadMixInsts()) ||
1722  (!IsFMA && Subtarget->hasFmaMixInsts()))) {
1723  SelectCode(N);
1724  return;
1725  }
1726 
1727  SDValue Src0 = N->getOperand(0);
1728  SDValue Src1 = N->getOperand(1);
1729  SDValue Src2 = N->getOperand(2);
1730  unsigned Src0Mods, Src1Mods, Src2Mods;
1731 
1732  // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
1733  // using the conversion from f16.
1734  bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
1735  bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
1736  bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
1737 
1738  assert((IsFMA || !Subtarget->hasFP32Denormals()) &&
1739  "fmad selected with denormals enabled");
1740  // TODO: We can select this with f32 denormals enabled if all the sources are
1741  // converted from f16 (in which case fmad isn't legal).
1742 
1743  if (Sel0 || Sel1 || Sel2) {
1744  // For dummy operands.
1745  SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
1746  SDValue Ops[] = {
1747  CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
1748  CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
1749  CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
1750  CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
1751  Zero, Zero
1752  };
1753 
1754  CurDAG->SelectNodeTo(N,
1755  IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
1756  MVT::f32, Ops);
1757  } else {
1758  SelectCode(N);
1759  }
1760 }
1761 
1762 // This is here because there isn't a way to use the generated sub0_sub1 as the
1763 // subreg index to EXTRACT_SUBREG in tablegen.
1764 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
1765  MemSDNode *Mem = cast<MemSDNode>(N);
1766  unsigned AS = Mem->getAddressSpace();
1767  if (AS == AMDGPUASI.FLAT_ADDRESS) {
1768  SelectCode(N);
1769  return;
1770  }
1771 
1772  MVT VT = N->getSimpleValueType(0);
1773  bool Is32 = (VT == MVT::i32);
1774  SDLoc SL(N);
1775 
1776  MachineSDNode *CmpSwap = nullptr;
1777  if (Subtarget->hasAddr64()) {
1778  SDValue SRsrc, VAddr, SOffset, Offset, SLC;
1779 
1780  if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
1781  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
1782  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
1783  SDValue CmpVal = Mem->getOperand(2);
1784 
1785  // XXX - Do we care about glue operands?
1786 
1787  SDValue Ops[] = {
1788  CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1789  };
1790 
1791  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1792  }
1793  }
1794 
1795  if (!CmpSwap) {
1796  SDValue SRsrc, SOffset, Offset, SLC;
1797  if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
1798  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
1799  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
1800 
1801  SDValue CmpVal = Mem->getOperand(2);
1802  SDValue Ops[] = {
1803  CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1804  };
1805 
1806  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1807  }
1808  }
1809 
1810  if (!CmpSwap) {
1811  SelectCode(N);
1812  return;
1813  }
1814 
1816  *MMOs = Mem->getMemOperand();
1817  CmpSwap->setMemRefs(MMOs, MMOs + 1);
1818 
1819  unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
1820  SDValue Extract
1821  = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
1822 
1823  ReplaceUses(SDValue(N, 0), Extract);
1824  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
1825  CurDAG->RemoveDeadNode(N);
1826 }
1827 
1828 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
1829  unsigned &Mods) const {
1830  Mods = 0;
1831  Src = In;
1832 
1833  if (Src.getOpcode() == ISD::FNEG) {
1834  Mods |= SISrcMods::NEG;
1835  Src = Src.getOperand(0);
1836  }
1837 
1838  if (Src.getOpcode() == ISD::FABS) {
1839  Mods |= SISrcMods::ABS;
1840  Src = Src.getOperand(0);
1841  }
1842 
1843  return true;
1844 }
1845 
1846 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
1847  SDValue &SrcMods) const {
1848  unsigned Mods;
1849  if (SelectVOP3ModsImpl(In, Src, Mods)) {
1850  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1851  return true;
1852  }
1853 
1854  return false;
1855 }
1856 
1857 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
1858  SDValue &SrcMods) const {
1859  SelectVOP3Mods(In, Src, SrcMods);
1860  return isNoNanSrc(Src);
1861 }
1862 
1863 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
1864  if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
1865  return false;
1866 
1867  Src = In;
1868  return true;
1869 }
1870 
1871 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
1872  SDValue &SrcMods, SDValue &Clamp,
1873  SDValue &Omod) const {
1874  SDLoc DL(In);
1875  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1876  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
1877 
1878  return SelectVOP3Mods(In, Src, SrcMods);
1879 }
1880 
1881 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
1882  SDValue &SrcMods,
1883  SDValue &Clamp,
1884  SDValue &Omod) const {
1885  Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1886  return SelectVOP3Mods(In, Src, SrcMods);
1887 }
1888 
1889 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
1890  SDValue &Clamp, SDValue &Omod) const {
1891  Src = In;
1892 
1893  SDLoc DL(In);
1894  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1895  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
1896 
1897  return true;
1898 }
1899 
1901  return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
1902 }
1903 
1904 // Figure out if this is really an extract of the high 16-bits of a dword.
1905 static bool isExtractHiElt(SDValue In, SDValue &Out) {
1906  In = stripBitcast(In);
1907  if (In.getOpcode() != ISD::TRUNCATE)
1908  return false;
1909 
1910  SDValue Srl = In.getOperand(0);
1911  if (Srl.getOpcode() == ISD::SRL) {
1912  if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
1913  if (ShiftAmt->getZExtValue() == 16) {
1914  Out = stripBitcast(Srl.getOperand(0));
1915  return true;
1916  }
1917  }
1918  }
1919 
1920  return false;
1921 }
1922 
1923 // Look through operations that obscure just looking at the low 16-bits of the
1924 // same register.
1926  if (In.getOpcode() == ISD::TRUNCATE) {
1927  SDValue Src = In.getOperand(0);
1928  if (Src.getValueType().getSizeInBits() == 32)
1929  return stripBitcast(Src);
1930  }
1931 
1932  return In;
1933 }
1934 
1935 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
1936  SDValue &SrcMods) const {
1937  unsigned Mods = 0;
1938  Src = In;
1939 
1940  if (Src.getOpcode() == ISD::FNEG) {
1941  Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
1942  Src = Src.getOperand(0);
1943  }
1944 
1945  if (Src.getOpcode() == ISD::BUILD_VECTOR) {
1946  unsigned VecMods = Mods;
1947 
1948  SDValue Lo = stripBitcast(Src.getOperand(0));
1949  SDValue Hi = stripBitcast(Src.getOperand(1));
1950 
1951  if (Lo.getOpcode() == ISD::FNEG) {
1952  Lo = stripBitcast(Lo.getOperand(0));
1953  Mods ^= SISrcMods::NEG;
1954  }
1955 
1956  if (Hi.getOpcode() == ISD::FNEG) {
1957  Hi = stripBitcast(Hi.getOperand(0));
1958  Mods ^= SISrcMods::NEG_HI;
1959  }
1960 
1961  if (isExtractHiElt(Lo, Lo))
1962  Mods |= SISrcMods::OP_SEL_0;
1963 
1964  if (isExtractHiElt(Hi, Hi))
1965  Mods |= SISrcMods::OP_SEL_1;
1966 
1967  Lo = stripExtractLoElt(Lo);
1968  Hi = stripExtractLoElt(Hi);
1969 
1970  if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
1971  // Really a scalar input. Just select from the low half of the register to
1972  // avoid packing.
1973 
1974  Src = Lo;
1975  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1976  return true;
1977  }
1978 
1979  Mods = VecMods;
1980  }
1981 
1982  // Packed instructions do not have abs modifiers.
1983  Mods |= SISrcMods::OP_SEL_1;
1984 
1985  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1986  return true;
1987 }
1988 
1989 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src,
1990  SDValue &SrcMods,
1991  SDValue &Clamp) const {
1992  SDLoc SL(In);
1993 
1994  // FIXME: Handle clamp and op_sel
1995  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
1996 
1997  return SelectVOP3PMods(In, Src, SrcMods);
1998 }
1999 
2000 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
2001  SDValue &SrcMods) const {
2002  Src = In;
2003  // FIXME: Handle op_sel
2004  SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2005  return true;
2006 }
2007 
2008 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src,
2009  SDValue &SrcMods,
2010  SDValue &Clamp) const {
2011  SDLoc SL(In);
2012 
2013  // FIXME: Handle clamp
2014  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2015 
2016  return SelectVOP3OpSel(In, Src, SrcMods);
2017 }
2018 
2019 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
2020  SDValue &SrcMods) const {
2021  // FIXME: Handle op_sel
2022  return SelectVOP3Mods(In, Src, SrcMods);
2023 }
2024 
2025 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src,
2026  SDValue &SrcMods,
2027  SDValue &Clamp) const {
2028  SDLoc SL(In);
2029 
2030  // FIXME: Handle clamp
2031  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2032 
2033  return SelectVOP3OpSelMods(In, Src, SrcMods);
2034 }
2035 
2036 // The return value is not whether the match is possible (which it always is),
2037 // but whether or not it a conversion is really used.
2038 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
2039  unsigned &Mods) const {
2040  Mods = 0;
2041  SelectVOP3ModsImpl(In, Src, Mods);
2042 
2043  if (Src.getOpcode() == ISD::FP_EXTEND) {
2044  Src = Src.getOperand(0);
2045  assert(Src.getValueType() == MVT::f16);
2046  Src = stripBitcast(Src);
2047 
2048  // Be careful about folding modifiers if we already have an abs. fneg is
2049  // applied last, so we don't want to apply an earlier fneg.
2050  if ((Mods & SISrcMods::ABS) == 0) {
2051  unsigned ModsTmp;
2052  SelectVOP3ModsImpl(Src, Src, ModsTmp);
2053 
2054  if ((ModsTmp & SISrcMods::NEG) != 0)
2055  Mods ^= SISrcMods::NEG;
2056 
2057  if ((ModsTmp & SISrcMods::ABS) != 0)
2058  Mods |= SISrcMods::ABS;
2059  }
2060 
2061  // op_sel/op_sel_hi decide the source type and source.
2062  // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
2063  // If the sources's op_sel is set, it picks the high half of the source
2064  // register.
2065 
2066  Mods |= SISrcMods::OP_SEL_1;
2067  if (isExtractHiElt(Src, Src)) {
2068  Mods |= SISrcMods::OP_SEL_0;
2069 
2070  // TODO: Should we try to look for neg/abs here?
2071  }
2072 
2073  return true;
2074  }
2075 
2076  return false;
2077 }
2078 
2079 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
2080  SDValue &SrcMods) const {
2081  unsigned Mods = 0;
2082  SelectVOP3PMadMixModsImpl(In, Src, Mods);
2083  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2084  return true;
2085 }
2086 
2087 // TODO: Can we identify things like v_mad_mixhi_f16?
2088 bool AMDGPUDAGToDAGISel::SelectHi16Elt(SDValue In, SDValue &Src) const {
2089  if (In.isUndef()) {
2090  Src = In;
2091  return true;
2092  }
2093 
2094  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
2095  SDLoc SL(In);
2096  SDValue K = CurDAG->getTargetConstant(C->getZExtValue() << 16, SL, MVT::i32);
2097  MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
2098  SL, MVT::i32, K);
2099  Src = SDValue(MovK, 0);
2100  return true;
2101  }
2102 
2103  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
2104  SDLoc SL(In);
2105  SDValue K = CurDAG->getTargetConstant(
2106  C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
2107  MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
2108  SL, MVT::i32, K);
2109  Src = SDValue(MovK, 0);
2110  return true;
2111  }
2112 
2113  return isExtractHiElt(In, Src);
2114 }
2115 
2116 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
2118  *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
2119  bool IsModified = false;
2120  do {
2121  IsModified = false;
2122 
2123  // Go over all selected nodes and try to fold them a bit more
2124  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
2125  while (Position != CurDAG->allnodes_end()) {
2126  SDNode *Node = &*Position++;
2127  MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
2128  if (!MachineNode)
2129  continue;
2130 
2131  SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
2132  if (ResNode != Node) {
2133  if (ResNode)
2134  ReplaceUses(Node, ResNode);
2135  IsModified = true;
2136  }
2137  }
2138  CurDAG->RemoveDeadNodes();
2139  } while (IsModified);
2140 }
2141 
2142 bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
2143  Subtarget = &MF.getSubtarget<R600Subtarget>();
2145 }
2146 
2147 bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
2148  if (!N->readMem())
2149  return false;
2150  if (CbId == -1)
2151  return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
2152  N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT;
2153 
2154  return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId;
2155 }
2156 
2157 bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
2158  SDValue& IntPtr) {
2159  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
2160  IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
2161  true);
2162  return true;
2163  }
2164  return false;
2165 }
2166 
2167 bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
2168  SDValue& BaseReg, SDValue &Offset) {
2169  if (!isa<ConstantSDNode>(Addr)) {
2170  BaseReg = Addr;
2171  Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
2172  return true;
2173  }
2174  return false;
2175 }
2176 
2178  unsigned int Opc = N->getOpcode();
2179  if (N->isMachineOpcode()) {
2180  N->setNodeId(-1);
2181  return; // Already selected.
2182  }
2183 
2184  switch (Opc) {
2185  default: break;
2187  case ISD::SCALAR_TO_VECTOR:
2188  case ISD::BUILD_VECTOR: {
2189  EVT VT = N->getValueType(0);
2190  unsigned NumVectorElts = VT.getVectorNumElements();
2191  unsigned RegClassID;
2192  // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
2193  // that adds a 128 bits reg copy when going through TwoAddressInstructions
2194  // pass. We want to avoid 128 bits copies as much as possible because they
2195  // can't be bundled by our scheduler.
2196  switch(NumVectorElts) {
2197  case 2: RegClassID = R600::R600_Reg64RegClassID; break;
2198  case 4:
2200  RegClassID = R600::R600_Reg128VerticalRegClassID;
2201  else
2202  RegClassID = R600::R600_Reg128RegClassID;
2203  break;
2204  default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
2205  }
2206  SelectBuildVector(N, RegClassID);
2207  return;
2208  }
2209  }
2210 
2211  SelectCode(N);
2212 }
2213 
2214 bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
2215  SDValue &Offset) {
2216  ConstantSDNode *C;
2217  SDLoc DL(Addr);
2218 
2219  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
2220  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
2221  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2222  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
2223  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
2224  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
2225  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2226  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
2227  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
2228  Base = Addr.getOperand(0);
2229  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2230  } else {
2231  Base = Addr;
2232  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
2233  }
2234 
2235  return true;
2236 }
2237 
2238 bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
2239  SDValue &Offset) {
2240  ConstantSDNode *IMMOffset;
2241 
2242  if (Addr.getOpcode() == ISD::ADD
2243  && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
2244  && isInt<16>(IMMOffset->getZExtValue())) {
2245 
2246  Base = Addr.getOperand(0);
2247  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2248  MVT::i32);
2249  return true;
2250  // If the pointer address is constant, we can move it to the offset field.
2251  } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
2252  && isInt<16>(IMMOffset->getZExtValue())) {
2253  Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
2254  SDLoc(CurDAG->getEntryNode()),
2255  R600::ZERO, MVT::i32);
2256  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2257  MVT::i32);
2258  return true;
2259  }
2260 
2261  // Default case, no offset
2262  Base = Addr;
2263  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2264  return true;
2265 }
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:539
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:349
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
static bool isExtractHiElt(SDValue In, SDValue &Out)
EVT getValueType() const
Return the ValueType of the referenced return value.
Interface definition for SIRegisterInfo.
static unsigned getSubRegFromChannel(unsigned Channel)
bool isUndef() const
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1556
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:561
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
AMDGPU specific subclass of TargetSubtarget.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd)
Assign this MachineSDNodes&#39;s memory reference descriptor list.
uint64_t getDefaultRsrcDataFormat() const
FunctionPass * createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a R600-specific.
Shadow Stack GC Lowering
SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL, SDValue V) const
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:223
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:162
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
SDVTList getVTList() const
unsigned Reg
AMDGPUAS getAMDGPUAS(const Module &M)
const SDValue & getChain() const
virtual SDNode * PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const =0
static bool getConstantValue(SDValue N, uint32_t &Out)
SDNode * legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const
Legalize target independent instructions (e.g.
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:306
unsigned const TargetRegisterInfo * TRI
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
const SDNodeFlags getFlags() const
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
bool isInlineConstant(const APInt &Imm) const
uint64_t High
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:405
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineSDNode * wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr) const
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:718
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:209
OpenCL uses address spaces to differentiate between various memory regions on the hardware...
Definition: AMDGPU.h:224
const HexagonInstrInfo * TII
Shift and rotation operations.
Definition: ISDOpcodes.h:380
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:191
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:170
unsigned SubReg
Position
Position to insert a new instruction relative to an existing instruction.
This file implements a class to represent arbitrary precision integral constant values and operations...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
TargetRegisterInfo interface that is implemented by all hw codegen targets.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
int64_t getSExtValue() const
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:298
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:200
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:911
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:303
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:343
unsigned const MachineRegisterInfo * MRI
Machine Value Type.
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.
This is an important base class in LLVM.
Definition: Constant.h:42
const SDValue & getOperand(unsigned Num) const
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:232
SI DAG Lowering interface definition.
Represent the analysis usage information of a pass.
INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel", "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) INITIALIZE_PASS_END(AMDGPUDAGToDAGISel
bool hasNoNaNs() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:72
bool isDefined() const
Returns true if the flags are in a defined state.
AMDGPU DAG DAG Pattern Instruction Selection
The AMDGPU TargetMachine interface definition for hw codgen targets.
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:524
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwises returns null...
Definition: PointerUnion.h:142
Extended Value Type.
Definition: ValueTypes.h:34
const AMDGPUAS & AS
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts)
MachineSDNode * buildRSRC(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr, uint32_t RsrcDword1, uint64_t RsrcDword2And3) const
Return a resource descriptor with the &#39;Add TID&#39; bit enabled The TID (Thread ID) is multiplied by the ...
Iterator for intrusive lists based on ilist_node.
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:520
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:265
Interface definition of the TargetLowering class that is common to all AMD GPUs.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:605
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:224
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
bool readMem() const
Special value supplied for machine level alias analysis.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo)
static SDValue stripExtractLoElt(SDValue In)
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:363
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:73
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:456
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:575
#define N
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
unsigned getOpcode() const
SDValue getValue(unsigned R) const
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:346
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isLegalMUBUFImmOffset(unsigned Imm)
Definition: SIInstrInfo.h:877
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:278
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:173
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
static SDValue stripBitcast(SDValue Val)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations...
Definition: ISDOpcodes.h:282
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:413
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:223
const SDValue & getOperand(unsigned i) const
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:138
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:750
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:444
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, FLOG, FLOG2, FLOG10, FEXP, FEXP2, FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary floating point operations.
Definition: ISDOpcodes.h:555
Analyzes if a function potentially memory bound and if a kernel kernel may benefit from limiting numb...
MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num)
allocateMemRefsArray - Allocate an array to hold MachineMemOperand pointers.
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:746
const SDValue & getBasePtr() const
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:351
constexpr char Args[]
Key for Kernel::Metadata::mArgs.