LLVM  8.0.0svn
AMDGPUISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// Defines an instruction selector for the AMDGPU target.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
17 #include "AMDGPUISelLowering.h" // For AMDGPUISD
18 #include "AMDGPUInstrInfo.h"
19 #include "AMDGPUPerfHintAnalysis.h"
20 #include "AMDGPURegisterInfo.h"
21 #include "AMDGPUSubtarget.h"
22 #include "AMDGPUTargetMachine.h"
23 #include "SIDefines.h"
24 #include "SIISelLowering.h"
25 #include "SIInstrInfo.h"
26 #include "SIMachineFunctionInfo.h"
27 #include "SIRegisterInfo.h"
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/StringRef.h"
42 #include "llvm/IR/BasicBlock.h"
43 #include "llvm/IR/Instruction.h"
44 #include "llvm/MC/MCInstrDesc.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/CodeGen.h"
50 #include <cassert>
51 #include <cstdint>
52 #include <new>
53 #include <vector>
54 
55 using namespace llvm;
56 
57 namespace llvm {
58 
59 class R600InstrInfo;
60 
61 } // end namespace llvm
62 
63 //===----------------------------------------------------------------------===//
64 // Instruction Selector Implementation
65 //===----------------------------------------------------------------------===//
66 
67 namespace {
68 
69 /// AMDGPU specific code to select AMDGPU machine instructions for
70 /// SelectionDAG operations.
71 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
72  // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
73  // make the right decision when generating code for different targets.
74  const GCNSubtarget *Subtarget;
75  bool EnableLateStructurizeCFG;
76 
77 public:
78  explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
80  : SelectionDAGISel(*TM, OptLevel) {
81  EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
82  }
83  ~AMDGPUDAGToDAGISel() override = default;
84 
85  void getAnalysisUsage(AnalysisUsage &AU) const override {
90  }
91 
92  bool runOnMachineFunction(MachineFunction &MF) override;
93  void Select(SDNode *N) override;
94  StringRef getPassName() const override;
95  void PostprocessISelDAG() override;
96 
97 protected:
98  void SelectBuildVector(SDNode *N, unsigned RegClassID);
99 
100 private:
101  std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
102  bool isNoNanSrc(SDValue N) const;
103  bool isInlineImmediate(const SDNode *N) const;
104  bool isVGPRImm(const SDNode *N) const;
105  bool isUniformLoad(const SDNode *N) const;
106  bool isUniformBr(const SDNode *N) const;
107 
108  MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
109 
110  SDNode *glueCopyToM0(SDNode *N) const;
111 
112  const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
113  virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
114  virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
115  bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
116  unsigned OffsetBits) const;
117  bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
118  bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
119  SDValue &Offset1) const;
120  bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
121  SDValue &SOffset, SDValue &Offset, SDValue &Offen,
122  SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
123  SDValue &TFE) const;
124  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
125  SDValue &SOffset, SDValue &Offset, SDValue &GLC,
126  SDValue &SLC, SDValue &TFE) const;
127  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
128  SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
129  SDValue &SLC) const;
130  bool SelectMUBUFScratchOffen(SDNode *Parent,
131  SDValue Addr, SDValue &RSrc, SDValue &VAddr,
132  SDValue &SOffset, SDValue &ImmOffset) const;
133  bool SelectMUBUFScratchOffset(SDNode *Parent,
134  SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
135  SDValue &Offset) const;
136 
137  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
138  SDValue &Offset, SDValue &GLC, SDValue &SLC,
139  SDValue &TFE) const;
140  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
141  SDValue &Offset, SDValue &SLC) const;
142  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
143  SDValue &Offset) const;
144 
145  bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
146  SDValue &Offset, SDValue &SLC) const;
147  bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr,
148  SDValue &Offset, SDValue &SLC) const;
149 
150  template <bool IsSigned>
151  bool SelectFlatOffset(SDValue Addr, SDValue &VAddr,
152  SDValue &Offset, SDValue &SLC) const;
153 
154  bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
155  bool &Imm) const;
156  SDValue Expand32BitAddress(SDValue Addr) const;
157  bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
158  bool &Imm) const;
159  bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
160  bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
161  bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
162  bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
163  bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
164  bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
165 
166  bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
167  bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
168  bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
169  bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
170  bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
171  SDValue &Clamp, SDValue &Omod) const;
172  bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
173  SDValue &Clamp, SDValue &Omod) const;
174 
175  bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
176  SDValue &Clamp,
177  SDValue &Omod) const;
178 
179  bool SelectVOP3OMods(SDValue In, SDValue &Src,
180  SDValue &Clamp, SDValue &Omod) const;
181 
182  bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
183  bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
184  SDValue &Clamp) const;
185 
186  bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
187  bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods,
188  SDValue &Clamp) const;
189 
190  bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
191  bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
192  SDValue &Clamp) const;
193  bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
194  bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
195 
196  bool SelectHi16Elt(SDValue In, SDValue &Src) const;
197 
198  void SelectADD_SUB_I64(SDNode *N);
199  void SelectUADDO_USUBO(SDNode *N);
200  void SelectDIV_SCALE(SDNode *N);
201  void SelectMAD_64_32(SDNode *N);
202  void SelectFMA_W_CHAIN(SDNode *N);
203  void SelectFMUL_W_CHAIN(SDNode *N);
204 
205  SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
206  uint32_t Offset, uint32_t Width);
207  void SelectS_BFEFromShifts(SDNode *N);
208  void SelectS_BFE(SDNode *N);
209  bool isCBranchSCC(const SDNode *N) const;
210  void SelectBRCOND(SDNode *N);
211  void SelectFMAD_FMA(SDNode *N);
212  void SelectATOMIC_CMP_SWAP(SDNode *N);
213 
214 protected:
215  // Include the pieces autogenerated from the target description.
216 #include "AMDGPUGenDAGISel.inc"
217 };
218 
219 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
220  const R600Subtarget *Subtarget;
221 
222  bool isConstantLoad(const MemSDNode *N, int cbID) const;
223  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
224  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
225  SDValue& Offset);
226 public:
227  explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
228  AMDGPUDAGToDAGISel(TM, OptLevel) {}
229 
230  void Select(SDNode *N) override;
231 
232  bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
233  SDValue &Offset) override;
234  bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
235  SDValue &Offset) override;
236 
237  bool runOnMachineFunction(MachineFunction &MF) override;
238 protected:
239  // Include the pieces autogenerated from the target description.
240 #include "R600GenDAGISel.inc"
241 };
242 
243 } // end anonymous namespace
244 
245 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
246  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
250 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",
251  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
252 
253 /// This pass converts a legalized DAG into a AMDGPU-specific
254 // DAG, ready for instruction scheduling.
256  CodeGenOpt::Level OptLevel) {
257  return new AMDGPUDAGToDAGISel(TM, OptLevel);
258 }
259 
260 /// This pass converts a legalized DAG into a R600-specific
261 // DAG, ready for instruction scheduling.
263  CodeGenOpt::Level OptLevel) {
264  return new R600DAGToDAGISel(TM, OptLevel);
265 }
266 
267 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
268  Subtarget = &MF.getSubtarget<GCNSubtarget>();
270 }
271 
272 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
273  if (TM.Options.NoNaNsFPMath)
274  return true;
275 
276  // TODO: Move into isKnownNeverNaN
277  if (N->getFlags().isDefined())
278  return N->getFlags().hasNoNaNs();
279 
280  return CurDAG->isKnownNeverNaN(N);
281 }
282 
283 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
284  const SIInstrInfo *TII = Subtarget->getInstrInfo();
285 
286  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
287  return TII->isInlineConstant(C->getAPIntValue());
288 
289  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
290  return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
291 
292  return false;
293 }
294 
295 /// Determine the register class for \p OpNo
296 /// \returns The register class of the virtual register that will be used for
297 /// the given operand number \OpNo or NULL if the register class cannot be
298 /// determined.
299 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
300  unsigned OpNo) const {
301  if (!N->isMachineOpcode()) {
302  if (N->getOpcode() == ISD::CopyToReg) {
303  unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
305  MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
306  return MRI.getRegClass(Reg);
307  }
308 
309  const SIRegisterInfo *TRI
310  = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
311  return TRI->getPhysRegClass(Reg);
312  }
313 
314  return nullptr;
315  }
316 
317  switch (N->getMachineOpcode()) {
318  default: {
319  const MCInstrDesc &Desc =
320  Subtarget->getInstrInfo()->get(N->getMachineOpcode());
321  unsigned OpIdx = Desc.getNumDefs() + OpNo;
322  if (OpIdx >= Desc.getNumOperands())
323  return nullptr;
324  int RegClass = Desc.OpInfo[OpIdx].RegClass;
325  if (RegClass == -1)
326  return nullptr;
327 
328  return Subtarget->getRegisterInfo()->getRegClass(RegClass);
329  }
330  case AMDGPU::REG_SEQUENCE: {
331  unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
332  const TargetRegisterClass *SuperRC =
333  Subtarget->getRegisterInfo()->getRegClass(RCID);
334 
335  SDValue SubRegOp = N->getOperand(OpNo + 1);
336  unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
337  return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
338  SubRegIdx);
339  }
340  }
341 }
342 
343 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
344  if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS ||
345  !Subtarget->ldsRequiresM0Init())
346  return N;
347 
348  const SITargetLowering& Lowering =
349  *static_cast<const SITargetLowering*>(getTargetLowering());
350 
351  // Write max value to m0 before each load operation
352 
353  SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
354  CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
355 
356  SDValue Glue = M0.getValue(1);
357 
359  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
360  Ops.push_back(N->getOperand(i));
361  }
362  Ops.push_back(Glue);
363  return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
364 }
365 
366 MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
367  EVT VT) const {
368  SDNode *Lo = CurDAG->getMachineNode(
369  AMDGPU::S_MOV_B32, DL, MVT::i32,
370  CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
371  SDNode *Hi =
372  CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
373  CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
374  const SDValue Ops[] = {
375  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
376  SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
377  SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
378 
379  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
380 }
381 
382 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
383  switch (NumVectorElts) {
384  case 1:
385  return AMDGPU::SReg_32_XM0RegClassID;
386  case 2:
387  return AMDGPU::SReg_64RegClassID;
388  case 4:
389  return AMDGPU::SReg_128RegClassID;
390  case 8:
391  return AMDGPU::SReg_256RegClassID;
392  case 16:
393  return AMDGPU::SReg_512RegClassID;
394  }
395 
396  llvm_unreachable("invalid vector size");
397 }
398 
399 static bool getConstantValue(SDValue N, uint32_t &Out) {
400  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
401  Out = C->getAPIntValue().getZExtValue();
402  return true;
403  }
404 
405  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
406  Out = C->getValueAPF().bitcastToAPInt().getZExtValue();
407  return true;
408  }
409 
410  return false;
411 }
412 
413 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
414  EVT VT = N->getValueType(0);
415  unsigned NumVectorElts = VT.getVectorNumElements();
416  EVT EltVT = VT.getVectorElementType();
417  SDLoc DL(N);
418  SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
419 
420  if (NumVectorElts == 1) {
421  CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
422  RegClass);
423  return;
424  }
425 
426  assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
427  "supported yet");
428  // 16 = Max Num Vector Elements
429  // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
430  // 1 = Vector Register Class
431  SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
432 
433  RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
434  bool IsRegSeq = true;
435  unsigned NOps = N->getNumOperands();
436  for (unsigned i = 0; i < NOps; i++) {
437  // XXX: Why is this here?
438  if (isa<RegisterSDNode>(N->getOperand(i))) {
439  IsRegSeq = false;
440  break;
441  }
443  RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
444  RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
445  }
446  if (NOps != NumVectorElts) {
447  // Fill in the missing undef elements if this was a scalar_to_vector.
448  assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
449  MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
450  DL, EltVT);
451  for (unsigned i = NOps; i < NumVectorElts; ++i) {
453  RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
454  RegSeqArgs[1 + (2 * i) + 1] =
455  CurDAG->getTargetConstant(Sub, DL, MVT::i32);
456  }
457  }
458 
459  if (!IsRegSeq)
460  SelectCode(N);
461  CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
462 }
463 
465  unsigned int Opc = N->getOpcode();
466  if (N->isMachineOpcode()) {
467  N->setNodeId(-1);
468  return; // Already selected.
469  }
470 
471  if (isa<AtomicSDNode>(N) ||
472  (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
476  N = glueCopyToM0(N);
477 
478  switch (Opc) {
479  default:
480  break;
481  // We are selecting i64 ADD here instead of custom lower it during
482  // DAG legalization, so we can fold some i64 ADDs used for address
483  // calculation into the LOAD and STORE instructions.
484  case ISD::ADDC:
485  case ISD::ADDE:
486  case ISD::SUBC:
487  case ISD::SUBE: {
488  if (N->getValueType(0) != MVT::i64)
489  break;
490 
491  SelectADD_SUB_I64(N);
492  return;
493  }
494  case ISD::UADDO:
495  case ISD::USUBO: {
496  SelectUADDO_USUBO(N);
497  return;
498  }
500  SelectFMUL_W_CHAIN(N);
501  return;
502  }
503  case AMDGPUISD::FMA_W_CHAIN: {
504  SelectFMA_W_CHAIN(N);
505  return;
506  }
507 
509  case ISD::BUILD_VECTOR: {
510  EVT VT = N->getValueType(0);
511  unsigned NumVectorElts = VT.getVectorNumElements();
512  if (VT.getScalarSizeInBits() == 16) {
513  if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
514  uint32_t LHSVal, RHSVal;
515  if (getConstantValue(N->getOperand(0), LHSVal) &&
516  getConstantValue(N->getOperand(1), RHSVal)) {
517  uint32_t K = LHSVal | (RHSVal << 16);
518  CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT,
519  CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32));
520  return;
521  }
522  }
523 
524  break;
525  }
526 
528  unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
529  SelectBuildVector(N, RegClassID);
530  return;
531  }
532  case ISD::BUILD_PAIR: {
533  SDValue RC, SubReg0, SubReg1;
534  SDLoc DL(N);
535  if (N->getValueType(0) == MVT::i128) {
536  RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
537  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
538  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
539  } else if (N->getValueType(0) == MVT::i64) {
540  RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
541  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
542  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
543  } else {
544  llvm_unreachable("Unhandled value type for BUILD_PAIR");
545  }
546  const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
547  N->getOperand(1), SubReg1 };
548  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
549  N->getValueType(0), Ops));
550  return;
551  }
552 
553  case ISD::Constant:
554  case ISD::ConstantFP: {
555  if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
556  break;
557 
558  uint64_t Imm;
559  if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
560  Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
561  else {
562  ConstantSDNode *C = cast<ConstantSDNode>(N);
563  Imm = C->getZExtValue();
564  }
565 
566  SDLoc DL(N);
567  ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
568  return;
569  }
570  case ISD::LOAD:
571  case ISD::STORE:
572  case ISD::ATOMIC_LOAD:
573  case ISD::ATOMIC_STORE: {
574  N = glueCopyToM0(N);
575  break;
576  }
577 
578  case AMDGPUISD::BFE_I32:
579  case AMDGPUISD::BFE_U32: {
580  // There is a scalar version available, but unlike the vector version which
581  // has a separate operand for the offset and width, the scalar version packs
582  // the width and offset into a single operand. Try to move to the scalar
583  // version if the offsets are constant, so that we can try to keep extended
584  // loads of kernel arguments in SGPRs.
585 
586  // TODO: Technically we could try to pattern match scalar bitshifts of
587  // dynamic values, but it's probably not useful.
589  if (!Offset)
590  break;
591 
593  if (!Width)
594  break;
595 
596  bool Signed = Opc == AMDGPUISD::BFE_I32;
597 
598  uint32_t OffsetVal = Offset->getZExtValue();
599  uint32_t WidthVal = Width->getZExtValue();
600 
601  ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
602  SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
603  return;
604  }
605  case AMDGPUISD::DIV_SCALE: {
606  SelectDIV_SCALE(N);
607  return;
608  }
610  case AMDGPUISD::MAD_U64_U32: {
611  SelectMAD_64_32(N);
612  return;
613  }
614  case ISD::CopyToReg: {
615  const SITargetLowering& Lowering =
616  *static_cast<const SITargetLowering*>(getTargetLowering());
617  N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
618  break;
619  }
620  case ISD::AND:
621  case ISD::SRL:
622  case ISD::SRA:
624  if (N->getValueType(0) != MVT::i32)
625  break;
626 
627  SelectS_BFE(N);
628  return;
629  case ISD::BRCOND:
630  SelectBRCOND(N);
631  return;
632  case ISD::FMAD:
633  case ISD::FMA:
634  SelectFMAD_FMA(N);
635  return;
637  SelectATOMIC_CMP_SWAP(N);
638  return;
644  // Hack around using a legal type if f16 is illegal.
645  if (N->getValueType(0) == MVT::i32) {
647  N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
648  { N->getOperand(0), N->getOperand(1) });
649  SelectCode(N);
650  return;
651  }
652  }
653  }
654 
655  SelectCode(N);
656 }
657 
658 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
659  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
660  const Instruction *Term = BB->getTerminator();
661  return Term->getMetadata("amdgpu.uniform") ||
662  Term->getMetadata("structurizecfg.uniform");
663 }
664 
665 StringRef AMDGPUDAGToDAGISel::getPassName() const {
666  return "AMDGPU DAG->DAG Pattern Instruction Selection";
667 }
668 
669 //===----------------------------------------------------------------------===//
670 // Complex Patterns
671 //===----------------------------------------------------------------------===//
672 
673 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
674  SDValue &Offset) {
675  return false;
676 }
677 
678 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
679  SDValue &Offset) {
680  ConstantSDNode *C;
681  SDLoc DL(Addr);
682 
683  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
684  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
685  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
686  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
687  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
688  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
689  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
690  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
691  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
692  Base = Addr.getOperand(0);
693  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
694  } else {
695  Base = Addr;
696  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
697  }
698 
699  return true;
700 }
701 
702 // FIXME: Should only handle addcarry/subcarry
703 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
704  SDLoc DL(N);
705  SDValue LHS = N->getOperand(0);
706  SDValue RHS = N->getOperand(1);
707 
708  unsigned Opcode = N->getOpcode();
709  bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
710  bool ProduceCarry =
711  ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
712  bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
713 
714  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
715  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
716 
717  SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
718  DL, MVT::i32, LHS, Sub0);
719  SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
720  DL, MVT::i32, LHS, Sub1);
721 
722  SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
723  DL, MVT::i32, RHS, Sub0);
724  SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
725  DL, MVT::i32, RHS, Sub1);
726 
727  SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
728 
729  unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
730  unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
731 
732  SDNode *AddLo;
733  if (!ConsumeCarry) {
734  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
735  AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
736  } else {
737  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
738  AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
739  }
740  SDValue AddHiArgs[] = {
741  SDValue(Hi0, 0),
742  SDValue(Hi1, 0),
743  SDValue(AddLo, 1)
744  };
745  SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
746 
747  SDValue RegSequenceArgs[] = {
748  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
749  SDValue(AddLo,0),
750  Sub0,
751  SDValue(AddHi,0),
752  Sub1,
753  };
754  SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
755  MVT::i64, RegSequenceArgs);
756 
757  if (ProduceCarry) {
758  // Replace the carry-use
759  ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
760  }
761 
762  // Replace the remaining uses.
763  ReplaceNode(N, RegSequence);
764 }
765 
766 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
767  // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
768  // carry out despite the _i32 name. These were renamed in VI to _U32.
769  // FIXME: We should probably rename the opcodes here.
770  unsigned Opc = N->getOpcode() == ISD::UADDO ?
771  AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
772 
773  CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
774  { N->getOperand(0), N->getOperand(1) });
775 }
776 
777 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
778  SDLoc SL(N);
779  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
780  SDValue Ops[10];
781 
782  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
783  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
784  SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
785  Ops[8] = N->getOperand(0);
786  Ops[9] = N->getOperand(4);
787 
788  CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
789 }
790 
791 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
792  SDLoc SL(N);
793  // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
794  SDValue Ops[8];
795 
796  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
797  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
798  Ops[6] = N->getOperand(0);
799  Ops[7] = N->getOperand(3);
800 
801  CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
802 }
803 
804 // We need to handle this here because tablegen doesn't support matching
805 // instructions with multiple outputs.
806 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
807  SDLoc SL(N);
808  EVT VT = N->getValueType(0);
809 
810  assert(VT == MVT::f32 || VT == MVT::f64);
811 
812  unsigned Opc
813  = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
814 
815  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
816  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
817 }
818 
819 // We need to handle this here because tablegen doesn't support matching
820 // instructions with multiple outputs.
821 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
822  SDLoc SL(N);
823  bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
824  unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32;
825 
826  SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
827  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
828  Clamp };
829  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
830 }
831 
832 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
833  unsigned OffsetBits) const {
834  if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
835  (OffsetBits == 8 && !isUInt<8>(Offset)))
836  return false;
837 
838  if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
839  Subtarget->unsafeDSOffsetFoldingEnabled())
840  return true;
841 
842  // On Southern Islands instruction with a negative base value and an offset
843  // don't seem to work.
844  return CurDAG->SignBitIsZero(Base);
845 }
846 
847 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
848  SDValue &Offset) const {
849  SDLoc DL(Addr);
850  if (CurDAG->isBaseWithConstantOffset(Addr)) {
851  SDValue N0 = Addr.getOperand(0);
852  SDValue N1 = Addr.getOperand(1);
853  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
854  if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
855  // (add n0, c0)
856  Base = N0;
857  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
858  return true;
859  }
860  } else if (Addr.getOpcode() == ISD::SUB) {
861  // sub C, x -> add (sub 0, x), C
862  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
863  int64_t ByteOffset = C->getSExtValue();
864  if (isUInt<16>(ByteOffset)) {
865  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
866 
867  // XXX - This is kind of hacky. Create a dummy sub node so we can check
868  // the known bits in isDSOffsetLegal. We need to emit the selected node
869  // here, so this is thrown away.
870  SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
871  Zero, Addr.getOperand(1));
872 
873  if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
874  // FIXME: Select to VOP3 version for with-carry.
875  unsigned SubOp = Subtarget->hasAddNoCarry() ?
876  AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
877 
878  MachineSDNode *MachineSub
879  = CurDAG->getMachineNode(SubOp, DL, MVT::i32,
880  Zero, Addr.getOperand(1));
881 
882  Base = SDValue(MachineSub, 0);
883  Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
884  return true;
885  }
886  }
887  }
888  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
889  // If we have a constant address, prefer to put the constant into the
890  // offset. This can save moves to load the constant address since multiple
891  // operations can share the zero base address register, and enables merging
892  // into read2 / write2 instructions.
893 
894  SDLoc DL(Addr);
895 
896  if (isUInt<16>(CAddr->getZExtValue())) {
897  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
898  MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
899  DL, MVT::i32, Zero);
900  Base = SDValue(MovZero, 0);
901  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
902  return true;
903  }
904  }
905 
906  // default case
907  Base = Addr;
908  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
909  return true;
910 }
911 
912 // TODO: If offset is too big, put low 16-bit into offset.
913 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
914  SDValue &Offset0,
915  SDValue &Offset1) const {
916  SDLoc DL(Addr);
917 
918  if (CurDAG->isBaseWithConstantOffset(Addr)) {
919  SDValue N0 = Addr.getOperand(0);
920  SDValue N1 = Addr.getOperand(1);
921  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
922  unsigned DWordOffset0 = C1->getZExtValue() / 4;
923  unsigned DWordOffset1 = DWordOffset0 + 1;
924  // (add n0, c0)
925  if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
926  Base = N0;
927  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
928  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
929  return true;
930  }
931  } else if (Addr.getOpcode() == ISD::SUB) {
932  // sub C, x -> add (sub 0, x), C
933  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
934  unsigned DWordOffset0 = C->getZExtValue() / 4;
935  unsigned DWordOffset1 = DWordOffset0 + 1;
936 
937  if (isUInt<8>(DWordOffset0)) {
938  SDLoc DL(Addr);
939  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
940 
941  // XXX - This is kind of hacky. Create a dummy sub node so we can check
942  // the known bits in isDSOffsetLegal. We need to emit the selected node
943  // here, so this is thrown away.
944  SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
945  Zero, Addr.getOperand(1));
946 
947  if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
948  unsigned SubOp = Subtarget->hasAddNoCarry() ?
949  AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
950 
951  MachineSDNode *MachineSub
952  = CurDAG->getMachineNode(SubOp, DL, MVT::i32,
953  Zero, Addr.getOperand(1));
954 
955  Base = SDValue(MachineSub, 0);
956  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
957  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
958  return true;
959  }
960  }
961  }
962  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
963  unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
964  unsigned DWordOffset1 = DWordOffset0 + 1;
965  assert(4 * DWordOffset0 == CAddr->getZExtValue());
966 
967  if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
968  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
969  MachineSDNode *MovZero
970  = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
971  DL, MVT::i32, Zero);
972  Base = SDValue(MovZero, 0);
973  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
974  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
975  return true;
976  }
977  }
978 
979  // default case
980 
981  Base = Addr;
982  Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
983  Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
984  return true;
985 }
986 
987 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
988  SDValue &VAddr, SDValue &SOffset,
989  SDValue &Offset, SDValue &Offen,
990  SDValue &Idxen, SDValue &Addr64,
991  SDValue &GLC, SDValue &SLC,
992  SDValue &TFE) const {
993  // Subtarget prefers to use flat instruction
994  if (Subtarget->useFlatForGlobal())
995  return false;
996 
997  SDLoc DL(Addr);
998 
999  if (!GLC.getNode())
1000  GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1001  if (!SLC.getNode())
1002  SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1003  TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
1004 
1005  Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1006  Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1007  Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1008  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1009 
1010  ConstantSDNode *C1 = nullptr;
1011  SDValue N0 = Addr;
1012  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1013  C1 = cast<ConstantSDNode>(Addr.getOperand(1));
1014  if (isUInt<32>(C1->getZExtValue()))
1015  N0 = Addr.getOperand(0);
1016  else
1017  C1 = nullptr;
1018  }
1019 
1020  if (N0.getOpcode() == ISD::ADD) {
1021  // (add N2, N3) -> addr64, or
1022  // (add (add N2, N3), C1) -> addr64
1023  SDValue N2 = N0.getOperand(0);
1024  SDValue N3 = N0.getOperand(1);
1025  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1026 
1027  if (N2->isDivergent()) {
1028  if (N3->isDivergent()) {
1029  // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
1030  // addr64, and construct the resource from a 0 address.
1031  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1032  VAddr = N0;
1033  } else {
1034  // N2 is divergent, N3 is not.
1035  Ptr = N3;
1036  VAddr = N2;
1037  }
1038  } else {
1039  // N2 is not divergent.
1040  Ptr = N2;
1041  VAddr = N3;
1042  }
1043  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1044  } else if (N0->isDivergent()) {
1045  // N0 is divergent. Use it as the addr64, and construct the resource from a
1046  // 0 address.
1047  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1048  VAddr = N0;
1049  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1050  } else {
1051  // N0 -> offset, or
1052  // (N0 + C1) -> offset
1053  VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1054  Ptr = N0;
1055  }
1056 
1057  if (!C1) {
1058  // No offset.
1059  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1060  return true;
1061  }
1062 
1064  // Legal offset for instruction.
1065  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1066  return true;
1067  }
1068 
1069  // Illegal offset, store it in soffset.
1070  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1071  SOffset =
1072  SDValue(CurDAG->getMachineNode(
1073  AMDGPU::S_MOV_B32, DL, MVT::i32,
1074  CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1075  0);
1076  return true;
1077 }
1078 
1079 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1080  SDValue &VAddr, SDValue &SOffset,
1081  SDValue &Offset, SDValue &GLC,
1082  SDValue &SLC, SDValue &TFE) const {
1083  SDValue Ptr, Offen, Idxen, Addr64;
1084 
1085  // addr64 bit was removed for volcanic islands.
1086  if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
1087  return false;
1088 
1089  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1090  GLC, SLC, TFE))
1091  return false;
1092 
1093  ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1094  if (C->getSExtValue()) {
1095  SDLoc DL(Addr);
1096 
1097  const SITargetLowering& Lowering =
1098  *static_cast<const SITargetLowering*>(getTargetLowering());
1099 
1100  SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1101  return true;
1102  }
1103 
1104  return false;
1105 }
1106 
1107 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1108  SDValue &VAddr, SDValue &SOffset,
1109  SDValue &Offset,
1110  SDValue &SLC) const {
1111  SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
1112  SDValue GLC, TFE;
1113 
1114  return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
1115 }
1116 
1117 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
1118  auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
1119  return PSV && PSV->isStack();
1120 }
1121 
1122 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1123  const MachineFunction &MF = CurDAG->getMachineFunction();
1125 
1126  if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
1127  SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1128  FI->getValueType(0));
1129 
1130  // If we can resolve this to a frame index access, this is relative to the
1131  // frame pointer SGPR.
1132  return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(),
1133  MVT::i32));
1134  }
1135 
1136  // If we don't know this private access is a local stack object, it needs to
1137  // be relative to the entry point's scratch wave offset register.
1138  return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(),
1139  MVT::i32));
1140 }
1141 
1142 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1143  SDValue Addr, SDValue &Rsrc,
1144  SDValue &VAddr, SDValue &SOffset,
1145  SDValue &ImmOffset) const {
1146 
1147  SDLoc DL(Addr);
1148  MachineFunction &MF = CurDAG->getMachineFunction();
1150 
1151  Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1152 
1153  if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1154  unsigned Imm = CAddr->getZExtValue();
1155 
1156  SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1157  MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1158  DL, MVT::i32, HighBits);
1159  VAddr = SDValue(MovHighBits, 0);
1160 
1161  // In a call sequence, stores to the argument stack area are relative to the
1162  // stack pointer.
1163  const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1164  unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1166 
1167  SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1168  ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
1169  return true;
1170  }
1171 
1172  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1173  // (add n0, c1)
1174 
1175  SDValue N0 = Addr.getOperand(0);
1176  SDValue N1 = Addr.getOperand(1);
1177 
1178  // Offsets in vaddr must be positive if range checking is enabled.
1179  //
1180  // The total computation of vaddr + soffset + offset must not overflow. If
1181  // vaddr is negative, even if offset is 0 the sgpr offset add will end up
1182  // overflowing.
1183  //
1184  // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
1185  // always perform a range check. If a negative vaddr base index was used,
1186  // this would fail the range check. The overall address computation would
1187  // compute a valid address, but this doesn't happen due to the range
1188  // check. For out-of-bounds MUBUF loads, a 0 is returned.
1189  //
1190  // Therefore it should be safe to fold any VGPR offset on gfx9 into the
1191  // MUBUF vaddr, but not on older subtargets which can only do this if the
1192  // sign bit is known 0.
1193  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1195  (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1196  CurDAG->SignBitIsZero(N0))) {
1197  std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1198  ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1199  return true;
1200  }
1201  }
1202 
1203  // (node)
1204  std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1205  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1206  return true;
1207 }
1208 
1209 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1210  SDValue Addr,
1211  SDValue &SRsrc,
1212  SDValue &SOffset,
1213  SDValue &Offset) const {
1214  ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
1215  if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1216  return false;
1217 
1218  SDLoc DL(Addr);
1219  MachineFunction &MF = CurDAG->getMachineFunction();
1221 
1222  SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1223 
1224  const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1225  unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1227 
1228  // FIXME: Get from MachinePointerInfo? We should only be using the frame
1229  // offset if we know this is in a call sequence.
1230  SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1231 
1232  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1233  return true;
1234 }
1235 
1236 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1237  SDValue &SOffset, SDValue &Offset,
1238  SDValue &GLC, SDValue &SLC,
1239  SDValue &TFE) const {
1240  SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1241  const SIInstrInfo *TII =
1242  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1243 
1244  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1245  GLC, SLC, TFE))
1246  return false;
1247 
1248  if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1249  !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1250  !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1251  uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1252  APInt::getAllOnesValue(32).getZExtValue(); // Size
1253  SDLoc DL(Addr);
1254 
1255  const SITargetLowering& Lowering =
1256  *static_cast<const SITargetLowering*>(getTargetLowering());
1257 
1258  SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1259  return true;
1260  }
1261  return false;
1262 }
1263 
1264 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1265  SDValue &Soffset, SDValue &Offset
1266  ) const {
1267  SDValue GLC, SLC, TFE;
1268 
1269  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1270 }
1271 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1272  SDValue &Soffset, SDValue &Offset,
1273  SDValue &SLC) const {
1274  SDValue GLC, TFE;
1275 
1276  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1277 }
1278 
1279 template <bool IsSigned>
1280 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
1281  SDValue &VAddr,
1282  SDValue &Offset,
1283  SDValue &SLC) const {
1284  int64_t OffsetVal = 0;
1285 
1286  if (Subtarget->hasFlatInstOffsets() &&
1287  CurDAG->isBaseWithConstantOffset(Addr)) {
1288  SDValue N0 = Addr.getOperand(0);
1289  SDValue N1 = Addr.getOperand(1);
1290  int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1291 
1292  if ((IsSigned && isInt<13>(COffsetVal)) ||
1293  (!IsSigned && isUInt<12>(COffsetVal))) {
1294  Addr = N0;
1295  OffsetVal = COffsetVal;
1296  }
1297  }
1298 
1299  VAddr = Addr;
1300  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
1301  SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
1302 
1303  return true;
1304 }
1305 
1306 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr,
1307  SDValue &VAddr,
1308  SDValue &Offset,
1309  SDValue &SLC) const {
1310  return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC);
1311 }
1312 
1313 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr,
1314  SDValue &VAddr,
1315  SDValue &Offset,
1316  SDValue &SLC) const {
1317  return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC);
1318 }
1319 
1320 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1321  SDValue &Offset, bool &Imm) const {
1322 
1323  // FIXME: Handle non-constant offsets.
1324  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1325  if (!C)
1326  return false;
1327 
1328  SDLoc SL(ByteOffsetNode);
1329  GCNSubtarget::Generation Gen = Subtarget->getGeneration();
1330  int64_t ByteOffset = C->getSExtValue();
1331  int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
1332 
1333  if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) {
1334  Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1335  Imm = true;
1336  return true;
1337  }
1338 
1339  if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1340  return false;
1341 
1342  if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1343  // 32-bit Immediates are supported on Sea Islands.
1344  Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1345  } else {
1346  SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1347  Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1348  C32Bit), 0);
1349  }
1350  Imm = false;
1351  return true;
1352 }
1353 
1354 SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
1355  if (Addr.getValueType() != MVT::i32)
1356  return Addr;
1357 
1358  // Zero-extend a 32-bit address.
1359  SDLoc SL(Addr);
1360 
1361  const MachineFunction &MF = CurDAG->getMachineFunction();
1363  unsigned AddrHiVal = Info->get32BitAddressHighBits();
1364  SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
1365 
1366  const SDValue Ops[] = {
1367  CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
1368  Addr,
1369  CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
1370  SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
1371  0),
1372  CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
1373  };
1374 
1375  return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
1376  Ops), 0);
1377 }
1378 
1379 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1380  SDValue &Offset, bool &Imm) const {
1381  SDLoc SL(Addr);
1382 
1383  // A 32-bit (address + offset) should not cause unsigned 32-bit integer
1384  // wraparound, because s_load instructions perform the addition in 64 bits.
1385  if ((Addr.getValueType() != MVT::i32 ||
1386  Addr->getFlags().hasNoUnsignedWrap()) &&
1387  CurDAG->isBaseWithConstantOffset(Addr)) {
1388  SDValue N0 = Addr.getOperand(0);
1389  SDValue N1 = Addr.getOperand(1);
1390 
1391  if (SelectSMRDOffset(N1, Offset, Imm)) {
1392  SBase = Expand32BitAddress(N0);
1393  return true;
1394  }
1395  }
1396  SBase = Expand32BitAddress(Addr);
1397  Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1398  Imm = true;
1399  return true;
1400 }
1401 
1402 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1403  SDValue &Offset) const {
1404  bool Imm;
1405  return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1406 }
1407 
1408 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1409  SDValue &Offset) const {
1410 
1411  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1412  return false;
1413 
1414  bool Imm;
1415  if (!SelectSMRD(Addr, SBase, Offset, Imm))
1416  return false;
1417 
1418  return !Imm && isa<ConstantSDNode>(Offset);
1419 }
1420 
1421 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1422  SDValue &Offset) const {
1423  bool Imm;
1424  return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1425  !isa<ConstantSDNode>(Offset);
1426 }
1427 
1428 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1429  SDValue &Offset) const {
1430  bool Imm;
1431  return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1432 }
1433 
1434 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1435  SDValue &Offset) const {
1436  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1437  return false;
1438 
1439  bool Imm;
1440  if (!SelectSMRDOffset(Addr, Offset, Imm))
1441  return false;
1442 
1443  return !Imm && isa<ConstantSDNode>(Offset);
1444 }
1445 
1446 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
1447  SDValue &Base,
1448  SDValue &Offset) const {
1449  SDLoc DL(Index);
1450 
1451  if (CurDAG->isBaseWithConstantOffset(Index)) {
1452  SDValue N0 = Index.getOperand(0);
1453  SDValue N1 = Index.getOperand(1);
1454  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1455 
1456  // (add n0, c0)
1457  Base = N0;
1458  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
1459  return true;
1460  }
1461 
1462  if (isa<ConstantSDNode>(Index))
1463  return false;
1464 
1465  Base = Index;
1466  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1467  return true;
1468 }
1469 
1470 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
1471  SDValue Val, uint32_t Offset,
1472  uint32_t Width) {
1473  // Transformation function, pack the offset and width of a BFE into
1474  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1475  // source, bits [5:0] contain the offset and bits [22:16] the width.
1476  uint32_t PackedVal = Offset | (Width << 16);
1477  SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1478 
1479  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1480 }
1481 
1482 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1483  // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1484  // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1485  // Predicate: 0 < b <= c < 32
1486 
1487  const SDValue &Shl = N->getOperand(0);
1490 
1491  if (B && C) {
1492  uint32_t BVal = B->getZExtValue();
1493  uint32_t CVal = C->getZExtValue();
1494 
1495  if (0 < BVal && BVal <= CVal && CVal < 32) {
1496  bool Signed = N->getOpcode() == ISD::SRA;
1497  unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1498 
1499  ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
1500  32 - CVal));
1501  return;
1502  }
1503  }
1504  SelectCode(N);
1505 }
1506 
1507 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1508  switch (N->getOpcode()) {
1509  case ISD::AND:
1510  if (N->getOperand(0).getOpcode() == ISD::SRL) {
1511  // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1512  // Predicate: isMask(mask)
1513  const SDValue &Srl = N->getOperand(0);
1516 
1517  if (Shift && Mask) {
1518  uint32_t ShiftVal = Shift->getZExtValue();
1519  uint32_t MaskVal = Mask->getZExtValue();
1520 
1521  if (isMask_32(MaskVal)) {
1522  uint32_t WidthVal = countPopulation(MaskVal);
1523 
1524  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1525  Srl.getOperand(0), ShiftVal, WidthVal));
1526  return;
1527  }
1528  }
1529  }
1530  break;
1531  case ISD::SRL:
1532  if (N->getOperand(0).getOpcode() == ISD::AND) {
1533  // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1534  // Predicate: isMask(mask >> b)
1535  const SDValue &And = N->getOperand(0);
1538 
1539  if (Shift && Mask) {
1540  uint32_t ShiftVal = Shift->getZExtValue();
1541  uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1542 
1543  if (isMask_32(MaskVal)) {
1544  uint32_t WidthVal = countPopulation(MaskVal);
1545 
1546  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1547  And.getOperand(0), ShiftVal, WidthVal));
1548  return;
1549  }
1550  }
1551  } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
1552  SelectS_BFEFromShifts(N);
1553  return;
1554  }
1555  break;
1556  case ISD::SRA:
1557  if (N->getOperand(0).getOpcode() == ISD::SHL) {
1558  SelectS_BFEFromShifts(N);
1559  return;
1560  }
1561  break;
1562 
1563  case ISD::SIGN_EXTEND_INREG: {
1564  // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
1565  SDValue Src = N->getOperand(0);
1566  if (Src.getOpcode() != ISD::SRL)
1567  break;
1568 
1569  const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1570  if (!Amt)
1571  break;
1572 
1573  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1574  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
1575  Amt->getZExtValue(), Width));
1576  return;
1577  }
1578  }
1579 
1580  SelectCode(N);
1581 }
1582 
1583 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
1584  assert(N->getOpcode() == ISD::BRCOND);
1585  if (!N->hasOneUse())
1586  return false;
1587 
1588  SDValue Cond = N->getOperand(1);
1589  if (Cond.getOpcode() == ISD::CopyToReg)
1590  Cond = Cond.getOperand(2);
1591 
1592  if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
1593  return false;
1594 
1595  MVT VT = Cond.getOperand(0).getSimpleValueType();
1596  if (VT == MVT::i32)
1597  return true;
1598 
1599  if (VT == MVT::i64) {
1600  auto ST = static_cast<const GCNSubtarget *>(Subtarget);
1601 
1602  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
1603  return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
1604  }
1605 
1606  return false;
1607 }
1608 
1609 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
1610  SDValue Cond = N->getOperand(1);
1611 
1612  if (Cond.isUndef()) {
1613  CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
1614  N->getOperand(2), N->getOperand(0));
1615  return;
1616  }
1617 
1618  bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
1619  unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
1620  unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC;
1621  SDLoc SL(N);
1622 
1623  if (!UseSCCBr) {
1624  // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
1625  // analyzed what generates the vcc value, so we do not know whether vcc
1626  // bits for disabled lanes are 0. Thus we need to mask out bits for
1627  // disabled lanes.
1628  //
1629  // For the case that we select S_CBRANCH_SCC1 and it gets
1630  // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
1631  // SIInstrInfo::moveToVALU which inserts the S_AND).
1632  //
1633  // We could add an analysis of what generates the vcc value here and omit
1634  // the S_AND when is unnecessary. But it would be better to add a separate
1635  // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
1636  // catches both cases.
1637  Cond = SDValue(CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
1638  CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
1639  Cond),
1640  0);
1641  }
1642 
1643  SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
1644  CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
1645  N->getOperand(2), // Basic Block
1646  VCC.getValue(0));
1647 }
1648 
1649 void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
1650  MVT VT = N->getSimpleValueType(0);
1651  bool IsFMA = N->getOpcode() == ISD::FMA;
1652  if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
1653  !Subtarget->hasFmaMixInsts()) ||
1654  ((IsFMA && Subtarget->hasMadMixInsts()) ||
1655  (!IsFMA && Subtarget->hasFmaMixInsts()))) {
1656  SelectCode(N);
1657  return;
1658  }
1659 
1660  SDValue Src0 = N->getOperand(0);
1661  SDValue Src1 = N->getOperand(1);
1662  SDValue Src2 = N->getOperand(2);
1663  unsigned Src0Mods, Src1Mods, Src2Mods;
1664 
1665  // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
1666  // using the conversion from f16.
1667  bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
1668  bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
1669  bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
1670 
1671  assert((IsFMA || !Subtarget->hasFP32Denormals()) &&
1672  "fmad selected with denormals enabled");
1673  // TODO: We can select this with f32 denormals enabled if all the sources are
1674  // converted from f16 (in which case fmad isn't legal).
1675 
1676  if (Sel0 || Sel1 || Sel2) {
1677  // For dummy operands.
1678  SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
1679  SDValue Ops[] = {
1680  CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
1681  CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
1682  CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
1683  CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
1684  Zero, Zero
1685  };
1686 
1687  CurDAG->SelectNodeTo(N,
1688  IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
1689  MVT::f32, Ops);
1690  } else {
1691  SelectCode(N);
1692  }
1693 }
1694 
1695 // This is here because there isn't a way to use the generated sub0_sub1 as the
1696 // subreg index to EXTRACT_SUBREG in tablegen.
1697 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
1698  MemSDNode *Mem = cast<MemSDNode>(N);
1699  unsigned AS = Mem->getAddressSpace();
1700  if (AS == AMDGPUAS::FLAT_ADDRESS) {
1701  SelectCode(N);
1702  return;
1703  }
1704 
1705  MVT VT = N->getSimpleValueType(0);
1706  bool Is32 = (VT == MVT::i32);
1707  SDLoc SL(N);
1708 
1709  MachineSDNode *CmpSwap = nullptr;
1710  if (Subtarget->hasAddr64()) {
1711  SDValue SRsrc, VAddr, SOffset, Offset, SLC;
1712 
1713  if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
1714  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
1715  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
1716  SDValue CmpVal = Mem->getOperand(2);
1717 
1718  // XXX - Do we care about glue operands?
1719 
1720  SDValue Ops[] = {
1721  CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1722  };
1723 
1724  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1725  }
1726  }
1727 
1728  if (!CmpSwap) {
1729  SDValue SRsrc, SOffset, Offset, SLC;
1730  if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
1731  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
1732  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
1733 
1734  SDValue CmpVal = Mem->getOperand(2);
1735  SDValue Ops[] = {
1736  CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1737  };
1738 
1739  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1740  }
1741  }
1742 
1743  if (!CmpSwap) {
1744  SelectCode(N);
1745  return;
1746  }
1747 
1748  MachineMemOperand *MMO = Mem->getMemOperand();
1749  CurDAG->setNodeMemRefs(CmpSwap, {MMO});
1750 
1751  unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
1752  SDValue Extract
1753  = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
1754 
1755  ReplaceUses(SDValue(N, 0), Extract);
1756  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
1757  CurDAG->RemoveDeadNode(N);
1758 }
1759 
1760 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
1761  unsigned &Mods) const {
1762  Mods = 0;
1763  Src = In;
1764 
1765  if (Src.getOpcode() == ISD::FNEG) {
1766  Mods |= SISrcMods::NEG;
1767  Src = Src.getOperand(0);
1768  }
1769 
1770  if (Src.getOpcode() == ISD::FABS) {
1771  Mods |= SISrcMods::ABS;
1772  Src = Src.getOperand(0);
1773  }
1774 
1775  return true;
1776 }
1777 
1778 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
1779  SDValue &SrcMods) const {
1780  unsigned Mods;
1781  if (SelectVOP3ModsImpl(In, Src, Mods)) {
1782  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1783  return true;
1784  }
1785 
1786  return false;
1787 }
1788 
1789 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
1790  SDValue &SrcMods) const {
1791  SelectVOP3Mods(In, Src, SrcMods);
1792  return isNoNanSrc(Src);
1793 }
1794 
1795 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
1796  if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
1797  return false;
1798 
1799  Src = In;
1800  return true;
1801 }
1802 
1803 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
1804  SDValue &SrcMods, SDValue &Clamp,
1805  SDValue &Omod) const {
1806  SDLoc DL(In);
1807  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1808  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
1809 
1810  return SelectVOP3Mods(In, Src, SrcMods);
1811 }
1812 
1813 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
1814  SDValue &SrcMods,
1815  SDValue &Clamp,
1816  SDValue &Omod) const {
1817  Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1818  return SelectVOP3Mods(In, Src, SrcMods);
1819 }
1820 
1821 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
1822  SDValue &Clamp, SDValue &Omod) const {
1823  Src = In;
1824 
1825  SDLoc DL(In);
1826  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1827  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
1828 
1829  return true;
1830 }
1831 
1833  return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
1834 }
1835 
1836 // Figure out if this is really an extract of the high 16-bits of a dword.
1837 static bool isExtractHiElt(SDValue In, SDValue &Out) {
1838  In = stripBitcast(In);
1839  if (In.getOpcode() != ISD::TRUNCATE)
1840  return false;
1841 
1842  SDValue Srl = In.getOperand(0);
1843  if (Srl.getOpcode() == ISD::SRL) {
1844  if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
1845  if (ShiftAmt->getZExtValue() == 16) {
1846  Out = stripBitcast(Srl.getOperand(0));
1847  return true;
1848  }
1849  }
1850  }
1851 
1852  return false;
1853 }
1854 
1855 // Look through operations that obscure just looking at the low 16-bits of the
1856 // same register.
1858  if (In.getOpcode() == ISD::TRUNCATE) {
1859  SDValue Src = In.getOperand(0);
1860  if (Src.getValueType().getSizeInBits() == 32)
1861  return stripBitcast(Src);
1862  }
1863 
1864  return In;
1865 }
1866 
1867 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
1868  SDValue &SrcMods) const {
1869  unsigned Mods = 0;
1870  Src = In;
1871 
1872  if (Src.getOpcode() == ISD::FNEG) {
1873  Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
1874  Src = Src.getOperand(0);
1875  }
1876 
1877  if (Src.getOpcode() == ISD::BUILD_VECTOR) {
1878  unsigned VecMods = Mods;
1879 
1880  SDValue Lo = stripBitcast(Src.getOperand(0));
1881  SDValue Hi = stripBitcast(Src.getOperand(1));
1882 
1883  if (Lo.getOpcode() == ISD::FNEG) {
1884  Lo = stripBitcast(Lo.getOperand(0));
1885  Mods ^= SISrcMods::NEG;
1886  }
1887 
1888  if (Hi.getOpcode() == ISD::FNEG) {
1889  Hi = stripBitcast(Hi.getOperand(0));
1890  Mods ^= SISrcMods::NEG_HI;
1891  }
1892 
1893  if (isExtractHiElt(Lo, Lo))
1894  Mods |= SISrcMods::OP_SEL_0;
1895 
1896  if (isExtractHiElt(Hi, Hi))
1897  Mods |= SISrcMods::OP_SEL_1;
1898 
1899  Lo = stripExtractLoElt(Lo);
1900  Hi = stripExtractLoElt(Hi);
1901 
1902  if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
1903  // Really a scalar input. Just select from the low half of the register to
1904  // avoid packing.
1905 
1906  Src = Lo;
1907  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1908  return true;
1909  }
1910 
1911  Mods = VecMods;
1912  }
1913 
1914  // Packed instructions do not have abs modifiers.
1915  Mods |= SISrcMods::OP_SEL_1;
1916 
1917  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1918  return true;
1919 }
1920 
1921 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src,
1922  SDValue &SrcMods,
1923  SDValue &Clamp) const {
1924  SDLoc SL(In);
1925 
1926  // FIXME: Handle clamp and op_sel
1927  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
1928 
1929  return SelectVOP3PMods(In, Src, SrcMods);
1930 }
1931 
1932 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
1933  SDValue &SrcMods) const {
1934  Src = In;
1935  // FIXME: Handle op_sel
1936  SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1937  return true;
1938 }
1939 
1940 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src,
1941  SDValue &SrcMods,
1942  SDValue &Clamp) const {
1943  SDLoc SL(In);
1944 
1945  // FIXME: Handle clamp
1946  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
1947 
1948  return SelectVOP3OpSel(In, Src, SrcMods);
1949 }
1950 
1951 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
1952  SDValue &SrcMods) const {
1953  // FIXME: Handle op_sel
1954  return SelectVOP3Mods(In, Src, SrcMods);
1955 }
1956 
1957 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src,
1958  SDValue &SrcMods,
1959  SDValue &Clamp) const {
1960  SDLoc SL(In);
1961 
1962  // FIXME: Handle clamp
1963  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
1964 
1965  return SelectVOP3OpSelMods(In, Src, SrcMods);
1966 }
1967 
1968 // The return value is not whether the match is possible (which it always is),
1969 // but whether or not it a conversion is really used.
1970 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
1971  unsigned &Mods) const {
1972  Mods = 0;
1973  SelectVOP3ModsImpl(In, Src, Mods);
1974 
1975  if (Src.getOpcode() == ISD::FP_EXTEND) {
1976  Src = Src.getOperand(0);
1977  assert(Src.getValueType() == MVT::f16);
1978  Src = stripBitcast(Src);
1979 
1980  // Be careful about folding modifiers if we already have an abs. fneg is
1981  // applied last, so we don't want to apply an earlier fneg.
1982  if ((Mods & SISrcMods::ABS) == 0) {
1983  unsigned ModsTmp;
1984  SelectVOP3ModsImpl(Src, Src, ModsTmp);
1985 
1986  if ((ModsTmp & SISrcMods::NEG) != 0)
1987  Mods ^= SISrcMods::NEG;
1988 
1989  if ((ModsTmp & SISrcMods::ABS) != 0)
1990  Mods |= SISrcMods::ABS;
1991  }
1992 
1993  // op_sel/op_sel_hi decide the source type and source.
1994  // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
1995  // If the sources's op_sel is set, it picks the high half of the source
1996  // register.
1997 
1998  Mods |= SISrcMods::OP_SEL_1;
1999  if (isExtractHiElt(Src, Src)) {
2000  Mods |= SISrcMods::OP_SEL_0;
2001 
2002  // TODO: Should we try to look for neg/abs here?
2003  }
2004 
2005  return true;
2006  }
2007 
2008  return false;
2009 }
2010 
2011 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
2012  SDValue &SrcMods) const {
2013  unsigned Mods = 0;
2014  SelectVOP3PMadMixModsImpl(In, Src, Mods);
2015  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2016  return true;
2017 }
2018 
2019 // TODO: Can we identify things like v_mad_mixhi_f16?
2020 bool AMDGPUDAGToDAGISel::SelectHi16Elt(SDValue In, SDValue &Src) const {
2021  if (In.isUndef()) {
2022  Src = In;
2023  return true;
2024  }
2025 
2026  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
2027  SDLoc SL(In);
2028  SDValue K = CurDAG->getTargetConstant(C->getZExtValue() << 16, SL, MVT::i32);
2029  MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
2030  SL, MVT::i32, K);
2031  Src = SDValue(MovK, 0);
2032  return true;
2033  }
2034 
2035  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
2036  SDLoc SL(In);
2037  SDValue K = CurDAG->getTargetConstant(
2038  C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
2039  MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
2040  SL, MVT::i32, K);
2041  Src = SDValue(MovK, 0);
2042  return true;
2043  }
2044 
2045  return isExtractHiElt(In, Src);
2046 }
2047 
2048 bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
2049  if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
2050  return false;
2051  }
2052  const SIRegisterInfo *SIRI =
2053  static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
2054  const SIInstrInfo * SII =
2055  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
2056 
2057  unsigned Limit = 0;
2058  bool AllUsesAcceptSReg = true;
2059  for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
2060  Limit < 10 && U != E; ++U, ++Limit) {
2061  const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
2062 
2063  // If the register class is unknown, it could be an unknown
2064  // register class that needs to be an SGPR, e.g. an inline asm
2065  // constraint
2066  if (!RC || SIRI->isSGPRClass(RC))
2067  return false;
2068 
2069  if (RC != &AMDGPU::VS_32RegClass) {
2070  AllUsesAcceptSReg = false;
2071  SDNode * User = *U;
2072  if (User->isMachineOpcode()) {
2073  unsigned Opc = User->getMachineOpcode();
2074  MCInstrDesc Desc = SII->get(Opc);
2075  if (Desc.isCommutable()) {
2076  unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
2077  unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
2078  if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
2079  unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
2080  const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
2081  if (CommutedRC == &AMDGPU::VS_32RegClass)
2082  AllUsesAcceptSReg = true;
2083  }
2084  }
2085  }
2086  // If "AllUsesAcceptSReg == false" so far we haven't suceeded
2087  // commuting current user. This means have at least one use
2088  // that strictly require VGPR. Thus, we will not attempt to commute
2089  // other user instructions.
2090  if (!AllUsesAcceptSReg)
2091  break;
2092  }
2093  }
2094  return !AllUsesAcceptSReg && (Limit < 10);
2095 }
2096 
2097 bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
2098  auto Ld = cast<LoadSDNode>(N);
2099 
2100  return Ld->getAlignment() >= 4 &&
2101  (
2102  (
2103  (
2104  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
2105  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
2106  )
2107  &&
2108  !N->isDivergent()
2109  )
2110  ||
2111  (
2112  Subtarget->getScalarizeGlobalBehavior() &&
2113  Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
2114  !Ld->isVolatile() &&
2115  !N->isDivergent() &&
2116  static_cast<const SITargetLowering *>(
2117  getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)
2118  )
2119  );
2120 }
2121 
2122 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
2124  *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
2125  bool IsModified = false;
2126  do {
2127  IsModified = false;
2128 
2129  // Go over all selected nodes and try to fold them a bit more
2130  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
2131  while (Position != CurDAG->allnodes_end()) {
2132  SDNode *Node = &*Position++;
2133  MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
2134  if (!MachineNode)
2135  continue;
2136 
2137  SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
2138  if (ResNode != Node) {
2139  if (ResNode)
2140  ReplaceUses(Node, ResNode);
2141  IsModified = true;
2142  }
2143  }
2144  CurDAG->RemoveDeadNodes();
2145  } while (IsModified);
2146 }
2147 
2148 bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
2149  Subtarget = &MF.getSubtarget<R600Subtarget>();
2151 }
2152 
2153 bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
2154  if (!N->readMem())
2155  return false;
2156  if (CbId == -1)
2159 
2160  return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
2161 }
2162 
2163 bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
2164  SDValue& IntPtr) {
2165  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
2166  IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
2167  true);
2168  return true;
2169  }
2170  return false;
2171 }
2172 
2173 bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
2174  SDValue& BaseReg, SDValue &Offset) {
2175  if (!isa<ConstantSDNode>(Addr)) {
2176  BaseReg = Addr;
2177  Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
2178  return true;
2179  }
2180  return false;
2181 }
2182 
2184  unsigned int Opc = N->getOpcode();
2185  if (N->isMachineOpcode()) {
2186  N->setNodeId(-1);
2187  return; // Already selected.
2188  }
2189 
2190  switch (Opc) {
2191  default: break;
2193  case ISD::SCALAR_TO_VECTOR:
2194  case ISD::BUILD_VECTOR: {
2195  EVT VT = N->getValueType(0);
2196  unsigned NumVectorElts = VT.getVectorNumElements();
2197  unsigned RegClassID;
2198  // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
2199  // that adds a 128 bits reg copy when going through TwoAddressInstructions
2200  // pass. We want to avoid 128 bits copies as much as possible because they
2201  // can't be bundled by our scheduler.
2202  switch(NumVectorElts) {
2203  case 2: RegClassID = R600::R600_Reg64RegClassID; break;
2204  case 4:
2206  RegClassID = R600::R600_Reg128VerticalRegClassID;
2207  else
2208  RegClassID = R600::R600_Reg128RegClassID;
2209  break;
2210  default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
2211  }
2212  SelectBuildVector(N, RegClassID);
2213  return;
2214  }
2215  }
2216 
2217  SelectCode(N);
2218 }
2219 
2220 bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
2221  SDValue &Offset) {
2222  ConstantSDNode *C;
2223  SDLoc DL(Addr);
2224 
2225  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
2226  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
2227  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2228  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
2229  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
2230  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
2231  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2232  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
2233  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
2234  Base = Addr.getOperand(0);
2235  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2236  } else {
2237  Base = Addr;
2238  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
2239  }
2240 
2241  return true;
2242 }
2243 
2244 bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
2245  SDValue &Offset) {
2246  ConstantSDNode *IMMOffset;
2247 
2248  if (Addr.getOpcode() == ISD::ADD
2249  && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
2250  && isInt<16>(IMMOffset->getZExtValue())) {
2251 
2252  Base = Addr.getOperand(0);
2253  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2254  MVT::i32);
2255  return true;
2256  // If the pointer address is constant, we can move it to the offset field.
2257  } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
2258  && isInt<16>(IMMOffset->getZExtValue())) {
2259  Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
2260  SDLoc(CurDAG->getEntryNode()),
2261  R600::ZERO, MVT::i32);
2262  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2263  MVT::i32);
2264  return true;
2265  }
2266 
2267  // Default case, no offset
2268  Base = Addr;
2269  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2270  return true;
2271 }
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:571
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:349
static bool isExtractHiElt(SDValue In, SDValue &Out)
EVT getValueType() const
Return the ValueType of the referenced return value.
Interface definition for SIRegisterInfo.
static unsigned getSubRegFromChannel(unsigned Channel)
bool isUndef() const
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1563
INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel", "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) INITIALIZE_PASS_END(AMDGPUDAGToDAGISel
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:562
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
AMDGPU specific subclass of TargetSubtarget.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
This class represents lattice values for constants.
Definition: AllocatorList.h:24
uint64_t getDefaultRsrcDataFormat() const
FunctionPass * createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a R600-specific.
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MCInstrDesc.h:437
Shadow Stack GC Lowering
SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL, SDValue V) const
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:223
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:164
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
SDVTList getVTList() const
unsigned Reg
Address space for 32-bit constant memory.
Definition: AMDGPU.h:259
const SDValue & getChain() const
virtual SDNode * PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const =0
static bool getConstantValue(SDValue N, uint32_t &Out)
SDNode * legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const
Legalize target independent instructions (e.g.
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:306
unsigned const TargetRegisterInfo * TRI
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
const SDNodeFlags getFlags() const
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:138
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
bool isInlineConstant(const APInt &Imm) const
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:405
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineSDNode * wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr) const
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
Address space for constant memory (VTX2)
Definition: AMDGPU.h:255
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:211
const HexagonInstrInfo * TII
Shift and rotation operations.
Definition: ISDOpcodes.h:410
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:191
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:170
unsigned SubReg
Position
Position to insert a new instruction relative to an existing instruction.
bool isSGPRClass(const TargetRegisterClass *RC) const
This file implements a class to represent arbitrary precision integral constant values and operations...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
TargetRegisterInfo interface that is implemented by all hw codegen targets.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
int64_t getSExtValue() const
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:298
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:221
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:959
amdgpu isel
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:327
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static const unsigned CommuteAnyOperandIndex
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:343
unsigned const MachineRegisterInfo * MRI
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
Machine Value Type.
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273
Address space for flat memory.
Definition: AMDGPU.h:251
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const SDValue & getOperand(unsigned Num) const
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:232
SI DAG Lowering interface definition.
Represent the analysis usage information of a pass.
This class provides iterator support for SDUse operands that use a specific SDNode.
Address space for local memory.
Definition: AMDGPU.h:256
bool hasNoNaNs() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:72
bool isDefined() const
Returns true if the flags are in a defined state.
The AMDGPU TargetMachine interface definition for hw codgen targets.
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:556
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwises returns null...
Definition: PointerUnion.h:142
Extended Value Type.
Definition: ValueTypes.h:34
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:252
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts)
MachineSDNode * buildRSRC(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr, uint32_t RsrcDword1, uint64_t RsrcDword2And3) const
Return a resource descriptor with the &#39;Add TID&#39; bit enabled The TID (Thread ID) is multiplied by the ...
Iterator for intrusive lists based on ilist_node.
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:520
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:265
Interface definition of the TargetLowering class that is common to all AMD GPUs.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:644
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
bool isDivergent() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:226
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
bool readMem() const
Special value supplied for machine level alias analysis.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo)
amdgpu AMDGPU DAG DAG Pattern Instruction Selection
static SDValue stripExtractLoElt(SDValue In)
static use_iterator use_end()
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:387
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:73
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:486
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:614
#define N
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
unsigned getOpcode() const
SDValue getValue(unsigned R) const
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:346
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isLegalMUBUFImmOffset(unsigned Imm)
Definition: SIInstrInfo.h:921
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:302
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
bool hasNoUnsignedWrap() const
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:175
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
static SDValue stripBitcast(SDValue Val)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations...
Definition: ISDOpcodes.h:306
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:443
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:223
const SDValue & getOperand(unsigned i) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:789
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:474
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:584
Analyzes if a function potentially memory bound and if a kernel kernel may benefit from limiting numb...
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:785
const SDValue & getBasePtr() const
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:375
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override