LLVM  4.0.0
AMDGPUISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Defines an instruction selector for the AMDGPU target.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "AMDGPUInstrInfo.h"
17 #include "AMDGPURegisterInfo.h"
18 #include "AMDGPUISelLowering.h" // For AMDGPUISD
19 #include "AMDGPUSubtarget.h"
20 #include "SIDefines.h"
21 #include "SIInstrInfo.h"
22 #include "SIRegisterInfo.h"
23 #include "SIISelLowering.h"
24 #include "SIMachineFunctionInfo.h"
25 #include "llvm/ADT/APInt.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/StringRef.h"
38 #include "llvm/IR/BasicBlock.h"
39 #include "llvm/IR/Instruction.h"
40 #include "llvm/MC/MCInstrDesc.h"
41 #include "llvm/Support/Casting.h"
42 #include "llvm/Support/CodeGen.h"
45 #include <cassert>
46 #include <cstdint>
47 #include <new>
48 #include <vector>
49 
50 using namespace llvm;
51 
52 namespace llvm {
53 
54 class R600InstrInfo;
55 
56 } // end namespace llvm
57 
58 //===----------------------------------------------------------------------===//
59 // Instruction Selector Implementation
60 //===----------------------------------------------------------------------===//
61 
62 namespace {
63 
64 /// AMDGPU specific code to select AMDGPU machine instructions for
65 /// SelectionDAG operations.
66 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
67  // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
68  // make the right decision when generating code for different targets.
69  const AMDGPUSubtarget *Subtarget;
70 
71 public:
72  explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel)
73  : SelectionDAGISel(TM, OptLevel) {}
74  ~AMDGPUDAGToDAGISel() override = default;
75 
76  bool runOnMachineFunction(MachineFunction &MF) override;
77  void Select(SDNode *N) override;
78  StringRef getPassName() const override;
79  void PostprocessISelDAG() override;
80 
81 private:
82  SDValue foldFrameIndex(SDValue N) const;
83  bool isInlineImmediate(const SDNode *N) const;
84  bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
85  const R600InstrInfo *TII);
86  bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
87  bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
88 
89  bool isConstantLoad(const MemSDNode *N, int cbID) const;
90  bool isUniformBr(const SDNode *N) const;
91 
92  SDNode *glueCopyToM0(SDNode *N) const;
93 
94  const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
95  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
96  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
97  SDValue& Offset);
98  bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
99  bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
100  bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
101  unsigned OffsetBits) const;
102  bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
103  bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
104  SDValue &Offset1) const;
105  bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
106  SDValue &SOffset, SDValue &Offset, SDValue &Offen,
107  SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
108  SDValue &TFE) const;
109  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
110  SDValue &SOffset, SDValue &Offset, SDValue &GLC,
111  SDValue &SLC, SDValue &TFE) const;
112  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
113  SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
114  SDValue &SLC) const;
115  bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
116  SDValue &SOffset, SDValue &ImmOffset) const;
117  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
118  SDValue &Offset, SDValue &GLC, SDValue &SLC,
119  SDValue &TFE) const;
120  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
121  SDValue &Offset, SDValue &SLC) const;
122  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
123  SDValue &Offset) const;
124  bool SelectMUBUFConstant(SDValue Constant,
125  SDValue &SOffset,
126  SDValue &ImmOffset) const;
127  bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset,
128  SDValue &ImmOffset) const;
129  bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
130  SDValue &ImmOffset, SDValue &VOffset) const;
131 
132  bool SelectFlat(SDValue Addr, SDValue &VAddr,
133  SDValue &SLC, SDValue &TFE) const;
134 
135  bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
136  bool &Imm) const;
137  bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
138  bool &Imm) const;
139  bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
140  bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
141  bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
142  bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
143  bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
144  bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const;
145  bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
146  bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
147  bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
148  bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
149  SDValue &Clamp, SDValue &Omod) const;
150  bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
151  SDValue &Clamp, SDValue &Omod) const;
152 
153  bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods,
154  SDValue &Omod) const;
155  bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
156  SDValue &Clamp,
157  SDValue &Omod) const;
158 
159  void SelectADD_SUB_I64(SDNode *N);
160  void SelectDIV_SCALE(SDNode *N);
161  void SelectFMA_W_CHAIN(SDNode *N);
162  void SelectFMUL_W_CHAIN(SDNode *N);
163 
164  SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
165  uint32_t Offset, uint32_t Width);
166  void SelectS_BFEFromShifts(SDNode *N);
167  void SelectS_BFE(SDNode *N);
168  bool isCBranchSCC(const SDNode *N) const;
169  void SelectBRCOND(SDNode *N);
170  void SelectATOMIC_CMP_SWAP(SDNode *N);
171 
172  // Include the pieces autogenerated from the target description.
173 #include "AMDGPUGenDAGISel.inc"
174 };
175 
176 } // end anonymous namespace
177 
178 /// \brief This pass converts a legalized DAG into a AMDGPU-specific
179 // DAG, ready for instruction scheduling.
181  CodeGenOpt::Level OptLevel) {
182  return new AMDGPUDAGToDAGISel(TM, OptLevel);
183 }
184 
185 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
186  Subtarget = &MF.getSubtarget<AMDGPUSubtarget>();
188 }
189 
190 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
191  const SIInstrInfo *TII
192  = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo();
193 
194  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
195  return TII->isInlineConstant(C->getAPIntValue());
196 
197  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
198  return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
199 
200  return false;
201 }
202 
203 /// \brief Determine the register class for \p OpNo
204 /// \returns The register class of the virtual register that will be used for
205 /// the given operand number \OpNo or NULL if the register class cannot be
206 /// determined.
207 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
208  unsigned OpNo) const {
209  if (!N->isMachineOpcode()) {
210  if (N->getOpcode() == ISD::CopyToReg) {
211  unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
213  MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
214  return MRI.getRegClass(Reg);
215  }
216 
217  const SIRegisterInfo *TRI
218  = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo();
219  return TRI->getPhysRegClass(Reg);
220  }
221 
222  return nullptr;
223  }
224 
225  switch (N->getMachineOpcode()) {
226  default: {
227  const MCInstrDesc &Desc =
228  Subtarget->getInstrInfo()->get(N->getMachineOpcode());
229  unsigned OpIdx = Desc.getNumDefs() + OpNo;
230  if (OpIdx >= Desc.getNumOperands())
231  return nullptr;
232  int RegClass = Desc.OpInfo[OpIdx].RegClass;
233  if (RegClass == -1)
234  return nullptr;
235 
236  return Subtarget->getRegisterInfo()->getRegClass(RegClass);
237  }
238  case AMDGPU::REG_SEQUENCE: {
239  unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
240  const TargetRegisterClass *SuperRC =
241  Subtarget->getRegisterInfo()->getRegClass(RCID);
242 
243  SDValue SubRegOp = N->getOperand(OpNo + 1);
244  unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
245  return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
246  SubRegIdx);
247  }
248  }
249 }
250 
251 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
252  if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
253  cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
254  return N;
255 
256  const SITargetLowering& Lowering =
257  *static_cast<const SITargetLowering*>(getTargetLowering());
258 
259  // Write max value to m0 before each load operation
260 
261  SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
262  CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
263 
264  SDValue Glue = M0.getValue(1);
265 
267  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
268  Ops.push_back(N->getOperand(i));
269  }
270  Ops.push_back(Glue);
271  CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
272 
273  return N;
274 }
275 
276 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
277  switch (NumVectorElts) {
278  case 1:
279  return AMDGPU::SReg_32_XM0RegClassID;
280  case 2:
281  return AMDGPU::SReg_64RegClassID;
282  case 4:
283  return AMDGPU::SReg_128RegClassID;
284  case 8:
285  return AMDGPU::SReg_256RegClassID;
286  case 16:
287  return AMDGPU::SReg_512RegClassID;
288  }
289 
290  llvm_unreachable("invalid vector size");
291 }
292 
294  unsigned int Opc = N->getOpcode();
295  if (N->isMachineOpcode()) {
296  N->setNodeId(-1);
297  return; // Already selected.
298  }
299 
300  if (isa<AtomicSDNode>(N) ||
301  (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC))
302  N = glueCopyToM0(N);
303 
304  switch (Opc) {
305  default: break;
306  // We are selecting i64 ADD here instead of custom lower it during
307  // DAG legalization, so we can fold some i64 ADDs used for address
308  // calculation into the LOAD and STORE instructions.
309  case ISD::ADD:
310  case ISD::ADDC:
311  case ISD::ADDE:
312  case ISD::SUB:
313  case ISD::SUBC:
314  case ISD::SUBE: {
315  if (N->getValueType(0) != MVT::i64 ||
316  Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
317  break;
318 
319  SelectADD_SUB_I64(N);
320  return;
321  }
323  SelectFMUL_W_CHAIN(N);
324  return;
325  }
326  case AMDGPUISD::FMA_W_CHAIN: {
327  SelectFMA_W_CHAIN(N);
328  return;
329  }
330 
333  case ISD::BUILD_VECTOR: {
334  unsigned RegClassID;
335  const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
336  EVT VT = N->getValueType(0);
337  unsigned NumVectorElts = VT.getVectorNumElements();
338  EVT EltVT = VT.getVectorElementType();
339  assert(EltVT.bitsEq(MVT::i32));
340  if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
341  RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
342  } else {
343  // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
344  // that adds a 128 bits reg copy when going through TwoAddressInstructions
345  // pass. We want to avoid 128 bits copies as much as possible because they
346  // can't be bundled by our scheduler.
347  switch(NumVectorElts) {
348  case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
349  case 4:
351  RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
352  else
353  RegClassID = AMDGPU::R600_Reg128RegClassID;
354  break;
355  default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
356  }
357  }
358 
359  SDLoc DL(N);
360  SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
361 
362  if (NumVectorElts == 1) {
363  CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
364  RegClass);
365  return;
366  }
367 
368  assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
369  "supported yet");
370  // 16 = Max Num Vector Elements
371  // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
372  // 1 = Vector Register Class
373  SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
374 
375  RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
376  bool IsRegSeq = true;
377  unsigned NOps = N->getNumOperands();
378  for (unsigned i = 0; i < NOps; i++) {
379  // XXX: Why is this here?
380  if (isa<RegisterSDNode>(N->getOperand(i))) {
381  IsRegSeq = false;
382  break;
383  }
384  RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
385  RegSeqArgs[1 + (2 * i) + 1] =
386  CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
387  MVT::i32);
388  }
389 
390  if (NOps != NumVectorElts) {
391  // Fill in the missing undef elements if this was a scalar_to_vector.
392  assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
393 
394  MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
395  DL, EltVT);
396  for (unsigned i = NOps; i < NumVectorElts; ++i) {
397  RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
398  RegSeqArgs[1 + (2 * i) + 1] =
399  CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
400  }
401  }
402 
403  if (!IsRegSeq)
404  break;
405  CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
406  return;
407  }
408  case ISD::BUILD_PAIR: {
409  SDValue RC, SubReg0, SubReg1;
410  if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
411  break;
412  }
413  SDLoc DL(N);
414  if (N->getValueType(0) == MVT::i128) {
415  RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
416  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
417  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
418  } else if (N->getValueType(0) == MVT::i64) {
419  RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
420  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
421  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
422  } else {
423  llvm_unreachable("Unhandled value type for BUILD_PAIR");
424  }
425  const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
426  N->getOperand(1), SubReg1 };
427  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
428  N->getValueType(0), Ops));
429  return;
430  }
431 
432  case ISD::Constant:
433  case ISD::ConstantFP: {
434  if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
435  N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
436  break;
437 
438  uint64_t Imm;
439  if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
440  Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
441  else {
442  ConstantSDNode *C = cast<ConstantSDNode>(N);
443  Imm = C->getZExtValue();
444  }
445 
446  SDLoc DL(N);
447  SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
448  CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
449  MVT::i32));
450  SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
451  CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
452  const SDValue Ops[] = {
453  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
454  SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
455  SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
456  };
457 
458  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
459  N->getValueType(0), Ops));
460  return;
461  }
462  case ISD::LOAD:
463  case ISD::STORE: {
464  N = glueCopyToM0(N);
465  break;
466  }
467 
468  case AMDGPUISD::BFE_I32:
469  case AMDGPUISD::BFE_U32: {
470  if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
471  break;
472 
473  // There is a scalar version available, but unlike the vector version which
474  // has a separate operand for the offset and width, the scalar version packs
475  // the width and offset into a single operand. Try to move to the scalar
476  // version if the offsets are constant, so that we can try to keep extended
477  // loads of kernel arguments in SGPRs.
478 
479  // TODO: Technically we could try to pattern match scalar bitshifts of
480  // dynamic values, but it's probably not useful.
482  if (!Offset)
483  break;
484 
486  if (!Width)
487  break;
488 
489  bool Signed = Opc == AMDGPUISD::BFE_I32;
490 
491  uint32_t OffsetVal = Offset->getZExtValue();
492  uint32_t WidthVal = Width->getZExtValue();
493 
494  ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
495  SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
496  return;
497  }
498  case AMDGPUISD::DIV_SCALE: {
499  SelectDIV_SCALE(N);
500  return;
501  }
502  case ISD::CopyToReg: {
503  const SITargetLowering& Lowering =
504  *static_cast<const SITargetLowering*>(getTargetLowering());
505  Lowering.legalizeTargetIndependentNode(N, *CurDAG);
506  break;
507  }
508  case ISD::AND:
509  case ISD::SRL:
510  case ISD::SRA:
512  if (N->getValueType(0) != MVT::i32 ||
513  Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
514  break;
515 
516  SelectS_BFE(N);
517  return;
518  case ISD::BRCOND:
519  SelectBRCOND(N);
520  return;
521 
523  SelectATOMIC_CMP_SWAP(N);
524  return;
525  }
526 
527  SelectCode(N);
528 }
529 
530 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
531  if (!N->readMem())
532  return false;
533  if (CbId == -1)
535 
536  return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
537 }
538 
539 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
540  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
541  const Instruction *Term = BB->getTerminator();
542  return Term->getMetadata("amdgpu.uniform") ||
543  Term->getMetadata("structurizecfg.uniform");
544 }
545 
546 StringRef AMDGPUDAGToDAGISel::getPassName() const {
547  return "AMDGPU DAG->DAG Pattern Instruction Selection";
548 }
549 
550 //===----------------------------------------------------------------------===//
551 // Complex Patterns
552 //===----------------------------------------------------------------------===//
553 
554 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
555  SDValue& IntPtr) {
556  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
557  IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
558  true);
559  return true;
560  }
561  return false;
562 }
563 
564 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
565  SDValue& BaseReg, SDValue &Offset) {
566  if (!isa<ConstantSDNode>(Addr)) {
567  BaseReg = Addr;
568  Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
569  return true;
570  }
571  return false;
572 }
573 
574 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
575  SDValue &Offset) {
576  ConstantSDNode *IMMOffset;
577 
578  if (Addr.getOpcode() == ISD::ADD
579  && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
580  && isInt<16>(IMMOffset->getZExtValue())) {
581 
582  Base = Addr.getOperand(0);
583  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
584  MVT::i32);
585  return true;
586  // If the pointer address is constant, we can move it to the offset field.
587  } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
588  && isInt<16>(IMMOffset->getZExtValue())) {
589  Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
590  SDLoc(CurDAG->getEntryNode()),
591  AMDGPU::ZERO, MVT::i32);
592  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
593  MVT::i32);
594  return true;
595  }
596 
597  // Default case, no offset
598  Base = Addr;
599  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
600  return true;
601 }
602 
603 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
604  SDValue &Offset) {
605  ConstantSDNode *C;
606  SDLoc DL(Addr);
607 
608  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
609  Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
610  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
611  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
612  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
613  Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
614  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
615  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
616  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
617  Base = Addr.getOperand(0);
618  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
619  } else {
620  Base = Addr;
621  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
622  }
623 
624  return true;
625 }
626 
627 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
628  SDLoc DL(N);
629  SDValue LHS = N->getOperand(0);
630  SDValue RHS = N->getOperand(1);
631 
632  unsigned Opcode = N->getOpcode();
633  bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
634  bool ProduceCarry =
635  ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
636  bool IsAdd =
637  (Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE);
638 
639  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
640  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
641 
642  SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
643  DL, MVT::i32, LHS, Sub0);
644  SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
645  DL, MVT::i32, LHS, Sub1);
646 
647  SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
648  DL, MVT::i32, RHS, Sub0);
649  SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
650  DL, MVT::i32, RHS, Sub1);
651 
652  SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
653 
654  unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
655  unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
656 
657  SDNode *AddLo;
658  if (!ConsumeCarry) {
659  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
660  AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
661  } else {
662  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
663  AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
664  }
665  SDValue AddHiArgs[] = {
666  SDValue(Hi0, 0),
667  SDValue(Hi1, 0),
668  SDValue(AddLo, 1)
669  };
670  SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
671 
672  SDValue RegSequenceArgs[] = {
673  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
674  SDValue(AddLo,0),
675  Sub0,
676  SDValue(AddHi,0),
677  Sub1,
678  };
679  SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
680  MVT::i64, RegSequenceArgs);
681 
682  if (ProduceCarry) {
683  // Replace the carry-use
684  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(AddHi, 1));
685  }
686 
687  // Replace the remaining uses.
688  CurDAG->ReplaceAllUsesWith(N, RegSequence);
689  CurDAG->RemoveDeadNode(N);
690 }
691 
692 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
693  SDLoc SL(N);
694  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
695  SDValue Ops[10];
696 
697  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
698  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
699  SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
700  Ops[8] = N->getOperand(0);
701  Ops[9] = N->getOperand(4);
702 
703  CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
704 }
705 
706 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
707  SDLoc SL(N);
708  // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
709  SDValue Ops[8];
710 
711  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
712  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
713  Ops[6] = N->getOperand(0);
714  Ops[7] = N->getOperand(3);
715 
716  CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
717 }
718 
719 // We need to handle this here because tablegen doesn't support matching
720 // instructions with multiple outputs.
721 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
722  SDLoc SL(N);
723  EVT VT = N->getValueType(0);
724 
725  assert(VT == MVT::f32 || VT == MVT::f64);
726 
727  unsigned Opc
728  = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
729 
730  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
731  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
732 }
733 
734 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
735  unsigned OffsetBits) const {
736  if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
737  (OffsetBits == 8 && !isUInt<8>(Offset)))
738  return false;
739 
740  if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
741  Subtarget->unsafeDSOffsetFoldingEnabled())
742  return true;
743 
744  // On Southern Islands instruction with a negative base value and an offset
745  // don't seem to work.
746  return CurDAG->SignBitIsZero(Base);
747 }
748 
749 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
750  SDValue &Offset) const {
751  SDLoc DL(Addr);
752  if (CurDAG->isBaseWithConstantOffset(Addr)) {
753  SDValue N0 = Addr.getOperand(0);
754  SDValue N1 = Addr.getOperand(1);
755  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
756  if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
757  // (add n0, c0)
758  Base = N0;
759  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
760  return true;
761  }
762  } else if (Addr.getOpcode() == ISD::SUB) {
763  // sub C, x -> add (sub 0, x), C
764  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
765  int64_t ByteOffset = C->getSExtValue();
766  if (isUInt<16>(ByteOffset)) {
767  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
768 
769  // XXX - This is kind of hacky. Create a dummy sub node so we can check
770  // the known bits in isDSOffsetLegal. We need to emit the selected node
771  // here, so this is thrown away.
772  SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
773  Zero, Addr.getOperand(1));
774 
775  if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
776  MachineSDNode *MachineSub
777  = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
778  Zero, Addr.getOperand(1));
779 
780  Base = SDValue(MachineSub, 0);
781  Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
782  return true;
783  }
784  }
785  }
786  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
787  // If we have a constant address, prefer to put the constant into the
788  // offset. This can save moves to load the constant address since multiple
789  // operations can share the zero base address register, and enables merging
790  // into read2 / write2 instructions.
791 
792  SDLoc DL(Addr);
793 
794  if (isUInt<16>(CAddr->getZExtValue())) {
795  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
796  MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
797  DL, MVT::i32, Zero);
798  Base = SDValue(MovZero, 0);
799  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
800  return true;
801  }
802  }
803 
804  // default case
805  Base = Addr;
806  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
807  return true;
808 }
809 
810 // TODO: If offset is too big, put low 16-bit into offset.
811 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
812  SDValue &Offset0,
813  SDValue &Offset1) const {
814  SDLoc DL(Addr);
815 
816  if (CurDAG->isBaseWithConstantOffset(Addr)) {
817  SDValue N0 = Addr.getOperand(0);
818  SDValue N1 = Addr.getOperand(1);
819  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
820  unsigned DWordOffset0 = C1->getZExtValue() / 4;
821  unsigned DWordOffset1 = DWordOffset0 + 1;
822  // (add n0, c0)
823  if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
824  Base = N0;
825  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
826  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
827  return true;
828  }
829  } else if (Addr.getOpcode() == ISD::SUB) {
830  // sub C, x -> add (sub 0, x), C
831  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
832  unsigned DWordOffset0 = C->getZExtValue() / 4;
833  unsigned DWordOffset1 = DWordOffset0 + 1;
834 
835  if (isUInt<8>(DWordOffset0)) {
836  SDLoc DL(Addr);
837  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
838 
839  // XXX - This is kind of hacky. Create a dummy sub node so we can check
840  // the known bits in isDSOffsetLegal. We need to emit the selected node
841  // here, so this is thrown away.
842  SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
843  Zero, Addr.getOperand(1));
844 
845  if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
846  MachineSDNode *MachineSub
847  = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
848  Zero, Addr.getOperand(1));
849 
850  Base = SDValue(MachineSub, 0);
851  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
852  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
853  return true;
854  }
855  }
856  }
857  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
858  unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
859  unsigned DWordOffset1 = DWordOffset0 + 1;
860  assert(4 * DWordOffset0 == CAddr->getZExtValue());
861 
862  if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
863  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
864  MachineSDNode *MovZero
865  = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
866  DL, MVT::i32, Zero);
867  Base = SDValue(MovZero, 0);
868  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
869  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
870  return true;
871  }
872  }
873 
874  // default case
875 
876  // FIXME: This is broken on SI where we still need to check if the base
877  // pointer is positive here.
878  Base = Addr;
879  Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
880  Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
881  return true;
882 }
883 
884 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
885  return isUInt<12>(Imm->getZExtValue());
886 }
887 
888 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
889  SDValue &VAddr, SDValue &SOffset,
890  SDValue &Offset, SDValue &Offen,
891  SDValue &Idxen, SDValue &Addr64,
892  SDValue &GLC, SDValue &SLC,
893  SDValue &TFE) const {
894  // Subtarget prefers to use flat instruction
895  if (Subtarget->useFlatForGlobal())
896  return false;
897 
898  SDLoc DL(Addr);
899 
900  if (!GLC.getNode())
901  GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
902  if (!SLC.getNode())
903  SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
904  TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
905 
906  Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
907  Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
908  Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
909  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
910 
911  if (CurDAG->isBaseWithConstantOffset(Addr)) {
912  SDValue N0 = Addr.getOperand(0);
913  SDValue N1 = Addr.getOperand(1);
914  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
915 
916  if (N0.getOpcode() == ISD::ADD) {
917  // (add (add N2, N3), C1) -> addr64
918  SDValue N2 = N0.getOperand(0);
919  SDValue N3 = N0.getOperand(1);
920  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
921  Ptr = N2;
922  VAddr = N3;
923  } else {
924  // (add N0, C1) -> offset
925  VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
926  Ptr = N0;
927  }
928 
929  if (isLegalMUBUFImmOffset(C1)) {
930  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
931  return true;
932  }
933 
934  if (isUInt<32>(C1->getZExtValue())) {
935  // Illegal offset, store it in soffset.
936  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
937  SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
938  CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
939  0);
940  return true;
941  }
942  }
943 
944  if (Addr.getOpcode() == ISD::ADD) {
945  // (add N0, N1) -> addr64
946  SDValue N0 = Addr.getOperand(0);
947  SDValue N1 = Addr.getOperand(1);
948  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
949  Ptr = N0;
950  VAddr = N1;
951  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
952  return true;
953  }
954 
955  // default case -> offset
956  VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
957  Ptr = Addr;
958  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
959 
960  return true;
961 }
962 
963 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
964  SDValue &VAddr, SDValue &SOffset,
965  SDValue &Offset, SDValue &GLC,
966  SDValue &SLC, SDValue &TFE) const {
967  SDValue Ptr, Offen, Idxen, Addr64;
968 
969  // addr64 bit was removed for volcanic islands.
970  if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
971  return false;
972 
973  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
974  GLC, SLC, TFE))
975  return false;
976 
977  ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
978  if (C->getSExtValue()) {
979  SDLoc DL(Addr);
980 
981  const SITargetLowering& Lowering =
982  *static_cast<const SITargetLowering*>(getTargetLowering());
983 
984  SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
985  return true;
986  }
987 
988  return false;
989 }
990 
991 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
992  SDValue &VAddr, SDValue &SOffset,
993  SDValue &Offset,
994  SDValue &SLC) const {
995  SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
996  SDValue GLC, TFE;
997 
998  return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
999 }
1000 
1001 SDValue AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1002  if (auto FI = dyn_cast<FrameIndexSDNode>(N))
1003  return CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
1004  return N;
1005 }
1006 
1007 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
1008  SDValue &VAddr, SDValue &SOffset,
1009  SDValue &ImmOffset) const {
1010 
1011  SDLoc DL(Addr);
1012  MachineFunction &MF = CurDAG->getMachineFunction();
1014 
1015  Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1016  SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32);
1017 
1018  // (add n0, c1)
1019  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1020  SDValue N0 = Addr.getOperand(0);
1021  SDValue N1 = Addr.getOperand(1);
1022 
1023  // Offsets in vaddr must be positive.
1024  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1025  if (isLegalMUBUFImmOffset(C1)) {
1026  VAddr = foldFrameIndex(N0);
1027  ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1028  return true;
1029  }
1030  }
1031 
1032  // (node)
1033  VAddr = foldFrameIndex(Addr);
1034  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1035  return true;
1036 }
1037 
1038 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1039  SDValue &SOffset, SDValue &Offset,
1040  SDValue &GLC, SDValue &SLC,
1041  SDValue &TFE) const {
1042  SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1043  const SIInstrInfo *TII =
1044  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1045 
1046  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1047  GLC, SLC, TFE))
1048  return false;
1049 
1050  if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1051  !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1052  !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1053  uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1054  APInt::getAllOnesValue(32).getZExtValue(); // Size
1055  SDLoc DL(Addr);
1056 
1057  const SITargetLowering& Lowering =
1058  *static_cast<const SITargetLowering*>(getTargetLowering());
1059 
1060  SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1061  return true;
1062  }
1063  return false;
1064 }
1065 
1066 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1067  SDValue &Soffset, SDValue &Offset
1068  ) const {
1069  SDValue GLC, SLC, TFE;
1070 
1071  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1072 }
1073 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1074  SDValue &Soffset, SDValue &Offset,
1075  SDValue &SLC) const {
1076  SDValue GLC, TFE;
1077 
1078  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1079 }
1080 
1081 bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant,
1082  SDValue &SOffset,
1083  SDValue &ImmOffset) const {
1084  SDLoc DL(Constant);
1085  uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue();
1086  uint32_t Overflow = 0;
1087 
1088  if (Imm >= 4096) {
1089  if (Imm <= 4095 + 64) {
1090  // Use an SOffset inline constant for 1..64
1091  Overflow = Imm - 4095;
1092  Imm = 4095;
1093  } else {
1094  // Try to keep the same value in SOffset for adjacent loads, so that
1095  // the corresponding register contents can be re-used.
1096  //
1097  // Load values with all low-bits set into SOffset, so that a larger
1098  // range of values can be covered using s_movk_i32
1099  uint32_t High = (Imm + 1) & ~4095;
1100  uint32_t Low = (Imm + 1) & 4095;
1101  Imm = Low;
1102  Overflow = High - 1;
1103  }
1104  }
1105 
1106  // There is a hardware bug in SI and CI which prevents address clamping in
1107  // MUBUF instructions from working correctly with SOffsets. The immediate
1108  // offset is unaffected.
1109  if (Overflow > 0 &&
1110  Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
1111  return false;
1112 
1113  ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16);
1114 
1115  if (Overflow <= 64)
1116  SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32);
1117  else
1118  SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1119  CurDAG->getTargetConstant(Overflow, DL, MVT::i32)),
1120  0);
1121 
1122  return true;
1123 }
1124 
1125 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset,
1126  SDValue &SOffset,
1127  SDValue &ImmOffset) const {
1128  SDLoc DL(Offset);
1129 
1130  if (!isa<ConstantSDNode>(Offset))
1131  return false;
1132 
1133  return SelectMUBUFConstant(Offset, SOffset, ImmOffset);
1134 }
1135 
1136 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
1137  SDValue &SOffset,
1138  SDValue &ImmOffset,
1139  SDValue &VOffset) const {
1140  SDLoc DL(Offset);
1141 
1142  // Don't generate an unnecessary voffset for constant offsets.
1143  if (isa<ConstantSDNode>(Offset)) {
1144  SDValue Tmp1, Tmp2;
1145 
1146  // When necessary, use a voffset in <= CI anyway to work around a hardware
1147  // bug.
1148  if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS ||
1149  SelectMUBUFConstant(Offset, Tmp1, Tmp2))
1150  return false;
1151  }
1152 
1153  if (CurDAG->isBaseWithConstantOffset(Offset)) {
1154  SDValue N0 = Offset.getOperand(0);
1155  SDValue N1 = Offset.getOperand(1);
1156  if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 &&
1157  SelectMUBUFConstant(N1, SOffset, ImmOffset)) {
1158  VOffset = N0;
1159  return true;
1160  }
1161  }
1162 
1163  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1164  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1165  VOffset = Offset;
1166 
1167  return true;
1168 }
1169 
1170 bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr,
1171  SDValue &VAddr,
1172  SDValue &SLC,
1173  SDValue &TFE) const {
1174  VAddr = Addr;
1175  TFE = SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
1176  return true;
1177 }
1178 
1179 ///
1180 /// \param EncodedOffset This is the immediate value that will be encoded
1181 /// directly into the instruction. On SI/CI the \p EncodedOffset
1182 /// will be in units of dwords and on VI+ it will be units of bytes.
1184  int64_t EncodedOffset) {
1186  isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset);
1187 }
1188 
1189 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1190  SDValue &Offset, bool &Imm) const {
1191 
1192  // FIXME: Handle non-constant offsets.
1193  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1194  if (!C)
1195  return false;
1196 
1197  SDLoc SL(ByteOffsetNode);
1198  AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration();
1199  int64_t ByteOffset = C->getSExtValue();
1200  int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
1201  ByteOffset >> 2 : ByteOffset;
1202 
1203  if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) {
1204  Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1205  Imm = true;
1206  return true;
1207  }
1208 
1209  if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1210  return false;
1211 
1212  if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1213  // 32-bit Immediates are supported on Sea Islands.
1214  Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1215  } else {
1216  SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1217  Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1218  C32Bit), 0);
1219  }
1220  Imm = false;
1221  return true;
1222 }
1223 
1224 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1225  SDValue &Offset, bool &Imm) const {
1226  SDLoc SL(Addr);
1227  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1228  SDValue N0 = Addr.getOperand(0);
1229  SDValue N1 = Addr.getOperand(1);
1230 
1231  if (SelectSMRDOffset(N1, Offset, Imm)) {
1232  SBase = N0;
1233  return true;
1234  }
1235  }
1236  SBase = Addr;
1237  Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1238  Imm = true;
1239  return true;
1240 }
1241 
1242 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1243  SDValue &Offset) const {
1244  bool Imm;
1245  return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1246 }
1247 
1248 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1249  SDValue &Offset) const {
1250 
1251  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1252  return false;
1253 
1254  bool Imm;
1255  if (!SelectSMRD(Addr, SBase, Offset, Imm))
1256  return false;
1257 
1258  return !Imm && isa<ConstantSDNode>(Offset);
1259 }
1260 
1261 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1262  SDValue &Offset) const {
1263  bool Imm;
1264  return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1265  !isa<ConstantSDNode>(Offset);
1266 }
1267 
1268 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1269  SDValue &Offset) const {
1270  bool Imm;
1271  return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1272 }
1273 
1274 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1275  SDValue &Offset) const {
1276  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1277  return false;
1278 
1279  bool Imm;
1280  if (!SelectSMRDOffset(Addr, Offset, Imm))
1281  return false;
1282 
1283  return !Imm && isa<ConstantSDNode>(Offset);
1284 }
1285 
1286 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr,
1287  SDValue &Offset) const {
1288  bool Imm;
1289  return SelectSMRDOffset(Addr, Offset, Imm) && !Imm &&
1290  !isa<ConstantSDNode>(Offset);
1291 }
1292 
1293 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
1294  SDValue &Base,
1295  SDValue &Offset) const {
1296  SDLoc DL(Index);
1297 
1298  if (CurDAG->isBaseWithConstantOffset(Index)) {
1299  SDValue N0 = Index.getOperand(0);
1300  SDValue N1 = Index.getOperand(1);
1301  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1302 
1303  // (add n0, c0)
1304  Base = N0;
1305  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
1306  return true;
1307  }
1308 
1309  if (isa<ConstantSDNode>(Index))
1310  return false;
1311 
1312  Base = Index;
1313  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1314  return true;
1315 }
1316 
1317 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
1318  SDValue Val, uint32_t Offset,
1319  uint32_t Width) {
1320  // Transformation function, pack the offset and width of a BFE into
1321  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1322  // source, bits [5:0] contain the offset and bits [22:16] the width.
1323  uint32_t PackedVal = Offset | (Width << 16);
1324  SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1325 
1326  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1327 }
1328 
1329 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1330  // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1331  // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1332  // Predicate: 0 < b <= c < 32
1333 
1334  const SDValue &Shl = N->getOperand(0);
1337 
1338  if (B && C) {
1339  uint32_t BVal = B->getZExtValue();
1340  uint32_t CVal = C->getZExtValue();
1341 
1342  if (0 < BVal && BVal <= CVal && CVal < 32) {
1343  bool Signed = N->getOpcode() == ISD::SRA;
1344  unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1345 
1346  ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
1347  32 - CVal));
1348  return;
1349  }
1350  }
1351  SelectCode(N);
1352 }
1353 
1354 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1355  switch (N->getOpcode()) {
1356  case ISD::AND:
1357  if (N->getOperand(0).getOpcode() == ISD::SRL) {
1358  // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1359  // Predicate: isMask(mask)
1360  const SDValue &Srl = N->getOperand(0);
1363 
1364  if (Shift && Mask) {
1365  uint32_t ShiftVal = Shift->getZExtValue();
1366  uint32_t MaskVal = Mask->getZExtValue();
1367 
1368  if (isMask_32(MaskVal)) {
1369  uint32_t WidthVal = countPopulation(MaskVal);
1370 
1371  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1372  Srl.getOperand(0), ShiftVal, WidthVal));
1373  return;
1374  }
1375  }
1376  }
1377  break;
1378  case ISD::SRL:
1379  if (N->getOperand(0).getOpcode() == ISD::AND) {
1380  // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1381  // Predicate: isMask(mask >> b)
1382  const SDValue &And = N->getOperand(0);
1385 
1386  if (Shift && Mask) {
1387  uint32_t ShiftVal = Shift->getZExtValue();
1388  uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1389 
1390  if (isMask_32(MaskVal)) {
1391  uint32_t WidthVal = countPopulation(MaskVal);
1392 
1393  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1394  And.getOperand(0), ShiftVal, WidthVal));
1395  return;
1396  }
1397  }
1398  } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
1399  SelectS_BFEFromShifts(N);
1400  return;
1401  }
1402  break;
1403  case ISD::SRA:
1404  if (N->getOperand(0).getOpcode() == ISD::SHL) {
1405  SelectS_BFEFromShifts(N);
1406  return;
1407  }
1408  break;
1409 
1410  case ISD::SIGN_EXTEND_INREG: {
1411  // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
1412  SDValue Src = N->getOperand(0);
1413  if (Src.getOpcode() != ISD::SRL)
1414  break;
1415 
1416  const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1417  if (!Amt)
1418  break;
1419 
1420  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1421  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
1422  Amt->getZExtValue(), Width));
1423  return;
1424  }
1425  }
1426 
1427  SelectCode(N);
1428 }
1429 
1430 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
1431  assert(N->getOpcode() == ISD::BRCOND);
1432  if (!N->hasOneUse())
1433  return false;
1434 
1435  SDValue Cond = N->getOperand(1);
1436  if (Cond.getOpcode() == ISD::CopyToReg)
1437  Cond = Cond.getOperand(2);
1438 
1439  if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
1440  return false;
1441 
1442  MVT VT = Cond.getOperand(0).getSimpleValueType();
1443  if (VT == MVT::i32)
1444  return true;
1445 
1446  if (VT == MVT::i64) {
1447  auto ST = static_cast<const SISubtarget *>(Subtarget);
1448 
1449  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
1450  return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
1451  }
1452 
1453  return false;
1454 }
1455 
1456 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
1457  SDValue Cond = N->getOperand(1);
1458 
1459  if (Cond.isUndef()) {
1460  CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
1461  N->getOperand(2), N->getOperand(0));
1462  return;
1463  }
1464 
1465  if (isCBranchSCC(N)) {
1466  // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it.
1467  SelectCode(N);
1468  return;
1469  }
1470 
1471  SDLoc SL(N);
1472 
1473  SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC, Cond);
1474  CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other,
1475  N->getOperand(2), // Basic Block
1476  VCC.getValue(0));
1477 }
1478 
1479 // This is here because there isn't a way to use the generated sub0_sub1 as the
1480 // subreg index to EXTRACT_SUBREG in tablegen.
1481 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
1482  MemSDNode *Mem = cast<MemSDNode>(N);
1483  unsigned AS = Mem->getAddressSpace();
1484  if (AS == AMDGPUAS::FLAT_ADDRESS) {
1485  SelectCode(N);
1486  return;
1487  }
1488 
1489  MVT VT = N->getSimpleValueType(0);
1490  bool Is32 = (VT == MVT::i32);
1491  SDLoc SL(N);
1492 
1493  MachineSDNode *CmpSwap = nullptr;
1494  if (Subtarget->hasAddr64()) {
1495  SDValue SRsrc, VAddr, SOffset, Offset, GLC, SLC;
1496 
1497  if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
1498  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_ADDR64 :
1499  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_ADDR64;
1500  SDValue CmpVal = Mem->getOperand(2);
1501 
1502  // XXX - Do we care about glue operands?
1503 
1504  SDValue Ops[] = {
1505  CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1506  };
1507 
1508  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1509  }
1510  }
1511 
1512  if (!CmpSwap) {
1513  SDValue SRsrc, SOffset, Offset, SLC;
1514  if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
1515  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET :
1516  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_OFFSET;
1517 
1518  SDValue CmpVal = Mem->getOperand(2);
1519  SDValue Ops[] = {
1520  CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1521  };
1522 
1523  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1524  }
1525  }
1526 
1527  if (!CmpSwap) {
1528  SelectCode(N);
1529  return;
1530  }
1531 
1533  *MMOs = Mem->getMemOperand();
1534  CmpSwap->setMemRefs(MMOs, MMOs + 1);
1535 
1536  unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
1537  SDValue Extract
1538  = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
1539 
1540  ReplaceUses(SDValue(N, 0), Extract);
1541  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
1542  CurDAG->RemoveDeadNode(N);
1543 }
1544 
1545 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
1546  SDValue &SrcMods) const {
1547  unsigned Mods = 0;
1548 
1549  Src = In;
1550 
1551  if (Src.getOpcode() == ISD::FNEG) {
1552  Mods |= SISrcMods::NEG;
1553  Src = Src.getOperand(0);
1554  }
1555 
1556  if (Src.getOpcode() == ISD::FABS) {
1557  Mods |= SISrcMods::ABS;
1558  Src = Src.getOperand(0);
1559  }
1560 
1561  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1562 
1563  return true;
1564 }
1565 
1566 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src,
1567  SDValue &SrcMods) const {
1568  bool Res = SelectVOP3Mods(In, Src, SrcMods);
1569  return Res && cast<ConstantSDNode>(SrcMods)->isNullValue();
1570 }
1571 
1572 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
1573  SDValue &SrcMods, SDValue &Clamp,
1574  SDValue &Omod) const {
1575  SDLoc DL(In);
1576  // FIXME: Handle Clamp and Omod
1577  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
1578  Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
1579 
1580  return SelectVOP3Mods(In, Src, SrcMods);
1581 }
1582 
1583 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src,
1584  SDValue &SrcMods, SDValue &Clamp,
1585  SDValue &Omod) const {
1586  bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod);
1587 
1588  return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() &&
1589  cast<ConstantSDNode>(Clamp)->isNullValue() &&
1590  cast<ConstantSDNode>(Omod)->isNullValue();
1591 }
1592 
1593 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
1594  SDValue &SrcMods,
1595  SDValue &Omod) const {
1596  // FIXME: Handle Omod
1597  Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1598 
1599  return SelectVOP3Mods(In, Src, SrcMods);
1600 }
1601 
1602 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
1603  SDValue &SrcMods,
1604  SDValue &Clamp,
1605  SDValue &Omod) const {
1606  Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1607  return SelectVOP3Mods(In, Src, SrcMods);
1608 }
1609 
1610 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
1611  const AMDGPUTargetLowering& Lowering =
1612  *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
1613  bool IsModified = false;
1614  do {
1615  IsModified = false;
1616  // Go over all selected nodes and try to fold them a bit more
1617  for (SDNode &Node : CurDAG->allnodes()) {
1618  MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
1619  if (!MachineNode)
1620  continue;
1621 
1622  SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
1623  if (ResNode != &Node) {
1624  ReplaceUses(&Node, ResNode);
1625  IsModified = true;
1626  }
1627  }
1628  CurDAG->RemoveDeadNodes();
1629  } while (IsModified);
1630 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:315
MachineSDNode * wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr) const
Interface definition for SIRegisterInfo.
SDValue getValue(unsigned R) const
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:458
AMDGPU specific subclass of TargetSubtarget.
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1309
size_t i
void legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const
Legalize target independent instructions (e.g.
void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd)
Assign this MachineSDNodes's memory reference descriptor list.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDVTList getVTList() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:216
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:219
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
MachineInstrBuilder MachineInstrBuilder &DefMI const MCInstrDesc & Desc
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getScratchWaveOffsetReg() const
bool readMem() const
unsigned getNumOperands() const
Return the number of values used by this operation.
virtual SDNode * PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const =0
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:271
const SDValue & getOperand(unsigned Num) const
Address space for local memory.
Definition: AMDGPU.h:141
void setNodeId(int Id)
Set unique node id.
FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a AMDGPU-specific.
uint64_t High
constexpr bool isMask_32(uint32_t Value)
isMask_32 - This function returns true if the argument is a non-empty sequence of ones starting at th...
Definition: MathExtras.h:373
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST, int64_t EncodedOffset)
A description of a memory reference used in the backend.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const HexagonInstrInfo * TII
Shift and rotation operations.
Definition: ISDOpcodes.h:344
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:190
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:170
unsigned SubReg
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
Reg
All possible values of the reg field in the ModR/M byte.
This file implements a class to represent arbitrary precision integral constant values and operations...
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
EVT getVectorElementType() const
getVectorElementType - Given a vector type, return the type of each element.
Definition: ValueTypes.h:239
TargetRegisterInfo interface that is implemented by all hw codegen targets.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:842
Generation getGeneration() const
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:274
static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm)
SDNode * getNode() const
get the SDNode which holds the desired result
const SDValue & getBasePtr() const
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:309
unsigned const MachineRegisterInfo * MRI
MVT - Machine Value Type.
LLVM Basic Block Representation.
Definition: BasicBlock.h:51
const SDValue & getOperand(unsigned i) const
Address space for constant memory (VTX2)
Definition: AMDGPU.h:140
This is an important base class in LLVM.
Definition: Constant.h:42
unsigned getSubRegFromChannel(unsigned Channel) const
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:228
SI DAG Lowering interface definition.
uint32_t Offset
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
unsigned getOpcode() const
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
uint64_t getDefaultRsrcDataFormat() const
Address space for flat memory.
Definition: AMDGPU.h:142
EVT - Extended Value Type.
Definition: ValueTypes.h:31
bool bitsEq(EVT VT) const
bitsEq - Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:194
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts)
bool isInlineConstant(const APInt &Imm) const
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:494
shadow stack gc Shadow Stack GC Lowering
Interface definition of the TargetLowering class that is common to all AMD GPUs.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:175
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:566
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
This is an abstract virtual class for memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
int64_t getSExtValue() const
SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL, SDValue V) const
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the 'base' register class for this register.
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
Definition: APInt.h:1942
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:333
bool isUndef() const
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:76
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:418
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:536
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:256
#define N
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:124
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
static volatile int Zero
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:312
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:174
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:81
Primary interface to the complete machine description for the target machine.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:377
int * Ptr
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, FLOG, FLOG2, FLOG10, FEXP, FEXP2, FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary floating point operations.
Definition: ISDOpcodes.h:516
MachineSDNode * buildRSRC(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr, uint32_t RsrcDword1, uint64_t RsrcDword2And3) const
Return a resource descriptor with the 'Add TID' bit enabled The TID (Thread ID) is multiplied by the ...
MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num)
allocateMemRefsArray - Allocate an array to hold MachineMemOperand pointers.
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:321
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
uint64_t getZExtValue() const
unsigned getVectorNumElements() const
getVectorNumElements - Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:248