LLVM  9.0.0svn
AMDGPUISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines an instruction selector for the AMDGPU target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
16 #include "AMDGPUISelLowering.h" // For AMDGPUISD
17 #include "AMDGPUInstrInfo.h"
18 #include "AMDGPUPerfHintAnalysis.h"
19 #include "AMDGPURegisterInfo.h"
20 #include "AMDGPUSubtarget.h"
21 #include "AMDGPUTargetMachine.h"
22 #include "SIDefines.h"
23 #include "SIISelLowering.h"
24 #include "SIInstrInfo.h"
25 #include "SIMachineFunctionInfo.h"
26 #include "SIRegisterInfo.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/ADT/StringRef.h"
41 #include "llvm/IR/BasicBlock.h"
42 #include "llvm/IR/Instruction.h"
43 #include "llvm/MC/MCInstrDesc.h"
44 #include "llvm/Support/Casting.h"
45 #include "llvm/Support/CodeGen.h"
49 #include <cassert>
50 #include <cstdint>
51 #include <new>
52 #include <vector>
53 
54 #define DEBUG_TYPE "isel"
55 
56 using namespace llvm;
57 
58 namespace llvm {
59 
60 class R600InstrInfo;
61 
62 } // end namespace llvm
63 
64 //===----------------------------------------------------------------------===//
65 // Instruction Selector Implementation
66 //===----------------------------------------------------------------------===//
67 
68 namespace {
69 
70 /// AMDGPU specific code to select AMDGPU machine instructions for
71 /// SelectionDAG operations.
72 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
73  // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
74  // make the right decision when generating code for different targets.
75  const GCNSubtarget *Subtarget;
76  bool EnableLateStructurizeCFG;
77 
78 public:
79  explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
81  : SelectionDAGISel(*TM, OptLevel) {
82  EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
83  }
84  ~AMDGPUDAGToDAGISel() override = default;
85 
86  void getAnalysisUsage(AnalysisUsage &AU) const override {
91  }
92 
93  bool matchLoadD16FromBuildVector(SDNode *N) const;
94 
95  bool runOnMachineFunction(MachineFunction &MF) override;
96  void PreprocessISelDAG() override;
97  void Select(SDNode *N) override;
98  StringRef getPassName() const override;
99  void PostprocessISelDAG() override;
100 
101 protected:
102  void SelectBuildVector(SDNode *N, unsigned RegClassID);
103 
104 private:
105  std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
106  bool isNoNanSrc(SDValue N) const;
107  bool isInlineImmediate(const SDNode *N) const;
108  bool isVGPRImm(const SDNode *N) const;
109  bool isUniformLoad(const SDNode *N) const;
110  bool isUniformBr(const SDNode *N) const;
111 
112  MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
113 
114  SDNode *glueCopyToM0LDSInit(SDNode *N) const;
115  SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
116 
117  const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
118  virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
119  virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
120  bool isDSOffsetLegal(SDValue Base, unsigned Offset,
121  unsigned OffsetBits) const;
122  bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
123  bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
124  SDValue &Offset1) const;
125  bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
126  SDValue &SOffset, SDValue &Offset, SDValue &Offen,
127  SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
128  SDValue &TFE, SDValue &DLC) const;
129  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
130  SDValue &SOffset, SDValue &Offset, SDValue &GLC,
131  SDValue &SLC, SDValue &TFE, SDValue &DLC) const;
132  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
133  SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
134  SDValue &SLC) const;
135  bool SelectMUBUFScratchOffen(SDNode *Parent,
136  SDValue Addr, SDValue &RSrc, SDValue &VAddr,
137  SDValue &SOffset, SDValue &ImmOffset) const;
138  bool SelectMUBUFScratchOffset(SDNode *Parent,
139  SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
140  SDValue &Offset) const;
141 
142  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
143  SDValue &Offset, SDValue &GLC, SDValue &SLC,
144  SDValue &TFE, SDValue &DLC) const;
145  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
146  SDValue &Offset, SDValue &SLC) const;
147  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
148  SDValue &Offset) const;
149 
150  bool SelectFlatAtomic(SDNode *N, SDValue Addr, SDValue &VAddr,
151  SDValue &Offset, SDValue &SLC) const;
152  bool SelectFlatAtomicSigned(SDNode *N, SDValue Addr, SDValue &VAddr,
153  SDValue &Offset, SDValue &SLC) const;
154 
155  template <bool IsSigned>
156  bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
157  SDValue &Offset, SDValue &SLC) const;
158 
159  bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
160  bool &Imm) const;
161  SDValue Expand32BitAddress(SDValue Addr) const;
162  bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
163  bool &Imm) const;
164  bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
165  bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
166  bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
167  bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
168  bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
169  bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
170 
171  bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
172  bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
173  bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
174  bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
175  bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
176  SDValue &Clamp, SDValue &Omod) const;
177  bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
178  SDValue &Clamp, SDValue &Omod) const;
179 
180  bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
181  SDValue &Clamp,
182  SDValue &Omod) const;
183 
184  bool SelectVOP3OMods(SDValue In, SDValue &Src,
185  SDValue &Clamp, SDValue &Omod) const;
186 
187  bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
188  bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
189  SDValue &Clamp) const;
190 
191  bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
192  bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods,
193  SDValue &Clamp) const;
194 
195  bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
196  bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
197  SDValue &Clamp) const;
198  bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
199  bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
200 
201  SDValue getHi16Elt(SDValue In) const;
202 
203  void SelectADD_SUB_I64(SDNode *N);
204  void SelectAddcSubb(SDNode *N);
205  void SelectUADDO_USUBO(SDNode *N);
206  void SelectDIV_SCALE(SDNode *N);
207  void SelectDIV_FMAS(SDNode *N);
208  void SelectMAD_64_32(SDNode *N);
209  void SelectFMA_W_CHAIN(SDNode *N);
210  void SelectFMUL_W_CHAIN(SDNode *N);
211 
212  SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
213  uint32_t Offset, uint32_t Width);
214  void SelectS_BFEFromShifts(SDNode *N);
215  void SelectS_BFE(SDNode *N);
216  bool isCBranchSCC(const SDNode *N) const;
217  void SelectBRCOND(SDNode *N);
218  void SelectFMAD_FMA(SDNode *N);
219  void SelectATOMIC_CMP_SWAP(SDNode *N);
220  void SelectINTRINSIC_W_CHAIN(SDNode *N);
221 
222 protected:
223  // Include the pieces autogenerated from the target description.
224 #include "AMDGPUGenDAGISel.inc"
225 };
226 
227 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
228  const R600Subtarget *Subtarget;
229 
230  bool isConstantLoad(const MemSDNode *N, int cbID) const;
231  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
232  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
233  SDValue& Offset);
234 public:
235  explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
236  AMDGPUDAGToDAGISel(TM, OptLevel) {}
237 
238  void Select(SDNode *N) override;
239 
240  bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
241  SDValue &Offset) override;
242  bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
243  SDValue &Offset) override;
244 
245  bool runOnMachineFunction(MachineFunction &MF) override;
246 
247  void PreprocessISelDAG() override {}
248 
249 protected:
250  // Include the pieces autogenerated from the target description.
251 #include "R600GenDAGISel.inc"
252 };
253 
254 static SDValue stripBitcast(SDValue Val) {
255  return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
256 }
257 
258 // Figure out if this is really an extract of the high 16-bits of a dword.
259 static bool isExtractHiElt(SDValue In, SDValue &Out) {
260  In = stripBitcast(In);
261  if (In.getOpcode() != ISD::TRUNCATE)
262  return false;
263 
264  SDValue Srl = In.getOperand(0);
265  if (Srl.getOpcode() == ISD::SRL) {
266  if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
267  if (ShiftAmt->getZExtValue() == 16) {
268  Out = stripBitcast(Srl.getOperand(0));
269  return true;
270  }
271  }
272  }
273 
274  return false;
275 }
276 
277 // Look through operations that obscure just looking at the low 16-bits of the
278 // same register.
279 static SDValue stripExtractLoElt(SDValue In) {
280  if (In.getOpcode() == ISD::TRUNCATE) {
281  SDValue Src = In.getOperand(0);
282  if (Src.getValueType().getSizeInBits() == 32)
283  return stripBitcast(Src);
284  }
285 
286  return In;
287 }
288 
289 } // end anonymous namespace
290 
291 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
292  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
296 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",
297  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
298 
299 /// This pass converts a legalized DAG into a AMDGPU-specific
300 // DAG, ready for instruction scheduling.
302  CodeGenOpt::Level OptLevel) {
303  return new AMDGPUDAGToDAGISel(TM, OptLevel);
304 }
305 
306 /// This pass converts a legalized DAG into a R600-specific
307 // DAG, ready for instruction scheduling.
309  CodeGenOpt::Level OptLevel) {
310  return new R600DAGToDAGISel(TM, OptLevel);
311 }
312 
313 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
314  Subtarget = &MF.getSubtarget<GCNSubtarget>();
316 }
317 
318 bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {
319  assert(Subtarget->d16PreservesUnusedBits());
320  MVT VT = N->getValueType(0).getSimpleVT();
321  if (VT != MVT::v2i16 && VT != MVT::v2f16)
322  return false;
323 
324  SDValue Lo = N->getOperand(0);
325  SDValue Hi = N->getOperand(1);
326 
327  LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));
328 
329  // build_vector lo, (load ptr) -> load_d16_hi ptr, lo
330  // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
331  // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
332 
333  // Need to check for possible indirect dependencies on the other half of the
334  // vector to avoid introducing a cycle.
335  if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {
336  SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
337 
338  SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo);
339  SDValue Ops[] = {
340  LdHi->getChain(), LdHi->getBasePtr(), TiedIn
341  };
342 
343  unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
344  if (LdHi->getMemoryVT() == MVT::i8) {
345  LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
347  } else {
348  assert(LdHi->getMemoryVT() == MVT::i16);
349  }
350 
351  SDValue NewLoadHi =
352  CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
353  Ops, LdHi->getMemoryVT(),
354  LdHi->getMemOperand());
355 
356  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
357  CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
358  return true;
359  }
360 
361  // build_vector (load ptr), hi -> load_d16_lo ptr, hi
362  // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
363  // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
364  LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));
365  if (LdLo && Lo.hasOneUse()) {
366  SDValue TiedIn = getHi16Elt(Hi);
367  if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode()))
368  return false;
369 
370  SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
371  unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
372  if (LdLo->getMemoryVT() == MVT::i8) {
373  LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
375  } else {
376  assert(LdLo->getMemoryVT() == MVT::i16);
377  }
378 
379  TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
380 
381  SDValue Ops[] = {
382  LdLo->getChain(), LdLo->getBasePtr(), TiedIn
383  };
384 
385  SDValue NewLoadLo =
386  CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
387  Ops, LdLo->getMemoryVT(),
388  LdLo->getMemOperand());
389 
390  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
391  CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
392  return true;
393  }
394 
395  return false;
396 }
397 
398 void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
399  if (!Subtarget->d16PreservesUnusedBits())
400  return;
401 
402  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
403 
404  bool MadeChange = false;
405  while (Position != CurDAG->allnodes_begin()) {
406  SDNode *N = &*--Position;
407  if (N->use_empty())
408  continue;
409 
410  switch (N->getOpcode()) {
411  case ISD::BUILD_VECTOR:
412  MadeChange |= matchLoadD16FromBuildVector(N);
413  break;
414  default:
415  break;
416  }
417  }
418 
419  if (MadeChange) {
420  CurDAG->RemoveDeadNodes();
421  LLVM_DEBUG(dbgs() << "After PreProcess:\n";
422  CurDAG->dump(););
423  }
424 }
425 
426 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
427  if (TM.Options.NoNaNsFPMath)
428  return true;
429 
430  // TODO: Move into isKnownNeverNaN
431  if (N->getFlags().isDefined())
432  return N->getFlags().hasNoNaNs();
433 
434  return CurDAG->isKnownNeverNaN(N);
435 }
436 
437 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
438  const SIInstrInfo *TII = Subtarget->getInstrInfo();
439 
440  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
441  return TII->isInlineConstant(C->getAPIntValue());
442 
443  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
444  return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
445 
446  return false;
447 }
448 
449 /// Determine the register class for \p OpNo
450 /// \returns The register class of the virtual register that will be used for
451 /// the given operand number \OpNo or NULL if the register class cannot be
452 /// determined.
453 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
454  unsigned OpNo) const {
455  if (!N->isMachineOpcode()) {
456  if (N->getOpcode() == ISD::CopyToReg) {
457  unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
459  MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
460  return MRI.getRegClass(Reg);
461  }
462 
463  const SIRegisterInfo *TRI
464  = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
465  return TRI->getPhysRegClass(Reg);
466  }
467 
468  return nullptr;
469  }
470 
471  switch (N->getMachineOpcode()) {
472  default: {
473  const MCInstrDesc &Desc =
474  Subtarget->getInstrInfo()->get(N->getMachineOpcode());
475  unsigned OpIdx = Desc.getNumDefs() + OpNo;
476  if (OpIdx >= Desc.getNumOperands())
477  return nullptr;
478  int RegClass = Desc.OpInfo[OpIdx].RegClass;
479  if (RegClass == -1)
480  return nullptr;
481 
482  return Subtarget->getRegisterInfo()->getRegClass(RegClass);
483  }
484  case AMDGPU::REG_SEQUENCE: {
485  unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
486  const TargetRegisterClass *SuperRC =
487  Subtarget->getRegisterInfo()->getRegClass(RCID);
488 
489  SDValue SubRegOp = N->getOperand(OpNo + 1);
490  unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
491  return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
492  SubRegIdx);
493  }
494  }
495 }
496 
497 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
498  const SITargetLowering& Lowering =
499  *static_cast<const SITargetLowering*>(getTargetLowering());
500 
501  // Write max value to m0 before each load operation
502 
503  SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
504  Val);
505 
506  SDValue Glue = M0.getValue(1);
507 
509  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
510  Ops.push_back(N->getOperand(i));
511 
512  Ops.push_back(Glue);
513  return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
514 }
515 
516 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
517  if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS ||
518  !Subtarget->ldsRequiresM0Init())
519  return N;
520  return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
521 }
522 
523 MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
524  EVT VT) const {
525  SDNode *Lo = CurDAG->getMachineNode(
526  AMDGPU::S_MOV_B32, DL, MVT::i32,
527  CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
528  SDNode *Hi =
529  CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
530  CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
531  const SDValue Ops[] = {
532  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
533  SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
534  SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
535 
536  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
537 }
538 
539 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
540  switch (NumVectorElts) {
541  case 1:
542  return AMDGPU::SReg_32_XM0RegClassID;
543  case 2:
544  return AMDGPU::SReg_64RegClassID;
545  case 3:
546  return AMDGPU::SGPR_96RegClassID;
547  case 4:
548  return AMDGPU::SReg_128RegClassID;
549  case 5:
550  return AMDGPU::SGPR_160RegClassID;
551  case 8:
552  return AMDGPU::SReg_256RegClassID;
553  case 16:
554  return AMDGPU::SReg_512RegClassID;
555  }
556 
557  llvm_unreachable("invalid vector size");
558 }
559 
560 static bool getConstantValue(SDValue N, uint32_t &Out) {
561  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
562  Out = C->getAPIntValue().getZExtValue();
563  return true;
564  }
565 
566  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
567  Out = C->getValueAPF().bitcastToAPInt().getZExtValue();
568  return true;
569  }
570 
571  return false;
572 }
573 
574 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
575  EVT VT = N->getValueType(0);
576  unsigned NumVectorElts = VT.getVectorNumElements();
577  EVT EltVT = VT.getVectorElementType();
578  SDLoc DL(N);
579  SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
580 
581  if (NumVectorElts == 1) {
582  CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
583  RegClass);
584  return;
585  }
586 
587  assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
588  "supported yet");
589  // 16 = Max Num Vector Elements
590  // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
591  // 1 = Vector Register Class
592  SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
593 
594  RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
595  bool IsRegSeq = true;
596  unsigned NOps = N->getNumOperands();
597  for (unsigned i = 0; i < NOps; i++) {
598  // XXX: Why is this here?
599  if (isa<RegisterSDNode>(N->getOperand(i))) {
600  IsRegSeq = false;
601  break;
602  }
604  RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
605  RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
606  }
607  if (NOps != NumVectorElts) {
608  // Fill in the missing undef elements if this was a scalar_to_vector.
609  assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
610  MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
611  DL, EltVT);
612  for (unsigned i = NOps; i < NumVectorElts; ++i) {
614  RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
615  RegSeqArgs[1 + (2 * i) + 1] =
616  CurDAG->getTargetConstant(Sub, DL, MVT::i32);
617  }
618  }
619 
620  if (!IsRegSeq)
621  SelectCode(N);
622  CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
623 }
624 
626  unsigned int Opc = N->getOpcode();
627  if (N->isMachineOpcode()) {
628  N->setNodeId(-1);
629  return; // Already selected.
630  }
631 
632  if (isa<AtomicSDNode>(N) ||
633  (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
634  Opc == ISD::ATOMIC_LOAD_FADD ||
637  N = glueCopyToM0LDSInit(N);
638 
639  switch (Opc) {
640  default:
641  break;
642  // We are selecting i64 ADD here instead of custom lower it during
643  // DAG legalization, so we can fold some i64 ADDs used for address
644  // calculation into the LOAD and STORE instructions.
645  case ISD::ADDC:
646  case ISD::ADDE:
647  case ISD::SUBC:
648  case ISD::SUBE: {
649  if (N->getValueType(0) != MVT::i64)
650  break;
651 
652  SelectADD_SUB_I64(N);
653  return;
654  }
655  case ISD::ADDCARRY:
656  case ISD::SUBCARRY:
657  if (N->getValueType(0) != MVT::i32)
658  break;
659 
660  SelectAddcSubb(N);
661  return;
662  case ISD::UADDO:
663  case ISD::USUBO: {
664  SelectUADDO_USUBO(N);
665  return;
666  }
668  SelectFMUL_W_CHAIN(N);
669  return;
670  }
671  case AMDGPUISD::FMA_W_CHAIN: {
672  SelectFMA_W_CHAIN(N);
673  return;
674  }
675 
677  case ISD::BUILD_VECTOR: {
678  EVT VT = N->getValueType(0);
679  unsigned NumVectorElts = VT.getVectorNumElements();
680  if (VT.getScalarSizeInBits() == 16) {
681  if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
682  uint32_t LHSVal, RHSVal;
683  if (getConstantValue(N->getOperand(0), LHSVal) &&
684  getConstantValue(N->getOperand(1), RHSVal)) {
685  uint32_t K = LHSVal | (RHSVal << 16);
686  CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT,
687  CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32));
688  return;
689  }
690  }
691 
692  break;
693  }
694 
696  unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
697  SelectBuildVector(N, RegClassID);
698  return;
699  }
700  case ISD::BUILD_PAIR: {
701  SDValue RC, SubReg0, SubReg1;
702  SDLoc DL(N);
703  if (N->getValueType(0) == MVT::i128) {
704  RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
705  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
706  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
707  } else if (N->getValueType(0) == MVT::i64) {
708  RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
709  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
710  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
711  } else {
712  llvm_unreachable("Unhandled value type for BUILD_PAIR");
713  }
714  const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
715  N->getOperand(1), SubReg1 };
716  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
717  N->getValueType(0), Ops));
718  return;
719  }
720 
721  case ISD::Constant:
722  case ISD::ConstantFP: {
723  if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
724  break;
725 
726  uint64_t Imm;
727  if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
728  Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
729  else {
730  ConstantSDNode *C = cast<ConstantSDNode>(N);
731  Imm = C->getZExtValue();
732  }
733 
734  SDLoc DL(N);
735  ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
736  return;
737  }
738  case ISD::LOAD:
739  case ISD::STORE:
740  case ISD::ATOMIC_LOAD:
741  case ISD::ATOMIC_STORE: {
742  N = glueCopyToM0LDSInit(N);
743  break;
744  }
745 
746  case AMDGPUISD::BFE_I32:
747  case AMDGPUISD::BFE_U32: {
748  // There is a scalar version available, but unlike the vector version which
749  // has a separate operand for the offset and width, the scalar version packs
750  // the width and offset into a single operand. Try to move to the scalar
751  // version if the offsets are constant, so that we can try to keep extended
752  // loads of kernel arguments in SGPRs.
753 
754  // TODO: Technically we could try to pattern match scalar bitshifts of
755  // dynamic values, but it's probably not useful.
757  if (!Offset)
758  break;
759 
761  if (!Width)
762  break;
763 
764  bool Signed = Opc == AMDGPUISD::BFE_I32;
765 
766  uint32_t OffsetVal = Offset->getZExtValue();
767  uint32_t WidthVal = Width->getZExtValue();
768 
769  ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
770  SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
771  return;
772  }
773  case AMDGPUISD::DIV_SCALE: {
774  SelectDIV_SCALE(N);
775  return;
776  }
777  case AMDGPUISD::DIV_FMAS: {
778  SelectDIV_FMAS(N);
779  return;
780  }
782  case AMDGPUISD::MAD_U64_U32: {
783  SelectMAD_64_32(N);
784  return;
785  }
786  case ISD::CopyToReg: {
787  const SITargetLowering& Lowering =
788  *static_cast<const SITargetLowering*>(getTargetLowering());
789  N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
790  break;
791  }
792  case ISD::AND:
793  case ISD::SRL:
794  case ISD::SRA:
796  if (N->getValueType(0) != MVT::i32)
797  break;
798 
799  SelectS_BFE(N);
800  return;
801  case ISD::BRCOND:
802  SelectBRCOND(N);
803  return;
804  case ISD::FMAD:
805  case ISD::FMA:
806  SelectFMAD_FMA(N);
807  return;
809  SelectATOMIC_CMP_SWAP(N);
810  return;
816  // Hack around using a legal type if f16 is illegal.
817  if (N->getValueType(0) == MVT::i32) {
819  N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
820  { N->getOperand(0), N->getOperand(1) });
821  SelectCode(N);
822  return;
823  }
824 
825  break;
826  }
827  case ISD::INTRINSIC_W_CHAIN: {
828  SelectINTRINSIC_W_CHAIN(N);
829  return;
830  }
831  }
832 
833  SelectCode(N);
834 }
835 
836 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
837  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
838  const Instruction *Term = BB->getTerminator();
839  return Term->getMetadata("amdgpu.uniform") ||
840  Term->getMetadata("structurizecfg.uniform");
841 }
842 
843 StringRef AMDGPUDAGToDAGISel::getPassName() const {
844  return "AMDGPU DAG->DAG Pattern Instruction Selection";
845 }
846 
847 //===----------------------------------------------------------------------===//
848 // Complex Patterns
849 //===----------------------------------------------------------------------===//
850 
851 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
852  SDValue &Offset) {
853  return false;
854 }
855 
856 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
857  SDValue &Offset) {
858  ConstantSDNode *C;
859  SDLoc DL(Addr);
860 
861  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
862  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
863  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
864  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
865  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
866  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
867  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
868  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
869  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
870  Base = Addr.getOperand(0);
871  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
872  } else {
873  Base = Addr;
874  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
875  }
876 
877  return true;
878 }
879 
880 // FIXME: Should only handle addcarry/subcarry
881 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
882  SDLoc DL(N);
883  SDValue LHS = N->getOperand(0);
884  SDValue RHS = N->getOperand(1);
885 
886  unsigned Opcode = N->getOpcode();
887  bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
888  bool ProduceCarry =
889  ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
890  bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
891 
892  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
893  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
894 
895  SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
896  DL, MVT::i32, LHS, Sub0);
897  SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
898  DL, MVT::i32, LHS, Sub1);
899 
900  SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
901  DL, MVT::i32, RHS, Sub0);
902  SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
903  DL, MVT::i32, RHS, Sub1);
904 
905  SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
906 
907  unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
908  unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
909 
910  SDNode *AddLo;
911  if (!ConsumeCarry) {
912  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
913  AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
914  } else {
915  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
916  AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
917  }
918  SDValue AddHiArgs[] = {
919  SDValue(Hi0, 0),
920  SDValue(Hi1, 0),
921  SDValue(AddLo, 1)
922  };
923  SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
924 
925  SDValue RegSequenceArgs[] = {
926  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
927  SDValue(AddLo,0),
928  Sub0,
929  SDValue(AddHi,0),
930  Sub1,
931  };
932  SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
933  MVT::i64, RegSequenceArgs);
934 
935  if (ProduceCarry) {
936  // Replace the carry-use
937  ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
938  }
939 
940  // Replace the remaining uses.
941  ReplaceNode(N, RegSequence);
942 }
943 
944 void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
945  SDLoc DL(N);
946  SDValue LHS = N->getOperand(0);
947  SDValue RHS = N->getOperand(1);
948  SDValue CI = N->getOperand(2);
949 
950  unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
951  : AMDGPU::V_SUBB_U32_e64;
952  CurDAG->SelectNodeTo(
953  N, Opc, N->getVTList(),
954  {LHS, RHS, CI, CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
955 }
956 
957 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
958  // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
959  // carry out despite the _i32 name. These were renamed in VI to _U32.
960  // FIXME: We should probably rename the opcodes here.
961  unsigned Opc = N->getOpcode() == ISD::UADDO ?
962  AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
963 
964  CurDAG->SelectNodeTo(
965  N, Opc, N->getVTList(),
966  {N->getOperand(0), N->getOperand(1),
967  CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
968 }
969 
970 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
971  SDLoc SL(N);
972  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
973  SDValue Ops[10];
974 
975  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
976  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
977  SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
978  Ops[8] = N->getOperand(0);
979  Ops[9] = N->getOperand(4);
980 
981  CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
982 }
983 
984 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
985  SDLoc SL(N);
986  // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
987  SDValue Ops[8];
988 
989  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
990  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
991  Ops[6] = N->getOperand(0);
992  Ops[7] = N->getOperand(3);
993 
994  CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
995 }
996 
997 // We need to handle this here because tablegen doesn't support matching
998 // instructions with multiple outputs.
999 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
1000  SDLoc SL(N);
1001  EVT VT = N->getValueType(0);
1002 
1003  assert(VT == MVT::f32 || VT == MVT::f64);
1004 
1005  unsigned Opc
1006  = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
1007 
1008  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
1009  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1010 }
1011 
1012 void AMDGPUDAGToDAGISel::SelectDIV_FMAS(SDNode *N) {
1013  SDLoc SL(N);
1014  EVT VT = N->getValueType(0);
1015 
1016  assert(VT == MVT::f32 || VT == MVT::f64);
1017 
1018  unsigned Opc
1019  = (VT == MVT::f64) ? AMDGPU::V_DIV_FMAS_F64 : AMDGPU::V_DIV_FMAS_F32;
1020 
1021  SDValue CarryIn = N->getOperand(3);
1022  // V_DIV_FMAS implicitly reads VCC.
1023  SDValue VCC = CurDAG->getCopyToReg(CurDAG->getEntryNode(), SL,
1024  AMDGPU::VCC, CarryIn, SDValue());
1025 
1026  SDValue Ops[10];
1027 
1028  SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
1029  SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
1030  SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
1031 
1032  Ops[8] = VCC;
1033  Ops[9] = VCC.getValue(1);
1034 
1035  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1036 }
1037 
1038 // We need to handle this here because tablegen doesn't support matching
1039 // instructions with multiple outputs.
1040 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
1041  SDLoc SL(N);
1042  bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
1043  unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32;
1044 
1045  SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
1046  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
1047  Clamp };
1048  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1049 }
1050 
1051 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset,
1052  unsigned OffsetBits) const {
1053  if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
1054  (OffsetBits == 8 && !isUInt<8>(Offset)))
1055  return false;
1056 
1057  if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
1058  Subtarget->unsafeDSOffsetFoldingEnabled())
1059  return true;
1060 
1061  // On Southern Islands instruction with a negative base value and an offset
1062  // don't seem to work.
1063  return CurDAG->SignBitIsZero(Base);
1064 }
1065 
1066 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
1067  SDValue &Offset) const {
1068  SDLoc DL(Addr);
1069  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1070  SDValue N0 = Addr.getOperand(0);
1071  SDValue N1 = Addr.getOperand(1);
1072  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1073  if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
1074  // (add n0, c0)
1075  Base = N0;
1076  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1077  return true;
1078  }
1079  } else if (Addr.getOpcode() == ISD::SUB) {
1080  // sub C, x -> add (sub 0, x), C
1081  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1082  int64_t ByteOffset = C->getSExtValue();
1083  if (isUInt<16>(ByteOffset)) {
1084  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1085 
1086  // XXX - This is kind of hacky. Create a dummy sub node so we can check
1087  // the known bits in isDSOffsetLegal. We need to emit the selected node
1088  // here, so this is thrown away.
1089  SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
1090  Zero, Addr.getOperand(1));
1091 
1092  if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
1094  Opnds.push_back(Zero);
1095  Opnds.push_back(Addr.getOperand(1));
1096 
1097  // FIXME: Select to VOP3 version for with-carry.
1098  unsigned SubOp = AMDGPU::V_SUB_I32_e32;
1099  if (Subtarget->hasAddNoCarry()) {
1100  SubOp = AMDGPU::V_SUB_U32_e64;
1101  Opnds.push_back(
1102  CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1103  }
1104 
1105  MachineSDNode *MachineSub =
1106  CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1107 
1108  Base = SDValue(MachineSub, 0);
1109  Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
1110  return true;
1111  }
1112  }
1113  }
1114  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1115  // If we have a constant address, prefer to put the constant into the
1116  // offset. This can save moves to load the constant address since multiple
1117  // operations can share the zero base address register, and enables merging
1118  // into read2 / write2 instructions.
1119 
1120  SDLoc DL(Addr);
1121 
1122  if (isUInt<16>(CAddr->getZExtValue())) {
1123  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1124  MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1125  DL, MVT::i32, Zero);
1126  Base = SDValue(MovZero, 0);
1127  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1128  return true;
1129  }
1130  }
1131 
1132  // default case
1133  Base = Addr;
1134  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
1135  return true;
1136 }
1137 
1138 // TODO: If offset is too big, put low 16-bit into offset.
1139 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
1140  SDValue &Offset0,
1141  SDValue &Offset1) const {
1142  SDLoc DL(Addr);
1143 
1144  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1145  SDValue N0 = Addr.getOperand(0);
1146  SDValue N1 = Addr.getOperand(1);
1147  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1148  unsigned DWordOffset0 = C1->getZExtValue() / 4;
1149  unsigned DWordOffset1 = DWordOffset0 + 1;
1150  // (add n0, c0)
1151  if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
1152  Base = N0;
1153  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
1154  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
1155  return true;
1156  }
1157  } else if (Addr.getOpcode() == ISD::SUB) {
1158  // sub C, x -> add (sub 0, x), C
1159  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1160  unsigned DWordOffset0 = C->getZExtValue() / 4;
1161  unsigned DWordOffset1 = DWordOffset0 + 1;
1162 
1163  if (isUInt<8>(DWordOffset0)) {
1164  SDLoc DL(Addr);
1165  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1166 
1167  // XXX - This is kind of hacky. Create a dummy sub node so we can check
1168  // the known bits in isDSOffsetLegal. We need to emit the selected node
1169  // here, so this is thrown away.
1170  SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
1171  Zero, Addr.getOperand(1));
1172 
1173  if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
1175  Opnds.push_back(Zero);
1176  Opnds.push_back(Addr.getOperand(1));
1177  unsigned SubOp = AMDGPU::V_SUB_I32_e32;
1178  if (Subtarget->hasAddNoCarry()) {
1179  SubOp = AMDGPU::V_SUB_U32_e64;
1180  Opnds.push_back(
1181  CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1182  }
1183 
1184  MachineSDNode *MachineSub
1185  = CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1186 
1187  Base = SDValue(MachineSub, 0);
1188  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
1189  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
1190  return true;
1191  }
1192  }
1193  }
1194  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1195  unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
1196  unsigned DWordOffset1 = DWordOffset0 + 1;
1197  assert(4 * DWordOffset0 == CAddr->getZExtValue());
1198 
1199  if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
1200  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1201  MachineSDNode *MovZero
1202  = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1203  DL, MVT::i32, Zero);
1204  Base = SDValue(MovZero, 0);
1205  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
1206  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
1207  return true;
1208  }
1209  }
1210 
1211  // default case
1212 
1213  Base = Addr;
1214  Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
1215  Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
1216  return true;
1217 }
1218 
1219 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
1220  SDValue &VAddr, SDValue &SOffset,
1221  SDValue &Offset, SDValue &Offen,
1222  SDValue &Idxen, SDValue &Addr64,
1223  SDValue &GLC, SDValue &SLC,
1224  SDValue &TFE, SDValue &DLC) const {
1225  // Subtarget prefers to use flat instruction
1226  if (Subtarget->useFlatForGlobal())
1227  return false;
1228 
1229  SDLoc DL(Addr);
1230 
1231  if (!GLC.getNode())
1232  GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1233  if (!SLC.getNode())
1234  SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1235  TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
1236  DLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1237 
1238  Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1239  Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1240  Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1241  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1242 
1243  ConstantSDNode *C1 = nullptr;
1244  SDValue N0 = Addr;
1245  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1246  C1 = cast<ConstantSDNode>(Addr.getOperand(1));
1247  if (isUInt<32>(C1->getZExtValue()))
1248  N0 = Addr.getOperand(0);
1249  else
1250  C1 = nullptr;
1251  }
1252 
1253  if (N0.getOpcode() == ISD::ADD) {
1254  // (add N2, N3) -> addr64, or
1255  // (add (add N2, N3), C1) -> addr64
1256  SDValue N2 = N0.getOperand(0);
1257  SDValue N3 = N0.getOperand(1);
1258  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1259 
1260  if (N2->isDivergent()) {
1261  if (N3->isDivergent()) {
1262  // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
1263  // addr64, and construct the resource from a 0 address.
1264  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1265  VAddr = N0;
1266  } else {
1267  // N2 is divergent, N3 is not.
1268  Ptr = N3;
1269  VAddr = N2;
1270  }
1271  } else {
1272  // N2 is not divergent.
1273  Ptr = N2;
1274  VAddr = N3;
1275  }
1276  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1277  } else if (N0->isDivergent()) {
1278  // N0 is divergent. Use it as the addr64, and construct the resource from a
1279  // 0 address.
1280  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1281  VAddr = N0;
1282  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1283  } else {
1284  // N0 -> offset, or
1285  // (N0 + C1) -> offset
1286  VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1287  Ptr = N0;
1288  }
1289 
1290  if (!C1) {
1291  // No offset.
1292  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1293  return true;
1294  }
1295 
1297  // Legal offset for instruction.
1298  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1299  return true;
1300  }
1301 
1302  // Illegal offset, store it in soffset.
1303  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1304  SOffset =
1305  SDValue(CurDAG->getMachineNode(
1306  AMDGPU::S_MOV_B32, DL, MVT::i32,
1307  CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1308  0);
1309  return true;
1310 }
1311 
1312 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1313  SDValue &VAddr, SDValue &SOffset,
1314  SDValue &Offset, SDValue &GLC,
1315  SDValue &SLC, SDValue &TFE,
1316  SDValue &DLC) const {
1317  SDValue Ptr, Offen, Idxen, Addr64;
1318 
1319  // addr64 bit was removed for volcanic islands.
1320  if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
1321  return false;
1322 
1323  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1324  GLC, SLC, TFE, DLC))
1325  return false;
1326 
1327  ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1328  if (C->getSExtValue()) {
1329  SDLoc DL(Addr);
1330 
1331  const SITargetLowering& Lowering =
1332  *static_cast<const SITargetLowering*>(getTargetLowering());
1333 
1334  SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1335  return true;
1336  }
1337 
1338  return false;
1339 }
1340 
1341 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1342  SDValue &VAddr, SDValue &SOffset,
1343  SDValue &Offset,
1344  SDValue &SLC) const {
1345  SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
1346  SDValue GLC, TFE, DLC;
1347 
1348  return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC);
1349 }
1350 
1351 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
1352  auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
1353  return PSV && PSV->isStack();
1354 }
1355 
1356 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1357  const MachineFunction &MF = CurDAG->getMachineFunction();
1359 
1360  if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
1361  SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1362  FI->getValueType(0));
1363 
1364  // If we can resolve this to a frame index access, this is relative to the
1365  // frame pointer SGPR.
1366  return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(),
1367  MVT::i32));
1368  }
1369 
1370  // If we don't know this private access is a local stack object, it needs to
1371  // be relative to the entry point's scratch wave offset register.
1372  return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(),
1373  MVT::i32));
1374 }
1375 
1376 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1377  SDValue Addr, SDValue &Rsrc,
1378  SDValue &VAddr, SDValue &SOffset,
1379  SDValue &ImmOffset) const {
1380 
1381  SDLoc DL(Addr);
1382  MachineFunction &MF = CurDAG->getMachineFunction();
1384 
1385  Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1386 
1387  if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1388  unsigned Imm = CAddr->getZExtValue();
1389 
1390  SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1391  MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1392  DL, MVT::i32, HighBits);
1393  VAddr = SDValue(MovHighBits, 0);
1394 
1395  // In a call sequence, stores to the argument stack area are relative to the
1396  // stack pointer.
1397  const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1398  unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1400 
1401  SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1402  ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
1403  return true;
1404  }
1405 
1406  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1407  // (add n0, c1)
1408 
1409  SDValue N0 = Addr.getOperand(0);
1410  SDValue N1 = Addr.getOperand(1);
1411 
1412  // Offsets in vaddr must be positive if range checking is enabled.
1413  //
1414  // The total computation of vaddr + soffset + offset must not overflow. If
1415  // vaddr is negative, even if offset is 0 the sgpr offset add will end up
1416  // overflowing.
1417  //
1418  // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
1419  // always perform a range check. If a negative vaddr base index was used,
1420  // this would fail the range check. The overall address computation would
1421  // compute a valid address, but this doesn't happen due to the range
1422  // check. For out-of-bounds MUBUF loads, a 0 is returned.
1423  //
1424  // Therefore it should be safe to fold any VGPR offset on gfx9 into the
1425  // MUBUF vaddr, but not on older subtargets which can only do this if the
1426  // sign bit is known 0.
1427  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1429  (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1430  CurDAG->SignBitIsZero(N0))) {
1431  std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1432  ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1433  return true;
1434  }
1435  }
1436 
1437  // (node)
1438  std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1439  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1440  return true;
1441 }
1442 
1443 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1444  SDValue Addr,
1445  SDValue &SRsrc,
1446  SDValue &SOffset,
1447  SDValue &Offset) const {
1448  ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
1449  if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1450  return false;
1451 
1452  SDLoc DL(Addr);
1453  MachineFunction &MF = CurDAG->getMachineFunction();
1455 
1456  SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1457 
1458  const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1459  unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1461 
1462  // FIXME: Get from MachinePointerInfo? We should only be using the frame
1463  // offset if we know this is in a call sequence.
1464  SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1465 
1466  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1467  return true;
1468 }
1469 
1470 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1471  SDValue &SOffset, SDValue &Offset,
1472  SDValue &GLC, SDValue &SLC,
1473  SDValue &TFE, SDValue &DLC) const {
1474  SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1475  const SIInstrInfo *TII =
1476  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1477 
1478  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1479  GLC, SLC, TFE, DLC))
1480  return false;
1481 
1482  if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1483  !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1484  !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1485  uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1486  APInt::getAllOnesValue(32).getZExtValue(); // Size
1487  SDLoc DL(Addr);
1488 
1489  const SITargetLowering& Lowering =
1490  *static_cast<const SITargetLowering*>(getTargetLowering());
1491 
1492  SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1493  return true;
1494  }
1495  return false;
1496 }
1497 
1498 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1499  SDValue &Soffset, SDValue &Offset
1500  ) const {
1501  SDValue GLC, SLC, TFE, DLC;
1502 
1503  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC);
1504 }
1505 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1506  SDValue &Soffset, SDValue &Offset,
1507  SDValue &SLC) const {
1508  SDValue GLC, TFE, DLC;
1509 
1510  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC);
1511 }
1512 
1513 template <bool IsSigned>
1514 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N,
1515  SDValue Addr,
1516  SDValue &VAddr,
1517  SDValue &Offset,
1518  SDValue &SLC) const {
1519  return static_cast<const SITargetLowering*>(getTargetLowering())->
1520  SelectFlatOffset(IsSigned, *CurDAG, N, Addr, VAddr, Offset, SLC);
1521 }
1522 
1523 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N,
1524  SDValue Addr,
1525  SDValue &VAddr,
1526  SDValue &Offset,
1527  SDValue &SLC) const {
1528  return SelectFlatOffset<false>(N, Addr, VAddr, Offset, SLC);
1529 }
1530 
1531 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDNode *N,
1532  SDValue Addr,
1533  SDValue &VAddr,
1534  SDValue &Offset,
1535  SDValue &SLC) const {
1536  return SelectFlatOffset<true>(N, Addr, VAddr, Offset, SLC);
1537 }
1538 
1539 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1540  SDValue &Offset, bool &Imm) const {
1541 
1542  // FIXME: Handle non-constant offsets.
1543  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1544  if (!C)
1545  return false;
1546 
1547  SDLoc SL(ByteOffsetNode);
1548  GCNSubtarget::Generation Gen = Subtarget->getGeneration();
1549  int64_t ByteOffset = C->getSExtValue();
1550  int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
1551 
1552  if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) {
1553  Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1554  Imm = true;
1555  return true;
1556  }
1557 
1558  if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1559  return false;
1560 
1561  if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1562  // 32-bit Immediates are supported on Sea Islands.
1563  Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1564  } else {
1565  SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1566  Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1567  C32Bit), 0);
1568  }
1569  Imm = false;
1570  return true;
1571 }
1572 
1573 SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
1574  if (Addr.getValueType() != MVT::i32)
1575  return Addr;
1576 
1577  // Zero-extend a 32-bit address.
1578  SDLoc SL(Addr);
1579 
1580  const MachineFunction &MF = CurDAG->getMachineFunction();
1582  unsigned AddrHiVal = Info->get32BitAddressHighBits();
1583  SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
1584 
1585  const SDValue Ops[] = {
1586  CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
1587  Addr,
1588  CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
1589  SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
1590  0),
1591  CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
1592  };
1593 
1594  return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
1595  Ops), 0);
1596 }
1597 
1598 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1599  SDValue &Offset, bool &Imm) const {
1600  SDLoc SL(Addr);
1601 
1602  // A 32-bit (address + offset) should not cause unsigned 32-bit integer
1603  // wraparound, because s_load instructions perform the addition in 64 bits.
1604  if ((Addr.getValueType() != MVT::i32 ||
1605  Addr->getFlags().hasNoUnsignedWrap()) &&
1606  CurDAG->isBaseWithConstantOffset(Addr)) {
1607  SDValue N0 = Addr.getOperand(0);
1608  SDValue N1 = Addr.getOperand(1);
1609 
1610  if (SelectSMRDOffset(N1, Offset, Imm)) {
1611  SBase = Expand32BitAddress(N0);
1612  return true;
1613  }
1614  }
1615  SBase = Expand32BitAddress(Addr);
1616  Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1617  Imm = true;
1618  return true;
1619 }
1620 
1621 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1622  SDValue &Offset) const {
1623  bool Imm;
1624  return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1625 }
1626 
1627 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1628  SDValue &Offset) const {
1629 
1630  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1631  return false;
1632 
1633  bool Imm;
1634  if (!SelectSMRD(Addr, SBase, Offset, Imm))
1635  return false;
1636 
1637  return !Imm && isa<ConstantSDNode>(Offset);
1638 }
1639 
1640 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1641  SDValue &Offset) const {
1642  bool Imm;
1643  return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1644  !isa<ConstantSDNode>(Offset);
1645 }
1646 
1647 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1648  SDValue &Offset) const {
1649  bool Imm;
1650  return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1651 }
1652 
1653 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1654  SDValue &Offset) const {
1655  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1656  return false;
1657 
1658  bool Imm;
1659  if (!SelectSMRDOffset(Addr, Offset, Imm))
1660  return false;
1661 
1662  return !Imm && isa<ConstantSDNode>(Offset);
1663 }
1664 
1665 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
1666  SDValue &Base,
1667  SDValue &Offset) const {
1668  SDLoc DL(Index);
1669 
1670  if (CurDAG->isBaseWithConstantOffset(Index)) {
1671  SDValue N0 = Index.getOperand(0);
1672  SDValue N1 = Index.getOperand(1);
1673  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1674 
1675  // (add n0, c0)
1676  // Don't peel off the offset (c0) if doing so could possibly lead
1677  // the base (n0) to be negative.
1678  if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0)) {
1679  Base = N0;
1680  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
1681  return true;
1682  }
1683  }
1684 
1685  if (isa<ConstantSDNode>(Index))
1686  return false;
1687 
1688  Base = Index;
1689  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1690  return true;
1691 }
1692 
1693 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
1694  SDValue Val, uint32_t Offset,
1695  uint32_t Width) {
1696  // Transformation function, pack the offset and width of a BFE into
1697  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1698  // source, bits [5:0] contain the offset and bits [22:16] the width.
1699  uint32_t PackedVal = Offset | (Width << 16);
1700  SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1701 
1702  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1703 }
1704 
1705 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1706  // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1707  // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1708  // Predicate: 0 < b <= c < 32
1709 
1710  const SDValue &Shl = N->getOperand(0);
1713 
1714  if (B && C) {
1715  uint32_t BVal = B->getZExtValue();
1716  uint32_t CVal = C->getZExtValue();
1717 
1718  if (0 < BVal && BVal <= CVal && CVal < 32) {
1719  bool Signed = N->getOpcode() == ISD::SRA;
1720  unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1721 
1722  ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
1723  32 - CVal));
1724  return;
1725  }
1726  }
1727  SelectCode(N);
1728 }
1729 
1730 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1731  switch (N->getOpcode()) {
1732  case ISD::AND:
1733  if (N->getOperand(0).getOpcode() == ISD::SRL) {
1734  // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1735  // Predicate: isMask(mask)
1736  const SDValue &Srl = N->getOperand(0);
1739 
1740  if (Shift && Mask) {
1741  uint32_t ShiftVal = Shift->getZExtValue();
1742  uint32_t MaskVal = Mask->getZExtValue();
1743 
1744  if (isMask_32(MaskVal)) {
1745  uint32_t WidthVal = countPopulation(MaskVal);
1746 
1747  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1748  Srl.getOperand(0), ShiftVal, WidthVal));
1749  return;
1750  }
1751  }
1752  }
1753  break;
1754  case ISD::SRL:
1755  if (N->getOperand(0).getOpcode() == ISD::AND) {
1756  // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1757  // Predicate: isMask(mask >> b)
1758  const SDValue &And = N->getOperand(0);
1761 
1762  if (Shift && Mask) {
1763  uint32_t ShiftVal = Shift->getZExtValue();
1764  uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1765 
1766  if (isMask_32(MaskVal)) {
1767  uint32_t WidthVal = countPopulation(MaskVal);
1768 
1769  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1770  And.getOperand(0), ShiftVal, WidthVal));
1771  return;
1772  }
1773  }
1774  } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
1775  SelectS_BFEFromShifts(N);
1776  return;
1777  }
1778  break;
1779  case ISD::SRA:
1780  if (N->getOperand(0).getOpcode() == ISD::SHL) {
1781  SelectS_BFEFromShifts(N);
1782  return;
1783  }
1784  break;
1785 
1786  case ISD::SIGN_EXTEND_INREG: {
1787  // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
1788  SDValue Src = N->getOperand(0);
1789  if (Src.getOpcode() != ISD::SRL)
1790  break;
1791 
1792  const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1793  if (!Amt)
1794  break;
1795 
1796  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1797  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
1798  Amt->getZExtValue(), Width));
1799  return;
1800  }
1801  }
1802 
1803  SelectCode(N);
1804 }
1805 
1806 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
1807  assert(N->getOpcode() == ISD::BRCOND);
1808  if (!N->hasOneUse())
1809  return false;
1810 
1811  SDValue Cond = N->getOperand(1);
1812  if (Cond.getOpcode() == ISD::CopyToReg)
1813  Cond = Cond.getOperand(2);
1814 
1815  if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
1816  return false;
1817 
1818  MVT VT = Cond.getOperand(0).getSimpleValueType();
1819  if (VT == MVT::i32)
1820  return true;
1821 
1822  if (VT == MVT::i64) {
1823  auto ST = static_cast<const GCNSubtarget *>(Subtarget);
1824 
1825  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
1826  return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
1827  }
1828 
1829  return false;
1830 }
1831 
1832 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
1833  SDValue Cond = N->getOperand(1);
1834 
1835  if (Cond.isUndef()) {
1836  CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
1837  N->getOperand(2), N->getOperand(0));
1838  return;
1839  }
1840 
1841  bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
1842  unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
1843  unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC;
1844  SDLoc SL(N);
1845 
1846  if (!UseSCCBr) {
1847  // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
1848  // analyzed what generates the vcc value, so we do not know whether vcc
1849  // bits for disabled lanes are 0. Thus we need to mask out bits for
1850  // disabled lanes.
1851  //
1852  // For the case that we select S_CBRANCH_SCC1 and it gets
1853  // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
1854  // SIInstrInfo::moveToVALU which inserts the S_AND).
1855  //
1856  // We could add an analysis of what generates the vcc value here and omit
1857  // the S_AND when is unnecessary. But it would be better to add a separate
1858  // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
1859  // catches both cases.
1860  Cond = SDValue(CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
1861  CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
1862  Cond),
1863  0);
1864  }
1865 
1866  SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
1867  CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
1868  N->getOperand(2), // Basic Block
1869  VCC.getValue(0));
1870 }
1871 
1872 void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
1873  MVT VT = N->getSimpleValueType(0);
1874  bool IsFMA = N->getOpcode() == ISD::FMA;
1875  if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
1876  !Subtarget->hasFmaMixInsts()) ||
1877  ((IsFMA && Subtarget->hasMadMixInsts()) ||
1878  (!IsFMA && Subtarget->hasFmaMixInsts()))) {
1879  SelectCode(N);
1880  return;
1881  }
1882 
1883  SDValue Src0 = N->getOperand(0);
1884  SDValue Src1 = N->getOperand(1);
1885  SDValue Src2 = N->getOperand(2);
1886  unsigned Src0Mods, Src1Mods, Src2Mods;
1887 
1888  // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
1889  // using the conversion from f16.
1890  bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
1891  bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
1892  bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
1893 
1894  assert((IsFMA || !Subtarget->hasFP32Denormals()) &&
1895  "fmad selected with denormals enabled");
1896  // TODO: We can select this with f32 denormals enabled if all the sources are
1897  // converted from f16 (in which case fmad isn't legal).
1898 
1899  if (Sel0 || Sel1 || Sel2) {
1900  // For dummy operands.
1901  SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
1902  SDValue Ops[] = {
1903  CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
1904  CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
1905  CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
1906  CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
1907  Zero, Zero
1908  };
1909 
1910  CurDAG->SelectNodeTo(N,
1911  IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
1912  MVT::f32, Ops);
1913  } else {
1914  SelectCode(N);
1915  }
1916 }
1917 
1918 // This is here because there isn't a way to use the generated sub0_sub1 as the
1919 // subreg index to EXTRACT_SUBREG in tablegen.
1920 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
1921  MemSDNode *Mem = cast<MemSDNode>(N);
1922  unsigned AS = Mem->getAddressSpace();
1923  if (AS == AMDGPUAS::FLAT_ADDRESS) {
1924  SelectCode(N);
1925  return;
1926  }
1927 
1928  MVT VT = N->getSimpleValueType(0);
1929  bool Is32 = (VT == MVT::i32);
1930  SDLoc SL(N);
1931 
1932  MachineSDNode *CmpSwap = nullptr;
1933  if (Subtarget->hasAddr64()) {
1934  SDValue SRsrc, VAddr, SOffset, Offset, SLC;
1935 
1936  if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
1937  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
1938  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
1939  SDValue CmpVal = Mem->getOperand(2);
1940 
1941  // XXX - Do we care about glue operands?
1942 
1943  SDValue Ops[] = {
1944  CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1945  };
1946 
1947  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1948  }
1949  }
1950 
1951  if (!CmpSwap) {
1952  SDValue SRsrc, SOffset, Offset, SLC;
1953  if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
1954  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
1955  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
1956 
1957  SDValue CmpVal = Mem->getOperand(2);
1958  SDValue Ops[] = {
1959  CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1960  };
1961 
1962  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1963  }
1964  }
1965 
1966  if (!CmpSwap) {
1967  SelectCode(N);
1968  return;
1969  }
1970 
1971  MachineMemOperand *MMO = Mem->getMemOperand();
1972  CurDAG->setNodeMemRefs(CmpSwap, {MMO});
1973 
1974  unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
1975  SDValue Extract
1976  = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
1977 
1978  ReplaceUses(SDValue(N, 0), Extract);
1979  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
1980  CurDAG->RemoveDeadNode(N);
1981 }
1982 
1983 void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
1984  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1985  if ((IntrID != Intrinsic::amdgcn_ds_append &&
1986  IntrID != Intrinsic::amdgcn_ds_consume) ||
1987  N->getValueType(0) != MVT::i32) {
1988  SelectCode(N);
1989  return;
1990  }
1991 
1992  // The address is assumed to be uniform, so if it ends up in a VGPR, it will
1993  // be copied to an SGPR with readfirstlane.
1994  unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
1995  AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
1996 
1997  SDValue Chain = N->getOperand(0);
1998  SDValue Ptr = N->getOperand(2);
1999  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2000  bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
2001 
2002  SDValue Offset;
2003  if (CurDAG->isBaseWithConstantOffset(Ptr)) {
2004  SDValue PtrBase = Ptr.getOperand(0);
2005  SDValue PtrOffset = Ptr.getOperand(1);
2006 
2007  const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
2008  if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue(), 16)) {
2009  N = glueCopyToM0(N, PtrBase);
2010  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
2011  }
2012  }
2013 
2014  if (!Offset) {
2015  N = glueCopyToM0(N, Ptr);
2016  Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2017  }
2018 
2019  SDValue Ops[] = {
2020  Offset,
2021  CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
2022  Chain,
2023  N->getOperand(N->getNumOperands() - 1) // New glue
2024  };
2025 
2026  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2027 }
2028 
2029 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
2030  unsigned &Mods) const {
2031  Mods = 0;
2032  Src = In;
2033 
2034  if (Src.getOpcode() == ISD::FNEG) {
2035  Mods |= SISrcMods::NEG;
2036  Src = Src.getOperand(0);
2037  }
2038 
2039  if (Src.getOpcode() == ISD::FABS) {
2040  Mods |= SISrcMods::ABS;
2041  Src = Src.getOperand(0);
2042  }
2043 
2044  return true;
2045 }
2046 
2047 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
2048  SDValue &SrcMods) const {
2049  unsigned Mods;
2050  if (SelectVOP3ModsImpl(In, Src, Mods)) {
2051  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2052  return true;
2053  }
2054 
2055  return false;
2056 }
2057 
2058 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
2059  SDValue &SrcMods) const {
2060  SelectVOP3Mods(In, Src, SrcMods);
2061  return isNoNanSrc(Src);
2062 }
2063 
2064 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
2065  if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
2066  return false;
2067 
2068  Src = In;
2069  return true;
2070 }
2071 
2072 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
2073  SDValue &SrcMods, SDValue &Clamp,
2074  SDValue &Omod) const {
2075  SDLoc DL(In);
2076  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2077  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2078 
2079  return SelectVOP3Mods(In, Src, SrcMods);
2080 }
2081 
2082 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
2083  SDValue &SrcMods,
2084  SDValue &Clamp,
2085  SDValue &Omod) const {
2086  Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2087  return SelectVOP3Mods(In, Src, SrcMods);
2088 }
2089 
2090 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
2091  SDValue &Clamp, SDValue &Omod) const {
2092  Src = In;
2093 
2094  SDLoc DL(In);
2095  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2096  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2097 
2098  return true;
2099 }
2100 
2101 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
2102  SDValue &SrcMods) const {
2103  unsigned Mods = 0;
2104  Src = In;
2105 
2106  if (Src.getOpcode() == ISD::FNEG) {
2107  Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
2108  Src = Src.getOperand(0);
2109  }
2110 
2111  if (Src.getOpcode() == ISD::BUILD_VECTOR) {
2112  unsigned VecMods = Mods;
2113 
2114  SDValue Lo = stripBitcast(Src.getOperand(0));
2115  SDValue Hi = stripBitcast(Src.getOperand(1));
2116 
2117  if (Lo.getOpcode() == ISD::FNEG) {
2118  Lo = stripBitcast(Lo.getOperand(0));
2119  Mods ^= SISrcMods::NEG;
2120  }
2121 
2122  if (Hi.getOpcode() == ISD::FNEG) {
2123  Hi = stripBitcast(Hi.getOperand(0));
2124  Mods ^= SISrcMods::NEG_HI;
2125  }
2126 
2127  if (isExtractHiElt(Lo, Lo))
2128  Mods |= SISrcMods::OP_SEL_0;
2129 
2130  if (isExtractHiElt(Hi, Hi))
2131  Mods |= SISrcMods::OP_SEL_1;
2132 
2133  Lo = stripExtractLoElt(Lo);
2134  Hi = stripExtractLoElt(Hi);
2135 
2136  if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
2137  // Really a scalar input. Just select from the low half of the register to
2138  // avoid packing.
2139 
2140  Src = Lo;
2141  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2142  return true;
2143  }
2144 
2145  Mods = VecMods;
2146  }
2147 
2148  // Packed instructions do not have abs modifiers.
2149  Mods |= SISrcMods::OP_SEL_1;
2150 
2151  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2152  return true;
2153 }
2154 
2155 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src,
2156  SDValue &SrcMods,
2157  SDValue &Clamp) const {
2158  SDLoc SL(In);
2159 
2160  // FIXME: Handle clamp and op_sel
2161  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2162 
2163  return SelectVOP3PMods(In, Src, SrcMods);
2164 }
2165 
2166 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
2167  SDValue &SrcMods) const {
2168  Src = In;
2169  // FIXME: Handle op_sel
2170  SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2171  return true;
2172 }
2173 
2174 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src,
2175  SDValue &SrcMods,
2176  SDValue &Clamp) const {
2177  SDLoc SL(In);
2178 
2179  // FIXME: Handle clamp
2180  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2181 
2182  return SelectVOP3OpSel(In, Src, SrcMods);
2183 }
2184 
2185 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
2186  SDValue &SrcMods) const {
2187  // FIXME: Handle op_sel
2188  return SelectVOP3Mods(In, Src, SrcMods);
2189 }
2190 
2191 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src,
2192  SDValue &SrcMods,
2193  SDValue &Clamp) const {
2194  SDLoc SL(In);
2195 
2196  // FIXME: Handle clamp
2197  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2198 
2199  return SelectVOP3OpSelMods(In, Src, SrcMods);
2200 }
2201 
2202 // The return value is not whether the match is possible (which it always is),
2203 // but whether or not it a conversion is really used.
2204 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
2205  unsigned &Mods) const {
2206  Mods = 0;
2207  SelectVOP3ModsImpl(In, Src, Mods);
2208 
2209  if (Src.getOpcode() == ISD::FP_EXTEND) {
2210  Src = Src.getOperand(0);
2211  assert(Src.getValueType() == MVT::f16);
2212  Src = stripBitcast(Src);
2213 
2214  // Be careful about folding modifiers if we already have an abs. fneg is
2215  // applied last, so we don't want to apply an earlier fneg.
2216  if ((Mods & SISrcMods::ABS) == 0) {
2217  unsigned ModsTmp;
2218  SelectVOP3ModsImpl(Src, Src, ModsTmp);
2219 
2220  if ((ModsTmp & SISrcMods::NEG) != 0)
2221  Mods ^= SISrcMods::NEG;
2222 
2223  if ((ModsTmp & SISrcMods::ABS) != 0)
2224  Mods |= SISrcMods::ABS;
2225  }
2226 
2227  // op_sel/op_sel_hi decide the source type and source.
2228  // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
2229  // If the sources's op_sel is set, it picks the high half of the source
2230  // register.
2231 
2232  Mods |= SISrcMods::OP_SEL_1;
2233  if (isExtractHiElt(Src, Src)) {
2234  Mods |= SISrcMods::OP_SEL_0;
2235 
2236  // TODO: Should we try to look for neg/abs here?
2237  }
2238 
2239  return true;
2240  }
2241 
2242  return false;
2243 }
2244 
2245 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
2246  SDValue &SrcMods) const {
2247  unsigned Mods = 0;
2248  SelectVOP3PMadMixModsImpl(In, Src, Mods);
2249  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2250  return true;
2251 }
2252 
2253 SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
2254  if (In.isUndef())
2255  return CurDAG->getUNDEF(MVT::i32);
2256 
2257  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
2258  SDLoc SL(In);
2259  return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
2260  }
2261 
2262  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
2263  SDLoc SL(In);
2264  return CurDAG->getConstant(
2265  C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
2266  }
2267 
2268  SDValue Src;
2269  if (isExtractHiElt(In, Src))
2270  return Src;
2271 
2272  return SDValue();
2273 }
2274 
2275 bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
2276  if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
2277  return false;
2278  }
2279  const SIRegisterInfo *SIRI =
2280  static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
2281  const SIInstrInfo * SII =
2282  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
2283 
2284  unsigned Limit = 0;
2285  bool AllUsesAcceptSReg = true;
2286  for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
2287  Limit < 10 && U != E; ++U, ++Limit) {
2288  const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
2289 
2290  // If the register class is unknown, it could be an unknown
2291  // register class that needs to be an SGPR, e.g. an inline asm
2292  // constraint
2293  if (!RC || SIRI->isSGPRClass(RC))
2294  return false;
2295 
2296  if (RC != &AMDGPU::VS_32RegClass) {
2297  AllUsesAcceptSReg = false;
2298  SDNode * User = *U;
2299  if (User->isMachineOpcode()) {
2300  unsigned Opc = User->getMachineOpcode();
2301  MCInstrDesc Desc = SII->get(Opc);
2302  if (Desc.isCommutable()) {
2303  unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
2304  unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
2305  if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
2306  unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
2307  const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
2308  if (CommutedRC == &AMDGPU::VS_32RegClass)
2309  AllUsesAcceptSReg = true;
2310  }
2311  }
2312  }
2313  // If "AllUsesAcceptSReg == false" so far we haven't suceeded
2314  // commuting current user. This means have at least one use
2315  // that strictly require VGPR. Thus, we will not attempt to commute
2316  // other user instructions.
2317  if (!AllUsesAcceptSReg)
2318  break;
2319  }
2320  }
2321  return !AllUsesAcceptSReg && (Limit < 10);
2322 }
2323 
2324 bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
2325  auto Ld = cast<LoadSDNode>(N);
2326 
2327  return Ld->getAlignment() >= 4 &&
2328  (
2329  (
2330  (
2331  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
2332  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
2333  )
2334  &&
2335  !N->isDivergent()
2336  )
2337  ||
2338  (
2339  Subtarget->getScalarizeGlobalBehavior() &&
2340  Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
2341  !Ld->isVolatile() &&
2342  !N->isDivergent() &&
2343  static_cast<const SITargetLowering *>(
2344  getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)
2345  )
2346  );
2347 }
2348 
2349 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
2351  *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
2352  bool IsModified = false;
2353  do {
2354  IsModified = false;
2355 
2356  // Go over all selected nodes and try to fold them a bit more
2357  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
2358  while (Position != CurDAG->allnodes_end()) {
2359  SDNode *Node = &*Position++;
2360  MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
2361  if (!MachineNode)
2362  continue;
2363 
2364  SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
2365  if (ResNode != Node) {
2366  if (ResNode)
2367  ReplaceUses(Node, ResNode);
2368  IsModified = true;
2369  }
2370  }
2371  CurDAG->RemoveDeadNodes();
2372  } while (IsModified);
2373 }
2374 
2375 bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
2376  Subtarget = &MF.getSubtarget<R600Subtarget>();
2378 }
2379 
2380 bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
2381  if (!N->readMem())
2382  return false;
2383  if (CbId == -1)
2386 
2387  return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
2388 }
2389 
2390 bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
2391  SDValue& IntPtr) {
2392  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
2393  IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
2394  true);
2395  return true;
2396  }
2397  return false;
2398 }
2399 
2400 bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
2401  SDValue& BaseReg, SDValue &Offset) {
2402  if (!isa<ConstantSDNode>(Addr)) {
2403  BaseReg = Addr;
2404  Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
2405  return true;
2406  }
2407  return false;
2408 }
2409 
2411  unsigned int Opc = N->getOpcode();
2412  if (N->isMachineOpcode()) {
2413  N->setNodeId(-1);
2414  return; // Already selected.
2415  }
2416 
2417  switch (Opc) {
2418  default: break;
2420  case ISD::SCALAR_TO_VECTOR:
2421  case ISD::BUILD_VECTOR: {
2422  EVT VT = N->getValueType(0);
2423  unsigned NumVectorElts = VT.getVectorNumElements();
2424  unsigned RegClassID;
2425  // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
2426  // that adds a 128 bits reg copy when going through TwoAddressInstructions
2427  // pass. We want to avoid 128 bits copies as much as possible because they
2428  // can't be bundled by our scheduler.
2429  switch(NumVectorElts) {
2430  case 2: RegClassID = R600::R600_Reg64RegClassID; break;
2431  case 4:
2433  RegClassID = R600::R600_Reg128VerticalRegClassID;
2434  else
2435  RegClassID = R600::R600_Reg128RegClassID;
2436  break;
2437  default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
2438  }
2439  SelectBuildVector(N, RegClassID);
2440  return;
2441  }
2442  }
2443 
2444  SelectCode(N);
2445 }
2446 
2447 bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
2448  SDValue &Offset) {
2449  ConstantSDNode *C;
2450  SDLoc DL(Addr);
2451 
2452  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
2453  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
2454  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2455  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
2456  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
2457  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
2458  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2459  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
2460  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
2461  Base = Addr.getOperand(0);
2462  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2463  } else {
2464  Base = Addr;
2465  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
2466  }
2467 
2468  return true;
2469 }
2470 
2471 bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
2472  SDValue &Offset) {
2473  ConstantSDNode *IMMOffset;
2474 
2475  if (Addr.getOpcode() == ISD::ADD
2476  && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
2477  && isInt<16>(IMMOffset->getZExtValue())) {
2478 
2479  Base = Addr.getOperand(0);
2480  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2481  MVT::i32);
2482  return true;
2483  // If the pointer address is constant, we can move it to the offset field.
2484  } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
2485  && isInt<16>(IMMOffset->getZExtValue())) {
2486  Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
2487  SDLoc(CurDAG->getEntryNode()),
2488  R600::ZERO, MVT::i32);
2489  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2490  MVT::i32);
2491  return true;
2492  }
2493 
2494  // Default case, no offset
2495  Base = Addr;
2496  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2497  return true;
2498 }
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:590
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:348
Address space for region memory. (GDS)
Definition: AMDGPU.h:255
EVT getValueType() const
Return the ValueType of the referenced return value.
Interface definition for SIRegisterInfo.
static unsigned getSubRegFromChannel(unsigned Channel)
bool isUndef() const
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand...
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1562
INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel", "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) INITIALIZE_PASS_END(AMDGPUDAGToDAGISel
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:561
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
AMDGPU specific subclass of TargetSubtarget.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
uint64_t getDefaultRsrcDataFormat() const
FunctionPass * createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a R600-specific.
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MCInstrDesc.h:436
Shadow Stack GC Lowering
SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL, SDValue V) const
const SDValue & getBasePtr() const
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:222
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
SDVTList getVTList() const
unsigned Reg
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
const SDValue & getChain() const
virtual SDNode * PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const =0
static bool getConstantValue(SDValue N, uint32_t &Out)
SDNode * legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const
Legalize target independent instructions (e.g.
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:305
Address space for 32-bit constant memory.
Definition: AMDGPU.h:261
unsigned const TargetRegisterInfo * TRI
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
const SDNodeFlags getFlags() const
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
bool isInlineConstant(const APInt &Imm) const
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:404
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:158
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineSDNode * wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr) const
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwises returns null...
Definition: PointerUnion.h:212
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
const HexagonInstrInfo * TII
Shift and rotation operations.
Definition: ISDOpcodes.h:429
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:190
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:169
unsigned SubReg
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Position
Position to insert a new instruction relative to an existing instruction.
bool isSGPRClass(const TargetRegisterClass *RC) const
This file implements a class to represent arbitrary precision integral constant values and operations...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
TargetRegisterInfo interface that is implemented by all hw codegen targets.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
int64_t getSExtValue() const
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:297
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
Address space for local memory.
Definition: AMDGPU.h:258
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:234
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:990
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
amdgpu isel
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:346
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static const unsigned CommuteAnyOperandIndex
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:342
unsigned const MachineRegisterInfo * MRI
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
Machine Value Type.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:272
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const SDValue & getOperand(unsigned Num) const
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:231
SI DAG Lowering interface definition.
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:254
Represent the analysis usage information of a pass.
This class provides iterator support for SDUse operands that use a specific SDNode.
bool hasNoNaNs() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:71
bool isDefined() const
Returns true if the flags are in a defined state.
The AMDGPU TargetMachine interface definition for hw codgen targets.
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:575
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
Extended Value Type.
Definition: ValueTypes.h:33
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts)
bool use_empty() const
Return true if there are no uses of this node.
MachineSDNode * buildRSRC(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr, uint32_t RsrcDword1, uint64_t RsrcDword2And3) const
Return a resource descriptor with the &#39;Add TID&#39; bit enabled The TID (Thread ID) is multiplied by the ...
Iterator for intrusive lists based on ilist_node.
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:519
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:264
Address space for constant memory (VTX2).
Definition: AMDGPU.h:257
Interface definition of the TargetLowering class that is common to all AMD GPUs.
Address space for flat memory.
Definition: AMDGPU.h:253
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:667
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
bool isDivergent() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:225
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
bool readMem() const
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
EVT getMemoryVT() const
Return the type of the in-memory value.
Class for arbitrary precision integers.
Definition: APInt.h:69
Special value supplied for machine level alias analysis.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo)
amdgpu AMDGPU DAG DAG Pattern Instruction Selection
static use_iterator use_end()
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:406
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:72
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:505
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:637
#define N
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
unsigned getOpcode() const
SDValue getValue(unsigned R) const
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:345
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isLegalMUBUFImmOffset(unsigned Imm)
Definition: SIInstrInfo.h:934
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:321
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
bool hasNoUnsignedWrap() const
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:174
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations...
Definition: ISDOpcodes.h:325
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:462
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:222
const SDValue & getOperand(unsigned i) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:815
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:493
#define LLVM_DEBUG(X)
Definition: Debug.h:122
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:605
Analyzes if a function potentially memory bound and if a kernel kernel may benefit from limiting numb...
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:811
const SDValue & getBasePtr() const
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:394
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:241
This class is used to represent ISD::LOAD nodes.