LLVM  9.0.0svn
AMDGPUISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines an instruction selector for the AMDGPU target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
16 #include "AMDGPUISelLowering.h" // For AMDGPUISD
17 #include "AMDGPUInstrInfo.h"
18 #include "AMDGPUPerfHintAnalysis.h"
19 #include "AMDGPURegisterInfo.h"
20 #include "AMDGPUSubtarget.h"
21 #include "AMDGPUTargetMachine.h"
22 #include "SIDefines.h"
23 #include "SIISelLowering.h"
24 #include "SIInstrInfo.h"
25 #include "SIMachineFunctionInfo.h"
26 #include "SIRegisterInfo.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/ADT/StringRef.h"
41 #include "llvm/IR/BasicBlock.h"
42 #include "llvm/IR/Instruction.h"
43 #include "llvm/MC/MCInstrDesc.h"
44 #include "llvm/Support/Casting.h"
45 #include "llvm/Support/CodeGen.h"
49 #include <cassert>
50 #include <cstdint>
51 #include <new>
52 #include <vector>
53 
54 #define DEBUG_TYPE "isel"
55 
56 using namespace llvm;
57 
58 namespace llvm {
59 
60 class R600InstrInfo;
61 
62 } // end namespace llvm
63 
64 //===----------------------------------------------------------------------===//
65 // Instruction Selector Implementation
66 //===----------------------------------------------------------------------===//
67 
68 namespace {
69 
70 /// AMDGPU specific code to select AMDGPU machine instructions for
71 /// SelectionDAG operations.
72 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
73  // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
74  // make the right decision when generating code for different targets.
75  const GCNSubtarget *Subtarget;
76  bool EnableLateStructurizeCFG;
77 
78 public:
79  explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
81  : SelectionDAGISel(*TM, OptLevel) {
82  EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
83  }
84  ~AMDGPUDAGToDAGISel() override = default;
85 
86  void getAnalysisUsage(AnalysisUsage &AU) const override {
91  }
92 
93  bool matchLoadD16FromBuildVector(SDNode *N) const;
94 
95  bool runOnMachineFunction(MachineFunction &MF) override;
96  void PreprocessISelDAG() override;
97  void Select(SDNode *N) override;
98  StringRef getPassName() const override;
99  void PostprocessISelDAG() override;
100 
101 protected:
102  void SelectBuildVector(SDNode *N, unsigned RegClassID);
103 
104 private:
105  std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
106  bool isNoNanSrc(SDValue N) const;
107  bool isInlineImmediate(const SDNode *N) const;
108  bool isVGPRImm(const SDNode *N) const;
109  bool isUniformLoad(const SDNode *N) const;
110  bool isUniformBr(const SDNode *N) const;
111 
112  MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
113 
114  SDNode *glueCopyToM0LDSInit(SDNode *N) const;
115  SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
116 
117  const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
118  virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
119  virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
120  bool isDSOffsetLegal(SDValue Base, unsigned Offset,
121  unsigned OffsetBits) const;
122  bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
123  bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
124  SDValue &Offset1) const;
125  bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
126  SDValue &SOffset, SDValue &Offset, SDValue &Offen,
127  SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
128  SDValue &TFE) const;
129  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
130  SDValue &SOffset, SDValue &Offset, SDValue &GLC,
131  SDValue &SLC, SDValue &TFE) const;
132  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
133  SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
134  SDValue &SLC) const;
135  bool SelectMUBUFScratchOffen(SDNode *Parent,
136  SDValue Addr, SDValue &RSrc, SDValue &VAddr,
137  SDValue &SOffset, SDValue &ImmOffset) const;
138  bool SelectMUBUFScratchOffset(SDNode *Parent,
139  SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
140  SDValue &Offset) const;
141 
142  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
143  SDValue &Offset, SDValue &GLC, SDValue &SLC,
144  SDValue &TFE) const;
145  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
146  SDValue &Offset, SDValue &SLC) const;
147  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
148  SDValue &Offset) const;
149 
150  bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
151  SDValue &Offset, SDValue &SLC) const;
152  bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr,
153  SDValue &Offset, SDValue &SLC) const;
154 
155  template <bool IsSigned>
156  bool SelectFlatOffset(SDValue Addr, SDValue &VAddr,
157  SDValue &Offset, SDValue &SLC) const;
158 
159  bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
160  bool &Imm) const;
161  SDValue Expand32BitAddress(SDValue Addr) const;
162  bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
163  bool &Imm) const;
164  bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
165  bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
166  bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
167  bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
168  bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
169  bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
170 
171  bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
172  bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
173  bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
174  bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
175  bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
176  SDValue &Clamp, SDValue &Omod) const;
177  bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
178  SDValue &Clamp, SDValue &Omod) const;
179 
180  bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
181  SDValue &Clamp,
182  SDValue &Omod) const;
183 
184  bool SelectVOP3OMods(SDValue In, SDValue &Src,
185  SDValue &Clamp, SDValue &Omod) const;
186 
187  bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
188  bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
189  SDValue &Clamp) const;
190 
191  bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
192  bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods,
193  SDValue &Clamp) const;
194 
195  bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
196  bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
197  SDValue &Clamp) const;
198  bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
199  bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
200 
201  SDValue getHi16Elt(SDValue In) const;
202 
203  void SelectADD_SUB_I64(SDNode *N);
204  void SelectUADDO_USUBO(SDNode *N);
205  void SelectDIV_SCALE(SDNode *N);
206  void SelectMAD_64_32(SDNode *N);
207  void SelectFMA_W_CHAIN(SDNode *N);
208  void SelectFMUL_W_CHAIN(SDNode *N);
209 
210  SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
211  uint32_t Offset, uint32_t Width);
212  void SelectS_BFEFromShifts(SDNode *N);
213  void SelectS_BFE(SDNode *N);
214  bool isCBranchSCC(const SDNode *N) const;
215  void SelectBRCOND(SDNode *N);
216  void SelectFMAD_FMA(SDNode *N);
217  void SelectATOMIC_CMP_SWAP(SDNode *N);
218  void SelectINTRINSIC_W_CHAIN(SDNode *N);
219 
220 protected:
221  // Include the pieces autogenerated from the target description.
222 #include "AMDGPUGenDAGISel.inc"
223 };
224 
225 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
226  const R600Subtarget *Subtarget;
227 
228  bool isConstantLoad(const MemSDNode *N, int cbID) const;
229  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
230  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
231  SDValue& Offset);
232 public:
233  explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
234  AMDGPUDAGToDAGISel(TM, OptLevel) {}
235 
236  void Select(SDNode *N) override;
237 
238  bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
239  SDValue &Offset) override;
240  bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
241  SDValue &Offset) override;
242 
243  bool runOnMachineFunction(MachineFunction &MF) override;
244 
245  void PreprocessISelDAG() override {}
246 
247 protected:
248  // Include the pieces autogenerated from the target description.
249 #include "R600GenDAGISel.inc"
250 };
251 
252 static SDValue stripBitcast(SDValue Val) {
253  return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
254 }
255 
256 // Figure out if this is really an extract of the high 16-bits of a dword.
257 static bool isExtractHiElt(SDValue In, SDValue &Out) {
258  In = stripBitcast(In);
259  if (In.getOpcode() != ISD::TRUNCATE)
260  return false;
261 
262  SDValue Srl = In.getOperand(0);
263  if (Srl.getOpcode() == ISD::SRL) {
264  if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
265  if (ShiftAmt->getZExtValue() == 16) {
266  Out = stripBitcast(Srl.getOperand(0));
267  return true;
268  }
269  }
270  }
271 
272  return false;
273 }
274 
275 // Look through operations that obscure just looking at the low 16-bits of the
276 // same register.
277 static SDValue stripExtractLoElt(SDValue In) {
278  if (In.getOpcode() == ISD::TRUNCATE) {
279  SDValue Src = In.getOperand(0);
280  if (Src.getValueType().getSizeInBits() == 32)
281  return stripBitcast(Src);
282  }
283 
284  return In;
285 }
286 
287 } // end anonymous namespace
288 
289 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
290  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
294 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",
295  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
296 
297 /// This pass converts a legalized DAG into a AMDGPU-specific
298 // DAG, ready for instruction scheduling.
300  CodeGenOpt::Level OptLevel) {
301  return new AMDGPUDAGToDAGISel(TM, OptLevel);
302 }
303 
304 /// This pass converts a legalized DAG into a R600-specific
305 // DAG, ready for instruction scheduling.
307  CodeGenOpt::Level OptLevel) {
308  return new R600DAGToDAGISel(TM, OptLevel);
309 }
310 
311 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
312  Subtarget = &MF.getSubtarget<GCNSubtarget>();
314 }
315 
316 bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {
317  assert(Subtarget->d16PreservesUnusedBits());
318  MVT VT = N->getValueType(0).getSimpleVT();
319  if (VT != MVT::v2i16 && VT != MVT::v2f16)
320  return false;
321 
322  SDValue Lo = N->getOperand(0);
323  SDValue Hi = N->getOperand(1);
324 
325  LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));
326 
327  // build_vector lo, (load ptr) -> load_d16_hi ptr, lo
328  // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
329  // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
330 
331  // Need to check for possible indirect dependencies on the other half of the
332  // vector to avoid introducing a cycle.
333  if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {
334  SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
335 
336  SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo);
337  SDValue Ops[] = {
338  LdHi->getChain(), LdHi->getBasePtr(), TiedIn
339  };
340 
341  unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
342  if (LdHi->getMemoryVT() == MVT::i8) {
343  LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
345  } else {
346  assert(LdHi->getMemoryVT() == MVT::i16);
347  }
348 
349  SDValue NewLoadHi =
350  CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
351  Ops, LdHi->getMemoryVT(),
352  LdHi->getMemOperand());
353 
354  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
355  CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
356  return true;
357  }
358 
359  // build_vector (load ptr), hi -> load_d16_lo ptr, hi
360  // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
361  // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
362  LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));
363  if (LdLo && Lo.hasOneUse()) {
364  SDValue TiedIn = getHi16Elt(Hi);
365  if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode()))
366  return false;
367 
368  SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
369  unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
370  if (LdLo->getMemoryVT() == MVT::i8) {
371  LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
373  } else {
374  assert(LdLo->getMemoryVT() == MVT::i16);
375  }
376 
377  TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
378 
379  SDValue Ops[] = {
380  LdLo->getChain(), LdLo->getBasePtr(), TiedIn
381  };
382 
383  SDValue NewLoadLo =
384  CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
385  Ops, LdLo->getMemoryVT(),
386  LdLo->getMemOperand());
387 
388  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
389  CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
390  return true;
391  }
392 
393  return false;
394 }
395 
396 void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
397  if (!Subtarget->d16PreservesUnusedBits())
398  return;
399 
400  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
401 
402  bool MadeChange = false;
403  while (Position != CurDAG->allnodes_begin()) {
404  SDNode *N = &*--Position;
405  if (N->use_empty())
406  continue;
407 
408  switch (N->getOpcode()) {
409  case ISD::BUILD_VECTOR:
410  MadeChange |= matchLoadD16FromBuildVector(N);
411  break;
412  default:
413  break;
414  }
415  }
416 
417  if (MadeChange) {
418  CurDAG->RemoveDeadNodes();
419  LLVM_DEBUG(dbgs() << "After PreProcess:\n";
420  CurDAG->dump(););
421  }
422 }
423 
424 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
425  if (TM.Options.NoNaNsFPMath)
426  return true;
427 
428  // TODO: Move into isKnownNeverNaN
429  if (N->getFlags().isDefined())
430  return N->getFlags().hasNoNaNs();
431 
432  return CurDAG->isKnownNeverNaN(N);
433 }
434 
435 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
436  const SIInstrInfo *TII = Subtarget->getInstrInfo();
437 
438  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
439  return TII->isInlineConstant(C->getAPIntValue());
440 
441  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
442  return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
443 
444  return false;
445 }
446 
447 /// Determine the register class for \p OpNo
448 /// \returns The register class of the virtual register that will be used for
449 /// the given operand number \OpNo or NULL if the register class cannot be
450 /// determined.
451 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
452  unsigned OpNo) const {
453  if (!N->isMachineOpcode()) {
454  if (N->getOpcode() == ISD::CopyToReg) {
455  unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
457  MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
458  return MRI.getRegClass(Reg);
459  }
460 
461  const SIRegisterInfo *TRI
462  = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
463  return TRI->getPhysRegClass(Reg);
464  }
465 
466  return nullptr;
467  }
468 
469  switch (N->getMachineOpcode()) {
470  default: {
471  const MCInstrDesc &Desc =
472  Subtarget->getInstrInfo()->get(N->getMachineOpcode());
473  unsigned OpIdx = Desc.getNumDefs() + OpNo;
474  if (OpIdx >= Desc.getNumOperands())
475  return nullptr;
476  int RegClass = Desc.OpInfo[OpIdx].RegClass;
477  if (RegClass == -1)
478  return nullptr;
479 
480  return Subtarget->getRegisterInfo()->getRegClass(RegClass);
481  }
482  case AMDGPU::REG_SEQUENCE: {
483  unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
484  const TargetRegisterClass *SuperRC =
485  Subtarget->getRegisterInfo()->getRegClass(RCID);
486 
487  SDValue SubRegOp = N->getOperand(OpNo + 1);
488  unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
489  return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
490  SubRegIdx);
491  }
492  }
493 }
494 
495 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
496  const SITargetLowering& Lowering =
497  *static_cast<const SITargetLowering*>(getTargetLowering());
498 
499  // Write max value to m0 before each load operation
500 
501  SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
502  Val);
503 
504  SDValue Glue = M0.getValue(1);
505 
507  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
508  Ops.push_back(N->getOperand(i));
509 
510  Ops.push_back(Glue);
511  return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
512 }
513 
514 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
515  if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS ||
516  !Subtarget->ldsRequiresM0Init())
517  return N;
518  return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
519 }
520 
521 MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
522  EVT VT) const {
523  SDNode *Lo = CurDAG->getMachineNode(
524  AMDGPU::S_MOV_B32, DL, MVT::i32,
525  CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
526  SDNode *Hi =
527  CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
528  CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
529  const SDValue Ops[] = {
530  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
531  SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
532  SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
533 
534  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
535 }
536 
537 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
538  switch (NumVectorElts) {
539  case 1:
540  return AMDGPU::SReg_32_XM0RegClassID;
541  case 2:
542  return AMDGPU::SReg_64RegClassID;
543  case 3:
544  return AMDGPU::SGPR_96RegClassID;
545  case 4:
546  return AMDGPU::SReg_128RegClassID;
547  case 5:
548  return AMDGPU::SGPR_160RegClassID;
549  case 8:
550  return AMDGPU::SReg_256RegClassID;
551  case 16:
552  return AMDGPU::SReg_512RegClassID;
553  }
554 
555  llvm_unreachable("invalid vector size");
556 }
557 
558 static bool getConstantValue(SDValue N, uint32_t &Out) {
559  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
560  Out = C->getAPIntValue().getZExtValue();
561  return true;
562  }
563 
564  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
565  Out = C->getValueAPF().bitcastToAPInt().getZExtValue();
566  return true;
567  }
568 
569  return false;
570 }
571 
572 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
573  EVT VT = N->getValueType(0);
574  unsigned NumVectorElts = VT.getVectorNumElements();
575  EVT EltVT = VT.getVectorElementType();
576  SDLoc DL(N);
577  SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
578 
579  if (NumVectorElts == 1) {
580  CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
581  RegClass);
582  return;
583  }
584 
585  assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
586  "supported yet");
587  // 16 = Max Num Vector Elements
588  // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
589  // 1 = Vector Register Class
590  SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
591 
592  RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
593  bool IsRegSeq = true;
594  unsigned NOps = N->getNumOperands();
595  for (unsigned i = 0; i < NOps; i++) {
596  // XXX: Why is this here?
597  if (isa<RegisterSDNode>(N->getOperand(i))) {
598  IsRegSeq = false;
599  break;
600  }
602  RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
603  RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
604  }
605  if (NOps != NumVectorElts) {
606  // Fill in the missing undef elements if this was a scalar_to_vector.
607  assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
608  MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
609  DL, EltVT);
610  for (unsigned i = NOps; i < NumVectorElts; ++i) {
612  RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
613  RegSeqArgs[1 + (2 * i) + 1] =
614  CurDAG->getTargetConstant(Sub, DL, MVT::i32);
615  }
616  }
617 
618  if (!IsRegSeq)
619  SelectCode(N);
620  CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
621 }
622 
624  unsigned int Opc = N->getOpcode();
625  if (N->isMachineOpcode()) {
626  N->setNodeId(-1);
627  return; // Already selected.
628  }
629 
630  if (isa<AtomicSDNode>(N) ||
631  (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
632  Opc == ISD::ATOMIC_LOAD_FADD ||
635  N = glueCopyToM0LDSInit(N);
636 
637  switch (Opc) {
638  default:
639  break;
640  // We are selecting i64 ADD here instead of custom lower it during
641  // DAG legalization, so we can fold some i64 ADDs used for address
642  // calculation into the LOAD and STORE instructions.
643  case ISD::ADDC:
644  case ISD::ADDE:
645  case ISD::SUBC:
646  case ISD::SUBE: {
647  if (N->getValueType(0) != MVT::i64)
648  break;
649 
650  SelectADD_SUB_I64(N);
651  return;
652  }
653  case ISD::UADDO:
654  case ISD::USUBO: {
655  SelectUADDO_USUBO(N);
656  return;
657  }
659  SelectFMUL_W_CHAIN(N);
660  return;
661  }
662  case AMDGPUISD::FMA_W_CHAIN: {
663  SelectFMA_W_CHAIN(N);
664  return;
665  }
666 
668  case ISD::BUILD_VECTOR: {
669  EVT VT = N->getValueType(0);
670  unsigned NumVectorElts = VT.getVectorNumElements();
671  if (VT.getScalarSizeInBits() == 16) {
672  if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
673  uint32_t LHSVal, RHSVal;
674  if (getConstantValue(N->getOperand(0), LHSVal) &&
675  getConstantValue(N->getOperand(1), RHSVal)) {
676  uint32_t K = LHSVal | (RHSVal << 16);
677  CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT,
678  CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32));
679  return;
680  }
681  }
682 
683  break;
684  }
685 
687  unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
688  SelectBuildVector(N, RegClassID);
689  return;
690  }
691  case ISD::BUILD_PAIR: {
692  SDValue RC, SubReg0, SubReg1;
693  SDLoc DL(N);
694  if (N->getValueType(0) == MVT::i128) {
695  RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
696  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
697  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
698  } else if (N->getValueType(0) == MVT::i64) {
699  RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
700  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
701  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
702  } else {
703  llvm_unreachable("Unhandled value type for BUILD_PAIR");
704  }
705  const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
706  N->getOperand(1), SubReg1 };
707  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
708  N->getValueType(0), Ops));
709  return;
710  }
711 
712  case ISD::Constant:
713  case ISD::ConstantFP: {
714  if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
715  break;
716 
717  uint64_t Imm;
718  if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
719  Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
720  else {
721  ConstantSDNode *C = cast<ConstantSDNode>(N);
722  Imm = C->getZExtValue();
723  }
724 
725  SDLoc DL(N);
726  ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
727  return;
728  }
729  case ISD::LOAD:
730  case ISD::STORE:
731  case ISD::ATOMIC_LOAD:
732  case ISD::ATOMIC_STORE: {
733  N = glueCopyToM0LDSInit(N);
734  break;
735  }
736 
737  case AMDGPUISD::BFE_I32:
738  case AMDGPUISD::BFE_U32: {
739  // There is a scalar version available, but unlike the vector version which
740  // has a separate operand for the offset and width, the scalar version packs
741  // the width and offset into a single operand. Try to move to the scalar
742  // version if the offsets are constant, so that we can try to keep extended
743  // loads of kernel arguments in SGPRs.
744 
745  // TODO: Technically we could try to pattern match scalar bitshifts of
746  // dynamic values, but it's probably not useful.
748  if (!Offset)
749  break;
750 
752  if (!Width)
753  break;
754 
755  bool Signed = Opc == AMDGPUISD::BFE_I32;
756 
757  uint32_t OffsetVal = Offset->getZExtValue();
758  uint32_t WidthVal = Width->getZExtValue();
759 
760  ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
761  SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
762  return;
763  }
764  case AMDGPUISD::DIV_SCALE: {
765  SelectDIV_SCALE(N);
766  return;
767  }
769  case AMDGPUISD::MAD_U64_U32: {
770  SelectMAD_64_32(N);
771  return;
772  }
773  case ISD::CopyToReg: {
774  const SITargetLowering& Lowering =
775  *static_cast<const SITargetLowering*>(getTargetLowering());
776  N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
777  break;
778  }
779  case ISD::AND:
780  case ISD::SRL:
781  case ISD::SRA:
783  if (N->getValueType(0) != MVT::i32)
784  break;
785 
786  SelectS_BFE(N);
787  return;
788  case ISD::BRCOND:
789  SelectBRCOND(N);
790  return;
791  case ISD::FMAD:
792  case ISD::FMA:
793  SelectFMAD_FMA(N);
794  return;
796  SelectATOMIC_CMP_SWAP(N);
797  return;
803  // Hack around using a legal type if f16 is illegal.
804  if (N->getValueType(0) == MVT::i32) {
806  N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
807  { N->getOperand(0), N->getOperand(1) });
808  SelectCode(N);
809  return;
810  }
811 
812  break;
813  }
814  case ISD::INTRINSIC_W_CHAIN: {
815  SelectINTRINSIC_W_CHAIN(N);
816  return;
817  }
818  }
819 
820  SelectCode(N);
821 }
822 
823 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
824  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
825  const Instruction *Term = BB->getTerminator();
826  return Term->getMetadata("amdgpu.uniform") ||
827  Term->getMetadata("structurizecfg.uniform");
828 }
829 
830 StringRef AMDGPUDAGToDAGISel::getPassName() const {
831  return "AMDGPU DAG->DAG Pattern Instruction Selection";
832 }
833 
834 //===----------------------------------------------------------------------===//
835 // Complex Patterns
836 //===----------------------------------------------------------------------===//
837 
838 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
839  SDValue &Offset) {
840  return false;
841 }
842 
843 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
844  SDValue &Offset) {
845  ConstantSDNode *C;
846  SDLoc DL(Addr);
847 
848  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
849  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
850  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
851  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
852  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
853  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
854  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
855  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
856  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
857  Base = Addr.getOperand(0);
858  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
859  } else {
860  Base = Addr;
861  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
862  }
863 
864  return true;
865 }
866 
867 // FIXME: Should only handle addcarry/subcarry
868 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
869  SDLoc DL(N);
870  SDValue LHS = N->getOperand(0);
871  SDValue RHS = N->getOperand(1);
872 
873  unsigned Opcode = N->getOpcode();
874  bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
875  bool ProduceCarry =
876  ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
877  bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
878 
879  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
880  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
881 
882  SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
883  DL, MVT::i32, LHS, Sub0);
884  SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
885  DL, MVT::i32, LHS, Sub1);
886 
887  SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
888  DL, MVT::i32, RHS, Sub0);
889  SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
890  DL, MVT::i32, RHS, Sub1);
891 
892  SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
893 
894  unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
895  unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
896 
897  SDNode *AddLo;
898  if (!ConsumeCarry) {
899  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
900  AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
901  } else {
902  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
903  AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
904  }
905  SDValue AddHiArgs[] = {
906  SDValue(Hi0, 0),
907  SDValue(Hi1, 0),
908  SDValue(AddLo, 1)
909  };
910  SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
911 
912  SDValue RegSequenceArgs[] = {
913  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
914  SDValue(AddLo,0),
915  Sub0,
916  SDValue(AddHi,0),
917  Sub1,
918  };
919  SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
920  MVT::i64, RegSequenceArgs);
921 
922  if (ProduceCarry) {
923  // Replace the carry-use
924  ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
925  }
926 
927  // Replace the remaining uses.
928  ReplaceNode(N, RegSequence);
929 }
930 
931 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
932  // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
933  // carry out despite the _i32 name. These were renamed in VI to _U32.
934  // FIXME: We should probably rename the opcodes here.
935  unsigned Opc = N->getOpcode() == ISD::UADDO ?
936  AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
937 
938  CurDAG->SelectNodeTo(
939  N, Opc, N->getVTList(),
940  {N->getOperand(0), N->getOperand(1),
941  CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
942 }
943 
944 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
945  SDLoc SL(N);
946  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
947  SDValue Ops[10];
948 
949  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
950  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
951  SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
952  Ops[8] = N->getOperand(0);
953  Ops[9] = N->getOperand(4);
954 
955  CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
956 }
957 
958 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
959  SDLoc SL(N);
960  // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
961  SDValue Ops[8];
962 
963  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
964  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
965  Ops[6] = N->getOperand(0);
966  Ops[7] = N->getOperand(3);
967 
968  CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
969 }
970 
971 // We need to handle this here because tablegen doesn't support matching
972 // instructions with multiple outputs.
973 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
974  SDLoc SL(N);
975  EVT VT = N->getValueType(0);
976 
977  assert(VT == MVT::f32 || VT == MVT::f64);
978 
979  unsigned Opc
980  = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
981 
982  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
983  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
984 }
985 
986 // We need to handle this here because tablegen doesn't support matching
987 // instructions with multiple outputs.
988 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
989  SDLoc SL(N);
990  bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
991  unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32;
992 
993  SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
994  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
995  Clamp };
996  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
997 }
998 
999 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset,
1000  unsigned OffsetBits) const {
1001  if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
1002  (OffsetBits == 8 && !isUInt<8>(Offset)))
1003  return false;
1004 
1005  if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
1006  Subtarget->unsafeDSOffsetFoldingEnabled())
1007  return true;
1008 
1009  // On Southern Islands instruction with a negative base value and an offset
1010  // don't seem to work.
1011  return CurDAG->SignBitIsZero(Base);
1012 }
1013 
1014 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
1015  SDValue &Offset) const {
1016  SDLoc DL(Addr);
1017  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1018  SDValue N0 = Addr.getOperand(0);
1019  SDValue N1 = Addr.getOperand(1);
1020  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1021  if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
1022  // (add n0, c0)
1023  Base = N0;
1024  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1025  return true;
1026  }
1027  } else if (Addr.getOpcode() == ISD::SUB) {
1028  // sub C, x -> add (sub 0, x), C
1029  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1030  int64_t ByteOffset = C->getSExtValue();
1031  if (isUInt<16>(ByteOffset)) {
1032  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1033 
1034  // XXX - This is kind of hacky. Create a dummy sub node so we can check
1035  // the known bits in isDSOffsetLegal. We need to emit the selected node
1036  // here, so this is thrown away.
1037  SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
1038  Zero, Addr.getOperand(1));
1039 
1040  if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
1042  Opnds.push_back(Zero);
1043  Opnds.push_back(Addr.getOperand(1));
1044 
1045  // FIXME: Select to VOP3 version for with-carry.
1046  unsigned SubOp = AMDGPU::V_SUB_I32_e32;
1047  if (Subtarget->hasAddNoCarry()) {
1048  SubOp = AMDGPU::V_SUB_U32_e64;
1049  Opnds.push_back(
1050  CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1051  }
1052 
1053  MachineSDNode *MachineSub =
1054  CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1055 
1056  Base = SDValue(MachineSub, 0);
1057  Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
1058  return true;
1059  }
1060  }
1061  }
1062  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1063  // If we have a constant address, prefer to put the constant into the
1064  // offset. This can save moves to load the constant address since multiple
1065  // operations can share the zero base address register, and enables merging
1066  // into read2 / write2 instructions.
1067 
1068  SDLoc DL(Addr);
1069 
1070  if (isUInt<16>(CAddr->getZExtValue())) {
1071  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1072  MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1073  DL, MVT::i32, Zero);
1074  Base = SDValue(MovZero, 0);
1075  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1076  return true;
1077  }
1078  }
1079 
1080  // default case
1081  Base = Addr;
1082  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
1083  return true;
1084 }
1085 
1086 // TODO: If offset is too big, put low 16-bit into offset.
1087 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
1088  SDValue &Offset0,
1089  SDValue &Offset1) const {
1090  SDLoc DL(Addr);
1091 
1092  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1093  SDValue N0 = Addr.getOperand(0);
1094  SDValue N1 = Addr.getOperand(1);
1095  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1096  unsigned DWordOffset0 = C1->getZExtValue() / 4;
1097  unsigned DWordOffset1 = DWordOffset0 + 1;
1098  // (add n0, c0)
1099  if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
1100  Base = N0;
1101  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
1102  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
1103  return true;
1104  }
1105  } else if (Addr.getOpcode() == ISD::SUB) {
1106  // sub C, x -> add (sub 0, x), C
1107  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1108  unsigned DWordOffset0 = C->getZExtValue() / 4;
1109  unsigned DWordOffset1 = DWordOffset0 + 1;
1110 
1111  if (isUInt<8>(DWordOffset0)) {
1112  SDLoc DL(Addr);
1113  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1114 
1115  // XXX - This is kind of hacky. Create a dummy sub node so we can check
1116  // the known bits in isDSOffsetLegal. We need to emit the selected node
1117  // here, so this is thrown away.
1118  SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
1119  Zero, Addr.getOperand(1));
1120 
1121  if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
1123  Opnds.push_back(Zero);
1124  Opnds.push_back(Addr.getOperand(1));
1125  unsigned SubOp = AMDGPU::V_SUB_I32_e32;
1126  if (Subtarget->hasAddNoCarry()) {
1127  SubOp = AMDGPU::V_SUB_U32_e64;
1128  Opnds.push_back(
1129  CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1130  }
1131 
1132  MachineSDNode *MachineSub
1133  = CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1134 
1135  Base = SDValue(MachineSub, 0);
1136  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
1137  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
1138  return true;
1139  }
1140  }
1141  }
1142  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1143  unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
1144  unsigned DWordOffset1 = DWordOffset0 + 1;
1145  assert(4 * DWordOffset0 == CAddr->getZExtValue());
1146 
1147  if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
1148  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1149  MachineSDNode *MovZero
1150  = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1151  DL, MVT::i32, Zero);
1152  Base = SDValue(MovZero, 0);
1153  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
1154  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
1155  return true;
1156  }
1157  }
1158 
1159  // default case
1160 
1161  Base = Addr;
1162  Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
1163  Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
1164  return true;
1165 }
1166 
1167 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
1168  SDValue &VAddr, SDValue &SOffset,
1169  SDValue &Offset, SDValue &Offen,
1170  SDValue &Idxen, SDValue &Addr64,
1171  SDValue &GLC, SDValue &SLC,
1172  SDValue &TFE) const {
1173  // Subtarget prefers to use flat instruction
1174  if (Subtarget->useFlatForGlobal())
1175  return false;
1176 
1177  SDLoc DL(Addr);
1178 
1179  if (!GLC.getNode())
1180  GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1181  if (!SLC.getNode())
1182  SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1183  TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
1184 
1185  Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1186  Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1187  Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1188  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1189 
1190  ConstantSDNode *C1 = nullptr;
1191  SDValue N0 = Addr;
1192  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1193  C1 = cast<ConstantSDNode>(Addr.getOperand(1));
1194  if (isUInt<32>(C1->getZExtValue()))
1195  N0 = Addr.getOperand(0);
1196  else
1197  C1 = nullptr;
1198  }
1199 
1200  if (N0.getOpcode() == ISD::ADD) {
1201  // (add N2, N3) -> addr64, or
1202  // (add (add N2, N3), C1) -> addr64
1203  SDValue N2 = N0.getOperand(0);
1204  SDValue N3 = N0.getOperand(1);
1205  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1206 
1207  if (N2->isDivergent()) {
1208  if (N3->isDivergent()) {
1209  // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
1210  // addr64, and construct the resource from a 0 address.
1211  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1212  VAddr = N0;
1213  } else {
1214  // N2 is divergent, N3 is not.
1215  Ptr = N3;
1216  VAddr = N2;
1217  }
1218  } else {
1219  // N2 is not divergent.
1220  Ptr = N2;
1221  VAddr = N3;
1222  }
1223  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1224  } else if (N0->isDivergent()) {
1225  // N0 is divergent. Use it as the addr64, and construct the resource from a
1226  // 0 address.
1227  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1228  VAddr = N0;
1229  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1230  } else {
1231  // N0 -> offset, or
1232  // (N0 + C1) -> offset
1233  VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1234  Ptr = N0;
1235  }
1236 
1237  if (!C1) {
1238  // No offset.
1239  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1240  return true;
1241  }
1242 
1244  // Legal offset for instruction.
1245  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1246  return true;
1247  }
1248 
1249  // Illegal offset, store it in soffset.
1250  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1251  SOffset =
1252  SDValue(CurDAG->getMachineNode(
1253  AMDGPU::S_MOV_B32, DL, MVT::i32,
1254  CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1255  0);
1256  return true;
1257 }
1258 
1259 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1260  SDValue &VAddr, SDValue &SOffset,
1261  SDValue &Offset, SDValue &GLC,
1262  SDValue &SLC, SDValue &TFE) const {
1263  SDValue Ptr, Offen, Idxen, Addr64;
1264 
1265  // addr64 bit was removed for volcanic islands.
1266  if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
1267  return false;
1268 
1269  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1270  GLC, SLC, TFE))
1271  return false;
1272 
1273  ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1274  if (C->getSExtValue()) {
1275  SDLoc DL(Addr);
1276 
1277  const SITargetLowering& Lowering =
1278  *static_cast<const SITargetLowering*>(getTargetLowering());
1279 
1280  SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1281  return true;
1282  }
1283 
1284  return false;
1285 }
1286 
1287 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1288  SDValue &VAddr, SDValue &SOffset,
1289  SDValue &Offset,
1290  SDValue &SLC) const {
1291  SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
1292  SDValue GLC, TFE;
1293 
1294  return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
1295 }
1296 
1297 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
1298  auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
1299  return PSV && PSV->isStack();
1300 }
1301 
1302 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1303  const MachineFunction &MF = CurDAG->getMachineFunction();
1305 
1306  if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
1307  SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1308  FI->getValueType(0));
1309 
1310  // If we can resolve this to a frame index access, this is relative to the
1311  // frame pointer SGPR.
1312  return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(),
1313  MVT::i32));
1314  }
1315 
1316  // If we don't know this private access is a local stack object, it needs to
1317  // be relative to the entry point's scratch wave offset register.
1318  return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(),
1319  MVT::i32));
1320 }
1321 
1322 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1323  SDValue Addr, SDValue &Rsrc,
1324  SDValue &VAddr, SDValue &SOffset,
1325  SDValue &ImmOffset) const {
1326 
1327  SDLoc DL(Addr);
1328  MachineFunction &MF = CurDAG->getMachineFunction();
1330 
1331  Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1332 
1333  if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1334  unsigned Imm = CAddr->getZExtValue();
1335 
1336  SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1337  MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1338  DL, MVT::i32, HighBits);
1339  VAddr = SDValue(MovHighBits, 0);
1340 
1341  // In a call sequence, stores to the argument stack area are relative to the
1342  // stack pointer.
1343  const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1344  unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1346 
1347  SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1348  ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
1349  return true;
1350  }
1351 
1352  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1353  // (add n0, c1)
1354 
1355  SDValue N0 = Addr.getOperand(0);
1356  SDValue N1 = Addr.getOperand(1);
1357 
1358  // Offsets in vaddr must be positive if range checking is enabled.
1359  //
1360  // The total computation of vaddr + soffset + offset must not overflow. If
1361  // vaddr is negative, even if offset is 0 the sgpr offset add will end up
1362  // overflowing.
1363  //
1364  // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
1365  // always perform a range check. If a negative vaddr base index was used,
1366  // this would fail the range check. The overall address computation would
1367  // compute a valid address, but this doesn't happen due to the range
1368  // check. For out-of-bounds MUBUF loads, a 0 is returned.
1369  //
1370  // Therefore it should be safe to fold any VGPR offset on gfx9 into the
1371  // MUBUF vaddr, but not on older subtargets which can only do this if the
1372  // sign bit is known 0.
1373  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1375  (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1376  CurDAG->SignBitIsZero(N0))) {
1377  std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1378  ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1379  return true;
1380  }
1381  }
1382 
1383  // (node)
1384  std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1385  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1386  return true;
1387 }
1388 
1389 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1390  SDValue Addr,
1391  SDValue &SRsrc,
1392  SDValue &SOffset,
1393  SDValue &Offset) const {
1394  ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
1395  if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1396  return false;
1397 
1398  SDLoc DL(Addr);
1399  MachineFunction &MF = CurDAG->getMachineFunction();
1401 
1402  SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1403 
1404  const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1405  unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1407 
1408  // FIXME: Get from MachinePointerInfo? We should only be using the frame
1409  // offset if we know this is in a call sequence.
1410  SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1411 
1412  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1413  return true;
1414 }
1415 
1416 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1417  SDValue &SOffset, SDValue &Offset,
1418  SDValue &GLC, SDValue &SLC,
1419  SDValue &TFE) const {
1420  SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1421  const SIInstrInfo *TII =
1422  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1423 
1424  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1425  GLC, SLC, TFE))
1426  return false;
1427 
1428  if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1429  !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1430  !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1431  uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1432  APInt::getAllOnesValue(32).getZExtValue(); // Size
1433  SDLoc DL(Addr);
1434 
1435  const SITargetLowering& Lowering =
1436  *static_cast<const SITargetLowering*>(getTargetLowering());
1437 
1438  SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1439  return true;
1440  }
1441  return false;
1442 }
1443 
1444 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1445  SDValue &Soffset, SDValue &Offset
1446  ) const {
1447  SDValue GLC, SLC, TFE;
1448 
1449  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1450 }
1451 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1452  SDValue &Soffset, SDValue &Offset,
1453  SDValue &SLC) const {
1454  SDValue GLC, TFE;
1455 
1456  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1457 }
1458 
1459 template <bool IsSigned>
1460 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
1461  SDValue &VAddr,
1462  SDValue &Offset,
1463  SDValue &SLC) const {
1464  int64_t OffsetVal = 0;
1465 
1466  if (Subtarget->hasFlatInstOffsets() &&
1467  CurDAG->isBaseWithConstantOffset(Addr)) {
1468  SDValue N0 = Addr.getOperand(0);
1469  SDValue N1 = Addr.getOperand(1);
1470  int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1471 
1472  if ((IsSigned && isInt<13>(COffsetVal)) ||
1473  (!IsSigned && isUInt<12>(COffsetVal))) {
1474  Addr = N0;
1475  OffsetVal = COffsetVal;
1476  }
1477  }
1478 
1479  VAddr = Addr;
1480  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
1481  SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
1482 
1483  return true;
1484 }
1485 
1486 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr,
1487  SDValue &VAddr,
1488  SDValue &Offset,
1489  SDValue &SLC) const {
1490  return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC);
1491 }
1492 
1493 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr,
1494  SDValue &VAddr,
1495  SDValue &Offset,
1496  SDValue &SLC) const {
1497  return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC);
1498 }
1499 
1500 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1501  SDValue &Offset, bool &Imm) const {
1502 
1503  // FIXME: Handle non-constant offsets.
1504  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1505  if (!C)
1506  return false;
1507 
1508  SDLoc SL(ByteOffsetNode);
1509  GCNSubtarget::Generation Gen = Subtarget->getGeneration();
1510  int64_t ByteOffset = C->getSExtValue();
1511  int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
1512 
1513  if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) {
1514  Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1515  Imm = true;
1516  return true;
1517  }
1518 
1519  if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1520  return false;
1521 
1522  if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1523  // 32-bit Immediates are supported on Sea Islands.
1524  Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1525  } else {
1526  SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1527  Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1528  C32Bit), 0);
1529  }
1530  Imm = false;
1531  return true;
1532 }
1533 
1534 SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
1535  if (Addr.getValueType() != MVT::i32)
1536  return Addr;
1537 
1538  // Zero-extend a 32-bit address.
1539  SDLoc SL(Addr);
1540 
1541  const MachineFunction &MF = CurDAG->getMachineFunction();
1543  unsigned AddrHiVal = Info->get32BitAddressHighBits();
1544  SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
1545 
1546  const SDValue Ops[] = {
1547  CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
1548  Addr,
1549  CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
1550  SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
1551  0),
1552  CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
1553  };
1554 
1555  return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
1556  Ops), 0);
1557 }
1558 
1559 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1560  SDValue &Offset, bool &Imm) const {
1561  SDLoc SL(Addr);
1562 
1563  // A 32-bit (address + offset) should not cause unsigned 32-bit integer
1564  // wraparound, because s_load instructions perform the addition in 64 bits.
1565  if ((Addr.getValueType() != MVT::i32 ||
1566  Addr->getFlags().hasNoUnsignedWrap()) &&
1567  CurDAG->isBaseWithConstantOffset(Addr)) {
1568  SDValue N0 = Addr.getOperand(0);
1569  SDValue N1 = Addr.getOperand(1);
1570 
1571  if (SelectSMRDOffset(N1, Offset, Imm)) {
1572  SBase = Expand32BitAddress(N0);
1573  return true;
1574  }
1575  }
1576  SBase = Expand32BitAddress(Addr);
1577  Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1578  Imm = true;
1579  return true;
1580 }
1581 
1582 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1583  SDValue &Offset) const {
1584  bool Imm;
1585  return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1586 }
1587 
1588 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1589  SDValue &Offset) const {
1590 
1591  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1592  return false;
1593 
1594  bool Imm;
1595  if (!SelectSMRD(Addr, SBase, Offset, Imm))
1596  return false;
1597 
1598  return !Imm && isa<ConstantSDNode>(Offset);
1599 }
1600 
1601 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1602  SDValue &Offset) const {
1603  bool Imm;
1604  return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1605  !isa<ConstantSDNode>(Offset);
1606 }
1607 
1608 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1609  SDValue &Offset) const {
1610  bool Imm;
1611  return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1612 }
1613 
1614 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1615  SDValue &Offset) const {
1616  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1617  return false;
1618 
1619  bool Imm;
1620  if (!SelectSMRDOffset(Addr, Offset, Imm))
1621  return false;
1622 
1623  return !Imm && isa<ConstantSDNode>(Offset);
1624 }
1625 
1626 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
1627  SDValue &Base,
1628  SDValue &Offset) const {
1629  SDLoc DL(Index);
1630 
1631  if (CurDAG->isBaseWithConstantOffset(Index)) {
1632  SDValue N0 = Index.getOperand(0);
1633  SDValue N1 = Index.getOperand(1);
1634  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1635 
1636  // (add n0, c0)
1637  // Don't peel off the offset (c0) if doing so could possibly lead
1638  // the base (n0) to be negative.
1639  if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0)) {
1640  Base = N0;
1641  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
1642  return true;
1643  }
1644  }
1645 
1646  if (isa<ConstantSDNode>(Index))
1647  return false;
1648 
1649  Base = Index;
1650  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1651  return true;
1652 }
1653 
1654 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
1655  SDValue Val, uint32_t Offset,
1656  uint32_t Width) {
1657  // Transformation function, pack the offset and width of a BFE into
1658  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1659  // source, bits [5:0] contain the offset and bits [22:16] the width.
1660  uint32_t PackedVal = Offset | (Width << 16);
1661  SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1662 
1663  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1664 }
1665 
1666 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1667  // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1668  // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1669  // Predicate: 0 < b <= c < 32
1670 
1671  const SDValue &Shl = N->getOperand(0);
1674 
1675  if (B && C) {
1676  uint32_t BVal = B->getZExtValue();
1677  uint32_t CVal = C->getZExtValue();
1678 
1679  if (0 < BVal && BVal <= CVal && CVal < 32) {
1680  bool Signed = N->getOpcode() == ISD::SRA;
1681  unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1682 
1683  ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
1684  32 - CVal));
1685  return;
1686  }
1687  }
1688  SelectCode(N);
1689 }
1690 
1691 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1692  switch (N->getOpcode()) {
1693  case ISD::AND:
1694  if (N->getOperand(0).getOpcode() == ISD::SRL) {
1695  // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1696  // Predicate: isMask(mask)
1697  const SDValue &Srl = N->getOperand(0);
1700 
1701  if (Shift && Mask) {
1702  uint32_t ShiftVal = Shift->getZExtValue();
1703  uint32_t MaskVal = Mask->getZExtValue();
1704 
1705  if (isMask_32(MaskVal)) {
1706  uint32_t WidthVal = countPopulation(MaskVal);
1707 
1708  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1709  Srl.getOperand(0), ShiftVal, WidthVal));
1710  return;
1711  }
1712  }
1713  }
1714  break;
1715  case ISD::SRL:
1716  if (N->getOperand(0).getOpcode() == ISD::AND) {
1717  // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1718  // Predicate: isMask(mask >> b)
1719  const SDValue &And = N->getOperand(0);
1722 
1723  if (Shift && Mask) {
1724  uint32_t ShiftVal = Shift->getZExtValue();
1725  uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1726 
1727  if (isMask_32(MaskVal)) {
1728  uint32_t WidthVal = countPopulation(MaskVal);
1729 
1730  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1731  And.getOperand(0), ShiftVal, WidthVal));
1732  return;
1733  }
1734  }
1735  } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
1736  SelectS_BFEFromShifts(N);
1737  return;
1738  }
1739  break;
1740  case ISD::SRA:
1741  if (N->getOperand(0).getOpcode() == ISD::SHL) {
1742  SelectS_BFEFromShifts(N);
1743  return;
1744  }
1745  break;
1746 
1747  case ISD::SIGN_EXTEND_INREG: {
1748  // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
1749  SDValue Src = N->getOperand(0);
1750  if (Src.getOpcode() != ISD::SRL)
1751  break;
1752 
1753  const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1754  if (!Amt)
1755  break;
1756 
1757  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1758  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
1759  Amt->getZExtValue(), Width));
1760  return;
1761  }
1762  }
1763 
1764  SelectCode(N);
1765 }
1766 
1767 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
1768  assert(N->getOpcode() == ISD::BRCOND);
1769  if (!N->hasOneUse())
1770  return false;
1771 
1772  SDValue Cond = N->getOperand(1);
1773  if (Cond.getOpcode() == ISD::CopyToReg)
1774  Cond = Cond.getOperand(2);
1775 
1776  if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
1777  return false;
1778 
1779  MVT VT = Cond.getOperand(0).getSimpleValueType();
1780  if (VT == MVT::i32)
1781  return true;
1782 
1783  if (VT == MVT::i64) {
1784  auto ST = static_cast<const GCNSubtarget *>(Subtarget);
1785 
1786  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
1787  return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
1788  }
1789 
1790  return false;
1791 }
1792 
1793 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
1794  SDValue Cond = N->getOperand(1);
1795 
1796  if (Cond.isUndef()) {
1797  CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
1798  N->getOperand(2), N->getOperand(0));
1799  return;
1800  }
1801 
1802  bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
1803  unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
1804  unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC;
1805  SDLoc SL(N);
1806 
1807  if (!UseSCCBr) {
1808  // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
1809  // analyzed what generates the vcc value, so we do not know whether vcc
1810  // bits for disabled lanes are 0. Thus we need to mask out bits for
1811  // disabled lanes.
1812  //
1813  // For the case that we select S_CBRANCH_SCC1 and it gets
1814  // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
1815  // SIInstrInfo::moveToVALU which inserts the S_AND).
1816  //
1817  // We could add an analysis of what generates the vcc value here and omit
1818  // the S_AND when is unnecessary. But it would be better to add a separate
1819  // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
1820  // catches both cases.
1821  Cond = SDValue(CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
1822  CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
1823  Cond),
1824  0);
1825  }
1826 
1827  SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
1828  CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
1829  N->getOperand(2), // Basic Block
1830  VCC.getValue(0));
1831 }
1832 
1833 void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
1834  MVT VT = N->getSimpleValueType(0);
1835  bool IsFMA = N->getOpcode() == ISD::FMA;
1836  if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
1837  !Subtarget->hasFmaMixInsts()) ||
1838  ((IsFMA && Subtarget->hasMadMixInsts()) ||
1839  (!IsFMA && Subtarget->hasFmaMixInsts()))) {
1840  SelectCode(N);
1841  return;
1842  }
1843 
1844  SDValue Src0 = N->getOperand(0);
1845  SDValue Src1 = N->getOperand(1);
1846  SDValue Src2 = N->getOperand(2);
1847  unsigned Src0Mods, Src1Mods, Src2Mods;
1848 
1849  // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
1850  // using the conversion from f16.
1851  bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
1852  bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
1853  bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
1854 
1855  assert((IsFMA || !Subtarget->hasFP32Denormals()) &&
1856  "fmad selected with denormals enabled");
1857  // TODO: We can select this with f32 denormals enabled if all the sources are
1858  // converted from f16 (in which case fmad isn't legal).
1859 
1860  if (Sel0 || Sel1 || Sel2) {
1861  // For dummy operands.
1862  SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
1863  SDValue Ops[] = {
1864  CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
1865  CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
1866  CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
1867  CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
1868  Zero, Zero
1869  };
1870 
1871  CurDAG->SelectNodeTo(N,
1872  IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
1873  MVT::f32, Ops);
1874  } else {
1875  SelectCode(N);
1876  }
1877 }
1878 
1879 // This is here because there isn't a way to use the generated sub0_sub1 as the
1880 // subreg index to EXTRACT_SUBREG in tablegen.
1881 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
1882  MemSDNode *Mem = cast<MemSDNode>(N);
1883  unsigned AS = Mem->getAddressSpace();
1884  if (AS == AMDGPUAS::FLAT_ADDRESS) {
1885  SelectCode(N);
1886  return;
1887  }
1888 
1889  MVT VT = N->getSimpleValueType(0);
1890  bool Is32 = (VT == MVT::i32);
1891  SDLoc SL(N);
1892 
1893  MachineSDNode *CmpSwap = nullptr;
1894  if (Subtarget->hasAddr64()) {
1895  SDValue SRsrc, VAddr, SOffset, Offset, SLC;
1896 
1897  if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
1898  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
1899  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
1900  SDValue CmpVal = Mem->getOperand(2);
1901 
1902  // XXX - Do we care about glue operands?
1903 
1904  SDValue Ops[] = {
1905  CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1906  };
1907 
1908  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1909  }
1910  }
1911 
1912  if (!CmpSwap) {
1913  SDValue SRsrc, SOffset, Offset, SLC;
1914  if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
1915  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
1916  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
1917 
1918  SDValue CmpVal = Mem->getOperand(2);
1919  SDValue Ops[] = {
1920  CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1921  };
1922 
1923  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1924  }
1925  }
1926 
1927  if (!CmpSwap) {
1928  SelectCode(N);
1929  return;
1930  }
1931 
1932  MachineMemOperand *MMO = Mem->getMemOperand();
1933  CurDAG->setNodeMemRefs(CmpSwap, {MMO});
1934 
1935  unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
1936  SDValue Extract
1937  = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
1938 
1939  ReplaceUses(SDValue(N, 0), Extract);
1940  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
1941  CurDAG->RemoveDeadNode(N);
1942 }
1943 
1944 void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
1945  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1946  if ((IntrID != Intrinsic::amdgcn_ds_append &&
1947  IntrID != Intrinsic::amdgcn_ds_consume) ||
1948  N->getValueType(0) != MVT::i32) {
1949  SelectCode(N);
1950  return;
1951  }
1952 
1953  // The address is assumed to be uniform, so if it ends up in a VGPR, it will
1954  // be copied to an SGPR with readfirstlane.
1955  unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
1956  AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
1957 
1958  SDValue Chain = N->getOperand(0);
1959  SDValue Ptr = N->getOperand(2);
1960  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
1961  bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
1962 
1963  SDValue Offset;
1964  if (CurDAG->isBaseWithConstantOffset(Ptr)) {
1965  SDValue PtrBase = Ptr.getOperand(0);
1966  SDValue PtrOffset = Ptr.getOperand(1);
1967 
1968  const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
1969  if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue(), 16)) {
1970  N = glueCopyToM0(N, PtrBase);
1971  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
1972  }
1973  }
1974 
1975  if (!Offset) {
1976  N = glueCopyToM0(N, Ptr);
1977  Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
1978  }
1979 
1980  SDValue Ops[] = {
1981  Offset,
1982  CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
1983  Chain,
1984  N->getOperand(N->getNumOperands() - 1) // New glue
1985  };
1986 
1987  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1988 }
1989 
1990 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
1991  unsigned &Mods) const {
1992  Mods = 0;
1993  Src = In;
1994 
1995  if (Src.getOpcode() == ISD::FNEG) {
1996  Mods |= SISrcMods::NEG;
1997  Src = Src.getOperand(0);
1998  }
1999 
2000  if (Src.getOpcode() == ISD::FABS) {
2001  Mods |= SISrcMods::ABS;
2002  Src = Src.getOperand(0);
2003  }
2004 
2005  return true;
2006 }
2007 
2008 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
2009  SDValue &SrcMods) const {
2010  unsigned Mods;
2011  if (SelectVOP3ModsImpl(In, Src, Mods)) {
2012  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2013  return true;
2014  }
2015 
2016  return false;
2017 }
2018 
2019 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
2020  SDValue &SrcMods) const {
2021  SelectVOP3Mods(In, Src, SrcMods);
2022  return isNoNanSrc(Src);
2023 }
2024 
2025 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
2026  if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
2027  return false;
2028 
2029  Src = In;
2030  return true;
2031 }
2032 
2033 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
2034  SDValue &SrcMods, SDValue &Clamp,
2035  SDValue &Omod) const {
2036  SDLoc DL(In);
2037  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2038  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2039 
2040  return SelectVOP3Mods(In, Src, SrcMods);
2041 }
2042 
2043 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
2044  SDValue &SrcMods,
2045  SDValue &Clamp,
2046  SDValue &Omod) const {
2047  Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2048  return SelectVOP3Mods(In, Src, SrcMods);
2049 }
2050 
2051 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
2052  SDValue &Clamp, SDValue &Omod) const {
2053  Src = In;
2054 
2055  SDLoc DL(In);
2056  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2057  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2058 
2059  return true;
2060 }
2061 
2062 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
2063  SDValue &SrcMods) const {
2064  unsigned Mods = 0;
2065  Src = In;
2066 
2067  if (Src.getOpcode() == ISD::FNEG) {
2068  Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
2069  Src = Src.getOperand(0);
2070  }
2071 
2072  if (Src.getOpcode() == ISD::BUILD_VECTOR) {
2073  unsigned VecMods = Mods;
2074 
2075  SDValue Lo = stripBitcast(Src.getOperand(0));
2076  SDValue Hi = stripBitcast(Src.getOperand(1));
2077 
2078  if (Lo.getOpcode() == ISD::FNEG) {
2079  Lo = stripBitcast(Lo.getOperand(0));
2080  Mods ^= SISrcMods::NEG;
2081  }
2082 
2083  if (Hi.getOpcode() == ISD::FNEG) {
2084  Hi = stripBitcast(Hi.getOperand(0));
2085  Mods ^= SISrcMods::NEG_HI;
2086  }
2087 
2088  if (isExtractHiElt(Lo, Lo))
2089  Mods |= SISrcMods::OP_SEL_0;
2090 
2091  if (isExtractHiElt(Hi, Hi))
2092  Mods |= SISrcMods::OP_SEL_1;
2093 
2094  Lo = stripExtractLoElt(Lo);
2095  Hi = stripExtractLoElt(Hi);
2096 
2097  if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
2098  // Really a scalar input. Just select from the low half of the register to
2099  // avoid packing.
2100 
2101  Src = Lo;
2102  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2103  return true;
2104  }
2105 
2106  Mods = VecMods;
2107  }
2108 
2109  // Packed instructions do not have abs modifiers.
2110  Mods |= SISrcMods::OP_SEL_1;
2111 
2112  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2113  return true;
2114 }
2115 
2116 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src,
2117  SDValue &SrcMods,
2118  SDValue &Clamp) const {
2119  SDLoc SL(In);
2120 
2121  // FIXME: Handle clamp and op_sel
2122  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2123 
2124  return SelectVOP3PMods(In, Src, SrcMods);
2125 }
2126 
2127 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
2128  SDValue &SrcMods) const {
2129  Src = In;
2130  // FIXME: Handle op_sel
2131  SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2132  return true;
2133 }
2134 
2135 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src,
2136  SDValue &SrcMods,
2137  SDValue &Clamp) const {
2138  SDLoc SL(In);
2139 
2140  // FIXME: Handle clamp
2141  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2142 
2143  return SelectVOP3OpSel(In, Src, SrcMods);
2144 }
2145 
2146 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
2147  SDValue &SrcMods) const {
2148  // FIXME: Handle op_sel
2149  return SelectVOP3Mods(In, Src, SrcMods);
2150 }
2151 
2152 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src,
2153  SDValue &SrcMods,
2154  SDValue &Clamp) const {
2155  SDLoc SL(In);
2156 
2157  // FIXME: Handle clamp
2158  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2159 
2160  return SelectVOP3OpSelMods(In, Src, SrcMods);
2161 }
2162 
2163 // The return value is not whether the match is possible (which it always is),
2164 // but whether or not it a conversion is really used.
2165 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
2166  unsigned &Mods) const {
2167  Mods = 0;
2168  SelectVOP3ModsImpl(In, Src, Mods);
2169 
2170  if (Src.getOpcode() == ISD::FP_EXTEND) {
2171  Src = Src.getOperand(0);
2172  assert(Src.getValueType() == MVT::f16);
2173  Src = stripBitcast(Src);
2174 
2175  // Be careful about folding modifiers if we already have an abs. fneg is
2176  // applied last, so we don't want to apply an earlier fneg.
2177  if ((Mods & SISrcMods::ABS) == 0) {
2178  unsigned ModsTmp;
2179  SelectVOP3ModsImpl(Src, Src, ModsTmp);
2180 
2181  if ((ModsTmp & SISrcMods::NEG) != 0)
2182  Mods ^= SISrcMods::NEG;
2183 
2184  if ((ModsTmp & SISrcMods::ABS) != 0)
2185  Mods |= SISrcMods::ABS;
2186  }
2187 
2188  // op_sel/op_sel_hi decide the source type and source.
2189  // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
2190  // If the sources's op_sel is set, it picks the high half of the source
2191  // register.
2192 
2193  Mods |= SISrcMods::OP_SEL_1;
2194  if (isExtractHiElt(Src, Src)) {
2195  Mods |= SISrcMods::OP_SEL_0;
2196 
2197  // TODO: Should we try to look for neg/abs here?
2198  }
2199 
2200  return true;
2201  }
2202 
2203  return false;
2204 }
2205 
2206 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
2207  SDValue &SrcMods) const {
2208  unsigned Mods = 0;
2209  SelectVOP3PMadMixModsImpl(In, Src, Mods);
2210  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2211  return true;
2212 }
2213 
2214 SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
2215  if (In.isUndef())
2216  return CurDAG->getUNDEF(MVT::i32);
2217 
2218  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
2219  SDLoc SL(In);
2220  return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
2221  }
2222 
2223  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
2224  SDLoc SL(In);
2225  return CurDAG->getConstant(
2226  C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
2227  }
2228 
2229  SDValue Src;
2230  if (isExtractHiElt(In, Src))
2231  return Src;
2232 
2233  return SDValue();
2234 }
2235 
2236 bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
2237  if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
2238  return false;
2239  }
2240  const SIRegisterInfo *SIRI =
2241  static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
2242  const SIInstrInfo * SII =
2243  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
2244 
2245  unsigned Limit = 0;
2246  bool AllUsesAcceptSReg = true;
2247  for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
2248  Limit < 10 && U != E; ++U, ++Limit) {
2249  const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
2250 
2251  // If the register class is unknown, it could be an unknown
2252  // register class that needs to be an SGPR, e.g. an inline asm
2253  // constraint
2254  if (!RC || SIRI->isSGPRClass(RC))
2255  return false;
2256 
2257  if (RC != &AMDGPU::VS_32RegClass) {
2258  AllUsesAcceptSReg = false;
2259  SDNode * User = *U;
2260  if (User->isMachineOpcode()) {
2261  unsigned Opc = User->getMachineOpcode();
2262  MCInstrDesc Desc = SII->get(Opc);
2263  if (Desc.isCommutable()) {
2264  unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
2265  unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
2266  if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
2267  unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
2268  const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
2269  if (CommutedRC == &AMDGPU::VS_32RegClass)
2270  AllUsesAcceptSReg = true;
2271  }
2272  }
2273  }
2274  // If "AllUsesAcceptSReg == false" so far we haven't suceeded
2275  // commuting current user. This means have at least one use
2276  // that strictly require VGPR. Thus, we will not attempt to commute
2277  // other user instructions.
2278  if (!AllUsesAcceptSReg)
2279  break;
2280  }
2281  }
2282  return !AllUsesAcceptSReg && (Limit < 10);
2283 }
2284 
2285 bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
2286  auto Ld = cast<LoadSDNode>(N);
2287 
2288  return Ld->getAlignment() >= 4 &&
2289  (
2290  (
2291  (
2292  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
2293  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
2294  )
2295  &&
2296  !N->isDivergent()
2297  )
2298  ||
2299  (
2300  Subtarget->getScalarizeGlobalBehavior() &&
2301  Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
2302  !Ld->isVolatile() &&
2303  !N->isDivergent() &&
2304  static_cast<const SITargetLowering *>(
2305  getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)
2306  )
2307  );
2308 }
2309 
2310 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
2312  *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
2313  bool IsModified = false;
2314  do {
2315  IsModified = false;
2316 
2317  // Go over all selected nodes and try to fold them a bit more
2318  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
2319  while (Position != CurDAG->allnodes_end()) {
2320  SDNode *Node = &*Position++;
2321  MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
2322  if (!MachineNode)
2323  continue;
2324 
2325  SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
2326  if (ResNode != Node) {
2327  if (ResNode)
2328  ReplaceUses(Node, ResNode);
2329  IsModified = true;
2330  }
2331  }
2332  CurDAG->RemoveDeadNodes();
2333  } while (IsModified);
2334 }
2335 
2336 bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
2337  Subtarget = &MF.getSubtarget<R600Subtarget>();
2339 }
2340 
2341 bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
2342  if (!N->readMem())
2343  return false;
2344  if (CbId == -1)
2347 
2348  return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
2349 }
2350 
2351 bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
2352  SDValue& IntPtr) {
2353  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
2354  IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
2355  true);
2356  return true;
2357  }
2358  return false;
2359 }
2360 
2361 bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
2362  SDValue& BaseReg, SDValue &Offset) {
2363  if (!isa<ConstantSDNode>(Addr)) {
2364  BaseReg = Addr;
2365  Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
2366  return true;
2367  }
2368  return false;
2369 }
2370 
2372  unsigned int Opc = N->getOpcode();
2373  if (N->isMachineOpcode()) {
2374  N->setNodeId(-1);
2375  return; // Already selected.
2376  }
2377 
2378  switch (Opc) {
2379  default: break;
2381  case ISD::SCALAR_TO_VECTOR:
2382  case ISD::BUILD_VECTOR: {
2383  EVT VT = N->getValueType(0);
2384  unsigned NumVectorElts = VT.getVectorNumElements();
2385  unsigned RegClassID;
2386  // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
2387  // that adds a 128 bits reg copy when going through TwoAddressInstructions
2388  // pass. We want to avoid 128 bits copies as much as possible because they
2389  // can't be bundled by our scheduler.
2390  switch(NumVectorElts) {
2391  case 2: RegClassID = R600::R600_Reg64RegClassID; break;
2392  case 4:
2394  RegClassID = R600::R600_Reg128VerticalRegClassID;
2395  else
2396  RegClassID = R600::R600_Reg128RegClassID;
2397  break;
2398  default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
2399  }
2400  SelectBuildVector(N, RegClassID);
2401  return;
2402  }
2403  }
2404 
2405  SelectCode(N);
2406 }
2407 
2408 bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
2409  SDValue &Offset) {
2410  ConstantSDNode *C;
2411  SDLoc DL(Addr);
2412 
2413  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
2414  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
2415  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2416  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
2417  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
2418  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
2419  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2420  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
2421  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
2422  Base = Addr.getOperand(0);
2423  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2424  } else {
2425  Base = Addr;
2426  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
2427  }
2428 
2429  return true;
2430 }
2431 
2432 bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
2433  SDValue &Offset) {
2434  ConstantSDNode *IMMOffset;
2435 
2436  if (Addr.getOpcode() == ISD::ADD
2437  && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
2438  && isInt<16>(IMMOffset->getZExtValue())) {
2439 
2440  Base = Addr.getOperand(0);
2441  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2442  MVT::i32);
2443  return true;
2444  // If the pointer address is constant, we can move it to the offset field.
2445  } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
2446  && isInt<16>(IMMOffset->getZExtValue())) {
2447  Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
2448  SDLoc(CurDAG->getEntryNode()),
2449  R600::ZERO, MVT::i32);
2450  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2451  MVT::i32);
2452  return true;
2453  }
2454 
2455  // Default case, no offset
2456  Base = Addr;
2457  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2458  return true;
2459 }
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:570
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:348
EVT getValueType() const
Return the ValueType of the referenced return value.
Interface definition for SIRegisterInfo.
static unsigned getSubRegFromChannel(unsigned Channel)
bool isUndef() const
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand...
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1562
INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel", "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) INITIALIZE_PASS_END(AMDGPUDAGToDAGISel
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:561
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
AMDGPU specific subclass of TargetSubtarget.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
uint64_t getDefaultRsrcDataFormat() const
FunctionPass * createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a R600-specific.
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MCInstrDesc.h:436
Shadow Stack GC Lowering
SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL, SDValue V) const
const SDValue & getBasePtr() const
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:222
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
SDVTList getVTList() const
unsigned Reg
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
Address space for flat memory.
Definition: AMDGPU.h:250
const SDValue & getChain() const
virtual SDNode * PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const =0
static bool getConstantValue(SDValue N, uint32_t &Out)
SDNode * legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const
Legalize target independent instructions (e.g.
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:305
unsigned const TargetRegisterInfo * TRI
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
const SDNodeFlags getFlags() const
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
bool isInlineConstant(const APInt &Imm) const
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:404
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:158
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineSDNode * wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr) const
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
const HexagonInstrInfo * TII
Shift and rotation operations.
Definition: ISDOpcodes.h:409
Address space for 32-bit constant memory.
Definition: AMDGPU.h:258
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:190
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:169
unsigned SubReg
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Position
Position to insert a new instruction relative to an existing instruction.
bool isSGPRClass(const TargetRegisterClass *RC) const
This file implements a class to represent arbitrary precision integral constant values and operations...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
TargetRegisterInfo interface that is implemented by all hw codegen targets.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
int64_t getSExtValue() const
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:297
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:234
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:968
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
amdgpu isel
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:326
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static const unsigned CommuteAnyOperandIndex
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:342
unsigned const MachineRegisterInfo * MRI
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
Machine Value Type.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:272
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const SDValue & getOperand(unsigned Num) const
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:251
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:231
SI DAG Lowering interface definition.
Represent the analysis usage information of a pass.
This class provides iterator support for SDUse operands that use a specific SDNode.
bool hasNoNaNs() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:71
bool isDefined() const
Returns true if the flags are in a defined state.
Address space for constant memory (VTX2).
Definition: AMDGPU.h:254
The AMDGPU TargetMachine interface definition for hw codgen targets.
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:555
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwises returns null...
Definition: PointerUnion.h:141
Extended Value Type.
Definition: ValueTypes.h:33
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts)
bool use_empty() const
Return true if there are no uses of this node.
MachineSDNode * buildRSRC(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr, uint32_t RsrcDword1, uint64_t RsrcDword2And3) const
Return a resource descriptor with the &#39;Add TID&#39; bit enabled The TID (Thread ID) is multiplied by the ...
Iterator for intrusive lists based on ilist_node.
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:519
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:264
Interface definition of the TargetLowering class that is common to all AMD GPUs.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:645
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
bool isDivergent() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:225
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
bool readMem() const
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
EVT getMemoryVT() const
Return the type of the in-memory value.
Class for arbitrary precision integers.
Definition: APInt.h:69
Special value supplied for machine level alias analysis.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo)
amdgpu AMDGPU DAG DAG Pattern Instruction Selection
static use_iterator use_end()
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:386
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:72
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:485
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:615
#define N
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
unsigned getOpcode() const
SDValue getValue(unsigned R) const
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:345
Address space for region memory. (GDS)
Definition: AMDGPU.h:252
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isLegalMUBUFImmOffset(unsigned Imm)
Definition: SIInstrInfo.h:926
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:301
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
bool hasNoUnsignedWrap() const
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:174
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations...
Definition: ISDOpcodes.h:305
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:442
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:222
const SDValue & getOperand(unsigned i) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:793
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:473
#define LLVM_DEBUG(X)
Definition: Debug.h:122
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:585
Analyzes if a function potentially memory bound and if a kernel kernel may benefit from limiting numb...
Address space for local memory.
Definition: AMDGPU.h:255
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:789
const SDValue & getBasePtr() const
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:374
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
This class is used to represent ISD::LOAD nodes.