LLVM  9.0.0svn
AMDGPUISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines an instruction selector for the AMDGPU target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
16 #include "AMDGPUISelLowering.h" // For AMDGPUISD
17 #include "AMDGPUInstrInfo.h"
18 #include "AMDGPUPerfHintAnalysis.h"
19 #include "AMDGPURegisterInfo.h"
20 #include "AMDGPUSubtarget.h"
21 #include "AMDGPUTargetMachine.h"
22 #include "SIDefines.h"
23 #include "SIISelLowering.h"
24 #include "SIInstrInfo.h"
25 #include "SIMachineFunctionInfo.h"
26 #include "SIRegisterInfo.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/ADT/StringRef.h"
41 #include "llvm/IR/BasicBlock.h"
42 #include "llvm/IR/Instruction.h"
43 #include "llvm/MC/MCInstrDesc.h"
44 #include "llvm/Support/Casting.h"
45 #include "llvm/Support/CodeGen.h"
49 #include <cassert>
50 #include <cstdint>
51 #include <new>
52 #include <vector>
53 
54 #define DEBUG_TYPE "isel"
55 
56 using namespace llvm;
57 
58 namespace llvm {
59 
60 class R600InstrInfo;
61 
62 } // end namespace llvm
63 
64 //===----------------------------------------------------------------------===//
65 // Instruction Selector Implementation
66 //===----------------------------------------------------------------------===//
67 
68 namespace {
69 
70 /// AMDGPU specific code to select AMDGPU machine instructions for
71 /// SelectionDAG operations.
72 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
73  // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
74  // make the right decision when generating code for different targets.
75  const GCNSubtarget *Subtarget;
76  bool EnableLateStructurizeCFG;
77 
78 public:
79  explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
81  : SelectionDAGISel(*TM, OptLevel) {
82  EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
83  }
84  ~AMDGPUDAGToDAGISel() override = default;
85 
86  void getAnalysisUsage(AnalysisUsage &AU) const override {
91  }
92 
93  bool matchLoadD16FromBuildVector(SDNode *N) const;
94 
95  bool runOnMachineFunction(MachineFunction &MF) override;
96  void PreprocessISelDAG() override;
97  void Select(SDNode *N) override;
98  StringRef getPassName() const override;
99  void PostprocessISelDAG() override;
100 
101 protected:
102  void SelectBuildVector(SDNode *N, unsigned RegClassID);
103 
104 private:
105  std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
106  bool isNoNanSrc(SDValue N) const;
107  bool isInlineImmediate(const SDNode *N) const;
108  bool isVGPRImm(const SDNode *N) const;
109  bool isUniformLoad(const SDNode *N) const;
110  bool isUniformBr(const SDNode *N) const;
111 
112  MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
113 
114  SDNode *glueCopyToM0LDSInit(SDNode *N) const;
115  SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
116 
117  const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
118  virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
119  virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
120  bool isDSOffsetLegal(SDValue Base, unsigned Offset,
121  unsigned OffsetBits) const;
122  bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
123  bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
124  SDValue &Offset1) const;
125  bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
126  SDValue &SOffset, SDValue &Offset, SDValue &Offen,
127  SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
128  SDValue &TFE, SDValue &DLC) const;
129  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
130  SDValue &SOffset, SDValue &Offset, SDValue &GLC,
131  SDValue &SLC, SDValue &TFE, SDValue &DLC) const;
132  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
133  SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
134  SDValue &SLC) const;
135  bool SelectMUBUFScratchOffen(SDNode *Parent,
136  SDValue Addr, SDValue &RSrc, SDValue &VAddr,
137  SDValue &SOffset, SDValue &ImmOffset) const;
138  bool SelectMUBUFScratchOffset(SDNode *Parent,
139  SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
140  SDValue &Offset) const;
141 
142  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
143  SDValue &Offset, SDValue &GLC, SDValue &SLC,
144  SDValue &TFE, SDValue &DLC) const;
145  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
146  SDValue &Offset, SDValue &SLC) const;
147  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
148  SDValue &Offset) const;
149 
150  bool SelectFlatAtomic(SDNode *N, SDValue Addr, SDValue &VAddr,
151  SDValue &Offset, SDValue &SLC) const;
152  bool SelectFlatAtomicSigned(SDNode *N, SDValue Addr, SDValue &VAddr,
153  SDValue &Offset, SDValue &SLC) const;
154 
155  template <bool IsSigned>
156  bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
157  SDValue &Offset, SDValue &SLC) const;
158 
159  bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
160  bool &Imm) const;
161  SDValue Expand32BitAddress(SDValue Addr) const;
162  bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
163  bool &Imm) const;
164  bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
165  bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
166  bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
167  bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
168  bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
169  bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
170 
171  bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
172  bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
173  bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
174  bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
175  bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
176  SDValue &Clamp, SDValue &Omod) const;
177  bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
178  SDValue &Clamp, SDValue &Omod) const;
179 
180  bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
181  SDValue &Clamp,
182  SDValue &Omod) const;
183 
184  bool SelectVOP3OMods(SDValue In, SDValue &Src,
185  SDValue &Clamp, SDValue &Omod) const;
186 
187  bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
188  bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
189  SDValue &Clamp) const;
190 
191  bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
192  bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods,
193  SDValue &Clamp) const;
194 
195  bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
196  bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
197  SDValue &Clamp) const;
198  bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
199  bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
200 
201  SDValue getHi16Elt(SDValue In) const;
202 
203  void SelectADD_SUB_I64(SDNode *N);
204  void SelectAddcSubb(SDNode *N);
205  void SelectUADDO_USUBO(SDNode *N);
206  void SelectDIV_SCALE(SDNode *N);
207  void SelectDIV_FMAS(SDNode *N);
208  void SelectMAD_64_32(SDNode *N);
209  void SelectFMA_W_CHAIN(SDNode *N);
210  void SelectFMUL_W_CHAIN(SDNode *N);
211 
212  SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
213  uint32_t Offset, uint32_t Width);
214  void SelectS_BFEFromShifts(SDNode *N);
215  void SelectS_BFE(SDNode *N);
216  bool isCBranchSCC(const SDNode *N) const;
217  void SelectBRCOND(SDNode *N);
218  void SelectFMAD_FMA(SDNode *N);
219  void SelectATOMIC_CMP_SWAP(SDNode *N);
220  void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
221  void SelectINTRINSIC_W_CHAIN(SDNode *N);
222 
223 protected:
224  // Include the pieces autogenerated from the target description.
225 #include "AMDGPUGenDAGISel.inc"
226 };
227 
228 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
229  const R600Subtarget *Subtarget;
230 
231  bool isConstantLoad(const MemSDNode *N, int cbID) const;
232  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
233  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
234  SDValue& Offset);
235 public:
236  explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
237  AMDGPUDAGToDAGISel(TM, OptLevel) {}
238 
239  void Select(SDNode *N) override;
240 
241  bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
242  SDValue &Offset) override;
243  bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
244  SDValue &Offset) override;
245 
246  bool runOnMachineFunction(MachineFunction &MF) override;
247 
248  void PreprocessISelDAG() override {}
249 
250 protected:
251  // Include the pieces autogenerated from the target description.
252 #include "R600GenDAGISel.inc"
253 };
254 
255 static SDValue stripBitcast(SDValue Val) {
256  return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
257 }
258 
259 // Figure out if this is really an extract of the high 16-bits of a dword.
260 static bool isExtractHiElt(SDValue In, SDValue &Out) {
261  In = stripBitcast(In);
262  if (In.getOpcode() != ISD::TRUNCATE)
263  return false;
264 
265  SDValue Srl = In.getOperand(0);
266  if (Srl.getOpcode() == ISD::SRL) {
267  if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
268  if (ShiftAmt->getZExtValue() == 16) {
269  Out = stripBitcast(Srl.getOperand(0));
270  return true;
271  }
272  }
273  }
274 
275  return false;
276 }
277 
278 // Look through operations that obscure just looking at the low 16-bits of the
279 // same register.
280 static SDValue stripExtractLoElt(SDValue In) {
281  if (In.getOpcode() == ISD::TRUNCATE) {
282  SDValue Src = In.getOperand(0);
283  if (Src.getValueType().getSizeInBits() == 32)
284  return stripBitcast(Src);
285  }
286 
287  return In;
288 }
289 
290 } // end anonymous namespace
291 
292 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
293  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
297 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",
298  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
299 
300 /// This pass converts a legalized DAG into a AMDGPU-specific
301 // DAG, ready for instruction scheduling.
303  CodeGenOpt::Level OptLevel) {
304  return new AMDGPUDAGToDAGISel(TM, OptLevel);
305 }
306 
307 /// This pass converts a legalized DAG into a R600-specific
308 // DAG, ready for instruction scheduling.
310  CodeGenOpt::Level OptLevel) {
311  return new R600DAGToDAGISel(TM, OptLevel);
312 }
313 
314 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
315  Subtarget = &MF.getSubtarget<GCNSubtarget>();
317 }
318 
319 bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {
320  assert(Subtarget->d16PreservesUnusedBits());
321  MVT VT = N->getValueType(0).getSimpleVT();
322  if (VT != MVT::v2i16 && VT != MVT::v2f16)
323  return false;
324 
325  SDValue Lo = N->getOperand(0);
326  SDValue Hi = N->getOperand(1);
327 
328  LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));
329 
330  // build_vector lo, (load ptr) -> load_d16_hi ptr, lo
331  // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
332  // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
333 
334  // Need to check for possible indirect dependencies on the other half of the
335  // vector to avoid introducing a cycle.
336  if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {
337  SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
338 
339  SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo);
340  SDValue Ops[] = {
341  LdHi->getChain(), LdHi->getBasePtr(), TiedIn
342  };
343 
344  unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
345  if (LdHi->getMemoryVT() == MVT::i8) {
346  LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
348  } else {
349  assert(LdHi->getMemoryVT() == MVT::i16);
350  }
351 
352  SDValue NewLoadHi =
353  CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
354  Ops, LdHi->getMemoryVT(),
355  LdHi->getMemOperand());
356 
357  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
358  CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
359  return true;
360  }
361 
362  // build_vector (load ptr), hi -> load_d16_lo ptr, hi
363  // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
364  // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
365  LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));
366  if (LdLo && Lo.hasOneUse()) {
367  SDValue TiedIn = getHi16Elt(Hi);
368  if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode()))
369  return false;
370 
371  SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
372  unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
373  if (LdLo->getMemoryVT() == MVT::i8) {
374  LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
376  } else {
377  assert(LdLo->getMemoryVT() == MVT::i16);
378  }
379 
380  TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
381 
382  SDValue Ops[] = {
383  LdLo->getChain(), LdLo->getBasePtr(), TiedIn
384  };
385 
386  SDValue NewLoadLo =
387  CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
388  Ops, LdLo->getMemoryVT(),
389  LdLo->getMemOperand());
390 
391  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
392  CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
393  return true;
394  }
395 
396  return false;
397 }
398 
399 void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
400  if (!Subtarget->d16PreservesUnusedBits())
401  return;
402 
403  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
404 
405  bool MadeChange = false;
406  while (Position != CurDAG->allnodes_begin()) {
407  SDNode *N = &*--Position;
408  if (N->use_empty())
409  continue;
410 
411  switch (N->getOpcode()) {
412  case ISD::BUILD_VECTOR:
413  MadeChange |= matchLoadD16FromBuildVector(N);
414  break;
415  default:
416  break;
417  }
418  }
419 
420  if (MadeChange) {
421  CurDAG->RemoveDeadNodes();
422  LLVM_DEBUG(dbgs() << "After PreProcess:\n";
423  CurDAG->dump(););
424  }
425 }
426 
427 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
428  if (TM.Options.NoNaNsFPMath)
429  return true;
430 
431  // TODO: Move into isKnownNeverNaN
432  if (N->getFlags().isDefined())
433  return N->getFlags().hasNoNaNs();
434 
435  return CurDAG->isKnownNeverNaN(N);
436 }
437 
438 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
439  const SIInstrInfo *TII = Subtarget->getInstrInfo();
440 
441  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
442  return TII->isInlineConstant(C->getAPIntValue());
443 
444  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
445  return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
446 
447  return false;
448 }
449 
450 /// Determine the register class for \p OpNo
451 /// \returns The register class of the virtual register that will be used for
452 /// the given operand number \OpNo or NULL if the register class cannot be
453 /// determined.
454 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
455  unsigned OpNo) const {
456  if (!N->isMachineOpcode()) {
457  if (N->getOpcode() == ISD::CopyToReg) {
458  unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
460  MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
461  return MRI.getRegClass(Reg);
462  }
463 
464  const SIRegisterInfo *TRI
465  = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
466  return TRI->getPhysRegClass(Reg);
467  }
468 
469  return nullptr;
470  }
471 
472  switch (N->getMachineOpcode()) {
473  default: {
474  const MCInstrDesc &Desc =
475  Subtarget->getInstrInfo()->get(N->getMachineOpcode());
476  unsigned OpIdx = Desc.getNumDefs() + OpNo;
477  if (OpIdx >= Desc.getNumOperands())
478  return nullptr;
479  int RegClass = Desc.OpInfo[OpIdx].RegClass;
480  if (RegClass == -1)
481  return nullptr;
482 
483  return Subtarget->getRegisterInfo()->getRegClass(RegClass);
484  }
485  case AMDGPU::REG_SEQUENCE: {
486  unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
487  const TargetRegisterClass *SuperRC =
488  Subtarget->getRegisterInfo()->getRegClass(RCID);
489 
490  SDValue SubRegOp = N->getOperand(OpNo + 1);
491  unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
492  return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
493  SubRegIdx);
494  }
495  }
496 }
497 
498 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
499  const SITargetLowering& Lowering =
500  *static_cast<const SITargetLowering*>(getTargetLowering());
501 
502  // Write max value to m0 before each load operation
503 
504  assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");
505 
506  SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N),
507  Val);
508 
509  SDValue Glue = M0.getValue(1);
510 
512  Ops.push_back(M0); // Replace the chain.
513  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
514  Ops.push_back(N->getOperand(i));
515 
516  Ops.push_back(Glue);
517  return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
518 }
519 
520 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
521  if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS ||
522  !Subtarget->ldsRequiresM0Init())
523  return N;
524  return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
525 }
526 
527 MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
528  EVT VT) const {
529  SDNode *Lo = CurDAG->getMachineNode(
530  AMDGPU::S_MOV_B32, DL, MVT::i32,
531  CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
532  SDNode *Hi =
533  CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
534  CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
535  const SDValue Ops[] = {
536  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
537  SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
538  SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
539 
540  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
541 }
542 
543 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
544  switch (NumVectorElts) {
545  case 1:
546  return AMDGPU::SReg_32_XM0RegClassID;
547  case 2:
548  return AMDGPU::SReg_64RegClassID;
549  case 3:
550  return AMDGPU::SGPR_96RegClassID;
551  case 4:
552  return AMDGPU::SReg_128RegClassID;
553  case 5:
554  return AMDGPU::SGPR_160RegClassID;
555  case 8:
556  return AMDGPU::SReg_256RegClassID;
557  case 16:
558  return AMDGPU::SReg_512RegClassID;
559  }
560 
561  llvm_unreachable("invalid vector size");
562 }
563 
564 static bool getConstantValue(SDValue N, uint32_t &Out) {
565  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
566  Out = C->getAPIntValue().getZExtValue();
567  return true;
568  }
569 
570  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
571  Out = C->getValueAPF().bitcastToAPInt().getZExtValue();
572  return true;
573  }
574 
575  return false;
576 }
577 
578 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
579  EVT VT = N->getValueType(0);
580  unsigned NumVectorElts = VT.getVectorNumElements();
581  EVT EltVT = VT.getVectorElementType();
582  SDLoc DL(N);
583  SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
584 
585  if (NumVectorElts == 1) {
586  CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
587  RegClass);
588  return;
589  }
590 
591  assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
592  "supported yet");
593  // 16 = Max Num Vector Elements
594  // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
595  // 1 = Vector Register Class
596  SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
597 
598  RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
599  bool IsRegSeq = true;
600  unsigned NOps = N->getNumOperands();
601  for (unsigned i = 0; i < NOps; i++) {
602  // XXX: Why is this here?
603  if (isa<RegisterSDNode>(N->getOperand(i))) {
604  IsRegSeq = false;
605  break;
606  }
608  RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
609  RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
610  }
611  if (NOps != NumVectorElts) {
612  // Fill in the missing undef elements if this was a scalar_to_vector.
613  assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
614  MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
615  DL, EltVT);
616  for (unsigned i = NOps; i < NumVectorElts; ++i) {
618  RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
619  RegSeqArgs[1 + (2 * i) + 1] =
620  CurDAG->getTargetConstant(Sub, DL, MVT::i32);
621  }
622  }
623 
624  if (!IsRegSeq)
625  SelectCode(N);
626  CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
627 }
628 
630  unsigned int Opc = N->getOpcode();
631  if (N->isMachineOpcode()) {
632  N->setNodeId(-1);
633  return; // Already selected.
634  }
635 
636  if (isa<AtomicSDNode>(N) ||
637  (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
638  Opc == ISD::ATOMIC_LOAD_FADD ||
641  N = glueCopyToM0LDSInit(N);
642 
643  switch (Opc) {
644  default:
645  break;
646  // We are selecting i64 ADD here instead of custom lower it during
647  // DAG legalization, so we can fold some i64 ADDs used for address
648  // calculation into the LOAD and STORE instructions.
649  case ISD::ADDC:
650  case ISD::ADDE:
651  case ISD::SUBC:
652  case ISD::SUBE: {
653  if (N->getValueType(0) != MVT::i64)
654  break;
655 
656  SelectADD_SUB_I64(N);
657  return;
658  }
659  case ISD::ADDCARRY:
660  case ISD::SUBCARRY:
661  if (N->getValueType(0) != MVT::i32)
662  break;
663 
664  SelectAddcSubb(N);
665  return;
666  case ISD::UADDO:
667  case ISD::USUBO: {
668  SelectUADDO_USUBO(N);
669  return;
670  }
672  SelectFMUL_W_CHAIN(N);
673  return;
674  }
675  case AMDGPUISD::FMA_W_CHAIN: {
676  SelectFMA_W_CHAIN(N);
677  return;
678  }
679 
681  case ISD::BUILD_VECTOR: {
682  EVT VT = N->getValueType(0);
683  unsigned NumVectorElts = VT.getVectorNumElements();
684  if (VT.getScalarSizeInBits() == 16) {
685  if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
686  uint32_t LHSVal, RHSVal;
687  if (getConstantValue(N->getOperand(0), LHSVal) &&
688  getConstantValue(N->getOperand(1), RHSVal)) {
689  uint32_t K = LHSVal | (RHSVal << 16);
690  CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT,
691  CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32));
692  return;
693  }
694  }
695 
696  break;
697  }
698 
700  unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
701  SelectBuildVector(N, RegClassID);
702  return;
703  }
704  case ISD::BUILD_PAIR: {
705  SDValue RC, SubReg0, SubReg1;
706  SDLoc DL(N);
707  if (N->getValueType(0) == MVT::i128) {
708  RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
709  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
710  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
711  } else if (N->getValueType(0) == MVT::i64) {
712  RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
713  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
714  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
715  } else {
716  llvm_unreachable("Unhandled value type for BUILD_PAIR");
717  }
718  const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
719  N->getOperand(1), SubReg1 };
720  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
721  N->getValueType(0), Ops));
722  return;
723  }
724 
725  case ISD::Constant:
726  case ISD::ConstantFP: {
727  if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
728  break;
729 
730  uint64_t Imm;
731  if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
732  Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
733  else {
734  ConstantSDNode *C = cast<ConstantSDNode>(N);
735  Imm = C->getZExtValue();
736  }
737 
738  SDLoc DL(N);
739  ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
740  return;
741  }
742  case ISD::LOAD:
743  case ISD::STORE:
744  case ISD::ATOMIC_LOAD:
745  case ISD::ATOMIC_STORE: {
746  N = glueCopyToM0LDSInit(N);
747  break;
748  }
749 
750  case AMDGPUISD::BFE_I32:
751  case AMDGPUISD::BFE_U32: {
752  // There is a scalar version available, but unlike the vector version which
753  // has a separate operand for the offset and width, the scalar version packs
754  // the width and offset into a single operand. Try to move to the scalar
755  // version if the offsets are constant, so that we can try to keep extended
756  // loads of kernel arguments in SGPRs.
757 
758  // TODO: Technically we could try to pattern match scalar bitshifts of
759  // dynamic values, but it's probably not useful.
761  if (!Offset)
762  break;
763 
765  if (!Width)
766  break;
767 
768  bool Signed = Opc == AMDGPUISD::BFE_I32;
769 
770  uint32_t OffsetVal = Offset->getZExtValue();
771  uint32_t WidthVal = Width->getZExtValue();
772 
773  ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
774  SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
775  return;
776  }
777  case AMDGPUISD::DIV_SCALE: {
778  SelectDIV_SCALE(N);
779  return;
780  }
781  case AMDGPUISD::DIV_FMAS: {
782  SelectDIV_FMAS(N);
783  return;
784  }
786  case AMDGPUISD::MAD_U64_U32: {
787  SelectMAD_64_32(N);
788  return;
789  }
790  case ISD::CopyToReg: {
791  const SITargetLowering& Lowering =
792  *static_cast<const SITargetLowering*>(getTargetLowering());
793  N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
794  break;
795  }
796  case ISD::AND:
797  case ISD::SRL:
798  case ISD::SRA:
800  if (N->getValueType(0) != MVT::i32)
801  break;
802 
803  SelectS_BFE(N);
804  return;
805  case ISD::BRCOND:
806  SelectBRCOND(N);
807  return;
808  case ISD::FMAD:
809  case ISD::FMA:
810  SelectFMAD_FMA(N);
811  return;
813  SelectATOMIC_CMP_SWAP(N);
814  return;
820  // Hack around using a legal type if f16 is illegal.
821  if (N->getValueType(0) == MVT::i32) {
823  N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
824  { N->getOperand(0), N->getOperand(1) });
825  SelectCode(N);
826  return;
827  }
828 
829  break;
830  }
831  case ISD::INTRINSIC_W_CHAIN: {
832  SelectINTRINSIC_W_CHAIN(N);
833  return;
834  }
835  }
836 
837  SelectCode(N);
838 }
839 
840 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
841  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
842  const Instruction *Term = BB->getTerminator();
843  return Term->getMetadata("amdgpu.uniform") ||
844  Term->getMetadata("structurizecfg.uniform");
845 }
846 
847 StringRef AMDGPUDAGToDAGISel::getPassName() const {
848  return "AMDGPU DAG->DAG Pattern Instruction Selection";
849 }
850 
851 //===----------------------------------------------------------------------===//
852 // Complex Patterns
853 //===----------------------------------------------------------------------===//
854 
855 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
856  SDValue &Offset) {
857  return false;
858 }
859 
860 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
861  SDValue &Offset) {
862  ConstantSDNode *C;
863  SDLoc DL(Addr);
864 
865  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
866  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
867  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
868  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
869  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
870  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
871  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
872  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
873  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
874  Base = Addr.getOperand(0);
875  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
876  } else {
877  Base = Addr;
878  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
879  }
880 
881  return true;
882 }
883 
884 // FIXME: Should only handle addcarry/subcarry
885 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
886  SDLoc DL(N);
887  SDValue LHS = N->getOperand(0);
888  SDValue RHS = N->getOperand(1);
889 
890  unsigned Opcode = N->getOpcode();
891  bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
892  bool ProduceCarry =
893  ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
894  bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
895 
896  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
897  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
898 
899  SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
900  DL, MVT::i32, LHS, Sub0);
901  SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
902  DL, MVT::i32, LHS, Sub1);
903 
904  SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
905  DL, MVT::i32, RHS, Sub0);
906  SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
907  DL, MVT::i32, RHS, Sub1);
908 
909  SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
910 
911  unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
912  unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
913 
914  SDNode *AddLo;
915  if (!ConsumeCarry) {
916  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
917  AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
918  } else {
919  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
920  AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
921  }
922  SDValue AddHiArgs[] = {
923  SDValue(Hi0, 0),
924  SDValue(Hi1, 0),
925  SDValue(AddLo, 1)
926  };
927  SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
928 
929  SDValue RegSequenceArgs[] = {
930  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
931  SDValue(AddLo,0),
932  Sub0,
933  SDValue(AddHi,0),
934  Sub1,
935  };
936  SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
937  MVT::i64, RegSequenceArgs);
938 
939  if (ProduceCarry) {
940  // Replace the carry-use
941  ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
942  }
943 
944  // Replace the remaining uses.
945  ReplaceNode(N, RegSequence);
946 }
947 
948 void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
949  SDLoc DL(N);
950  SDValue LHS = N->getOperand(0);
951  SDValue RHS = N->getOperand(1);
952  SDValue CI = N->getOperand(2);
953 
954  unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
955  : AMDGPU::V_SUBB_U32_e64;
956  CurDAG->SelectNodeTo(
957  N, Opc, N->getVTList(),
958  {LHS, RHS, CI, CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
959 }
960 
961 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
962  // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
963  // carry out despite the _i32 name. These were renamed in VI to _U32.
964  // FIXME: We should probably rename the opcodes here.
965  unsigned Opc = N->getOpcode() == ISD::UADDO ?
966  AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
967 
968  CurDAG->SelectNodeTo(
969  N, Opc, N->getVTList(),
970  {N->getOperand(0), N->getOperand(1),
971  CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
972 }
973 
974 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
975  SDLoc SL(N);
976  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
977  SDValue Ops[10];
978 
979  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
980  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
981  SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
982  Ops[8] = N->getOperand(0);
983  Ops[9] = N->getOperand(4);
984 
985  CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
986 }
987 
988 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
989  SDLoc SL(N);
990  // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
991  SDValue Ops[8];
992 
993  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
994  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
995  Ops[6] = N->getOperand(0);
996  Ops[7] = N->getOperand(3);
997 
998  CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
999 }
1000 
1001 // We need to handle this here because tablegen doesn't support matching
1002 // instructions with multiple outputs.
1003 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
1004  SDLoc SL(N);
1005  EVT VT = N->getValueType(0);
1006 
1007  assert(VT == MVT::f32 || VT == MVT::f64);
1008 
1009  unsigned Opc
1010  = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
1011 
1012  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
1013  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1014 }
1015 
1016 void AMDGPUDAGToDAGISel::SelectDIV_FMAS(SDNode *N) {
1017  SDLoc SL(N);
1018  EVT VT = N->getValueType(0);
1019 
1020  assert(VT == MVT::f32 || VT == MVT::f64);
1021 
1022  unsigned Opc
1023  = (VT == MVT::f64) ? AMDGPU::V_DIV_FMAS_F64 : AMDGPU::V_DIV_FMAS_F32;
1024 
1025  SDValue CarryIn = N->getOperand(3);
1026  // V_DIV_FMAS implicitly reads VCC.
1027  SDValue VCC = CurDAG->getCopyToReg(CurDAG->getEntryNode(), SL,
1028  AMDGPU::VCC, CarryIn, SDValue());
1029 
1030  SDValue Ops[10];
1031 
1032  SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
1033  SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
1034  SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
1035 
1036  Ops[8] = VCC;
1037  Ops[9] = VCC.getValue(1);
1038 
1039  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1040 }
1041 
1042 // We need to handle this here because tablegen doesn't support matching
1043 // instructions with multiple outputs.
1044 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
1045  SDLoc SL(N);
1046  bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
1047  unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32;
1048 
1049  SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
1050  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
1051  Clamp };
1052  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1053 }
1054 
1055 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset,
1056  unsigned OffsetBits) const {
1057  if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
1058  (OffsetBits == 8 && !isUInt<8>(Offset)))
1059  return false;
1060 
1061  if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
1062  Subtarget->unsafeDSOffsetFoldingEnabled())
1063  return true;
1064 
1065  // On Southern Islands instruction with a negative base value and an offset
1066  // don't seem to work.
1067  return CurDAG->SignBitIsZero(Base);
1068 }
1069 
1070 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
1071  SDValue &Offset) const {
1072  SDLoc DL(Addr);
1073  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1074  SDValue N0 = Addr.getOperand(0);
1075  SDValue N1 = Addr.getOperand(1);
1076  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1077  if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
1078  // (add n0, c0)
1079  Base = N0;
1080  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1081  return true;
1082  }
1083  } else if (Addr.getOpcode() == ISD::SUB) {
1084  // sub C, x -> add (sub 0, x), C
1085  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1086  int64_t ByteOffset = C->getSExtValue();
1087  if (isUInt<16>(ByteOffset)) {
1088  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1089 
1090  // XXX - This is kind of hacky. Create a dummy sub node so we can check
1091  // the known bits in isDSOffsetLegal. We need to emit the selected node
1092  // here, so this is thrown away.
1093  SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
1094  Zero, Addr.getOperand(1));
1095 
1096  if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
1098  Opnds.push_back(Zero);
1099  Opnds.push_back(Addr.getOperand(1));
1100 
1101  // FIXME: Select to VOP3 version for with-carry.
1102  unsigned SubOp = AMDGPU::V_SUB_I32_e32;
1103  if (Subtarget->hasAddNoCarry()) {
1104  SubOp = AMDGPU::V_SUB_U32_e64;
1105  Opnds.push_back(
1106  CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1107  }
1108 
1109  MachineSDNode *MachineSub =
1110  CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1111 
1112  Base = SDValue(MachineSub, 0);
1113  Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
1114  return true;
1115  }
1116  }
1117  }
1118  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1119  // If we have a constant address, prefer to put the constant into the
1120  // offset. This can save moves to load the constant address since multiple
1121  // operations can share the zero base address register, and enables merging
1122  // into read2 / write2 instructions.
1123 
1124  SDLoc DL(Addr);
1125 
1126  if (isUInt<16>(CAddr->getZExtValue())) {
1127  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1128  MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1129  DL, MVT::i32, Zero);
1130  Base = SDValue(MovZero, 0);
1131  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1132  return true;
1133  }
1134  }
1135 
1136  // default case
1137  Base = Addr;
1138  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
1139  return true;
1140 }
1141 
1142 // TODO: If offset is too big, put low 16-bit into offset.
1143 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
1144  SDValue &Offset0,
1145  SDValue &Offset1) const {
1146  SDLoc DL(Addr);
1147 
1148  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1149  SDValue N0 = Addr.getOperand(0);
1150  SDValue N1 = Addr.getOperand(1);
1151  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1152  unsigned DWordOffset0 = C1->getZExtValue() / 4;
1153  unsigned DWordOffset1 = DWordOffset0 + 1;
1154  // (add n0, c0)
1155  if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
1156  Base = N0;
1157  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
1158  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
1159  return true;
1160  }
1161  } else if (Addr.getOpcode() == ISD::SUB) {
1162  // sub C, x -> add (sub 0, x), C
1163  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1164  unsigned DWordOffset0 = C->getZExtValue() / 4;
1165  unsigned DWordOffset1 = DWordOffset0 + 1;
1166 
1167  if (isUInt<8>(DWordOffset0)) {
1168  SDLoc DL(Addr);
1169  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1170 
1171  // XXX - This is kind of hacky. Create a dummy sub node so we can check
1172  // the known bits in isDSOffsetLegal. We need to emit the selected node
1173  // here, so this is thrown away.
1174  SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
1175  Zero, Addr.getOperand(1));
1176 
1177  if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
1179  Opnds.push_back(Zero);
1180  Opnds.push_back(Addr.getOperand(1));
1181  unsigned SubOp = AMDGPU::V_SUB_I32_e32;
1182  if (Subtarget->hasAddNoCarry()) {
1183  SubOp = AMDGPU::V_SUB_U32_e64;
1184  Opnds.push_back(
1185  CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1186  }
1187 
1188  MachineSDNode *MachineSub
1189  = CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1190 
1191  Base = SDValue(MachineSub, 0);
1192  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
1193  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
1194  return true;
1195  }
1196  }
1197  }
1198  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1199  unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
1200  unsigned DWordOffset1 = DWordOffset0 + 1;
1201  assert(4 * DWordOffset0 == CAddr->getZExtValue());
1202 
1203  if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
1204  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1205  MachineSDNode *MovZero
1206  = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1207  DL, MVT::i32, Zero);
1208  Base = SDValue(MovZero, 0);
1209  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
1210  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
1211  return true;
1212  }
1213  }
1214 
1215  // default case
1216 
1217  Base = Addr;
1218  Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
1219  Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
1220  return true;
1221 }
1222 
1223 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
1224  SDValue &VAddr, SDValue &SOffset,
1225  SDValue &Offset, SDValue &Offen,
1226  SDValue &Idxen, SDValue &Addr64,
1227  SDValue &GLC, SDValue &SLC,
1228  SDValue &TFE, SDValue &DLC) const {
1229  // Subtarget prefers to use flat instruction
1230  if (Subtarget->useFlatForGlobal())
1231  return false;
1232 
1233  SDLoc DL(Addr);
1234 
1235  if (!GLC.getNode())
1236  GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1237  if (!SLC.getNode())
1238  SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1239  TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
1240  DLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1241 
1242  Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1243  Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1244  Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1245  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1246 
1247  ConstantSDNode *C1 = nullptr;
1248  SDValue N0 = Addr;
1249  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1250  C1 = cast<ConstantSDNode>(Addr.getOperand(1));
1251  if (isUInt<32>(C1->getZExtValue()))
1252  N0 = Addr.getOperand(0);
1253  else
1254  C1 = nullptr;
1255  }
1256 
1257  if (N0.getOpcode() == ISD::ADD) {
1258  // (add N2, N3) -> addr64, or
1259  // (add (add N2, N3), C1) -> addr64
1260  SDValue N2 = N0.getOperand(0);
1261  SDValue N3 = N0.getOperand(1);
1262  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1263 
1264  if (N2->isDivergent()) {
1265  if (N3->isDivergent()) {
1266  // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
1267  // addr64, and construct the resource from a 0 address.
1268  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1269  VAddr = N0;
1270  } else {
1271  // N2 is divergent, N3 is not.
1272  Ptr = N3;
1273  VAddr = N2;
1274  }
1275  } else {
1276  // N2 is not divergent.
1277  Ptr = N2;
1278  VAddr = N3;
1279  }
1280  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1281  } else if (N0->isDivergent()) {
1282  // N0 is divergent. Use it as the addr64, and construct the resource from a
1283  // 0 address.
1284  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1285  VAddr = N0;
1286  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1287  } else {
1288  // N0 -> offset, or
1289  // (N0 + C1) -> offset
1290  VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1291  Ptr = N0;
1292  }
1293 
1294  if (!C1) {
1295  // No offset.
1296  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1297  return true;
1298  }
1299 
1301  // Legal offset for instruction.
1302  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1303  return true;
1304  }
1305 
1306  // Illegal offset, store it in soffset.
1307  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1308  SOffset =
1309  SDValue(CurDAG->getMachineNode(
1310  AMDGPU::S_MOV_B32, DL, MVT::i32,
1311  CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1312  0);
1313  return true;
1314 }
1315 
1316 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1317  SDValue &VAddr, SDValue &SOffset,
1318  SDValue &Offset, SDValue &GLC,
1319  SDValue &SLC, SDValue &TFE,
1320  SDValue &DLC) const {
1321  SDValue Ptr, Offen, Idxen, Addr64;
1322 
1323  // addr64 bit was removed for volcanic islands.
1324  if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
1325  return false;
1326 
1327  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1328  GLC, SLC, TFE, DLC))
1329  return false;
1330 
1331  ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1332  if (C->getSExtValue()) {
1333  SDLoc DL(Addr);
1334 
1335  const SITargetLowering& Lowering =
1336  *static_cast<const SITargetLowering*>(getTargetLowering());
1337 
1338  SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1339  return true;
1340  }
1341 
1342  return false;
1343 }
1344 
1345 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1346  SDValue &VAddr, SDValue &SOffset,
1347  SDValue &Offset,
1348  SDValue &SLC) const {
1349  SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
1350  SDValue GLC, TFE, DLC;
1351 
1352  return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC);
1353 }
1354 
1355 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
1356  auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
1357  return PSV && PSV->isStack();
1358 }
1359 
1360 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1361  const MachineFunction &MF = CurDAG->getMachineFunction();
1363 
1364  if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
1365  SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1366  FI->getValueType(0));
1367 
1368  // If we can resolve this to a frame index access, this will be relative to
1369  // either the stack or frame pointer SGPR.
1370  return std::make_pair(
1371  TFI, CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32));
1372  }
1373 
1374  // If we don't know this private access is a local stack object, it needs to
1375  // be relative to the entry point's scratch wave offset register.
1376  return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(),
1377  MVT::i32));
1378 }
1379 
1380 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1381  SDValue Addr, SDValue &Rsrc,
1382  SDValue &VAddr, SDValue &SOffset,
1383  SDValue &ImmOffset) const {
1384 
1385  SDLoc DL(Addr);
1386  MachineFunction &MF = CurDAG->getMachineFunction();
1388 
1389  Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1390 
1391  if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1392  unsigned Imm = CAddr->getZExtValue();
1393 
1394  SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1395  MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1396  DL, MVT::i32, HighBits);
1397  VAddr = SDValue(MovHighBits, 0);
1398 
1399  // In a call sequence, stores to the argument stack area are relative to the
1400  // stack pointer.
1401  const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1402  unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1404 
1405  SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1406  ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
1407  return true;
1408  }
1409 
1410  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1411  // (add n0, c1)
1412 
1413  SDValue N0 = Addr.getOperand(0);
1414  SDValue N1 = Addr.getOperand(1);
1415 
1416  // Offsets in vaddr must be positive if range checking is enabled.
1417  //
1418  // The total computation of vaddr + soffset + offset must not overflow. If
1419  // vaddr is negative, even if offset is 0 the sgpr offset add will end up
1420  // overflowing.
1421  //
1422  // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
1423  // always perform a range check. If a negative vaddr base index was used,
1424  // this would fail the range check. The overall address computation would
1425  // compute a valid address, but this doesn't happen due to the range
1426  // check. For out-of-bounds MUBUF loads, a 0 is returned.
1427  //
1428  // Therefore it should be safe to fold any VGPR offset on gfx9 into the
1429  // MUBUF vaddr, but not on older subtargets which can only do this if the
1430  // sign bit is known 0.
1431  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1433  (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1434  CurDAG->SignBitIsZero(N0))) {
1435  std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1436  ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1437  return true;
1438  }
1439  }
1440 
1441  // (node)
1442  std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1443  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1444  return true;
1445 }
1446 
1447 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1448  SDValue Addr,
1449  SDValue &SRsrc,
1450  SDValue &SOffset,
1451  SDValue &Offset) const {
1452  ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
1453  if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1454  return false;
1455 
1456  SDLoc DL(Addr);
1457  MachineFunction &MF = CurDAG->getMachineFunction();
1459 
1460  SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1461 
1462  const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1463  unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1465 
1466  // FIXME: Get from MachinePointerInfo? We should only be using the frame
1467  // offset if we know this is in a call sequence.
1468  SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1469 
1470  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1471  return true;
1472 }
1473 
1474 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1475  SDValue &SOffset, SDValue &Offset,
1476  SDValue &GLC, SDValue &SLC,
1477  SDValue &TFE, SDValue &DLC) const {
1478  SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1479  const SIInstrInfo *TII =
1480  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1481 
1482  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1483  GLC, SLC, TFE, DLC))
1484  return false;
1485 
1486  if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1487  !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1488  !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1489  uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1490  APInt::getAllOnesValue(32).getZExtValue(); // Size
1491  SDLoc DL(Addr);
1492 
1493  const SITargetLowering& Lowering =
1494  *static_cast<const SITargetLowering*>(getTargetLowering());
1495 
1496  SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1497  return true;
1498  }
1499  return false;
1500 }
1501 
1502 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1503  SDValue &Soffset, SDValue &Offset
1504  ) const {
1505  SDValue GLC, SLC, TFE, DLC;
1506 
1507  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC);
1508 }
1509 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1510  SDValue &Soffset, SDValue &Offset,
1511  SDValue &SLC) const {
1512  SDValue GLC, TFE, DLC;
1513 
1514  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC);
1515 }
1516 
1517 template <bool IsSigned>
1518 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N,
1519  SDValue Addr,
1520  SDValue &VAddr,
1521  SDValue &Offset,
1522  SDValue &SLC) const {
1523  return static_cast<const SITargetLowering*>(getTargetLowering())->
1524  SelectFlatOffset(IsSigned, *CurDAG, N, Addr, VAddr, Offset, SLC);
1525 }
1526 
1527 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N,
1528  SDValue Addr,
1529  SDValue &VAddr,
1530  SDValue &Offset,
1531  SDValue &SLC) const {
1532  return SelectFlatOffset<false>(N, Addr, VAddr, Offset, SLC);
1533 }
1534 
1535 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDNode *N,
1536  SDValue Addr,
1537  SDValue &VAddr,
1538  SDValue &Offset,
1539  SDValue &SLC) const {
1540  return SelectFlatOffset<true>(N, Addr, VAddr, Offset, SLC);
1541 }
1542 
1543 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1544  SDValue &Offset, bool &Imm) const {
1545 
1546  // FIXME: Handle non-constant offsets.
1547  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1548  if (!C)
1549  return false;
1550 
1551  SDLoc SL(ByteOffsetNode);
1552  GCNSubtarget::Generation Gen = Subtarget->getGeneration();
1553  int64_t ByteOffset = C->getSExtValue();
1554  int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
1555 
1556  if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) {
1557  Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1558  Imm = true;
1559  return true;
1560  }
1561 
1562  if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1563  return false;
1564 
1565  if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1566  // 32-bit Immediates are supported on Sea Islands.
1567  Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1568  } else {
1569  SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1570  Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1571  C32Bit), 0);
1572  }
1573  Imm = false;
1574  return true;
1575 }
1576 
1577 SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
1578  if (Addr.getValueType() != MVT::i32)
1579  return Addr;
1580 
1581  // Zero-extend a 32-bit address.
1582  SDLoc SL(Addr);
1583 
1584  const MachineFunction &MF = CurDAG->getMachineFunction();
1586  unsigned AddrHiVal = Info->get32BitAddressHighBits();
1587  SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
1588 
1589  const SDValue Ops[] = {
1590  CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
1591  Addr,
1592  CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
1593  SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
1594  0),
1595  CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
1596  };
1597 
1598  return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
1599  Ops), 0);
1600 }
1601 
1602 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1603  SDValue &Offset, bool &Imm) const {
1604  SDLoc SL(Addr);
1605 
1606  // A 32-bit (address + offset) should not cause unsigned 32-bit integer
1607  // wraparound, because s_load instructions perform the addition in 64 bits.
1608  if ((Addr.getValueType() != MVT::i32 ||
1609  Addr->getFlags().hasNoUnsignedWrap()) &&
1610  CurDAG->isBaseWithConstantOffset(Addr)) {
1611  SDValue N0 = Addr.getOperand(0);
1612  SDValue N1 = Addr.getOperand(1);
1613 
1614  if (SelectSMRDOffset(N1, Offset, Imm)) {
1615  SBase = Expand32BitAddress(N0);
1616  return true;
1617  }
1618  }
1619  SBase = Expand32BitAddress(Addr);
1620  Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1621  Imm = true;
1622  return true;
1623 }
1624 
1625 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1626  SDValue &Offset) const {
1627  bool Imm;
1628  return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1629 }
1630 
1631 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1632  SDValue &Offset) const {
1633 
1634  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1635  return false;
1636 
1637  bool Imm;
1638  if (!SelectSMRD(Addr, SBase, Offset, Imm))
1639  return false;
1640 
1641  return !Imm && isa<ConstantSDNode>(Offset);
1642 }
1643 
1644 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1645  SDValue &Offset) const {
1646  bool Imm;
1647  return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1648  !isa<ConstantSDNode>(Offset);
1649 }
1650 
1651 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1652  SDValue &Offset) const {
1653  bool Imm;
1654  return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1655 }
1656 
1657 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1658  SDValue &Offset) const {
1659  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1660  return false;
1661 
1662  bool Imm;
1663  if (!SelectSMRDOffset(Addr, Offset, Imm))
1664  return false;
1665 
1666  return !Imm && isa<ConstantSDNode>(Offset);
1667 }
1668 
1669 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
1670  SDValue &Base,
1671  SDValue &Offset) const {
1672  SDLoc DL(Index);
1673 
1674  if (CurDAG->isBaseWithConstantOffset(Index)) {
1675  SDValue N0 = Index.getOperand(0);
1676  SDValue N1 = Index.getOperand(1);
1677  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1678 
1679  // (add n0, c0)
1680  // Don't peel off the offset (c0) if doing so could possibly lead
1681  // the base (n0) to be negative.
1682  if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0)) {
1683  Base = N0;
1684  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
1685  return true;
1686  }
1687  }
1688 
1689  if (isa<ConstantSDNode>(Index))
1690  return false;
1691 
1692  Base = Index;
1693  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1694  return true;
1695 }
1696 
1697 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
1698  SDValue Val, uint32_t Offset,
1699  uint32_t Width) {
1700  // Transformation function, pack the offset and width of a BFE into
1701  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1702  // source, bits [5:0] contain the offset and bits [22:16] the width.
1703  uint32_t PackedVal = Offset | (Width << 16);
1704  SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1705 
1706  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1707 }
1708 
1709 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1710  // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1711  // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1712  // Predicate: 0 < b <= c < 32
1713 
1714  const SDValue &Shl = N->getOperand(0);
1717 
1718  if (B && C) {
1719  uint32_t BVal = B->getZExtValue();
1720  uint32_t CVal = C->getZExtValue();
1721 
1722  if (0 < BVal && BVal <= CVal && CVal < 32) {
1723  bool Signed = N->getOpcode() == ISD::SRA;
1724  unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1725 
1726  ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
1727  32 - CVal));
1728  return;
1729  }
1730  }
1731  SelectCode(N);
1732 }
1733 
1734 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1735  switch (N->getOpcode()) {
1736  case ISD::AND:
1737  if (N->getOperand(0).getOpcode() == ISD::SRL) {
1738  // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1739  // Predicate: isMask(mask)
1740  const SDValue &Srl = N->getOperand(0);
1743 
1744  if (Shift && Mask) {
1745  uint32_t ShiftVal = Shift->getZExtValue();
1746  uint32_t MaskVal = Mask->getZExtValue();
1747 
1748  if (isMask_32(MaskVal)) {
1749  uint32_t WidthVal = countPopulation(MaskVal);
1750 
1751  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1752  Srl.getOperand(0), ShiftVal, WidthVal));
1753  return;
1754  }
1755  }
1756  }
1757  break;
1758  case ISD::SRL:
1759  if (N->getOperand(0).getOpcode() == ISD::AND) {
1760  // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1761  // Predicate: isMask(mask >> b)
1762  const SDValue &And = N->getOperand(0);
1765 
1766  if (Shift && Mask) {
1767  uint32_t ShiftVal = Shift->getZExtValue();
1768  uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1769 
1770  if (isMask_32(MaskVal)) {
1771  uint32_t WidthVal = countPopulation(MaskVal);
1772 
1773  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1774  And.getOperand(0), ShiftVal, WidthVal));
1775  return;
1776  }
1777  }
1778  } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
1779  SelectS_BFEFromShifts(N);
1780  return;
1781  }
1782  break;
1783  case ISD::SRA:
1784  if (N->getOperand(0).getOpcode() == ISD::SHL) {
1785  SelectS_BFEFromShifts(N);
1786  return;
1787  }
1788  break;
1789 
1790  case ISD::SIGN_EXTEND_INREG: {
1791  // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
1792  SDValue Src = N->getOperand(0);
1793  if (Src.getOpcode() != ISD::SRL)
1794  break;
1795 
1796  const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1797  if (!Amt)
1798  break;
1799 
1800  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1801  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
1802  Amt->getZExtValue(), Width));
1803  return;
1804  }
1805  }
1806 
1807  SelectCode(N);
1808 }
1809 
1810 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
1811  assert(N->getOpcode() == ISD::BRCOND);
1812  if (!N->hasOneUse())
1813  return false;
1814 
1815  SDValue Cond = N->getOperand(1);
1816  if (Cond.getOpcode() == ISD::CopyToReg)
1817  Cond = Cond.getOperand(2);
1818 
1819  if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
1820  return false;
1821 
1822  MVT VT = Cond.getOperand(0).getSimpleValueType();
1823  if (VT == MVT::i32)
1824  return true;
1825 
1826  if (VT == MVT::i64) {
1827  auto ST = static_cast<const GCNSubtarget *>(Subtarget);
1828 
1829  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
1830  return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
1831  }
1832 
1833  return false;
1834 }
1835 
1836 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
1837  SDValue Cond = N->getOperand(1);
1838 
1839  if (Cond.isUndef()) {
1840  CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
1841  N->getOperand(2), N->getOperand(0));
1842  return;
1843  }
1844 
1845  bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
1846  unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
1847  unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC;
1848  SDLoc SL(N);
1849 
1850  if (!UseSCCBr) {
1851  // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
1852  // analyzed what generates the vcc value, so we do not know whether vcc
1853  // bits for disabled lanes are 0. Thus we need to mask out bits for
1854  // disabled lanes.
1855  //
1856  // For the case that we select S_CBRANCH_SCC1 and it gets
1857  // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
1858  // SIInstrInfo::moveToVALU which inserts the S_AND).
1859  //
1860  // We could add an analysis of what generates the vcc value here and omit
1861  // the S_AND when is unnecessary. But it would be better to add a separate
1862  // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
1863  // catches both cases.
1864  Cond = SDValue(CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
1865  CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
1866  Cond),
1867  0);
1868  }
1869 
1870  SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
1871  CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
1872  N->getOperand(2), // Basic Block
1873  VCC.getValue(0));
1874 }
1875 
1876 void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
1877  MVT VT = N->getSimpleValueType(0);
1878  bool IsFMA = N->getOpcode() == ISD::FMA;
1879  if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
1880  !Subtarget->hasFmaMixInsts()) ||
1881  ((IsFMA && Subtarget->hasMadMixInsts()) ||
1882  (!IsFMA && Subtarget->hasFmaMixInsts()))) {
1883  SelectCode(N);
1884  return;
1885  }
1886 
1887  SDValue Src0 = N->getOperand(0);
1888  SDValue Src1 = N->getOperand(1);
1889  SDValue Src2 = N->getOperand(2);
1890  unsigned Src0Mods, Src1Mods, Src2Mods;
1891 
1892  // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
1893  // using the conversion from f16.
1894  bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
1895  bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
1896  bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
1897 
1898  assert((IsFMA || !Subtarget->hasFP32Denormals()) &&
1899  "fmad selected with denormals enabled");
1900  // TODO: We can select this with f32 denormals enabled if all the sources are
1901  // converted from f16 (in which case fmad isn't legal).
1902 
1903  if (Sel0 || Sel1 || Sel2) {
1904  // For dummy operands.
1905  SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
1906  SDValue Ops[] = {
1907  CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
1908  CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
1909  CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
1910  CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
1911  Zero, Zero
1912  };
1913 
1914  CurDAG->SelectNodeTo(N,
1915  IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
1916  MVT::f32, Ops);
1917  } else {
1918  SelectCode(N);
1919  }
1920 }
1921 
1922 // This is here because there isn't a way to use the generated sub0_sub1 as the
1923 // subreg index to EXTRACT_SUBREG in tablegen.
1924 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
1925  MemSDNode *Mem = cast<MemSDNode>(N);
1926  unsigned AS = Mem->getAddressSpace();
1927  if (AS == AMDGPUAS::FLAT_ADDRESS) {
1928  SelectCode(N);
1929  return;
1930  }
1931 
1932  MVT VT = N->getSimpleValueType(0);
1933  bool Is32 = (VT == MVT::i32);
1934  SDLoc SL(N);
1935 
1936  MachineSDNode *CmpSwap = nullptr;
1937  if (Subtarget->hasAddr64()) {
1938  SDValue SRsrc, VAddr, SOffset, Offset, SLC;
1939 
1940  if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
1941  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
1942  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
1943  SDValue CmpVal = Mem->getOperand(2);
1944 
1945  // XXX - Do we care about glue operands?
1946 
1947  SDValue Ops[] = {
1948  CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1949  };
1950 
1951  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1952  }
1953  }
1954 
1955  if (!CmpSwap) {
1956  SDValue SRsrc, SOffset, Offset, SLC;
1957  if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
1958  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
1959  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
1960 
1961  SDValue CmpVal = Mem->getOperand(2);
1962  SDValue Ops[] = {
1963  CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1964  };
1965 
1966  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1967  }
1968  }
1969 
1970  if (!CmpSwap) {
1971  SelectCode(N);
1972  return;
1973  }
1974 
1975  MachineMemOperand *MMO = Mem->getMemOperand();
1976  CurDAG->setNodeMemRefs(CmpSwap, {MMO});
1977 
1978  unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
1979  SDValue Extract
1980  = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
1981 
1982  ReplaceUses(SDValue(N, 0), Extract);
1983  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
1984  CurDAG->RemoveDeadNode(N);
1985 }
1986 
1987 void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
1988  // The address is assumed to be uniform, so if it ends up in a VGPR, it will
1989  // be copied to an SGPR with readfirstlane.
1990  unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
1991  AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
1992 
1993  SDValue Chain = N->getOperand(0);
1994  SDValue Ptr = N->getOperand(2);
1995  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
1996  MachineMemOperand *MMO = M->getMemOperand();
1997  bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
1998 
1999  SDValue Offset;
2000  if (CurDAG->isBaseWithConstantOffset(Ptr)) {
2001  SDValue PtrBase = Ptr.getOperand(0);
2002  SDValue PtrOffset = Ptr.getOperand(1);
2003 
2004  const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
2005  if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue(), 16)) {
2006  N = glueCopyToM0(N, PtrBase);
2007  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
2008  }
2009  }
2010 
2011  if (!Offset) {
2012  N = glueCopyToM0(N, Ptr);
2013  Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2014  }
2015 
2016  SDValue Ops[] = {
2017  Offset,
2018  CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
2019  Chain,
2020  N->getOperand(N->getNumOperands() - 1) // New glue
2021  };
2022 
2023  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2024  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2025 }
2026 
2027 void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
2028  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2029  switch (IntrID) {
2030  case Intrinsic::amdgcn_ds_append:
2031  case Intrinsic::amdgcn_ds_consume: {
2032  if (N->getValueType(0) != MVT::i32)
2033  break;
2034  SelectDSAppendConsume(N, IntrID);
2035  return;
2036  }
2037  default:
2038  break;
2039  }
2040 
2041  SelectCode(N);
2042 }
2043 
2044 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
2045  unsigned &Mods) const {
2046  Mods = 0;
2047  Src = In;
2048 
2049  if (Src.getOpcode() == ISD::FNEG) {
2050  Mods |= SISrcMods::NEG;
2051  Src = Src.getOperand(0);
2052  }
2053 
2054  if (Src.getOpcode() == ISD::FABS) {
2055  Mods |= SISrcMods::ABS;
2056  Src = Src.getOperand(0);
2057  }
2058 
2059  return true;
2060 }
2061 
2062 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
2063  SDValue &SrcMods) const {
2064  unsigned Mods;
2065  if (SelectVOP3ModsImpl(In, Src, Mods)) {
2066  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2067  return true;
2068  }
2069 
2070  return false;
2071 }
2072 
2073 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
2074  SDValue &SrcMods) const {
2075  SelectVOP3Mods(In, Src, SrcMods);
2076  return isNoNanSrc(Src);
2077 }
2078 
2079 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
2080  if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
2081  return false;
2082 
2083  Src = In;
2084  return true;
2085 }
2086 
2087 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
2088  SDValue &SrcMods, SDValue &Clamp,
2089  SDValue &Omod) const {
2090  SDLoc DL(In);
2091  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2092  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2093 
2094  return SelectVOP3Mods(In, Src, SrcMods);
2095 }
2096 
2097 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
2098  SDValue &SrcMods,
2099  SDValue &Clamp,
2100  SDValue &Omod) const {
2101  Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2102  return SelectVOP3Mods(In, Src, SrcMods);
2103 }
2104 
2105 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
2106  SDValue &Clamp, SDValue &Omod) const {
2107  Src = In;
2108 
2109  SDLoc DL(In);
2110  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2111  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2112 
2113  return true;
2114 }
2115 
2116 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
2117  SDValue &SrcMods) const {
2118  unsigned Mods = 0;
2119  Src = In;
2120 
2121  if (Src.getOpcode() == ISD::FNEG) {
2122  Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
2123  Src = Src.getOperand(0);
2124  }
2125 
2126  if (Src.getOpcode() == ISD::BUILD_VECTOR) {
2127  unsigned VecMods = Mods;
2128 
2129  SDValue Lo = stripBitcast(Src.getOperand(0));
2130  SDValue Hi = stripBitcast(Src.getOperand(1));
2131 
2132  if (Lo.getOpcode() == ISD::FNEG) {
2133  Lo = stripBitcast(Lo.getOperand(0));
2134  Mods ^= SISrcMods::NEG;
2135  }
2136 
2137  if (Hi.getOpcode() == ISD::FNEG) {
2138  Hi = stripBitcast(Hi.getOperand(0));
2139  Mods ^= SISrcMods::NEG_HI;
2140  }
2141 
2142  if (isExtractHiElt(Lo, Lo))
2143  Mods |= SISrcMods::OP_SEL_0;
2144 
2145  if (isExtractHiElt(Hi, Hi))
2146  Mods |= SISrcMods::OP_SEL_1;
2147 
2148  Lo = stripExtractLoElt(Lo);
2149  Hi = stripExtractLoElt(Hi);
2150 
2151  if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
2152  // Really a scalar input. Just select from the low half of the register to
2153  // avoid packing.
2154 
2155  Src = Lo;
2156  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2157  return true;
2158  }
2159 
2160  Mods = VecMods;
2161  }
2162 
2163  // Packed instructions do not have abs modifiers.
2164  Mods |= SISrcMods::OP_SEL_1;
2165 
2166  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2167  return true;
2168 }
2169 
2170 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src,
2171  SDValue &SrcMods,
2172  SDValue &Clamp) const {
2173  SDLoc SL(In);
2174 
2175  // FIXME: Handle clamp and op_sel
2176  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2177 
2178  return SelectVOP3PMods(In, Src, SrcMods);
2179 }
2180 
2181 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
2182  SDValue &SrcMods) const {
2183  Src = In;
2184  // FIXME: Handle op_sel
2185  SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2186  return true;
2187 }
2188 
2189 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src,
2190  SDValue &SrcMods,
2191  SDValue &Clamp) const {
2192  SDLoc SL(In);
2193 
2194  // FIXME: Handle clamp
2195  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2196 
2197  return SelectVOP3OpSel(In, Src, SrcMods);
2198 }
2199 
2200 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
2201  SDValue &SrcMods) const {
2202  // FIXME: Handle op_sel
2203  return SelectVOP3Mods(In, Src, SrcMods);
2204 }
2205 
2206 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src,
2207  SDValue &SrcMods,
2208  SDValue &Clamp) const {
2209  SDLoc SL(In);
2210 
2211  // FIXME: Handle clamp
2212  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2213 
2214  return SelectVOP3OpSelMods(In, Src, SrcMods);
2215 }
2216 
2217 // The return value is not whether the match is possible (which it always is),
2218 // but whether or not it a conversion is really used.
2219 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
2220  unsigned &Mods) const {
2221  Mods = 0;
2222  SelectVOP3ModsImpl(In, Src, Mods);
2223 
2224  if (Src.getOpcode() == ISD::FP_EXTEND) {
2225  Src = Src.getOperand(0);
2226  assert(Src.getValueType() == MVT::f16);
2227  Src = stripBitcast(Src);
2228 
2229  // Be careful about folding modifiers if we already have an abs. fneg is
2230  // applied last, so we don't want to apply an earlier fneg.
2231  if ((Mods & SISrcMods::ABS) == 0) {
2232  unsigned ModsTmp;
2233  SelectVOP3ModsImpl(Src, Src, ModsTmp);
2234 
2235  if ((ModsTmp & SISrcMods::NEG) != 0)
2236  Mods ^= SISrcMods::NEG;
2237 
2238  if ((ModsTmp & SISrcMods::ABS) != 0)
2239  Mods |= SISrcMods::ABS;
2240  }
2241 
2242  // op_sel/op_sel_hi decide the source type and source.
2243  // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
2244  // If the sources's op_sel is set, it picks the high half of the source
2245  // register.
2246 
2247  Mods |= SISrcMods::OP_SEL_1;
2248  if (isExtractHiElt(Src, Src)) {
2249  Mods |= SISrcMods::OP_SEL_0;
2250 
2251  // TODO: Should we try to look for neg/abs here?
2252  }
2253 
2254  return true;
2255  }
2256 
2257  return false;
2258 }
2259 
2260 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
2261  SDValue &SrcMods) const {
2262  unsigned Mods = 0;
2263  SelectVOP3PMadMixModsImpl(In, Src, Mods);
2264  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2265  return true;
2266 }
2267 
2268 SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
2269  if (In.isUndef())
2270  return CurDAG->getUNDEF(MVT::i32);
2271 
2272  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
2273  SDLoc SL(In);
2274  return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
2275  }
2276 
2277  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
2278  SDLoc SL(In);
2279  return CurDAG->getConstant(
2280  C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
2281  }
2282 
2283  SDValue Src;
2284  if (isExtractHiElt(In, Src))
2285  return Src;
2286 
2287  return SDValue();
2288 }
2289 
2290 bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
2291  if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
2292  return false;
2293  }
2294  const SIRegisterInfo *SIRI =
2295  static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
2296  const SIInstrInfo * SII =
2297  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
2298 
2299  unsigned Limit = 0;
2300  bool AllUsesAcceptSReg = true;
2301  for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
2302  Limit < 10 && U != E; ++U, ++Limit) {
2303  const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
2304 
2305  // If the register class is unknown, it could be an unknown
2306  // register class that needs to be an SGPR, e.g. an inline asm
2307  // constraint
2308  if (!RC || SIRI->isSGPRClass(RC))
2309  return false;
2310 
2311  if (RC != &AMDGPU::VS_32RegClass) {
2312  AllUsesAcceptSReg = false;
2313  SDNode * User = *U;
2314  if (User->isMachineOpcode()) {
2315  unsigned Opc = User->getMachineOpcode();
2316  MCInstrDesc Desc = SII->get(Opc);
2317  if (Desc.isCommutable()) {
2318  unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
2319  unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
2320  if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
2321  unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
2322  const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
2323  if (CommutedRC == &AMDGPU::VS_32RegClass)
2324  AllUsesAcceptSReg = true;
2325  }
2326  }
2327  }
2328  // If "AllUsesAcceptSReg == false" so far we haven't suceeded
2329  // commuting current user. This means have at least one use
2330  // that strictly require VGPR. Thus, we will not attempt to commute
2331  // other user instructions.
2332  if (!AllUsesAcceptSReg)
2333  break;
2334  }
2335  }
2336  return !AllUsesAcceptSReg && (Limit < 10);
2337 }
2338 
2339 bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
2340  auto Ld = cast<LoadSDNode>(N);
2341 
2342  return Ld->getAlignment() >= 4 &&
2343  (
2344  (
2345  (
2346  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
2347  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
2348  )
2349  &&
2350  !N->isDivergent()
2351  )
2352  ||
2353  (
2354  Subtarget->getScalarizeGlobalBehavior() &&
2355  Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
2356  !Ld->isVolatile() &&
2357  !N->isDivergent() &&
2358  static_cast<const SITargetLowering *>(
2359  getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)
2360  )
2361  );
2362 }
2363 
2364 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
2366  *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
2367  bool IsModified = false;
2368  do {
2369  IsModified = false;
2370 
2371  // Go over all selected nodes and try to fold them a bit more
2372  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
2373  while (Position != CurDAG->allnodes_end()) {
2374  SDNode *Node = &*Position++;
2375  MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
2376  if (!MachineNode)
2377  continue;
2378 
2379  SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
2380  if (ResNode != Node) {
2381  if (ResNode)
2382  ReplaceUses(Node, ResNode);
2383  IsModified = true;
2384  }
2385  }
2386  CurDAG->RemoveDeadNodes();
2387  } while (IsModified);
2388 }
2389 
2390 bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
2391  Subtarget = &MF.getSubtarget<R600Subtarget>();
2393 }
2394 
2395 bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
2396  if (!N->readMem())
2397  return false;
2398  if (CbId == -1)
2401 
2402  return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
2403 }
2404 
2405 bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
2406  SDValue& IntPtr) {
2407  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
2408  IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
2409  true);
2410  return true;
2411  }
2412  return false;
2413 }
2414 
2415 bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
2416  SDValue& BaseReg, SDValue &Offset) {
2417  if (!isa<ConstantSDNode>(Addr)) {
2418  BaseReg = Addr;
2419  Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
2420  return true;
2421  }
2422  return false;
2423 }
2424 
2426  unsigned int Opc = N->getOpcode();
2427  if (N->isMachineOpcode()) {
2428  N->setNodeId(-1);
2429  return; // Already selected.
2430  }
2431 
2432  switch (Opc) {
2433  default: break;
2435  case ISD::SCALAR_TO_VECTOR:
2436  case ISD::BUILD_VECTOR: {
2437  EVT VT = N->getValueType(0);
2438  unsigned NumVectorElts = VT.getVectorNumElements();
2439  unsigned RegClassID;
2440  // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
2441  // that adds a 128 bits reg copy when going through TwoAddressInstructions
2442  // pass. We want to avoid 128 bits copies as much as possible because they
2443  // can't be bundled by our scheduler.
2444  switch(NumVectorElts) {
2445  case 2: RegClassID = R600::R600_Reg64RegClassID; break;
2446  case 4:
2448  RegClassID = R600::R600_Reg128VerticalRegClassID;
2449  else
2450  RegClassID = R600::R600_Reg128RegClassID;
2451  break;
2452  default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
2453  }
2454  SelectBuildVector(N, RegClassID);
2455  return;
2456  }
2457  }
2458 
2459  SelectCode(N);
2460 }
2461 
2462 bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
2463  SDValue &Offset) {
2464  ConstantSDNode *C;
2465  SDLoc DL(Addr);
2466 
2467  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
2468  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
2469  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2470  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
2471  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
2472  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
2473  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2474  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
2475  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
2476  Base = Addr.getOperand(0);
2477  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2478  } else {
2479  Base = Addr;
2480  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
2481  }
2482 
2483  return true;
2484 }
2485 
2486 bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
2487  SDValue &Offset) {
2488  ConstantSDNode *IMMOffset;
2489 
2490  if (Addr.getOpcode() == ISD::ADD
2491  && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
2492  && isInt<16>(IMMOffset->getZExtValue())) {
2493 
2494  Base = Addr.getOperand(0);
2495  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2496  MVT::i32);
2497  return true;
2498  // If the pointer address is constant, we can move it to the offset field.
2499  } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
2500  && isInt<16>(IMMOffset->getZExtValue())) {
2501  Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
2502  SDLoc(CurDAG->getEntryNode()),
2503  R600::ZERO, MVT::i32);
2504  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2505  MVT::i32);
2506  return true;
2507  }
2508 
2509  // Default case, no offset
2510  Base = Addr;
2511  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2512  return true;
2513 }
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:595
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:348
EVT getValueType() const
Return the ValueType of the referenced return value.
Interface definition for SIRegisterInfo.
static unsigned getSubRegFromChannel(unsigned Channel)
bool isUndef() const
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand...
Address space for flat memory.
Definition: AMDGPU.h:253
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1562
INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel", "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) INITIALIZE_PASS_END(AMDGPUDAGToDAGISel
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:561
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
AMDGPU specific subclass of TargetSubtarget.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
uint64_t getDefaultRsrcDataFormat() const
FunctionPass * createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a R600-specific.
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MCInstrDesc.h:442
Shadow Stack GC Lowering
SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL, SDValue V) const
const SDValue & getBasePtr() const
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:222
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:164
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
SDVTList getVTList() const
unsigned Reg
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
const SDValue & getChain() const
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:254
virtual SDNode * PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const =0
static bool getConstantValue(SDValue N, uint32_t &Out)
SDNode * legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const
Legalize target independent instructions (e.g.
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:305
unsigned const TargetRegisterInfo * TRI
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
const SDNodeFlags getFlags() const
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
bool isInlineConstant(const APInt &Imm) const
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:404
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
Address space for local memory.
Definition: AMDGPU.h:258
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:158
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineSDNode * wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr) const
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwises returns null...
Definition: PointerUnion.h:212
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:211
const HexagonInstrInfo * TII
Shift and rotation operations.
Definition: ISDOpcodes.h:434
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:190
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:169
unsigned SubReg
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Position
Position to insert a new instruction relative to an existing instruction.
bool isSGPRClass(const TargetRegisterClass *RC) const
This file implements a class to represent arbitrary precision integral constant values and operations...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
TargetRegisterInfo interface that is implemented by all hw codegen targets.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
int64_t getSExtValue() const
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:297
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:234
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:995
Address space for region memory. (GDS)
Definition: AMDGPU.h:255
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
amdgpu isel
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:351
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static const unsigned CommuteAnyOperandIndex
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:342
unsigned const MachineRegisterInfo * MRI
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
Machine Value Type.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:272
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const SDValue & getOperand(unsigned Num) const
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:231
SI DAG Lowering interface definition.
Address space for 32-bit constant memory.
Definition: AMDGPU.h:261
Represent the analysis usage information of a pass.
This class provides iterator support for SDUse operands that use a specific SDNode.
bool hasNoNaNs() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:71
bool isDefined() const
Returns true if the flags are in a defined state.
The AMDGPU TargetMachine interface definition for hw codgen targets.
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:580
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
Extended Value Type.
Definition: ValueTypes.h:33
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Address space for constant memory (VTX2).
Definition: AMDGPU.h:257
static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts)
bool use_empty() const
Return true if there are no uses of this node.
MachineSDNode * buildRSRC(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr, uint32_t RsrcDword1, uint64_t RsrcDword2And3) const
Return a resource descriptor with the &#39;Add TID&#39; bit enabled The TID (Thread ID) is multiplied by the ...
Iterator for intrusive lists based on ilist_node.
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:519
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:264
Interface definition of the TargetLowering class that is common to all AMD GPUs.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:672
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
bool isDivergent() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:226
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
bool readMem() const
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
EVT getMemoryVT() const
Return the type of the in-memory value.
Class for arbitrary precision integers.
Definition: APInt.h:69
Special value supplied for machine level alias analysis.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo)
amdgpu AMDGPU DAG DAG Pattern Instruction Selection
static use_iterator use_end()
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:411
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:72
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:510
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:642
#define N
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
unsigned getOpcode() const
SDValue getValue(unsigned R) const
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:345
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isLegalMUBUFImmOffset(unsigned Imm)
Definition: SIInstrInfo.h:937
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:326
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
bool hasNoUnsignedWrap() const
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:175
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations...
Definition: ISDOpcodes.h:330
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:467
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:222
const SDValue & getOperand(unsigned i) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:820
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:498
#define LLVM_DEBUG(X)
Definition: Debug.h:122
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:610
Analyzes if a function potentially memory bound and if a kernel kernel may benefit from limiting numb...
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:816
const SDValue & getBasePtr() const
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:399
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:241
This class is used to represent ISD::LOAD nodes.