LLVM  3.7.0
SIISelLowering.cpp
Go to the documentation of this file.
1 //===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Custom DAG lowering for SI
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifdef _MSC_VER
16 // Provide M_PI.
17 #define _USE_MATH_DEFINES
18 #include <cmath>
19 #endif
20 
21 #include "SIISelLowering.h"
22 #include "AMDGPU.h"
23 #include "AMDGPUIntrinsicInfo.h"
24 #include "AMDGPUSubtarget.h"
25 #include "SIInstrInfo.h"
26 #include "SIMachineFunctionInfo.h"
27 #include "SIRegisterInfo.h"
28 #include "llvm/ADT/BitVector.h"
33 #include "llvm/IR/Function.h"
34 #include "llvm/ADT/SmallString.h"
35 
36 using namespace llvm;
37 
39  const AMDGPUSubtarget &STI)
40  : AMDGPUTargetLowering(TM, STI) {
41  addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);
42  addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
43 
44  addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass);
45  addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass);
46 
47  addRegisterClass(MVT::i32, &AMDGPU::SReg_32RegClass);
48  addRegisterClass(MVT::f32, &AMDGPU::VGPR_32RegClass);
49 
50  addRegisterClass(MVT::f64, &AMDGPU::VReg_64RegClass);
51  addRegisterClass(MVT::v2i32, &AMDGPU::SReg_64RegClass);
52  addRegisterClass(MVT::v2f32, &AMDGPU::VReg_64RegClass);
53 
54  addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
55  addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
56 
57  addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass);
58  addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
59 
60  addRegisterClass(MVT::v16i32, &AMDGPU::SReg_512RegClass);
61  addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);
62 
64 
69 
75 
78 
81 
82  // We need to custom lower vector stores from local memory
86 
89 
92 
96 
101 
104 
106 
110 
114 
118 
121 
126 
129 
130  for (MVT VT : MVT::integer_valuetypes()) {
131  if (VT == MVT::i64)
132  continue;
133 
138 
143 
148  }
149 
150  for (MVT VT : MVT::integer_vector_valuetypes()) {
153  }
154 
155  for (MVT VT : MVT::fp_valuetypes())
157 
161 
163 
167 
168  // These should use UDIVREM, so set them to expand
171 
174 
175  // We only support LOAD/STORE and vector manipulation ops for vectors
176  // with > 4 elements.
178  for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
179  switch(Op) {
180  case ISD::LOAD:
181  case ISD::STORE:
182  case ISD::BUILD_VECTOR:
183  case ISD::BITCAST:
188  break;
189  case ISD::CONCAT_VECTORS:
190  setOperationAction(Op, VT, Custom);
191  break;
192  default:
193  setOperationAction(Op, VT, Expand);
194  break;
195  }
196  }
197  }
198 
203  }
204 
208 
222 
223  // All memory operations. Some folding on the pointer operand is done to help
224  // matching the constant offsets in the addressing modes.
242 
244 }
245 
246 //===----------------------------------------------------------------------===//
247 // TargetLowering queries
248 //===----------------------------------------------------------------------===//
249 
251  EVT) const {
252  // SI has some legal vector types, but no legal vector operations. Say no
253  // shuffles are legal in order to prefer scalarizing some vector operations.
254  return false;
255 }
256 
257 bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const {
258  // Flat instructions do not have offsets, and only have the register
259  // address.
260  return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1);
261 }
262 
264  const AddrMode &AM, Type *Ty,
265  unsigned AS) const {
266  // No global is ever allowed as a base.
267  if (AM.BaseGV)
268  return false;
269 
270  switch (AS) {
273  // Assume the we will use FLAT for all global memory accesses
274  // on VI.
275  // FIXME: This assumption is currently wrong. On VI we still use
276  // MUBUF instructions for the r + i addressing mode. As currently
277  // implemented, the MUBUF instructions only work on buffer < 4GB.
278  // It may be possible to support > 4GB buffers with MUBUF instructions,
279  // by setting the stride value in the resource descriptor which would
280  // increase the size limit to (stride * 4GB). However, this is risky,
281  // because it has never been validated.
282  return isLegalFlatAddressingMode(AM);
283  }
284  // fall-through
286  case AMDGPUAS::CONSTANT_ADDRESS: // XXX - Should we assume SMRD instructions?
288  // MUBUF / MTBUF instructions have a 12-bit unsigned byte offset, and
289  // additionally can do r + r + i with addr64. 32-bit has more addressing
290  // mode options. Depending on the resource constant, it can also do
291  // (i64 r0) + (i32 r1) * (i14 i).
292  //
293  // SMRD instructions have an 8-bit, dword offset.
294  //
295  // Assume nonunifom access, since the address space isn't enough to know
296  // what instruction we will use, and since we don't know if this is a load
297  // or store and scalar stores are only available on VI.
298  //
299  // We also know if we are doing an extload, we can't do a scalar load.
300  //
301  // Private arrays end up using a scratch buffer most of the time, so also
302  // assume those use MUBUF instructions. Scratch loads / stores are currently
303  // implemented as mubuf instructions with offen bit set, so slightly
304  // different than the normal addr64.
305  if (!isUInt<12>(AM.BaseOffs))
306  return false;
307 
308  // FIXME: Since we can split immediate into soffset and immediate offset,
309  // would it make sense to allow any immediate?
310 
311  switch (AM.Scale) {
312  case 0: // r + i or just i, depending on HasBaseReg.
313  return true;
314  case 1:
315  return true; // We have r + r or r + i.
316  case 2:
317  if (AM.HasBaseReg) {
318  // Reject 2 * r + r.
319  return false;
320  }
321 
322  // Allow 2 * r as r + r
323  // Or 2 * r + i is allowed as r + r + i.
324  return true;
325  default: // Don't allow n * r
326  return false;
327  }
328  }
331  // Basic, single offset DS instructions allow a 16-bit unsigned immediate
332  // field.
333  // XXX - If doing a 4-byte aligned 8-byte type access, we effectively have
334  // an 8-bit dword offset but we don't know the alignment here.
335  if (!isUInt<16>(AM.BaseOffs))
336  return false;
337 
338  if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg.
339  return true;
340 
341  if (AM.Scale == 1 && AM.HasBaseReg)
342  return true;
343 
344  return false;
345  }
347  return isLegalFlatAddressingMode(AM);
348 
349  default:
350  llvm_unreachable("unhandled address space");
351  }
352 }
353 
355  unsigned AddrSpace,
356  unsigned Align,
357  bool *IsFast) const {
358  if (IsFast)
359  *IsFast = false;
360 
361  // TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96,
362  // which isn't a simple VT.
363  if (!VT.isSimple() || VT == MVT::Other)
364  return false;
365 
366  // TODO - CI+ supports unaligned memory accesses, but this requires driver
367  // support.
368 
369  // XXX - The only mention I see of this in the ISA manual is for LDS direct
370  // reads the "byte address and must be dword aligned". Is it also true for the
371  // normal loads and stores?
372  if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS) {
373  // ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
374  // aligned, 8 byte access in a single operation using ds_read2/write2_b32
375  // with adjacent offsets.
376  return Align % 4 == 0;
377  }
378 
379  // Smaller than dword value must be aligned.
380  // FIXME: This should be allowed on CI+
381  if (VT.bitsLT(MVT::i32))
382  return false;
383 
384  // 8.1.6 - For Dword or larger reads or writes, the two LSBs of the
385  // byte-address are ignored, thus forcing Dword alignment.
386  // This applies to private, global, and constant memory.
387  if (IsFast)
388  *IsFast = true;
389 
390  return VT.bitsGT(MVT::i32) && Align % 4 == 0;
391 }
392 
393 EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
394  unsigned SrcAlign, bool IsMemset,
395  bool ZeroMemset,
396  bool MemcpyStrSrc,
397  MachineFunction &MF) const {
398  // FIXME: Should account for address space here.
399 
400  // The default fallback uses the private pointer size as a guess for a type to
401  // use. Make sure we switch these to 64-bit accesses.
402 
403  if (Size >= 16 && DstAlign >= 4) // XXX: Should only do for global
404  return MVT::v4i32;
405 
406  if (Size >= 8 && DstAlign >= 4)
407  return MVT::v2i32;
408 
409  // Use the default.
410  return MVT::Other;
411 }
412 
415  if (VT.getVectorNumElements() != 1 && VT.getScalarType().bitsLE(MVT::i16))
416  return TypeSplitVector;
417 
419 }
420 
422  Type *Ty) const {
423  const SIInstrInfo *TII =
424  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
425  return TII->isInlineConstant(Imm);
426 }
427 
428 static EVT toIntegerVT(EVT VT) {
429  if (VT.isVector())
431  return MVT::getIntegerVT(VT.getSizeInBits());
432 }
433 
434 SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
435  SDLoc SL, SDValue Chain,
436  unsigned Offset, bool Signed) const {
437  const DataLayout &DL = DAG.getDataLayout();
439  const SIRegisterInfo *TRI =
440  static_cast<const SIRegisterInfo*>(Subtarget->getRegisterInfo());
441  unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::INPUT_PTR);
442 
443  Type *Ty = VT.getTypeForEVT(*DAG.getContext());
444 
448  SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
449  MRI.getLiveInVirtReg(InputPtrReg), PtrVT);
450  SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
451  DAG.getConstant(Offset, SL, PtrVT));
452  SDValue PtrOffset = DAG.getUNDEF(PtrVT);
453  MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
454 
455  unsigned Align = DL.getABITypeAlignment(Ty);
456 
457  if (VT != MemVT && VT.isFloatingPoint()) {
458  // Do an integer load and convert.
459  // FIXME: This is mostly because load legalization after type legalization
460  // doesn't handle FP extloads.
461  assert(VT.getScalarType() == MVT::f32 &&
462  MemVT.getScalarType() == MVT::f16);
463 
464  EVT IVT = toIntegerVT(VT);
465  EVT MemIVT = toIntegerVT(MemVT);
467  IVT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemIVT,
468  false, // isVolatile
469  true, // isNonTemporal
470  true, // isInvariant
471  Align); // Alignment
472  SDValue Ops[] = {
473  DAG.getNode(ISD::FP16_TO_FP, SL, VT, Load),
474  Load.getValue(1)
475  };
476 
477  return DAG.getMergeValues(Ops, SL);
478  }
479 
480  ISD::LoadExtType ExtTy = Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
481  return DAG.getLoad(ISD::UNINDEXED, ExtTy,
482  VT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemVT,
483  false, // isVolatile
484  true, // isNonTemporal
485  true, // isInvariant
486  Align); // Alignment
487 }
488 
490  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
492  SmallVectorImpl<SDValue> &InVals) const {
493  const SIRegisterInfo *TRI =
494  static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
495 
497  FunctionType *FType = MF.getFunction()->getFunctionType();
499 
500  assert(CallConv == CallingConv::C);
501 
503  BitVector Skipped(Ins.size());
504 
505  for (unsigned i = 0, e = Ins.size(), PSInputNum = 0; i != e; ++i) {
506  const ISD::InputArg &Arg = Ins[i];
507 
508  // First check if it's a PS input addr
509  if (Info->getShaderType() == ShaderType::PIXEL && !Arg.Flags.isInReg() &&
510  !Arg.Flags.isByVal()) {
511 
512  assert((PSInputNum <= 15) && "Too many PS inputs!");
513 
514  if (!Arg.Used) {
515  // We can savely skip PS inputs
516  Skipped.set(i);
517  ++PSInputNum;
518  continue;
519  }
520 
521  Info->PSInputAddr |= 1 << PSInputNum++;
522  }
523 
524  // Second split vertices into their elements
525  if (Info->getShaderType() != ShaderType::COMPUTE && Arg.VT.isVector()) {
526  ISD::InputArg NewArg = Arg;
527  NewArg.Flags.setSplit();
528  NewArg.VT = Arg.VT.getVectorElementType();
529 
530  // We REALLY want the ORIGINAL number of vertex elements here, e.g. a
531  // three or five element vertex only needs three or five registers,
532  // NOT four or eigth.
533  Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
534  unsigned NumElements = ParamType->getVectorNumElements();
535 
536  for (unsigned j = 0; j != NumElements; ++j) {
537  Splits.push_back(NewArg);
538  NewArg.PartOffset += NewArg.VT.getStoreSize();
539  }
540 
541  } else if (Info->getShaderType() != ShaderType::COMPUTE) {
542  Splits.push_back(Arg);
543  }
544  }
545 
547  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
548  *DAG.getContext());
549 
550  // At least one interpolation mode must be enabled or else the GPU will hang.
551  if (Info->getShaderType() == ShaderType::PIXEL &&
552  (Info->PSInputAddr & 0x7F) == 0) {
553  Info->PSInputAddr |= 1;
554  CCInfo.AllocateReg(AMDGPU::VGPR0);
555  CCInfo.AllocateReg(AMDGPU::VGPR1);
556  }
557 
558  // The pointer to the list of arguments is stored in SGPR0, SGPR1
559  // The pointer to the scratch buffer is stored in SGPR2, SGPR3
560  if (Info->getShaderType() == ShaderType::COMPUTE) {
561  if (Subtarget->isAmdHsaOS())
562  Info->NumUserSGPRs = 2; // FIXME: Need to support scratch buffers.
563  else
564  Info->NumUserSGPRs = 4;
565 
566  unsigned InputPtrReg =
568  unsigned InputPtrRegLo =
569  TRI->getPhysRegSubReg(InputPtrReg, &AMDGPU::SReg_32RegClass, 0);
570  unsigned InputPtrRegHi =
571  TRI->getPhysRegSubReg(InputPtrReg, &AMDGPU::SReg_32RegClass, 1);
572 
573  unsigned ScratchPtrReg =
575  unsigned ScratchPtrRegLo =
576  TRI->getPhysRegSubReg(ScratchPtrReg, &AMDGPU::SReg_32RegClass, 0);
577  unsigned ScratchPtrRegHi =
578  TRI->getPhysRegSubReg(ScratchPtrReg, &AMDGPU::SReg_32RegClass, 1);
579 
580  CCInfo.AllocateReg(InputPtrRegLo);
581  CCInfo.AllocateReg(InputPtrRegHi);
582  CCInfo.AllocateReg(ScratchPtrRegLo);
583  CCInfo.AllocateReg(ScratchPtrRegHi);
584  MF.addLiveIn(InputPtrReg, &AMDGPU::SReg_64RegClass);
585  MF.addLiveIn(ScratchPtrReg, &AMDGPU::SReg_64RegClass);
586  }
587 
588  if (Info->getShaderType() == ShaderType::COMPUTE) {
590  Splits);
591  }
592 
593  AnalyzeFormalArguments(CCInfo, Splits);
594 
596 
597  for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
598 
599  const ISD::InputArg &Arg = Ins[i];
600  if (Skipped[i]) {
601  InVals.push_back(DAG.getUNDEF(Arg.VT));
602  continue;
603  }
604 
605  CCValAssign &VA = ArgLocs[ArgIdx++];
606  MVT VT = VA.getLocVT();
607 
608  if (VA.isMemLoc()) {
609  VT = Ins[i].VT;
610  EVT MemVT = Splits[i].VT;
611  const unsigned Offset = Subtarget->getExplicitKernelArgOffset() +
612  VA.getLocMemOffset();
613  // The first 36 bytes of the input buffer contains information about
614  // thread group and global sizes.
615  SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, Chain,
616  Offset, Ins[i].Flags.isSExt());
617  Chains.push_back(Arg.getValue(1));
618 
619  const PointerType *ParamTy =
620  dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex()));
622  ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
623  // On SI local pointers are just offsets into LDS, so they are always
624  // less than 16-bits. On CI and newer they could potentially be
625  // real pointers, so we can't guarantee their size.
626  Arg = DAG.getNode(ISD::AssertZext, DL, Arg.getValueType(), Arg,
627  DAG.getValueType(MVT::i16));
628  }
629 
630  InVals.push_back(Arg);
631  Info->ABIArgOffset = Offset + MemVT.getStoreSize();
632  continue;
633  }
634  assert(VA.isRegLoc() && "Parameter must be in a register!");
635 
636  unsigned Reg = VA.getLocReg();
637 
638  if (VT == MVT::i64) {
639  // For now assume it is a pointer
640  Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0,
641  &AMDGPU::SReg_64RegClass);
642  Reg = MF.addLiveIn(Reg, &AMDGPU::SReg_64RegClass);
643  SDValue Copy = DAG.getCopyFromReg(Chain, DL, Reg, VT);
644  InVals.push_back(Copy);
645  continue;
646  }
647 
648  const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
649 
650  Reg = MF.addLiveIn(Reg, RC);
651  SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
652 
653  if (Arg.VT.isVector()) {
654 
655  // Build a vector from the registers
656  Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
657  unsigned NumElements = ParamType->getVectorNumElements();
658 
660  Regs.push_back(Val);
661  for (unsigned j = 1; j != NumElements; ++j) {
662  Reg = ArgLocs[ArgIdx++].getLocReg();
663  Reg = MF.addLiveIn(Reg, RC);
664 
665  SDValue Copy = DAG.getCopyFromReg(Chain, DL, Reg, VT);
666  Regs.push_back(Copy);
667  }
668 
669  // Fill up the missing vector elements
670  NumElements = Arg.VT.getVectorNumElements() - NumElements;
671  Regs.append(NumElements, DAG.getUNDEF(VT));
672 
673  InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, DL, Arg.VT, Regs));
674  continue;
675  }
676 
677  InVals.push_back(Val);
678  }
679 
680  if (Info->getShaderType() != ShaderType::COMPUTE) {
681  unsigned ScratchIdx = CCInfo.getFirstUnallocated(ArrayRef<MCPhysReg>(
682  AMDGPU::SGPR_32RegClass.begin(), AMDGPU::SGPR_32RegClass.getNumRegs()));
683  Info->ScratchOffsetReg = AMDGPU::SGPR_32RegClass.getRegister(ScratchIdx);
684  }
685 
686  if (Chains.empty())
687  return Chain;
688 
689  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
690 }
691 
693  MachineInstr * MI, MachineBasicBlock * BB) const {
694 
696  const SIInstrInfo *TII =
697  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
698 
699  switch (MI->getOpcode()) {
700  default:
702  case AMDGPU::BRANCH:
703  return BB;
704  case AMDGPU::SI_RegisterStorePseudo: {
705  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
706  unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
707  MachineInstrBuilder MIB =
708  BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::SI_RegisterStore),
709  Reg);
710  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
711  MIB.addOperand(MI->getOperand(i));
712 
713  MI->eraseFromParent();
714  break;
715  }
716  }
717  return BB;
718 }
719 
721  // This currently forces unfolding various combinations of fsub into fma with
722  // free fneg'd operands. As long as we have fast FMA (controlled by
723  // isFMAFasterThanFMulAndFAdd), we should perform these.
724 
725  // When fma is quarter rate, for f64 where add / sub are at best half rate,
726  // most of these combines appear to be cycle neutral but save on instruction
727  // count / code size.
728  return true;
729 }
730 
732  EVT VT) const {
733  if (!VT.isVector()) {
734  return MVT::i1;
735  }
736  return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements());
737 }
738 
740  return MVT::i32;
741 }
742 
743 // Answering this is somewhat tricky and depends on the specific device which
744 // have different rates for fma or all f64 operations.
745 //
746 // v_fma_f64 and v_mul_f64 always take the same number of cycles as each other
747 // regardless of which device (although the number of cycles differs between
748 // devices), so it is always profitable for f64.
749 //
750 // v_fma_f32 takes 4 or 16 cycles depending on the device, so it is profitable
751 // only on full rate devices. Normally, we should prefer selecting v_mad_f32
752 // which we can always do even without fused FP ops since it returns the same
753 // result as the separate operations and since it is always full
754 // rate. Therefore, we lie and report that it is not faster for f32. v_mad_f32
755 // however does not support denormals, so we do report fma as faster if we have
756 // a fast fma device and require denormals.
757 //
759  VT = VT.getScalarType();
760 
761  if (!VT.isSimple())
762  return false;
763 
764  switch (VT.getSimpleVT().SimpleTy) {
765  case MVT::f32:
766  // This is as fast on some subtargets. However, we always have full rate f32
767  // mad available which returns the same result as the separate operations
768  // which we should prefer over fma. We can't use this if we want to support
769  // denormals, so only report this in these cases.
771  case MVT::f64:
772  return true;
773  default:
774  break;
775  }
776 
777  return false;
778 }
779 
780 //===----------------------------------------------------------------------===//
781 // Custom DAG Lowering Operations
782 //===----------------------------------------------------------------------===//
783 
785  switch (Op.getOpcode()) {
786  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
787  case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
788  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
789  case ISD::LOAD: {
790  SDValue Result = LowerLOAD(Op, DAG);
791  assert((!Result.getNode() ||
792  Result.getNode()->getNumValues() == 2) &&
793  "Load should return a value and a chain");
794  return Result;
795  }
796 
797  case ISD::FSIN:
798  case ISD::FCOS:
799  return LowerTrig(Op, DAG);
800  case ISD::SELECT: return LowerSELECT(Op, DAG);
801  case ISD::FDIV: return LowerFDIV(Op, DAG);
802  case ISD::STORE: return LowerSTORE(Op, DAG);
803  case ISD::GlobalAddress: {
806  return LowerGlobalAddress(MFI, Op, DAG);
807  }
808  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
809  case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG);
810  }
811  return SDValue();
812 }
813 
814 /// \brief Helper function for LowerBRCOND
815 static SDNode *findUser(SDValue Value, unsigned Opcode) {
816 
817  SDNode *Parent = Value.getNode();
818  for (SDNode::use_iterator I = Parent->use_begin(), E = Parent->use_end();
819  I != E; ++I) {
820 
821  if (I.getUse().get() != Value)
822  continue;
823 
824  if (I->getOpcode() == Opcode)
825  return *I;
826  }
827  return nullptr;
828 }
829 
830 SDValue SITargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
831 
832  SDLoc SL(Op);
833  FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Op);
834  unsigned FrameIndex = FINode->getIndex();
835 
836  // A FrameIndex node represents a 32-bit offset into scratch memory. If
837  // the high bit of a frame index offset were to be set, this would mean
838  // that it represented an offset of ~2GB * 64 = ~128GB from the start of the
839  // scratch buffer, with 64 being the number of threads per wave.
840  //
841  // If we know the machine uses less than 128GB of scratch, then we can
842  // amrk the high bit of the FrameIndex node as known zero,
843  // which is important, because it means in most situations we can
844  // prove that values derived from FrameIndex nodes are non-negative.
845  // This enables us to take advantage of more addressing modes when
846  // accessing scratch buffers, since for scratch reads/writes, the register
847  // offset must always be positive.
848 
849  SDValue TFI = DAG.getTargetFrameIndex(FrameIndex, MVT::i32);
851  return TFI;
852 
853  return DAG.getNode(ISD::AssertZext, SL, MVT::i32, TFI,
854  DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), 31)));
855 }
856 
857 /// This transforms the control flow intrinsics to get the branch destination as
858 /// last parameter, also switches branch target with BR if the need arise
859 SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
860  SelectionDAG &DAG) const {
861 
862  SDLoc DL(BRCOND);
863 
864  SDNode *Intr = BRCOND.getOperand(1).getNode();
865  SDValue Target = BRCOND.getOperand(2);
866  SDNode *BR = nullptr;
867 
868  if (Intr->getOpcode() == ISD::SETCC) {
869  // As long as we negate the condition everything is fine
870  SDNode *SetCC = Intr;
871  assert(SetCC->getConstantOperandVal(1) == 1);
872  assert(cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get() ==
873  ISD::SETNE);
874  Intr = SetCC->getOperand(0).getNode();
875 
876  } else {
877  // Get the target from BR if we don't negate the condition
878  BR = findUser(BRCOND, ISD::BR);
879  Target = BR->getOperand(1);
880  }
881 
882  assert(Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN);
883 
884  // Build the result and
885  ArrayRef<EVT> Res(Intr->value_begin() + 1, Intr->value_end());
886 
887  // operands of the new intrinsic call
889  Ops.push_back(BRCOND.getOperand(0));
890  Ops.append(Intr->op_begin() + 1, Intr->op_end());
891  Ops.push_back(Target);
892 
893  // build the new intrinsic call
894  SDNode *Result = DAG.getNode(
895  Res.size() > 1 ? ISD::INTRINSIC_W_CHAIN : ISD::INTRINSIC_VOID, DL,
896  DAG.getVTList(Res), Ops).getNode();
897 
898  if (BR) {
899  // Give the branch instruction our target
900  SDValue Ops[] = {
901  BR->getOperand(0),
902  BRCOND.getOperand(2)
903  };
904  SDValue NewBR = DAG.getNode(ISD::BR, DL, BR->getVTList(), Ops);
905  DAG.ReplaceAllUsesWith(BR, NewBR.getNode());
906  BR = NewBR.getNode();
907  }
908 
909  SDValue Chain = SDValue(Result, Result->getNumValues() - 1);
910 
911  // Copy the intrinsic results to registers
912  for (unsigned i = 1, e = Intr->getNumValues() - 1; i != e; ++i) {
914  if (!CopyToReg)
915  continue;
916 
917  Chain = DAG.getCopyToReg(
918  Chain, DL,
919  CopyToReg->getOperand(1),
920  SDValue(Result, i - 1),
921  SDValue());
922 
923  DAG.ReplaceAllUsesWith(SDValue(CopyToReg, 0), CopyToReg->getOperand(0));
924  }
925 
926  // Remove the old intrinsic from the chain
928  SDValue(Intr, Intr->getNumValues() - 1),
929  Intr->getOperand(0));
930 
931  return Chain;
932 }
933 
934 SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
935  SDValue Op,
936  SelectionDAG &DAG) const {
937  GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
938 
940  return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
941 
942  SDLoc DL(GSD);
943  const GlobalValue *GV = GSD->getGlobal();
944  MVT PtrVT = getPointerTy(DAG.getDataLayout(), GSD->getAddressSpace());
945 
946  SDValue Ptr = DAG.getNode(AMDGPUISD::CONST_DATA_PTR, DL, PtrVT);
947  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
948 
949  SDValue PtrLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Ptr,
950  DAG.getConstant(0, DL, MVT::i32));
951  SDValue PtrHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Ptr,
952  DAG.getConstant(1, DL, MVT::i32));
953 
955  PtrLo, GA);
957  PtrHi, DAG.getConstant(0, DL, MVT::i32),
958  SDValue(Lo.getNode(), 1));
959  return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
960 }
961 
963  SDValue V) const {
964  // We can't use CopyToReg, because MachineCSE won't combine COPY instructions,
965  // so we will end up with redundant moves to m0.
966  //
967  // We can't use S_MOV_B32, because there is no way to specify m0 as the
968  // destination register.
969  //
970  // We have to use them both. Machine cse will combine all the S_MOV_B32
971  // instructions and the register coalescer eliminate the extra copies.
972  SDNode *M0 = DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, V.getValueType(), V);
973  return DAG.getCopyToReg(Chain, DL, DAG.getRegister(AMDGPU::M0, MVT::i32),
974  SDValue(M0, 0), SDValue()); // Glue
975  // A Null SDValue creates
976  // a glue result.
977 }
978 
979 SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
980  SelectionDAG &DAG) const {
982  auto MFI = MF.getInfo<SIMachineFunctionInfo>();
983  const SIRegisterInfo *TRI =
984  static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
985 
986  EVT VT = Op.getValueType();
987  SDLoc DL(Op);
988  unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
989 
990  switch (IntrinsicID) {
991  case Intrinsic::r600_read_ngroups_x:
992  return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
994  case Intrinsic::r600_read_ngroups_y:
995  return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
997  case Intrinsic::r600_read_ngroups_z:
998  return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
1000  case Intrinsic::r600_read_global_size_x:
1001  return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
1003  case Intrinsic::r600_read_global_size_y:
1004  return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
1006  case Intrinsic::r600_read_global_size_z:
1007  return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
1009  case Intrinsic::r600_read_local_size_x:
1010  return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
1012  case Intrinsic::r600_read_local_size_y:
1013  return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
1015  case Intrinsic::r600_read_local_size_z:
1016  return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
1018 
1019  case Intrinsic::AMDGPU_read_workdim:
1020  return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
1021  getImplicitParameterOffset(MFI, GRID_DIM), false);
1022 
1023  case Intrinsic::r600_read_tgid_x:
1024  return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
1025  TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_X), VT);
1026  case Intrinsic::r600_read_tgid_y:
1027  return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
1028  TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_Y), VT);
1029  case Intrinsic::r600_read_tgid_z:
1030  return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
1031  TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_Z), VT);
1032  case Intrinsic::r600_read_tidig_x:
1033  return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
1034  TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_X), VT);
1035  case Intrinsic::r600_read_tidig_y:
1036  return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
1037  TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_Y), VT);
1038  case Intrinsic::r600_read_tidig_z:
1039  return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
1040  TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_Z), VT);
1041  case AMDGPUIntrinsic::SI_load_const: {
1042  SDValue Ops[] = {
1043  Op.getOperand(1),
1044  Op.getOperand(2)
1045  };
1046 
1050  VT.getStoreSize(), 4);
1052  Op->getVTList(), Ops, VT, MMO);
1053  }
1054  case AMDGPUIntrinsic::SI_sample:
1055  return LowerSampleIntrinsic(AMDGPUISD::SAMPLE, Op, DAG);
1056  case AMDGPUIntrinsic::SI_sampleb:
1057  return LowerSampleIntrinsic(AMDGPUISD::SAMPLEB, Op, DAG);
1058  case AMDGPUIntrinsic::SI_sampled:
1059  return LowerSampleIntrinsic(AMDGPUISD::SAMPLED, Op, DAG);
1060  case AMDGPUIntrinsic::SI_samplel:
1061  return LowerSampleIntrinsic(AMDGPUISD::SAMPLEL, Op, DAG);
1062  case AMDGPUIntrinsic::SI_vs_load_input:
1063  return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT,
1064  Op.getOperand(1),
1065  Op.getOperand(2),
1066  Op.getOperand(3));
1067 
1068  case AMDGPUIntrinsic::AMDGPU_fract:
1069  case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
1070  return DAG.getNode(ISD::FSUB, DL, VT, Op.getOperand(1),
1071  DAG.getNode(ISD::FFLOOR, DL, VT, Op.getOperand(1)));
1072  case AMDGPUIntrinsic::SI_fs_constant: {
1073  SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(3));
1074  SDValue Glue = M0.getValue(1);
1075  return DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32,
1076  DAG.getConstant(2, DL, MVT::i32), // P0
1077  Op.getOperand(1), Op.getOperand(2), Glue);
1078  }
1079  case AMDGPUIntrinsic::SI_fs_interp: {
1080  SDValue IJ = Op.getOperand(4);
1082  DAG.getConstant(0, DL, MVT::i32));
1084  DAG.getConstant(1, DL, MVT::i32));
1085  SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(3));
1086  SDValue Glue = M0.getValue(1);
1087  SDValue P1 = DAG.getNode(AMDGPUISD::INTERP_P1, DL,
1089  I, Op.getOperand(1), Op.getOperand(2), Glue);
1090  Glue = SDValue(P1.getNode(), 1);
1091  return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, P1, J,
1092  Op.getOperand(1), Op.getOperand(2), Glue);
1093  }
1094  default:
1095  return AMDGPUTargetLowering::LowerOperation(Op, DAG);
1096  }
1097 }
1098 
1099 SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
1100  SelectionDAG &DAG) const {
1101  MachineFunction &MF = DAG.getMachineFunction();
1102  SDLoc DL(Op);
1103  SDValue Chain = Op.getOperand(0);
1104  unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1105 
1106  switch (IntrinsicID) {
1107  case AMDGPUIntrinsic::SI_sendmsg: {
1108  Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3));
1109  SDValue Glue = Chain.getValue(1);
1110  return DAG.getNode(AMDGPUISD::SENDMSG, DL, MVT::Other, Chain,
1111  Op.getOperand(2), Glue);
1112  }
1113  case AMDGPUIntrinsic::SI_tbuffer_store: {
1114  SDValue Ops[] = {
1115  Chain,
1116  Op.getOperand(2),
1117  Op.getOperand(3),
1118  Op.getOperand(4),
1119  Op.getOperand(5),
1120  Op.getOperand(6),
1121  Op.getOperand(7),
1122  Op.getOperand(8),
1123  Op.getOperand(9),
1124  Op.getOperand(10),
1125  Op.getOperand(11),
1126  Op.getOperand(12),
1127  Op.getOperand(13),
1128  Op.getOperand(14)
1129  };
1130 
1131  EVT VT = Op.getOperand(3).getValueType();
1132 
1136  VT.getStoreSize(), 4);
1138  Op->getVTList(), Ops, VT, MMO);
1139  }
1140  default:
1141  return SDValue();
1142  }
1143 }
1144 
1145 SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1146  SDLoc DL(Op);
1147  LoadSDNode *Load = cast<LoadSDNode>(Op);
1148 
1149  if (Op.getValueType().isVector()) {
1150  assert(Op.getValueType().getVectorElementType() == MVT::i32 &&
1151  "Custom lowering for non-i32 vectors hasn't been implemented.");
1152  unsigned NumElements = Op.getValueType().getVectorNumElements();
1153  assert(NumElements != 2 && "v2 loads are supported for all address spaces.");
1154  switch (Load->getAddressSpace()) {
1155  default: break;
1158  // v4 loads are supported for private and global memory.
1159  if (NumElements <= 4)
1160  break;
1161  // fall-through
1163  return ScalarizeVectorLoad(Op, DAG);
1164  }
1165  }
1166 
1167  return AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1168 }
1169 
1170 SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode,
1171  const SDValue &Op,
1172  SelectionDAG &DAG) const {
1173  return DAG.getNode(Opcode, SDLoc(Op), Op.getValueType(), Op.getOperand(1),
1174  Op.getOperand(2),
1175  Op.getOperand(3),
1176  Op.getOperand(4));
1177 }
1178 
1179 SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
1180  if (Op.getValueType() != MVT::i64)
1181  return SDValue();
1182 
1183  SDLoc DL(Op);
1184  SDValue Cond = Op.getOperand(0);
1185 
1186  SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
1187  SDValue One = DAG.getConstant(1, DL, MVT::i32);
1188 
1189  SDValue LHS = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Op.getOperand(1));
1190  SDValue RHS = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Op.getOperand(2));
1191 
1192  SDValue Lo0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, LHS, Zero);
1193  SDValue Lo1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, RHS, Zero);
1194 
1195  SDValue Lo = DAG.getSelect(DL, MVT::i32, Cond, Lo0, Lo1);
1196 
1197  SDValue Hi0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, LHS, One);
1198  SDValue Hi1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, RHS, One);
1199 
1200  SDValue Hi = DAG.getSelect(DL, MVT::i32, Cond, Hi0, Hi1);
1201 
1202  SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2i32, Lo, Hi);
1203  return DAG.getNode(ISD::BITCAST, DL, MVT::i64, Res);
1204 }
1205 
1206 // Catch division cases where we can use shortcuts with rcp and rsq
1207 // instructions.
1208 SDValue SITargetLowering::LowerFastFDIV(SDValue Op, SelectionDAG &DAG) const {
1209  SDLoc SL(Op);
1210  SDValue LHS = Op.getOperand(0);
1211  SDValue RHS = Op.getOperand(1);
1212  EVT VT = Op.getValueType();
1213  bool Unsafe = DAG.getTarget().Options.UnsafeFPMath;
1214 
1215  if (const ConstantFPSDNode *CLHS = dyn_cast<ConstantFPSDNode>(LHS)) {
1216  if ((Unsafe || (VT == MVT::f32 && !Subtarget->hasFP32Denormals())) &&
1217  CLHS->isExactlyValue(1.0)) {
1218  // v_rcp_f32 and v_rsq_f32 do not support denormals, and according to
1219  // the CI documentation has a worst case error of 1 ulp.
1220  // OpenCL requires <= 2.5 ulp for 1.0 / x, so it should always be OK to
1221  // use it as long as we aren't trying to use denormals.
1222 
1223  // 1.0 / sqrt(x) -> rsq(x)
1224  //
1225  // XXX - Is UnsafeFPMath sufficient to do this for f64? The maximum ULP
1226  // error seems really high at 2^29 ULP.
1227  if (RHS.getOpcode() == ISD::FSQRT)
1228  return DAG.getNode(AMDGPUISD::RSQ, SL, VT, RHS.getOperand(0));
1229 
1230  // 1.0 / x -> rcp(x)
1231  return DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS);
1232  }
1233  }
1234 
1235  if (Unsafe) {
1236  // Turn into multiply by the reciprocal.
1237  // x / y -> x * (1.0 / y)
1238  SDValue Recip = DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS);
1239  return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip);
1240  }
1241 
1242  return SDValue();
1243 }
1244 
1245 SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
1246  SDValue FastLowered = LowerFastFDIV(Op, DAG);
1247  if (FastLowered.getNode())
1248  return FastLowered;
1249 
1250  // This uses v_rcp_f32 which does not handle denormals. Let this hit a
1251  // selection error for now rather than do something incorrect.
1252  if (Subtarget->hasFP32Denormals())
1253  return SDValue();
1254 
1255  SDLoc SL(Op);
1256  SDValue LHS = Op.getOperand(0);
1257  SDValue RHS = Op.getOperand(1);
1258 
1259  SDValue r1 = DAG.getNode(ISD::FABS, SL, MVT::f32, RHS);
1260 
1261  const APFloat K0Val(BitsToFloat(0x6f800000));
1262  const SDValue K0 = DAG.getConstantFP(K0Val, SL, MVT::f32);
1263 
1264  const APFloat K1Val(BitsToFloat(0x2f800000));
1265  const SDValue K1 = DAG.getConstantFP(K1Val, SL, MVT::f32);
1266 
1267  const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
1268 
1269  EVT SetCCVT =
1271 
1272  SDValue r2 = DAG.getSetCC(SL, SetCCVT, r1, K0, ISD::SETOGT);
1273 
1274  SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One);
1275 
1276  r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3);
1277 
1278  SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1);
1279 
1280  SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, LHS, r0);
1281 
1282  return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul);
1283 }
1284 
1285 SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {
1286  if (DAG.getTarget().Options.UnsafeFPMath)
1287  return LowerFastFDIV(Op, DAG);
1288 
1289  SDLoc SL(Op);
1290  SDValue X = Op.getOperand(0);
1291  SDValue Y = Op.getOperand(1);
1292 
1293  const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f64);
1294 
1295  SDVTList ScaleVT = DAG.getVTList(MVT::f64, MVT::i1);
1296 
1297  SDValue DivScale0 = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, Y, Y, X);
1298 
1299  SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f64, DivScale0);
1300 
1301  SDValue Rcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f64, DivScale0);
1302 
1303  SDValue Fma0 = DAG.getNode(ISD::FMA, SL, MVT::f64, NegDivScale0, Rcp, One);
1304 
1305  SDValue Fma1 = DAG.getNode(ISD::FMA, SL, MVT::f64, Rcp, Fma0, Rcp);
1306 
1307  SDValue Fma2 = DAG.getNode(ISD::FMA, SL, MVT::f64, NegDivScale0, Fma1, One);
1308 
1309  SDValue DivScale1 = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, X, Y, X);
1310 
1311  SDValue Fma3 = DAG.getNode(ISD::FMA, SL, MVT::f64, Fma1, Fma2, Fma1);
1312  SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, DivScale1, Fma3);
1313 
1314  SDValue Fma4 = DAG.getNode(ISD::FMA, SL, MVT::f64,
1315  NegDivScale0, Mul, DivScale1);
1316 
1317  SDValue Scale;
1318 
1320  // Workaround a hardware bug on SI where the condition output from div_scale
1321  // is not usable.
1322 
1323  const SDValue Hi = DAG.getConstant(1, SL, MVT::i32);
1324 
1325  // Figure out if the scale to use for div_fmas.
1326  SDValue NumBC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, X);
1327  SDValue DenBC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Y);
1328  SDValue Scale0BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, DivScale0);
1329  SDValue Scale1BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, DivScale1);
1330 
1331  SDValue NumHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, NumBC, Hi);
1332  SDValue DenHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, DenBC, Hi);
1333 
1334  SDValue Scale0Hi
1335  = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Scale0BC, Hi);
1336  SDValue Scale1Hi
1337  = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Scale1BC, Hi);
1338 
1339  SDValue CmpDen = DAG.getSetCC(SL, MVT::i1, DenHi, Scale0Hi, ISD::SETEQ);
1340  SDValue CmpNum = DAG.getSetCC(SL, MVT::i1, NumHi, Scale1Hi, ISD::SETEQ);
1341  Scale = DAG.getNode(ISD::XOR, SL, MVT::i1, CmpNum, CmpDen);
1342  } else {
1343  Scale = DivScale1.getValue(1);
1344  }
1345 
1346  SDValue Fmas = DAG.getNode(AMDGPUISD::DIV_FMAS, SL, MVT::f64,
1347  Fma4, Fma3, Mul, Scale);
1348 
1349  return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f64, Fmas, Y, X);
1350 }
1351 
1352 SDValue SITargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const {
1353  EVT VT = Op.getValueType();
1354 
1355  if (VT == MVT::f32)
1356  return LowerFDIV32(Op, DAG);
1357 
1358  if (VT == MVT::f64)
1359  return LowerFDIV64(Op, DAG);
1360 
1361  llvm_unreachable("Unexpected type for fdiv");
1362 }
1363 
1364 SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1365  SDLoc DL(Op);
1366  StoreSDNode *Store = cast<StoreSDNode>(Op);
1367  EVT VT = Store->getMemoryVT();
1368 
1369  // These stores are legal.
1370  if (Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
1371  if (VT.isVector() && VT.getVectorNumElements() > 4)
1372  return ScalarizeVectorStore(Op, DAG);
1373  return SDValue();
1374  }
1375 
1377  if (Ret.getNode())
1378  return Ret;
1379 
1380  if (VT.isVector() && VT.getVectorNumElements() >= 8)
1381  return ScalarizeVectorStore(Op, DAG);
1382 
1383  if (VT == MVT::i1)
1384  return DAG.getTruncStore(Store->getChain(), DL,
1385  DAG.getSExtOrTrunc(Store->getValue(), DL, MVT::i32),
1386  Store->getBasePtr(), MVT::i1, Store->getMemOperand());
1387 
1388  return SDValue();
1389 }
1390 
1391 SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
1392  SDLoc DL(Op);
1393  EVT VT = Op.getValueType();
1394  SDValue Arg = Op.getOperand(0);
1395  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
1396  DAG.getNode(ISD::FMUL, DL, VT, Arg,
1397  DAG.getConstantFP(0.5/M_PI, DL,
1398  VT)));
1399 
1400  switch (Op.getOpcode()) {
1401  case ISD::FCOS:
1402  return DAG.getNode(AMDGPUISD::COS_HW, SDLoc(Op), VT, FractPart);
1403  case ISD::FSIN:
1404  return DAG.getNode(AMDGPUISD::SIN_HW, SDLoc(Op), VT, FractPart);
1405  default:
1406  llvm_unreachable("Wrong trig opcode");
1407  }
1408 }
1409 
1410 //===----------------------------------------------------------------------===//
1411 // Custom DAG optimizations
1412 //===----------------------------------------------------------------------===//
1413 
1414 SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
1415  DAGCombinerInfo &DCI) const {
1416  EVT VT = N->getValueType(0);
1417  EVT ScalarVT = VT.getScalarType();
1418  if (ScalarVT != MVT::f32)
1419  return SDValue();
1420 
1421  SelectionDAG &DAG = DCI.DAG;
1422  SDLoc DL(N);
1423 
1424  SDValue Src = N->getOperand(0);
1425  EVT SrcVT = Src.getValueType();
1426 
1427  // TODO: We could try to match extracting the higher bytes, which would be
1428  // easier if i8 vectors weren't promoted to i32 vectors, particularly after
1429  // types are legalized. v4i8 -> v4f32 is probably the only case to worry
1430  // about in practice.
1431  if (DCI.isAfterLegalizeVectorOps() && SrcVT == MVT::i32) {
1432  if (DAG.MaskedValueIsZero(Src, APInt::getHighBitsSet(32, 24))) {
1433  SDValue Cvt = DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0, DL, VT, Src);
1434  DCI.AddToWorklist(Cvt.getNode());
1435  return Cvt;
1436  }
1437  }
1438 
1439  // We are primarily trying to catch operations on illegal vector types
1440  // before they are expanded.
1441  // For scalars, we can use the more flexible method of checking masked bits
1442  // after legalization.
1443  if (!DCI.isBeforeLegalize() ||
1444  !SrcVT.isVector() ||
1445  SrcVT.getVectorElementType() != MVT::i8) {
1446  return SDValue();
1447  }
1448 
1449  assert(DCI.isBeforeLegalize() && "Unexpected legal type");
1450 
1451  // Weird sized vectors are a pain to handle, but we know 3 is really the same
1452  // size as 4.
1453  unsigned NElts = SrcVT.getVectorNumElements();
1454  if (!SrcVT.isSimple() && NElts != 3)
1455  return SDValue();
1456 
1457  // Handle v4i8 -> v4f32 extload. Replace the v4i8 with a legal i32 load to
1458  // prevent a mess from expanding to v4i32 and repacking.
1459  if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse()) {
1460  EVT LoadVT = getEquivalentMemType(*DAG.getContext(), SrcVT);
1461  EVT RegVT = getEquivalentLoadRegType(*DAG.getContext(), SrcVT);
1462  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f32, NElts);
1463  LoadSDNode *Load = cast<LoadSDNode>(Src);
1464 
1465  unsigned AS = Load->getAddressSpace();
1466  unsigned Align = Load->getAlignment();
1467  Type *Ty = LoadVT.getTypeForEVT(*DAG.getContext());
1468  unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
1469 
1470  // Don't try to replace the load if we have to expand it due to alignment
1471  // problems. Otherwise we will end up scalarizing the load, and trying to
1472  // repack into the vector for no real reason.
1473  if (Align < ABIAlignment &&
1474  !allowsMisalignedMemoryAccesses(LoadVT, AS, Align, nullptr)) {
1475  return SDValue();
1476  }
1477 
1478  SDValue NewLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, RegVT,
1479  Load->getChain(),
1480  Load->getBasePtr(),
1481  LoadVT,
1482  Load->getMemOperand());
1483 
1484  // Make sure successors of the original load stay after it by updating
1485  // them to use the new Chain.
1486  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), NewLoad.getValue(1));
1487 
1489  if (RegVT.isVector())
1490  DAG.ExtractVectorElements(NewLoad, Elts);
1491  else
1492  Elts.push_back(NewLoad);
1493 
1495 
1496  unsigned EltIdx = 0;
1497  for (SDValue Elt : Elts) {
1498  unsigned ComponentsInElt = std::min(4u, NElts - 4 * EltIdx);
1499  for (unsigned I = 0; I < ComponentsInElt; ++I) {
1500  unsigned Opc = AMDGPUISD::CVT_F32_UBYTE0 + I;
1501  SDValue Cvt = DAG.getNode(Opc, DL, MVT::f32, Elt);
1502  DCI.AddToWorklist(Cvt.getNode());
1503  Ops.push_back(Cvt);
1504  }
1505 
1506  ++EltIdx;
1507  }
1508 
1509  assert(Ops.size() == NElts);
1510 
1511  return DAG.getNode(ISD::BUILD_VECTOR, DL, FloatVT, Ops);
1512  }
1513 
1514  return SDValue();
1515 }
1516 
1517 /// \brief Return true if the given offset Size in bytes can be folded into
1518 /// the immediate offsets of a memory instruction for the given address space.
1519 static bool canFoldOffset(unsigned OffsetSize, unsigned AS,
1520  const AMDGPUSubtarget &STI) {
1521  switch (AS) {
1522  case AMDGPUAS::GLOBAL_ADDRESS: {
1523  // MUBUF instructions a 12-bit offset in bytes.
1524  return isUInt<12>(OffsetSize);
1525  }
1527  // SMRD instructions have an 8-bit offset in dwords on SI and
1528  // a 20-bit offset in bytes on VI.
1530  return isUInt<20>(OffsetSize);
1531  else
1532  return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4);
1533  }
1535  case AMDGPUAS::REGION_ADDRESS: {
1536  // The single offset versions have a 16-bit offset in bytes.
1537  return isUInt<16>(OffsetSize);
1538  }
1540  // Indirect register addressing does not use any offsets.
1541  default:
1542  return 0;
1543  }
1544 }
1545 
1546 // (shl (add x, c1), c2) -> add (shl x, c2), (shl c1, c2)
1547 
1548 // This is a variant of
1549 // (mul (add x, c1), c2) -> add (mul x, c2), (mul c1, c2),
1550 //
1551 // The normal DAG combiner will do this, but only if the add has one use since
1552 // that would increase the number of instructions.
1553 //
1554 // This prevents us from seeing a constant offset that can be folded into a
1555 // memory instruction's addressing mode. If we know the resulting add offset of
1556 // a pointer can be folded into an addressing offset, we can replace the pointer
1557 // operand with the add of new constant offset. This eliminates one of the uses,
1558 // and may allow the remaining use to also be simplified.
1559 //
1560 SDValue SITargetLowering::performSHLPtrCombine(SDNode *N,
1561  unsigned AddrSpace,
1562  DAGCombinerInfo &DCI) const {
1563  SDValue N0 = N->getOperand(0);
1564  SDValue N1 = N->getOperand(1);
1565 
1566  if (N0.getOpcode() != ISD::ADD)
1567  return SDValue();
1568 
1569  const ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N1);
1570  if (!CN1)
1571  return SDValue();
1572 
1573  const ConstantSDNode *CAdd = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1574  if (!CAdd)
1575  return SDValue();
1576 
1577  // If the resulting offset is too large, we can't fold it into the addressing
1578  // mode offset.
1579  APInt Offset = CAdd->getAPIntValue() << CN1->getAPIntValue();
1580  if (!canFoldOffset(Offset.getZExtValue(), AddrSpace, *Subtarget))
1581  return SDValue();
1582 
1583  SelectionDAG &DAG = DCI.DAG;
1584  SDLoc SL(N);
1585  EVT VT = N->getValueType(0);
1586 
1587  SDValue ShlX = DAG.getNode(ISD::SHL, SL, VT, N0.getOperand(0), N1);
1588  SDValue COffset = DAG.getConstant(Offset, SL, MVT::i32);
1589 
1590  return DAG.getNode(ISD::ADD, SL, VT, ShlX, COffset);
1591 }
1592 
1593 SDValue SITargetLowering::performAndCombine(SDNode *N,
1594  DAGCombinerInfo &DCI) const {
1595  if (DCI.isBeforeLegalize())
1596  return SDValue();
1597 
1598  SelectionDAG &DAG = DCI.DAG;
1599 
1600  // (and (fcmp ord x, x), (fcmp une (fabs x), inf)) ->
1601  // fp_class x, ~(s_nan | q_nan | n_infinity | p_infinity)
1602  SDValue LHS = N->getOperand(0);
1603  SDValue RHS = N->getOperand(1);
1604 
1605  if (LHS.getOpcode() == ISD::SETCC &&
1606  RHS.getOpcode() == ISD::SETCC) {
1607  ISD::CondCode LCC = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
1608  ISD::CondCode RCC = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
1609 
1610  SDValue X = LHS.getOperand(0);
1611  SDValue Y = RHS.getOperand(0);
1612  if (Y.getOpcode() != ISD::FABS || Y.getOperand(0) != X)
1613  return SDValue();
1614 
1615  if (LCC == ISD::SETO) {
1616  if (X != LHS.getOperand(1))
1617  return SDValue();
1618 
1619  if (RCC == ISD::SETUNE) {
1621  if (!C1 || !C1->isInfinity() || C1->isNegative())
1622  return SDValue();
1623 
1624  const uint32_t Mask = SIInstrFlags::N_NORMAL |
1630 
1631  static_assert(((~(SIInstrFlags::S_NAN |
1634  SIInstrFlags::P_INFINITY)) & 0x3ff) == Mask,
1635  "mask not equal");
1636 
1637  SDLoc DL(N);
1638  return DAG.getNode(AMDGPUISD::FP_CLASS, DL, MVT::i1,
1639  X, DAG.getConstant(Mask, DL, MVT::i32));
1640  }
1641  }
1642  }
1643 
1644  return SDValue();
1645 }
1646 
1647 SDValue SITargetLowering::performOrCombine(SDNode *N,
1648  DAGCombinerInfo &DCI) const {
1649  SelectionDAG &DAG = DCI.DAG;
1650  SDValue LHS = N->getOperand(0);
1651  SDValue RHS = N->getOperand(1);
1652 
1653  // or (fp_class x, c1), (fp_class x, c2) -> fp_class x, (c1 | c2)
1654  if (LHS.getOpcode() == AMDGPUISD::FP_CLASS &&
1655  RHS.getOpcode() == AMDGPUISD::FP_CLASS) {
1656  SDValue Src = LHS.getOperand(0);
1657  if (Src != RHS.getOperand(0))
1658  return SDValue();
1659 
1660  const ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
1661  const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS.getOperand(1));
1662  if (!CLHS || !CRHS)
1663  return SDValue();
1664 
1665  // Only 10 bits are used.
1666  static const uint32_t MaxMask = 0x3ff;
1667 
1668  uint32_t NewMask = (CLHS->getZExtValue() | CRHS->getZExtValue()) & MaxMask;
1669  SDLoc DL(N);
1670  return DAG.getNode(AMDGPUISD::FP_CLASS, DL, MVT::i1,
1671  Src, DAG.getConstant(NewMask, DL, MVT::i32));
1672  }
1673 
1674  return SDValue();
1675 }
1676 
1677 SDValue SITargetLowering::performClassCombine(SDNode *N,
1678  DAGCombinerInfo &DCI) const {
1679  SelectionDAG &DAG = DCI.DAG;
1680  SDValue Mask = N->getOperand(1);
1681 
1682  // fp_class x, 0 -> false
1683  if (const ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(Mask)) {
1684  if (CMask->isNullValue())
1685  return DAG.getConstant(0, SDLoc(N), MVT::i1);
1686  }
1687 
1688  return SDValue();
1689 }
1690 
1691 static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
1692  switch (Opc) {
1693  case ISD::FMAXNUM:
1694  return AMDGPUISD::FMAX3;
1695  case ISD::SMAX:
1696  return AMDGPUISD::SMAX3;
1697  case ISD::UMAX:
1698  return AMDGPUISD::UMAX3;
1699  case ISD::FMINNUM:
1700  return AMDGPUISD::FMIN3;
1701  case ISD::SMIN:
1702  return AMDGPUISD::SMIN3;
1703  case ISD::UMIN:
1704  return AMDGPUISD::UMIN3;
1705  default:
1706  llvm_unreachable("Not a min/max opcode");
1707  }
1708 }
1709 
1710 SDValue SITargetLowering::performMin3Max3Combine(SDNode *N,
1711  DAGCombinerInfo &DCI) const {
1712  SelectionDAG &DAG = DCI.DAG;
1713 
1714  unsigned Opc = N->getOpcode();
1715  SDValue Op0 = N->getOperand(0);
1716  SDValue Op1 = N->getOperand(1);
1717 
1718  // Only do this if the inner op has one use since this will just increases
1719  // register pressure for no benefit.
1720 
1721  // max(max(a, b), c)
1722  if (Op0.getOpcode() == Opc && Op0.hasOneUse()) {
1723  SDLoc DL(N);
1724  return DAG.getNode(minMaxOpcToMin3Max3Opc(Opc),
1725  DL,
1726  N->getValueType(0),
1727  Op0.getOperand(0),
1728  Op0.getOperand(1),
1729  Op1);
1730  }
1731 
1732  // max(a, max(b, c))
1733  if (Op1.getOpcode() == Opc && Op1.hasOneUse()) {
1734  SDLoc DL(N);
1735  return DAG.getNode(minMaxOpcToMin3Max3Opc(Opc),
1736  DL,
1737  N->getValueType(0),
1738  Op0,
1739  Op1.getOperand(0),
1740  Op1.getOperand(1));
1741  }
1742 
1743  return SDValue();
1744 }
1745 
1746 SDValue SITargetLowering::performSetCCCombine(SDNode *N,
1747  DAGCombinerInfo &DCI) const {
1748  SelectionDAG &DAG = DCI.DAG;
1749  SDLoc SL(N);
1750 
1751  SDValue LHS = N->getOperand(0);
1752  SDValue RHS = N->getOperand(1);
1753  EVT VT = LHS.getValueType();
1754 
1755  if (VT != MVT::f32 && VT != MVT::f64)
1756  return SDValue();
1757 
1758  // Match isinf pattern
1759  // (fcmp oeq (fabs x), inf) -> (fp_class x, (p_infinity | n_infinity))
1760  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
1761  if (CC == ISD::SETOEQ && LHS.getOpcode() == ISD::FABS) {
1762  const ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS);
1763  if (!CRHS)
1764  return SDValue();
1765 
1766  const APFloat &APF = CRHS->getValueAPF();
1767  if (APF.isInfinity() && !APF.isNegative()) {
1769  return DAG.getNode(AMDGPUISD::FP_CLASS, SL, MVT::i1, LHS.getOperand(0),
1770  DAG.getConstant(Mask, SL, MVT::i32));
1771  }
1772  }
1773 
1774  return SDValue();
1775 }
1776 
1778  DAGCombinerInfo &DCI) const {
1779  SelectionDAG &DAG = DCI.DAG;
1780  SDLoc DL(N);
1781 
1782  switch (N->getOpcode()) {
1783  default:
1785  case ISD::SETCC:
1786  return performSetCCCombine(N, DCI);
1787  case ISD::FMAXNUM: // TODO: What about fmax_legacy?
1788  case ISD::FMINNUM:
1789  case ISD::SMAX:
1790  case ISD::SMIN:
1791  case ISD::UMAX:
1792  case ISD::UMIN: {
1793  if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG &&
1794  N->getValueType(0) != MVT::f64 &&
1796  return performMin3Max3Combine(N, DCI);
1797  break;
1798  }
1799 
1804  unsigned Offset = N->getOpcode() - AMDGPUISD::CVT_F32_UBYTE0;
1805 
1806  SDValue Src = N->getOperand(0);
1807  APInt Demanded = APInt::getBitsSet(32, 8 * Offset, 8 * Offset + 8);
1808 
1809  APInt KnownZero, KnownOne;
1811  !DCI.isBeforeLegalizeOps());
1812  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1813  if (TLO.ShrinkDemandedConstant(Src, Demanded) ||
1814  TLI.SimplifyDemandedBits(Src, Demanded, KnownZero, KnownOne, TLO)) {
1815  DCI.CommitTargetLoweringOpt(TLO);
1816  }
1817 
1818  break;
1819  }
1820 
1821  case ISD::UINT_TO_FP: {
1822  return performUCharToFloatCombine(N, DCI);
1823 
1824  case ISD::FADD: {
1826  break;
1827 
1828  EVT VT = N->getValueType(0);
1829  if (VT != MVT::f32)
1830  break;
1831 
1832  // Only do this if we are not trying to support denormals. v_mad_f32 does
1833  // not support denormals ever.
1834  if (Subtarget->hasFP32Denormals())
1835  break;
1836 
1837  SDValue LHS = N->getOperand(0);
1838  SDValue RHS = N->getOperand(1);
1839 
1840  // These should really be instruction patterns, but writing patterns with
1841  // source modiifiers is a pain.
1842 
1843  // fadd (fadd (a, a), b) -> mad 2.0, a, b
1844  if (LHS.getOpcode() == ISD::FADD) {
1845  SDValue A = LHS.getOperand(0);
1846  if (A == LHS.getOperand(1)) {
1847  const SDValue Two = DAG.getConstantFP(2.0, DL, MVT::f32);
1848  return DAG.getNode(ISD::FMAD, DL, VT, Two, A, RHS);
1849  }
1850  }
1851 
1852  // fadd (b, fadd (a, a)) -> mad 2.0, a, b
1853  if (RHS.getOpcode() == ISD::FADD) {
1854  SDValue A = RHS.getOperand(0);
1855  if (A == RHS.getOperand(1)) {
1856  const SDValue Two = DAG.getConstantFP(2.0, DL, MVT::f32);
1857  return DAG.getNode(ISD::FMAD, DL, VT, Two, A, LHS);
1858  }
1859  }
1860 
1861  return SDValue();
1862  }
1863  case ISD::FSUB: {
1865  break;
1866 
1867  EVT VT = N->getValueType(0);
1868 
1869  // Try to get the fneg to fold into the source modifier. This undoes generic
1870  // DAG combines and folds them into the mad.
1871  //
1872  // Only do this if we are not trying to support denormals. v_mad_f32 does
1873  // not support denormals ever.
1874  if (VT == MVT::f32 &&
1876  SDValue LHS = N->getOperand(0);
1877  SDValue RHS = N->getOperand(1);
1878  if (LHS.getOpcode() == ISD::FADD) {
1879  // (fsub (fadd a, a), c) -> mad 2.0, a, (fneg c)
1880 
1881  SDValue A = LHS.getOperand(0);
1882  if (A == LHS.getOperand(1)) {
1883  const SDValue Two = DAG.getConstantFP(2.0, DL, MVT::f32);
1884  SDValue NegRHS = DAG.getNode(ISD::FNEG, DL, VT, RHS);
1885 
1886  return DAG.getNode(ISD::FMAD, DL, VT, Two, A, NegRHS);
1887  }
1888  }
1889 
1890  if (RHS.getOpcode() == ISD::FADD) {
1891  // (fsub c, (fadd a, a)) -> mad -2.0, a, c
1892 
1893  SDValue A = RHS.getOperand(0);
1894  if (A == RHS.getOperand(1)) {
1895  const SDValue NegTwo = DAG.getConstantFP(-2.0, DL, MVT::f32);
1896  return DAG.getNode(ISD::FMAD, DL, VT, NegTwo, A, LHS);
1897  }
1898  }
1899 
1900  return SDValue();
1901  }
1902 
1903  break;
1904  }
1905  }
1906  case ISD::LOAD:
1907  case ISD::STORE:
1908  case ISD::ATOMIC_LOAD:
1909  case ISD::ATOMIC_STORE:
1910  case ISD::ATOMIC_CMP_SWAP:
1912  case ISD::ATOMIC_SWAP:
1913  case ISD::ATOMIC_LOAD_ADD:
1914  case ISD::ATOMIC_LOAD_SUB:
1915  case ISD::ATOMIC_LOAD_AND:
1916  case ISD::ATOMIC_LOAD_OR:
1917  case ISD::ATOMIC_LOAD_XOR:
1918  case ISD::ATOMIC_LOAD_NAND:
1919  case ISD::ATOMIC_LOAD_MIN:
1920  case ISD::ATOMIC_LOAD_MAX:
1921  case ISD::ATOMIC_LOAD_UMIN:
1922  case ISD::ATOMIC_LOAD_UMAX: { // TODO: Target mem intrinsics.
1923  if (DCI.isBeforeLegalize())
1924  break;
1925 
1926  MemSDNode *MemNode = cast<MemSDNode>(N);
1927  SDValue Ptr = MemNode->getBasePtr();
1928 
1929  // TODO: We could also do this for multiplies.
1930  unsigned AS = MemNode->getAddressSpace();
1931  if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUAS::PRIVATE_ADDRESS) {
1932  SDValue NewPtr = performSHLPtrCombine(Ptr.getNode(), AS, DCI);
1933  if (NewPtr) {
1934  SmallVector<SDValue, 8> NewOps(MemNode->op_begin(), MemNode->op_end());
1935 
1936  NewOps[N->getOpcode() == ISD::STORE ? 2 : 1] = NewPtr;
1937  return SDValue(DAG.UpdateNodeOperands(MemNode, NewOps), 0);
1938  }
1939  }
1940  break;
1941  }
1942  case ISD::AND:
1943  return performAndCombine(N, DCI);
1944  case ISD::OR:
1945  return performOrCombine(N, DCI);
1946  case AMDGPUISD::FP_CLASS:
1947  return performClassCombine(N, DCI);
1948  }
1950 }
1951 
1952 /// \brief Analyze the possible immediate value Op
1953 ///
1954 /// Returns -1 if it isn't an immediate, 0 if it's and inline immediate
1955 /// and the immediate value if it's a literal immediate
1957 
1958  const SIInstrInfo *TII =
1959  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1960 
1961  if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N)) {
1962  if (TII->isInlineConstant(Node->getAPIntValue()))
1963  return 0;
1964 
1965  uint64_t Val = Node->getZExtValue();
1966  return isUInt<32>(Val) ? Val : -1;
1967  }
1968 
1969  if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N)) {
1970  if (TII->isInlineConstant(Node->getValueAPF().bitcastToAPInt()))
1971  return 0;
1972 
1973  if (Node->getValueType(0) == MVT::f32)
1974  return FloatToBits(Node->getValueAPF().convertToFloat());
1975 
1976  return -1;
1977  }
1978 
1979  return -1;
1980 }
1981 
1982 /// \brief Helper function for adjustWritemask
1983 static unsigned SubIdx2Lane(unsigned Idx) {
1984  switch (Idx) {
1985  default: return 0;
1986  case AMDGPU::sub0: return 0;
1987  case AMDGPU::sub1: return 1;
1988  case AMDGPU::sub2: return 2;
1989  case AMDGPU::sub3: return 3;
1990  }
1991 }
1992 
1993 /// \brief Adjust the writemask of MIMG instructions
1994 void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
1995  SelectionDAG &DAG) const {
1996  SDNode *Users[4] = { };
1997  unsigned Lane = 0;
1998  unsigned OldDmask = Node->getConstantOperandVal(0);
1999  unsigned NewDmask = 0;
2000 
2001  // Try to figure out the used register components
2002  for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end();
2003  I != E; ++I) {
2004 
2005  // Abort if we can't understand the usage
2006  if (!I->isMachineOpcode() ||
2007  I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
2008  return;
2009 
2010  // Lane means which subreg of %VGPRa_VGPRb_VGPRc_VGPRd is used.
2011  // Note that subregs are packed, i.e. Lane==0 is the first bit set
2012  // in OldDmask, so it can be any of X,Y,Z,W; Lane==1 is the second bit
2013  // set, etc.
2014  Lane = SubIdx2Lane(I->getConstantOperandVal(1));
2015 
2016  // Set which texture component corresponds to the lane.
2017  unsigned Comp;
2018  for (unsigned i = 0, Dmask = OldDmask; i <= Lane; i++) {
2019  assert(Dmask);
2020  Comp = countTrailingZeros(Dmask);
2021  Dmask &= ~(1 << Comp);
2022  }
2023 
2024  // Abort if we have more than one user per component
2025  if (Users[Lane])
2026  return;
2027 
2028  Users[Lane] = *I;
2029  NewDmask |= 1 << Comp;
2030  }
2031 
2032  // Abort if there's no change
2033  if (NewDmask == OldDmask)
2034  return;
2035 
2036  // Adjust the writemask in the node
2037  std::vector<SDValue> Ops;
2038  Ops.push_back(DAG.getTargetConstant(NewDmask, SDLoc(Node), MVT::i32));
2039  Ops.insert(Ops.end(), Node->op_begin() + 1, Node->op_end());
2040  Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops);
2041 
2042  // If we only got one lane, replace it with a copy
2043  // (if NewDmask has only one bit set...)
2044  if (NewDmask && (NewDmask & (NewDmask-1)) == 0) {
2045  SDValue RC = DAG.getTargetConstant(AMDGPU::VGPR_32RegClassID, SDLoc(),
2046  MVT::i32);
2048  SDLoc(), Users[Lane]->getValueType(0),
2049  SDValue(Node, 0), RC);
2050  DAG.ReplaceAllUsesWith(Users[Lane], Copy);
2051  return;
2052  }
2053 
2054  // Update the users of the node with the new indices
2055  for (unsigned i = 0, Idx = AMDGPU::sub0; i < 4; ++i) {
2056 
2057  SDNode *User = Users[i];
2058  if (!User)
2059  continue;
2060 
2061  SDValue Op = DAG.getTargetConstant(Idx, SDLoc(User), MVT::i32);
2062  DAG.UpdateNodeOperands(User, User->getOperand(0), Op);
2063 
2064  switch (Idx) {
2065  default: break;
2066  case AMDGPU::sub0: Idx = AMDGPU::sub1; break;
2067  case AMDGPU::sub1: Idx = AMDGPU::sub2; break;
2068  case AMDGPU::sub2: Idx = AMDGPU::sub3; break;
2069  }
2070  }
2071 }
2072 
2073 static bool isFrameIndexOp(SDValue Op) {
2074  if (Op.getOpcode() == ISD::AssertZext)
2075  Op = Op.getOperand(0);
2076 
2077  return isa<FrameIndexSDNode>(Op);
2078 }
2079 
2080 /// \brief Legalize target independent instructions (e.g. INSERT_SUBREG)
2081 /// with frame index operands.
2082 /// LLVM assumes that inputs are to these instructions are registers.
2084  SelectionDAG &DAG) const {
2085 
2087  for (unsigned i = 0; i < Node->getNumOperands(); ++i) {
2088  if (!isFrameIndexOp(Node->getOperand(i))) {
2089  Ops.push_back(Node->getOperand(i));
2090  continue;
2091  }
2092 
2093  SDLoc DL(Node);
2094  Ops.push_back(SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B32, DL,
2095  Node->getOperand(i).getValueType(),
2096  Node->getOperand(i)), 0));
2097  }
2098 
2099  DAG.UpdateNodeOperands(Node, Ops);
2100 }
2101 
2102 /// \brief Fold the instructions after selecting them.
2104  SelectionDAG &DAG) const {
2105  const SIInstrInfo *TII =
2106  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
2107 
2108  if (TII->isMIMG(Node->getMachineOpcode()))
2109  adjustWritemask(Node, DAG);
2110 
2111  if (Node->getMachineOpcode() == AMDGPU::INSERT_SUBREG ||
2113  legalizeTargetIndependentNode(Node, DAG);
2114  return Node;
2115  }
2116  return Node;
2117 }
2118 
2119 /// \brief Assign the register class depending on the number of
2120 /// bits set in the writemask
2122  SDNode *Node) const {
2123  const SIInstrInfo *TII =
2124  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
2125 
2127  TII->legalizeOperands(MI);
2128 
2129  if (TII->isMIMG(MI->getOpcode())) {
2130  unsigned VReg = MI->getOperand(0).getReg();
2131  unsigned Writemask = MI->getOperand(1).getImm();
2132  unsigned BitsSet = 0;
2133  for (unsigned i = 0; i < 4; ++i)
2134  BitsSet += Writemask & (1 << i) ? 1 : 0;
2135 
2136  const TargetRegisterClass *RC;
2137  switch (BitsSet) {
2138  default: return;
2139  case 1: RC = &AMDGPU::VGPR_32RegClass; break;
2140  case 2: RC = &AMDGPU::VReg_64RegClass; break;
2141  case 3: RC = &AMDGPU::VReg_96RegClass; break;
2142  }
2143 
2144  unsigned NewOpcode = TII->getMaskedMIMGOp(MI->getOpcode(), BitsSet);
2145  MI->setDesc(TII->get(NewOpcode));
2146  MRI.setRegClass(VReg, RC);
2147  return;
2148  }
2149 
2150  // Replace unused atomics with the no return version.
2151  int NoRetAtomicOp = AMDGPU::getAtomicNoRetOp(MI->getOpcode());
2152  if (NoRetAtomicOp != -1) {
2153  if (!Node->hasAnyUseOfValue(0)) {
2154  MI->setDesc(TII->get(NoRetAtomicOp));
2155  MI->RemoveOperand(0);
2156  }
2157 
2158  return;
2159  }
2160 }
2161 
2162 static SDValue buildSMovImm32(SelectionDAG &DAG, SDLoc DL, uint64_t Val) {
2163  SDValue K = DAG.getTargetConstant(Val, DL, MVT::i32);
2164  return SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, K), 0);
2165 }
2166 
2168  SDLoc DL,
2169  SDValue Ptr) const {
2170  const SIInstrInfo *TII =
2171  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
2172 #if 1
2173  // XXX - Workaround for moveToVALU not handling different register class
2174  // inserts for REG_SEQUENCE.
2175 
2176  // Build the half of the subregister with the constants.
2177  const SDValue Ops0[] = {
2178  DAG.getTargetConstant(AMDGPU::SGPR_64RegClassID, DL, MVT::i32),
2179  buildSMovImm32(DAG, DL, 0),
2180  DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
2181  buildSMovImm32(DAG, DL, TII->getDefaultRsrcDataFormat() >> 32),
2182  DAG.getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
2183  };
2184 
2186  MVT::v2i32, Ops0), 0);
2187 
2188  // Combine the constants and the pointer.
2189  const SDValue Ops1[] = {
2190  DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32),
2191  Ptr,
2192  DAG.getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32),
2193  SubRegHi,
2194  DAG.getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32)
2195  };
2196 
2197  return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops1);
2198 #else
2199  const SDValue Ops[] = {
2200  DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32),
2201  Ptr,
2202  DAG.getTargetConstant(AMDGPU::sub0_sub1, MVT::i32),
2203  buildSMovImm32(DAG, DL, 0),
2204  DAG.getTargetConstant(AMDGPU::sub2, MVT::i32),
2205  buildSMovImm32(DAG, DL, TII->getDefaultRsrcFormat() >> 32),
2206  DAG.getTargetConstant(AMDGPU::sub3, MVT::i32)
2207  };
2208 
2209  return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops);
2210 
2211 #endif
2212 }
2213 
2214 /// \brief Return a resource descriptor with the 'Add TID' bit enabled
2215 /// The TID (Thread ID) is multipled by the stride value (bits [61:48]
2216 /// of the resource descriptor) to create an offset, which is added to the
2217 /// resource ponter.
2219  SDLoc DL,
2220  SDValue Ptr,
2221  uint32_t RsrcDword1,
2222  uint64_t RsrcDword2And3) const {
2223  SDValue PtrLo = DAG.getTargetExtractSubreg(AMDGPU::sub0, DL, MVT::i32, Ptr);
2224  SDValue PtrHi = DAG.getTargetExtractSubreg(AMDGPU::sub1, DL, MVT::i32, Ptr);
2225  if (RsrcDword1) {
2226  PtrHi = SDValue(DAG.getMachineNode(AMDGPU::S_OR_B32, DL, MVT::i32, PtrHi,
2227  DAG.getConstant(RsrcDword1, DL, MVT::i32)),
2228  0);
2229  }
2230 
2231  SDValue DataLo = buildSMovImm32(DAG, DL,
2232  RsrcDword2And3 & UINT64_C(0xFFFFFFFF));
2233  SDValue DataHi = buildSMovImm32(DAG, DL, RsrcDword2And3 >> 32);
2234 
2235  const SDValue Ops[] = {
2236  DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32),
2237  PtrLo,
2238  DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
2239  PtrHi,
2240  DAG.getTargetConstant(AMDGPU::sub1, DL, MVT::i32),
2241  DataLo,
2242  DAG.getTargetConstant(AMDGPU::sub2, DL, MVT::i32),
2243  DataHi,
2244  DAG.getTargetConstant(AMDGPU::sub3, DL, MVT::i32)
2245  };
2246 
2247  return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops);
2248 }
2249 
2251  SDLoc DL,
2252  SDValue Ptr) const {
2253  const SIInstrInfo *TII =
2254  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
2255  uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | AMDGPU::RSRC_TID_ENABLE |
2256  0xffffffff; // Size
2257 
2258  return buildRSRC(DAG, DL, Ptr, 0, Rsrc);
2259 }
2260 
2262  const TargetRegisterClass *RC,
2263  unsigned Reg, EVT VT) const {
2264  SDValue VReg = AMDGPUTargetLowering::CreateLiveInRegister(DAG, RC, Reg, VT);
2265 
2266  return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(DAG.getEntryNode()),
2267  cast<RegisterSDNode>(VReg)->getReg(), VT);
2268 }
2269 
2270 //===----------------------------------------------------------------------===//
2271 // SI Inline Assembly Support
2272 //===----------------------------------------------------------------------===//
2273 
2274 std::pair<unsigned, const TargetRegisterClass *>
2276  StringRef Constraint,
2277  MVT VT) const {
2278  if (Constraint == "r") {
2279  switch(VT.SimpleTy) {
2280  default: llvm_unreachable("Unhandled type for 'r' inline asm constraint");
2281  case MVT::i64:
2282  return std::make_pair(0U, &AMDGPU::SGPR_64RegClass);
2283  case MVT::i32:
2284  return std::make_pair(0U, &AMDGPU::SGPR_32RegClass);
2285  }
2286  }
2287 
2288  if (Constraint.size() > 1) {
2289  const TargetRegisterClass *RC = nullptr;
2290  if (Constraint[1] == 'v') {
2291  RC = &AMDGPU::VGPR_32RegClass;
2292  } else if (Constraint[1] == 's') {
2293  RC = &AMDGPU::SGPR_32RegClass;
2294  }
2295 
2296  if (RC) {
2297  uint32_t Idx;
2298  bool Failed = Constraint.substr(2).getAsInteger(10, Idx);
2299  if (!Failed && Idx < RC->getNumRegs())
2300  return std::make_pair(RC->getRegister(Idx), RC);
2301  }
2302  }
2303  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
2304 }
SDValue getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT TVT, bool isNonTemporal, bool isVolatile, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes())
bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:294
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:347
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:477
static MVT getIntegerVT(unsigned BitWidth)
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:724
The memory access reads data.
const MachineFunction * getParent() const
getParent - Return the MachineFunction containing this basic block.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:104
Interface definition for SIRegisterInfo.
void AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const override
Assign the register class depending on the number of bits set in the writemask.
SDValue getValue(unsigned R) const
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
The memory access writes data.
int getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const
Given a MIMG Opcode that writes all 4 channels, return the equivalent opcode that writes Channels Cha...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
unsigned getPhysRegSubReg(unsigned Reg, const TargetRegisterClass *SubRC, unsigned Channel) const
Channel This is the register channel (e.g.
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant, which is required to be operand #1) half of the integer or float value specified as operand #0.
Definition: ISDOpcodes.h:175
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
LLVMContext * getContext() const
Definition: SelectionDAG.h:289
SDValue getCopyToReg(SDValue Chain, SDLoc dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:522
AMDGPU specific subclass of TargetSubtarget.
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1327
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
size_t size() const
size - Get the string size.
Definition: StringRef.h:113
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:292
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
void legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const
Legalize target independent instructions (e.g.
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:301
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
void AnalyzeFormalArguments(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
const TargetMachine & getTargetMachine() const
SDVTList getVTList() const
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
SDValue getMergeValues(ArrayRef< SDValue > Ops, SDLoc dl)
Create a MERGE_VALUES node from the given operands.
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:210
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:285
unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
bool hasFastFMAF32() const
const GlobalValue * getGlobal() const
static PointerType * get(Type *ElementType, unsigned AddressSpace)
PointerType::get - This constructs a pointer to an object of the specified type in a numbered address...
Definition: Type.cpp:738
float BitsToFloat(uint32_t Bits)
BitsToFloat - This function takes a 32-bit integer and returns the bit equivalent float...
Definition: MathExtras.h:515
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:405
Type * getTypeForEVT(LLVMContext &Context) const
getTypeForEVT - This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:181
SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
const_iterator begin(StringRef path)
Get begin iterator over path.
Definition: Path.cpp:232
Val, Success, OUTCHAIN = ATOMIC_CMP_SWAP_WITH_SUCCESS(INCHAIN, ptr, cmp, swap) N.b.
Definition: ISDOpcodes.h:679
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:472
Address space for local memory.
Definition: AMDGPU.h:112
[US]{MIN/MAX} - Binary minimum or maximum or signed or unsigned integers.
Definition: ISDOpcodes.h:318
iv Induction Variable Users
Definition: IVUsers.cpp:43
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
Definition: ISDOpcodes.h:287
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
getMachineMemOperand - Allocate a new MachineMemOperand.
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:357
bool bitsLT(EVT VT) const
bitsLT - Return true if this has less bits than VT.
Definition: ValueTypes.h:189
bool isRegLoc() const
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:150
MachineSDNode * buildScratchRSRC(SelectionDAG &DAG, SDLoc DL, SDValue Ptr) const
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:115
bool isNegative() const
Return true if the value is negative.
MachineMemOperand - A description of a memory reference used in the backend.
Pointer to the start of the shader's constant data.
const HexagonInstrInfo * TII
MachineSDNode * buildRSRC(SelectionDAG &DAG, SDLoc DL, SDValue Ptr, uint32_t RsrcDword1, uint64_t RsrcDword2And3) const
Return a resource descriptor with the 'Add TID' bit enabled The TID (Thread ID) is multipled by the s...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
Shift and rotation operations.
Definition: ISDOpcodes.h:332
SDValue ScalarizeVectorStore(SDValue Op, SelectionDAG &DAG) const
Split a vector store into a scalar store of each component.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
int getAtomicNoRetOp(uint16_t Opcode)
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:283
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:181
int32_t analyzeImmediate(const SDNode *N) const
Analyze the possible immediate value Op.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
static unsigned SubIdx2Lane(unsigned Idx)
Helper function for adjustWritemask.
SDValue getTargetGlobalAddress(const GlobalValue *GV, SDLoc DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:467
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:161
virtual SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, unsigned Reg, EVT VT) const
Helper function that adds Reg to the LiveIn list of the DAG's MachineFunction.
unsigned getAddressSpace() const
const AMDGPUInstrInfo * getInstrInfo() const override
Reg
All possible values of the reg field in the ModR/M byte.
SimpleValueType SimpleTy
EVT getScalarType() const
getScalarType - If this is a vector type, return the element type, otherwise return this...
Definition: ValueTypes.h:210
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amt) For double-word atomic operations: ValLo, ValHi, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amtLo, amtHi) ValLo, ValHi, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amtLo, amtHi) These correspond to the atomicrmw instruction.
Definition: ISDOpcodes.h:687
Number of individual test Apply this number of consecutive mutations to each input exit after the first new interesting input is found the minimized corpus is saved into the first input directory Number of jobs to run If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
unsigned getStoreSize() const
getStoreSize - Return the number of bytes overwritten by a store of the specified value type...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:271
EVT getVectorElementType() const
getVectorElementType - Given a vector type, return the type of each element.
Definition: ValueTypes.h:216
void RemoveOperand(unsigned i)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
bool isShuffleMaskLegal(const SmallVectorImpl< int > &, EVT) const override
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations, those with specific masks.
unsigned getLocReg() const
FunctionType - Class to represent function types.
Definition: DerivedTypes.h:96
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
const AMDGPUSubtarget * Subtarget
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:57
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:473
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:191
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always beneficiates from combining into FMA for a given value type...
static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc)
const SDValue & getBasePtr() const
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:659
static mvt_range integer_vector_valuetypes()
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:804
const APInt & getAPIntValue() const
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
EVT getMemoryVT() const
Return the type of the in-memory value.
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
int64_t getImm() const
Generation getGeneration() const
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:142
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:284
bool bitsLE(EVT VT) const
bitsLE - Return true if this has no more bits than VT.
Definition: ValueTypes.h:195
PointerType - Class to represent pointers.
Definition: DerivedTypes.h:449
This class is used to represent ISD::STORE nodes.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:267
Address space for region memory.
Definition: AMDGPU.h:114
uint32_t FloatToBits(float Float)
FloatToBits - This function takes a float and returns the bit equivalent 32-bit integer.
Definition: MathExtras.h:541
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:120
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:262
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:513
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, unsigned Reg, EVT VT) const override
Helper function that adds Reg to the LiveIn list of the DAG's MachineFunction.
bundle_iterator< MachineInstr, instr_iterator > iterator
A self-contained host- and target-independent arbitrary-precision floating-point software implementat...
Definition: APFloat.h:122
unsigned getStoreSize() const
getStoreSize - Return the number of bytes overwritten by a store of the specified value type...
Definition: ValueTypes.h:245
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:131
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:157
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
static bool canFoldOffset(unsigned OffsetSize, unsigned AS, const AMDGPUSubtarget &STI)
Return true if the given offset Size in bytes can be folded into the immediate offsets of a memory in...
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:533
const SDValue & getBasePtr() const
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:109
unsigned getVectorNumElements() const
MVT - Machine Value Type.
const SDValue & getOperand(unsigned i) const
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:41
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
static volatile int One
Definition: InfiniteTest.cpp:9
Address space for constant memory.
Definition: AMDGPU.h:111
Simple binary floating point operators.
Definition: ISDOpcodes.h:237
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
MVT getLocVT() const
Address space for private memory.
Definition: AMDGPU.h:109
SDNode * PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override
Fold the instructions after selecting them.
bool isVector() const
isVector - Return true if this is a vector value type.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:780
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:267
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:273
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:219
unsigned getLiveInVirtReg(unsigned PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in physical ...
EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const override
Returns the target specific optimal type for load and store operations as a result of memset...
INSERT_SUBREG - This instruction takes three operands: a register that has subregisters, a register providing an insert value, and a subregister index.
Definition: TargetOpcodes.h:49
MachineSDNode * wrapAddr64Rsrc(SelectionDAG &DAG, SDLoc DL, SDValue Ptr) const
unsigned getPreloadedValue(const MachineFunction &MF, enum PreloadedValue Value) const
Returns the physical register that Value is stored in.
SI DAG Lowering interface definition.
static mvt_range fp_valuetypes()
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
SITargetLowering(TargetMachine &tm, const AMDGPUSubtarget &STI)
This class provides iterator support for SDUse operands that use a specific SDNode.
SDValue getCopyFromReg(SDValue Chain, SDLoc dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:547
SDValue getSExtOrTrunc(SDValue Op, SDLoc DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
unsigned PartOffset
Offset in bytes of current input value relative to the beginning of original argument.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang","erlang-compatible garbage collector")
SDValue getTargetConstant(uint64_t Val, SDLoc DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:436
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
unsigned getOpcode() const
bool hasFP32Denormals() const
uint64_t getDefaultRsrcDataFormat() const
SDValue ScalarizeVectorLoad(SDValue Op, SelectionDAG &DAG) const
Split a vector load into a scalar load of each component.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
Address space for flat memory.
Definition: AMDGPU.h:113
const SDValue & getValue() const
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
SDValue getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes())
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
BuildMI - Builder interface.
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo...
Definition: ISDOpcodes.h:673
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:416
EVT - Extended Value Type.
Definition: ValueTypes.h:31
static UndefValue * get(Type *T)
get() - Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1473
const APFloat & getValueAPF() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements)
getVectorVT - Returns the EVT that represents a vector NumElements in length, where each element is o...
Definition: ValueTypes.h:70
MachinePointerInfo - This class contains a discriminated union of information about pointers in memor...
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
static SDValue buildSMovImm32(SelectionDAG &DAG, SDLoc DL, uint64_t Val)
The memory access is invariant.
unsigned getExplicitKernelArgOffset() const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument...
const uint64_t RSRC_TID_ENABLE
Definition: SIInstrInfo.h:370
REG_SEQUENCE - This variadic instruction is used to form a register that represents a consecutive seq...
Definition: TargetOpcodes.h:82
bool isInlineConstant(const APInt &Imm) const
bool bitsGT(EVT VT) const
bitsGT - Return true if this has more bits than VT.
Definition: ValueTypes.h:177
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override
EVT is not used in-tree, but is used by out-of-tree target.
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:674
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
EXTRACT_SUBREG - This instruction takes two operands: a register that has subregisters, and a subregister index.
Definition: TargetOpcodes.h:41
CCState - This class holds information needed while lowering arguments and return values...
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) const override
Return the preferred vector type legalization action.
bool enableHugeScratchBuffer() const
bool isInfinity() const
Return true if the value is an infinity.
bool isNegative() const
IEEE-754R isSignMinus: Returns true if and only if the current value is negative. ...
Definition: APFloat.h:399
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:273
unsigned getVectorNumElements() const
Definition: Type.cpp:212
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:179
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
CCValAssign - Represent assignment of one arg/retval to a location.
static EVT toIntegerVT(EVT VT)
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:548
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
bool isMIMG(uint16_t Opcode) const
Definition: SIInstrInfo.h:211
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:335
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:500
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
This is an abstract virtual class for memory operations.
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0)
Append the extracted elements from Start to Count out of the vector Op in Args.
Represents one node in the SelectionDAG.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
static cl::opt< AlignMode > Align(cl::desc("Load/store alignment support"), cl::Hidden, cl::init(NoStrictAlign), cl::values(clEnumValN(StrictAlign,"aarch64-strict-align","Disallow all unaligned memory accesses"), clEnumValN(NoStrictAlign,"aarch64-no-strict-align","Allow unaligned memory accesses"), clEnumValEnd))
static SDNode * findUser(SDValue Value, unsigned Opcode)
Helper function for LowerBRCOND.
static mvt_range integer_valuetypes()
Target - Wrapper for Target specific information.
Class for arbitrary precision integers.
Definition: APInt.h:73
bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:302
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:342
Interface for the AMDGPU Implementation of the Intrinsic Info class.
op_iterator op_begin() const
static use_iterator use_end()
bool isAmdHsaOS() const
AddrMode
ARM Addressing Modes.
Definition: ARMBaseInfo.h:235
bool isMemLoc() const
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
static EVT getEquivalentLoadRegType(LLVMContext &Context, EVT VT)
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:238
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) const
Return the preferred vector type legalization action.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, bool *IsFast) const override
Determine if the target supports unaligned memory accesses.
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:498
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
Definition: MachineInstr.h:51
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:321
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned ABIArgOffset
Start of implicit kernel args.
void legalizeOperands(MachineInstr *MI) const
Legalize all operands in this instruction.
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:401
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, SDLoc DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array...
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:518
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:233
void ReplaceAllUsesWith(SDValue From, SDValue Op)
Modify anything using 'From' to use 'To' instead.
unsigned getOrigArgIndex() const
COPY_TO_REGCLASS - This instruction is a placeholder for a plain register-to-register copy into a spe...
Definition: TargetOpcodes.h:66
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
FunctionType * getFunctionType() const
Definition: Function.cpp:227
op_iterator op_end() const
MachineSDNode * getMachineNode(unsigned Opcode, SDLoc dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
EVT getValueType() const
Return the ValueType of the referenced return value.
static bool isFrameIndexOp(SDValue Op)
SDValue getConstant(uint64_t Val, SDLoc DL, EVT VT, bool isTarget=false, bool isOpaque=false)
void getOriginalFunctionArgs(SelectionDAG &DAG, const Function *F, const SmallVectorImpl< ISD::InputArg > &Ins, SmallVectorImpl< ISD::InputArg > &OrigIns) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types...
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:279
unsigned getReg() const
getReg - Returns the register number.
SDValue getSelect(SDLoc DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
Definition: SelectionDAG.h:739
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
Definition: ValueTypes.h:105
bool isSimple() const
isSimple - Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:94
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
SDValue getTargetExtractSubreg(int SRIdx, SDLoc DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM Value Representation.
Definition: Value.h:69
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:240
SDValue getRegister(unsigned Reg, EVT VT)
SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL, SDValue V) const
SDValue getValueType(EVT)
bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:298
bool isInfinity() const
IEEE-754R isInfinite(): Returns true if and only if the float is infinity.
Definition: APFloat.h:421
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:287
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.Val alone...
static EVT getEquivalentMemType(LLVMContext &Context, EVT VT)
Primary interface to the complete machine description for the target machine.
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
void setRegClass(unsigned Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:40
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations...
Definition: ISDOpcodes.h:244
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:365
unsigned getImplicitParameterOffset(const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
SDValue getConstantFP(double Val, SDLoc DL, EVT VT, bool isTarget=false)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml","ocaml 3.10-compatible collector")
SDValue getSetCC(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
Definition: SelectionDAG.h:726
unsigned getLocMemOffset() const
MVT getVectorElementType() const
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:338
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:666
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, FLOG, FLOG2, FLOG10, FEXP, FEXP2, FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary floating point operations.
Definition: ISDOpcodes.h:506
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
getIntegerVT - Returns the EVT that represents an integer with the given number of bits...
Definition: ValueTypes.h:61
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:662
EVT changeVectorElementTypeToInteger() const
changeVectorElementTypeToInteger - Return a vector with the same number of elements as this vector...
Definition: ValueTypes.h:80
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself...
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:203
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const
SDValue getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, bool Vol=false, bool ReadMem=true, bool WriteMem=true, unsigned Size=0)
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const AMDGPURegisterInfo * getRegisterInfo() const override
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:110
uint64_t getZExtValue() const
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
unsigned getVectorNumElements() const
getVectorNumElements - Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:225
This class is used to represent ISD::LOAD nodes.