LLVM  3.7.0
AMDGPUISelLowering.cpp
Go to the documentation of this file.
1 //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief This is the parent TargetLowering class for hardware code gen
12 /// targets.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPUISelLowering.h"
17 #include "AMDGPU.h"
18 #include "AMDGPUFrameLowering.h"
19 #include "AMDGPUIntrinsicInfo.h"
20 #include "AMDGPURegisterInfo.h"
21 #include "AMDGPUSubtarget.h"
23 #include "SIMachineFunctionInfo.h"
29 #include "llvm/IR/DataLayout.h"
30 #include "llvm/IR/DiagnosticInfo.h"
32 
33 using namespace llvm;
34 
35 namespace {
36 
37 /// Diagnostic information for unimplemented or unsupported feature reporting.
38 class DiagnosticInfoUnsupported : public DiagnosticInfo {
39 private:
40  const Twine &Description;
41  const Function &Fn;
42 
43  static int KindID;
44 
45  static int getKindID() {
46  if (KindID == 0)
48  return KindID;
49  }
50 
51 public:
52  DiagnosticInfoUnsupported(const Function &Fn, const Twine &Desc,
53  DiagnosticSeverity Severity = DS_Error)
54  : DiagnosticInfo(getKindID(), Severity),
55  Description(Desc),
56  Fn(Fn) { }
57 
58  const Function &getFunction() const { return Fn; }
59  const Twine &getDescription() const { return Description; }
60 
61  void print(DiagnosticPrinter &DP) const override {
62  DP << "unsupported " << getDescription() << " in " << Fn.getName();
63  }
64 
65  static bool classof(const DiagnosticInfo *DI) {
66  return DI->getKind() == getKindID();
67  }
68 };
69 
70 int DiagnosticInfoUnsupported::KindID = 0;
71 }
72 
73 
74 static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
75  CCValAssign::LocInfo LocInfo,
76  ISD::ArgFlagsTy ArgFlags, CCState &State) {
77  unsigned Offset = State.AllocateStack(ValVT.getStoreSize(),
78  ArgFlags.getOrigAlign());
79  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
80 
81  return true;
82 }
83 
84 #include "AMDGPUGenCallingConv.inc"
85 
86 // Find a larger type to do a load / store of a vector with.
88  unsigned StoreSize = VT.getStoreSizeInBits();
89  if (StoreSize <= 32)
90  return EVT::getIntegerVT(Ctx, StoreSize);
91 
92  assert(StoreSize % 32 == 0 && "Store size not a multiple of 32");
93  return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32);
94 }
95 
96 // Type for a vector that will be loaded to.
98  unsigned StoreSize = VT.getStoreSizeInBits();
99  if (StoreSize <= 32)
100  return EVT::getIntegerVT(Ctx, 32);
101 
102  return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32);
103 }
104 
106  const AMDGPUSubtarget &STI)
107  : TargetLowering(TM), Subtarget(&STI) {
112 
115 
116  // We need to custom lower some of the intrinsics
118 
119  // Library functions. These default to Expand, but we have instructions
120  // for them.
131 
134 
137 
138  // v_mad_f32 does not support denormals according to some sources.
139  if (!Subtarget->hasFP32Denormals())
141 
142  // Expand to fneg + fadd.
144 
145  // Lower floating point store/load to integer store/load to reduce the number
146  // of patterns in tablegen.
149 
152 
155 
158 
161 
164 
167 
168  // Custom lowering of vector stores is required for local address space
169  // stores.
171 
175 
176  // XXX: This can be change to Custom, once ExpandVectorStores can
177  // handle 64-bit stores.
179 
185 
186 
189 
192 
195 
198 
201 
204 
207 
218 
219  // There are no 64-bit extloads. These should be done as a 32-bit extload and
220  // an extension to 64-bit.
221  for (MVT VT : MVT::integer_valuetypes()) {
225  }
226 
227  for (MVT VT : MVT::integer_vector_valuetypes()) {
240  }
241 
243 
249  }
250 
251  if (!Subtarget->hasBFI()) {
252  // fcopysign can be done in a single instruction with BFI.
255  }
256 
258 
263 
268 
273 
276 
277  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
278  for (MVT VT : ScalarIntVTs) {
281 
282  // GPU does not have divrem function for signed or unsigned.
285 
286  // GPU does not have [S|U]MUL_LOHI functions as a single instruction.
289 
293  }
294 
295  if (!Subtarget->hasBCNT(32))
297 
298  if (!Subtarget->hasBCNT(64))
300 
301  // The hardware supports 32-bit ROTR, but not ROTL.
305 
316 
321 
322  if (!Subtarget->hasFFBH())
324 
325  if (!Subtarget->hasFFBL())
327 
328  static const MVT::SimpleValueType VectorIntTypes[] = {
330  };
331 
332  for (MVT VT : VectorIntTypes) {
333  // Expand the following operations for the current type by default.
371  }
372 
373  static const MVT::SimpleValueType FloatVectorTypes[] = {
375  };
376 
377  for (MVT VT : FloatVectorTypes) {
404  }
405 
408 
414 
417 
420 
422  setJumpIsExpensive(true);
423 
424  // SI at least has hardware support for floating point exceptions, but no way
425  // of using or handling them is implemented. They are also optional in OpenCL
426  // (Section 7.3)
428 
429  setSelectIsExpensive(false);
431 
432  // There are no integer divide instructions, and these expand to a pretty
433  // large sequence of instructions.
434  setIntDivIsCheap(false);
435  setPow2SDivIsCheap(false);
436  setFsqrtIsCheap(true);
437 
438  // FIXME: Need to really handle these.
439  MaxStoresPerMemcpy = 4096;
440  MaxStoresPerMemmove = 4096;
441  MaxStoresPerMemset = 4096;
442 }
443 
444 //===----------------------------------------------------------------------===//
445 // Target Information
446 //===----------------------------------------------------------------------===//
447 
449  return MVT::i32;
450 }
451 
453  return true;
454 }
455 
456 // The backend supports 32 and 64 bit floating point immediates.
457 // FIXME: Why are we reporting vectors of FP immediates as legal?
458 bool AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
459  EVT ScalarVT = VT.getScalarType();
460  return (ScalarVT == MVT::f32 || ScalarVT == MVT::f64);
461 }
462 
463 // We don't want to shrink f64 / f32 constants.
465  EVT ScalarVT = VT.getScalarType();
466  return (ScalarVT != MVT::f32 && ScalarVT != MVT::f64);
467 }
468 
471  EVT NewVT) const {
472 
473  unsigned NewSize = NewVT.getStoreSizeInBits();
474 
475  // If we are reducing to a 32-bit load, this is always better.
476  if (NewSize == 32)
477  return true;
478 
479  EVT OldVT = N->getValueType(0);
480  unsigned OldSize = OldVT.getStoreSizeInBits();
481 
482  // Don't produce extloads from sub 32-bit types. SI doesn't have scalar
483  // extloads, so doing one requires using a buffer_load. In cases where we
484  // still couldn't use a scalar load, using the wider load shouldn't really
485  // hurt anything.
486 
487  // If the old size already had to be an extload, there's no harm in continuing
488  // to reduce the width.
489  return (OldSize < 32);
490 }
491 
493  EVT CastTy) const {
494  if (LoadTy.getSizeInBits() != CastTy.getSizeInBits())
495  return true;
496 
497  unsigned LScalarSize = LoadTy.getScalarType().getSizeInBits();
498  unsigned CastScalarSize = CastTy.getScalarType().getSizeInBits();
499 
500  return ((LScalarSize <= CastScalarSize) ||
501  (CastScalarSize >= 32) ||
502  (LScalarSize < 32));
503 }
504 
505 // SI+ has instructions for cttz / ctlz for 32-bit values. This is probably also
506 // profitable with the expansion for 64-bit since it's generally good to
507 // speculate things.
508 // FIXME: These should really have the size as a parameter.
510  return true;
511 }
512 
514  return true;
515 }
516 
517 //===---------------------------------------------------------------------===//
518 // Target Properties
519 //===---------------------------------------------------------------------===//
520 
522  assert(VT.isFloatingPoint());
523  return VT == MVT::f32 || VT == MVT::f64;
524 }
525 
527  assert(VT.isFloatingPoint());
528  return VT == MVT::f32 || VT == MVT::f64;
529 }
530 
532  unsigned NumElem,
533  unsigned AS) const {
534  return true;
535 }
536 
538  // Truncate is just accessing a subregister.
539  return Dest.bitsLT(Source) && (Dest.getSizeInBits() % 32 == 0);
540 }
541 
543  // Truncate is just accessing a subregister.
544  return Dest->getPrimitiveSizeInBits() < Source->getPrimitiveSizeInBits() &&
545  (Dest->getPrimitiveSizeInBits() % 32 == 0);
546 }
547 
548 bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const {
549  unsigned SrcSize = Src->getScalarSizeInBits();
550  unsigned DestSize = Dest->getScalarSizeInBits();
551 
552  return SrcSize == 32 && DestSize == 64;
553 }
554 
555 bool AMDGPUTargetLowering::isZExtFree(EVT Src, EVT Dest) const {
556  // Any register load of a 64-bit value really requires 2 32-bit moves. For all
557  // practical purposes, the extra mov 0 to load a 64-bit is free. As used,
558  // this will enable reducing 64-bit operations the 32-bit, which is always
559  // good.
560  return Src == MVT::i32 && Dest == MVT::i64;
561 }
562 
564  return isZExtFree(Val.getValueType(), VT2);
565 }
566 
568  // There aren't really 64-bit registers, but pairs of 32-bit ones and only a
569  // limited number of native 64-bit operations. Shrinking an operation to fit
570  // in a single 32-bit register should always be helpful. As currently used,
571  // this is much less general than the name suggests, and is only used in
572  // places trying to reduce the sizes of loads. Shrinking loads to < 32-bits is
573  // not profitable, and may actually be harmful.
574  return SrcVT.getSizeInBits() > 32 && DestVT.getSizeInBits() == 32;
575 }
576 
577 //===---------------------------------------------------------------------===//
578 // TargetLowering Callbacks
579 //===---------------------------------------------------------------------===//
580 
582  const SmallVectorImpl<ISD::InputArg> &Ins) const {
583 
584  State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
585 }
586 
588  SDValue Chain,
589  CallingConv::ID CallConv,
590  bool isVarArg,
592  const SmallVectorImpl<SDValue> &OutVals,
593  SDLoc DL, SelectionDAG &DAG) const {
594  return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
595 }
596 
597 //===---------------------------------------------------------------------===//
598 // Target specific lowering
599 //===---------------------------------------------------------------------===//
600 
602  SmallVectorImpl<SDValue> &InVals) const {
603  SDValue Callee = CLI.Callee;
604  SelectionDAG &DAG = CLI.DAG;
605 
606  const Function &Fn = *DAG.getMachineFunction().getFunction();
607 
608  StringRef FuncName("<unknown>");
609 
610  if (const ExternalSymbolSDNode *G = dyn_cast<ExternalSymbolSDNode>(Callee))
611  FuncName = G->getSymbol();
612  else if (const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
613  FuncName = G->getGlobal()->getName();
614 
615  DiagnosticInfoUnsupported NoCalls(Fn, "call to function " + FuncName);
616  DAG.getContext()->diagnose(NoCalls);
617  return SDValue();
618 }
619 
621  SelectionDAG &DAG) const {
622  switch (Op.getOpcode()) {
623  default:
624  Op.getNode()->dump();
625  llvm_unreachable("Custom lowering code for this"
626  "instruction is not implemented yet!");
627  break;
628  case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
629  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
630  case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
631  case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
632  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
633  case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
634  case ISD::SDIVREM: return LowerSDIVREM(Op, DAG);
635  case ISD::FREM: return LowerFREM(Op, DAG);
636  case ISD::FCEIL: return LowerFCEIL(Op, DAG);
637  case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
638  case ISD::FRINT: return LowerFRINT(Op, DAG);
639  case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG);
640  case ISD::FROUND: return LowerFROUND(Op, DAG);
641  case ISD::FFLOOR: return LowerFFLOOR(Op, DAG);
642  case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
643  case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
644  case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
645  case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
646  }
647  return Op;
648 }
649 
651  SmallVectorImpl<SDValue> &Results,
652  SelectionDAG &DAG) const {
653  switch (N->getOpcode()) {
655  // Different parts of legalization seem to interpret which type of
656  // sign_extend_inreg is the one to check for custom lowering. The extended
657  // from type is what really matters, but some places check for custom
658  // lowering of the result type. This results in trying to use
659  // ReplaceNodeResults to sext_in_reg to an illegal type, so we'll just do
660  // nothing here and let the illegal result integer be handled normally.
661  return;
662  case ISD::LOAD: {
663  SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
664  if (!Node)
665  return;
666 
667  Results.push_back(SDValue(Node, 0));
668  Results.push_back(SDValue(Node, 1));
669  // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
670  // function
671  DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
672  return;
673  }
674  case ISD::STORE: {
675  SDValue Lowered = LowerSTORE(SDValue(N, 0), DAG);
676  if (Lowered.getNode())
677  Results.push_back(Lowered);
678  return;
679  }
680  default:
681  return;
682  }
683 }
684 
685 // FIXME: This implements accesses to initialized globals in the constant
686 // address space by copying them to private and accessing that. It does not
687 // properly handle illegal types or vectors. The private vector loads are not
688 // scalarized, and the illegal scalars hit an assertion. This technique will not
689 // work well with large initializers, and this should eventually be
690 // removed. Initialized globals should be placed into a data section that the
691 // runtime will load into a buffer before the kernel is executed. Uses of the
692 // global need to be replaced with a pointer loaded from an implicit kernel
693 // argument into this buffer holding the copy of the data, which will remove the
694 // need for any of this.
695 SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
696  const GlobalValue *GV,
697  const SDValue &InitPtr,
698  SDValue Chain,
699  SelectionDAG &DAG) const {
700  const DataLayout &TD = DAG.getDataLayout();
701  SDLoc DL(InitPtr);
702  Type *InitTy = Init->getType();
703 
704  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Init)) {
705  EVT VT = EVT::getEVT(InitTy);
707  return DAG.getStore(Chain, DL, DAG.getConstant(*CI, DL, VT), InitPtr,
708  MachinePointerInfo(UndefValue::get(PtrTy)), false,
709  false, TD.getPrefTypeAlignment(InitTy));
710  }
711 
712  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Init)) {
713  EVT VT = EVT::getEVT(CFP->getType());
714  PointerType *PtrTy = PointerType::get(CFP->getType(), 0);
715  return DAG.getStore(Chain, DL, DAG.getConstantFP(*CFP, DL, VT), InitPtr,
716  MachinePointerInfo(UndefValue::get(PtrTy)), false,
717  false, TD.getPrefTypeAlignment(CFP->getType()));
718  }
719 
720  if (StructType *ST = dyn_cast<StructType>(InitTy)) {
721  const StructLayout *SL = TD.getStructLayout(ST);
722 
723  EVT PtrVT = InitPtr.getValueType();
725 
726  for (unsigned I = 0, N = ST->getNumElements(); I != N; ++I) {
727  SDValue Offset = DAG.getConstant(SL->getElementOffset(I), DL, PtrVT);
728  SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, InitPtr, Offset);
729 
730  Constant *Elt = Init->getAggregateElement(I);
731  Chains.push_back(LowerConstantInitializer(Elt, GV, Ptr, Chain, DAG));
732  }
733 
734  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
735  }
736 
737  if (SequentialType *SeqTy = dyn_cast<SequentialType>(InitTy)) {
738  EVT PtrVT = InitPtr.getValueType();
739 
740  unsigned NumElements;
741  if (ArrayType *AT = dyn_cast<ArrayType>(SeqTy))
742  NumElements = AT->getNumElements();
743  else if (VectorType *VT = dyn_cast<VectorType>(SeqTy))
744  NumElements = VT->getNumElements();
745  else
746  llvm_unreachable("Unexpected type");
747 
748  unsigned EltSize = TD.getTypeAllocSize(SeqTy->getElementType());
750  for (unsigned i = 0; i < NumElements; ++i) {
751  SDValue Offset = DAG.getConstant(i * EltSize, DL, PtrVT);
752  SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, InitPtr, Offset);
753 
754  Constant *Elt = Init->getAggregateElement(i);
755  Chains.push_back(LowerConstantInitializer(Elt, GV, Ptr, Chain, DAG));
756  }
757 
758  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
759  }
760 
761  if (isa<UndefValue>(Init)) {
762  EVT VT = EVT::getEVT(InitTy);
764  return DAG.getStore(Chain, DL, DAG.getUNDEF(VT), InitPtr,
765  MachinePointerInfo(UndefValue::get(PtrTy)), false,
766  false, TD.getPrefTypeAlignment(InitTy));
767  }
768 
769  Init->dump();
770  llvm_unreachable("Unhandled constant initializer");
771 }
772 
773 static bool hasDefinedInitializer(const GlobalValue *GV) {
774  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
775  if (!GVar || !GVar->hasInitializer())
776  return false;
777 
778  if (isa<UndefValue>(GVar->getInitializer()))
779  return false;
780 
781  return true;
782 }
783 
785  SDValue Op,
786  SelectionDAG &DAG) const {
787 
788  const DataLayout &DL = DAG.getDataLayout();
789  GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
790  const GlobalValue *GV = G->getGlobal();
791 
792  switch (G->getAddressSpace()) {
794  // XXX: What does the value of G->getOffset() mean?
795  assert(G->getOffset() == 0 &&
796  "Do not know what to do with an non-zero offset");
797 
798  // TODO: We could emit code to handle the initialization somewhere.
799  if (hasDefinedInitializer(GV))
800  break;
801 
802  unsigned Offset;
803  if (MFI->LocalMemoryObjects.count(GV) == 0) {
804  uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType());
805  Offset = MFI->LDSSize;
806  MFI->LocalMemoryObjects[GV] = Offset;
807  // XXX: Account for alignment?
808  MFI->LDSSize += Size;
809  } else {
810  Offset = MFI->LocalMemoryObjects[GV];
811  }
812 
813  return DAG.getConstant(Offset, SDLoc(Op),
815  }
817  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
818  Type *EltType = GV->getType()->getElementType();
819  unsigned Size = DL.getTypeAllocSize(EltType);
820  unsigned Alignment = DL.getPrefTypeAlignment(EltType);
821 
822  MVT PrivPtrVT = getPointerTy(DL, AMDGPUAS::PRIVATE_ADDRESS);
823  MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
824 
825  int FI = FrameInfo->CreateStackObject(Size, Alignment, false);
826  SDValue InitPtr = DAG.getFrameIndex(FI, PrivPtrVT);
827 
828  const GlobalVariable *Var = cast<GlobalVariable>(GV);
829  if (!Var->hasInitializer()) {
830  // This has no use, but bugpoint will hit it.
831  return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op), ConstPtrVT);
832  }
833 
834  const Constant *Init = Var->getInitializer();
835  SmallVector<SDNode*, 8> WorkList;
836 
838  E = DAG.getEntryNode()->use_end(); I != E; ++I) {
839  if (I->getOpcode() != AMDGPUISD::REGISTER_LOAD && I->getOpcode() != ISD::LOAD)
840  continue;
841  WorkList.push_back(*I);
842  }
843  SDValue Chain = LowerConstantInitializer(Init, GV, InitPtr, DAG.getEntryNode(), DAG);
844  for (SmallVector<SDNode*, 8>::iterator I = WorkList.begin(),
845  E = WorkList.end(); I != E; ++I) {
847  Ops.push_back(Chain);
848  for (unsigned i = 1; i < (*I)->getNumOperands(); ++i) {
849  Ops.push_back((*I)->getOperand(i));
850  }
851  DAG.UpdateNodeOperands(*I, Ops);
852  }
853  return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op), ConstPtrVT);
854  }
855  }
856 
857  const Function &Fn = *DAG.getMachineFunction().getFunction();
858  DiagnosticInfoUnsupported BadInit(Fn,
859  "initializer for address space");
860  DAG.getContext()->diagnose(BadInit);
861  return SDValue();
862 }
863 
864 SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
865  SelectionDAG &DAG) const {
867 
868  for (const SDUse &U : Op->ops())
869  DAG.ExtractVectorElements(U.get(), Args);
870 
871  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), Args);
872 }
873 
874 SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
875  SelectionDAG &DAG) const {
876 
878  unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
879  EVT VT = Op.getValueType();
880  DAG.ExtractVectorElements(Op.getOperand(0), Args, Start,
881  VT.getVectorNumElements());
882 
883  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), Args);
884 }
885 
886 SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op,
887  SelectionDAG &DAG) const {
888 
891 
892  FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
893 
894  unsigned FrameIndex = FIN->getIndex();
895  unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
896  return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
897  Op.getValueType());
898 }
899 
900 SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
901  SelectionDAG &DAG) const {
902  unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
903  SDLoc DL(Op);
904  EVT VT = Op.getValueType();
905 
906  switch (IntrinsicID) {
907  default: return Op;
908  case AMDGPUIntrinsic::AMDGPU_abs:
909  case AMDGPUIntrinsic::AMDIL_abs: // Legacy name.
910  return LowerIntrinsicIABS(Op, DAG);
911  case AMDGPUIntrinsic::AMDGPU_lrp:
912  return LowerIntrinsicLRP(Op, DAG);
913 
914  case AMDGPUIntrinsic::AMDGPU_clamp:
915  case AMDGPUIntrinsic::AMDIL_clamp: // Legacy name.
916  return DAG.getNode(AMDGPUISD::CLAMP, DL, VT,
917  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
918 
919  case Intrinsic::AMDGPU_div_scale: {
920  // 3rd parameter required to be a constant.
922  if (!Param)
923  return DAG.getUNDEF(VT);
924 
925  // Translate to the operands expected by the machine instruction. The
926  // first parameter must be the same as the first instruction.
927  SDValue Numerator = Op.getOperand(1);
928  SDValue Denominator = Op.getOperand(2);
929 
930  // Note this order is opposite of the machine instruction's operations,
931  // which is s0.f = Quotient, s1.f = Denominator, s2.f = Numerator. The
932  // intrinsic has the numerator as the first operand to match a normal
933  // division operation.
934 
935  SDValue Src0 = Param->isAllOnesValue() ? Numerator : Denominator;
936 
937  return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, Op->getVTList(), Src0,
938  Denominator, Numerator);
939  }
940 
941  case Intrinsic::AMDGPU_div_fmas:
942  return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT,
943  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
944  Op.getOperand(4));
945 
946  case Intrinsic::AMDGPU_div_fixup:
947  return DAG.getNode(AMDGPUISD::DIV_FIXUP, DL, VT,
948  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
949 
950  case Intrinsic::AMDGPU_trig_preop:
951  return DAG.getNode(AMDGPUISD::TRIG_PREOP, DL, VT,
952  Op.getOperand(1), Op.getOperand(2));
953 
954  case Intrinsic::AMDGPU_rcp:
955  return DAG.getNode(AMDGPUISD::RCP, DL, VT, Op.getOperand(1));
956 
957  case Intrinsic::AMDGPU_rsq:
958  return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
959 
960  case AMDGPUIntrinsic::AMDGPU_legacy_rsq:
961  return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
962 
963  case Intrinsic::AMDGPU_rsq_clamped:
965  Type *Type = VT.getTypeForEVT(*DAG.getContext());
967  APFloat Min = APFloat::getLargest(Type->getFltSemantics(), true);
968 
969  SDValue Rsq = DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
970  SDValue Tmp = DAG.getNode(ISD::FMINNUM, DL, VT, Rsq,
971  DAG.getConstantFP(Max, DL, VT));
972  return DAG.getNode(ISD::FMAXNUM, DL, VT, Tmp,
973  DAG.getConstantFP(Min, DL, VT));
974  } else {
975  return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1));
976  }
977 
978  case Intrinsic::AMDGPU_ldexp:
979  return DAG.getNode(AMDGPUISD::LDEXP, DL, VT, Op.getOperand(1),
980  Op.getOperand(2));
981 
982  case AMDGPUIntrinsic::AMDGPU_imax:
983  return DAG.getNode(ISD::SMAX, DL, VT, Op.getOperand(1),
984  Op.getOperand(2));
985  case AMDGPUIntrinsic::AMDGPU_umax:
986  return DAG.getNode(ISD::UMAX, DL, VT, Op.getOperand(1),
987  Op.getOperand(2));
988  case AMDGPUIntrinsic::AMDGPU_imin:
989  return DAG.getNode(ISD::SMIN, DL, VT, Op.getOperand(1),
990  Op.getOperand(2));
991  case AMDGPUIntrinsic::AMDGPU_umin:
992  return DAG.getNode(ISD::UMIN, DL, VT, Op.getOperand(1),
993  Op.getOperand(2));
994 
995  case AMDGPUIntrinsic::AMDGPU_umul24:
996  return DAG.getNode(AMDGPUISD::MUL_U24, DL, VT,
997  Op.getOperand(1), Op.getOperand(2));
998 
999  case AMDGPUIntrinsic::AMDGPU_imul24:
1000  return DAG.getNode(AMDGPUISD::MUL_I24, DL, VT,
1001  Op.getOperand(1), Op.getOperand(2));
1002 
1003  case AMDGPUIntrinsic::AMDGPU_umad24:
1004  return DAG.getNode(AMDGPUISD::MAD_U24, DL, VT,
1005  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
1006 
1007  case AMDGPUIntrinsic::AMDGPU_imad24:
1008  return DAG.getNode(AMDGPUISD::MAD_I24, DL, VT,
1009  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
1010 
1011  case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte0:
1012  return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0, DL, VT, Op.getOperand(1));
1013 
1014  case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte1:
1015  return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE1, DL, VT, Op.getOperand(1));
1016 
1017  case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte2:
1018  return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE2, DL, VT, Op.getOperand(1));
1019 
1020  case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte3:
1021  return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE3, DL, VT, Op.getOperand(1));
1022 
1023  case AMDGPUIntrinsic::AMDGPU_bfe_i32:
1024  return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT,
1025  Op.getOperand(1),
1026  Op.getOperand(2),
1027  Op.getOperand(3));
1028 
1029  case AMDGPUIntrinsic::AMDGPU_bfe_u32:
1030  return DAG.getNode(AMDGPUISD::BFE_U32, DL, VT,
1031  Op.getOperand(1),
1032  Op.getOperand(2),
1033  Op.getOperand(3));
1034 
1035  case AMDGPUIntrinsic::AMDGPU_bfi:
1036  return DAG.getNode(AMDGPUISD::BFI, DL, VT,
1037  Op.getOperand(1),
1038  Op.getOperand(2),
1039  Op.getOperand(3));
1040 
1041  case AMDGPUIntrinsic::AMDGPU_bfm:
1042  return DAG.getNode(AMDGPUISD::BFM, DL, VT,
1043  Op.getOperand(1),
1044  Op.getOperand(2));
1045 
1046  case AMDGPUIntrinsic::AMDGPU_brev:
1047  return DAG.getNode(AMDGPUISD::BREV, DL, VT, Op.getOperand(1));
1048 
1049  case Intrinsic::AMDGPU_class:
1050  return DAG.getNode(AMDGPUISD::FP_CLASS, DL, VT,
1051  Op.getOperand(1), Op.getOperand(2));
1052 
1053  case AMDGPUIntrinsic::AMDIL_exp: // Legacy name.
1054  return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
1055 
1056  case AMDGPUIntrinsic::AMDIL_round_nearest: // Legacy name.
1057  return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
1058  case AMDGPUIntrinsic::AMDGPU_trunc: // Legacy name.
1059  return DAG.getNode(ISD::FTRUNC, DL, VT, Op.getOperand(1));
1060  }
1061 }
1062 
1063 ///IABS(a) = SMAX(sub(0, a), a)
1065  SelectionDAG &DAG) const {
1066  SDLoc DL(Op);
1067  EVT VT = Op.getValueType();
1068  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
1069  Op.getOperand(1));
1070 
1071  return DAG.getNode(ISD::SMAX, DL, VT, Neg, Op.getOperand(1));
1072 }
1073 
1074 /// Linear Interpolation
1075 /// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
1077  SelectionDAG &DAG) const {
1078  SDLoc DL(Op);
1079  EVT VT = Op.getValueType();
1080  SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
1081  DAG.getConstantFP(1.0f, DL, MVT::f32),
1082  Op.getOperand(1));
1083  SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
1084  Op.getOperand(3));
1085  return DAG.getNode(ISD::FADD, DL, VT,
1086  DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
1087  OneSubAC);
1088 }
1089 
1090 /// \brief Generate Min/Max node
1092  EVT VT,
1093  SDValue LHS,
1094  SDValue RHS,
1095  SDValue True,
1096  SDValue False,
1097  SDValue CC,
1098  DAGCombinerInfo &DCI) const {
1100  return SDValue();
1101 
1102  if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
1103  return SDValue();
1104 
1105  SelectionDAG &DAG = DCI.DAG;
1106  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1107  switch (CCOpcode) {
1108  case ISD::SETOEQ:
1109  case ISD::SETONE:
1110  case ISD::SETUNE:
1111  case ISD::SETNE:
1112  case ISD::SETUEQ:
1113  case ISD::SETEQ:
1114  case ISD::SETFALSE:
1115  case ISD::SETFALSE2:
1116  case ISD::SETTRUE:
1117  case ISD::SETTRUE2:
1118  case ISD::SETUO:
1119  case ISD::SETO:
1120  break;
1121  case ISD::SETULE:
1122  case ISD::SETULT: {
1123  if (LHS == True)
1124  return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
1125  return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
1126  }
1127  case ISD::SETOLE:
1128  case ISD::SETOLT:
1129  case ISD::SETLE:
1130  case ISD::SETLT: {
1131  // Ordered. Assume ordered for undefined.
1132 
1133  // Only do this after legalization to avoid interfering with other combines
1134  // which might occur.
1135  if (DCI.getDAGCombineLevel() < AfterLegalizeDAG &&
1136  !DCI.isCalledByLegalizer())
1137  return SDValue();
1138 
1139  // We need to permute the operands to get the correct NaN behavior. The
1140  // selected operand is the second one based on the failing compare with NaN,
1141  // so permute it based on the compare type the hardware uses.
1142  if (LHS == True)
1143  return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
1144  return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
1145  }
1146  case ISD::SETUGE:
1147  case ISD::SETUGT: {
1148  if (LHS == True)
1149  return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
1150  return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
1151  }
1152  case ISD::SETGT:
1153  case ISD::SETGE:
1154  case ISD::SETOGE:
1155  case ISD::SETOGT: {
1156  if (DCI.getDAGCombineLevel() < AfterLegalizeDAG &&
1157  !DCI.isCalledByLegalizer())
1158  return SDValue();
1159 
1160  if (LHS == True)
1161  return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
1162  return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
1163  }
1164  case ISD::SETCC_INVALID:
1165  llvm_unreachable("Invalid setcc condcode!");
1166  }
1167  return SDValue();
1168 }
1169 
1170 // FIXME: Remove this when combines added to DAGCombiner.
1172  EVT VT,
1173  SDValue LHS,
1174  SDValue RHS,
1175  SDValue True,
1176  SDValue False,
1177  SDValue CC,
1178  SelectionDAG &DAG) const {
1179  if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
1180  return SDValue();
1181 
1182  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1183  switch (CCOpcode) {
1184  case ISD::SETULE:
1185  case ISD::SETULT: {
1186  unsigned Opc = (LHS == True) ? ISD::UMIN : ISD::UMAX;
1187  return DAG.getNode(Opc, DL, VT, LHS, RHS);
1188  }
1189  case ISD::SETLE:
1190  case ISD::SETLT: {
1191  unsigned Opc = (LHS == True) ? ISD::SMIN : ISD::SMAX;
1192  return DAG.getNode(Opc, DL, VT, LHS, RHS);
1193  }
1194  case ISD::SETGT:
1195  case ISD::SETGE: {
1196  unsigned Opc = (LHS == True) ? ISD::SMAX : ISD::SMIN;
1197  return DAG.getNode(Opc, DL, VT, LHS, RHS);
1198  }
1199  case ISD::SETUGE:
1200  case ISD::SETUGT: {
1201  unsigned Opc = (LHS == True) ? ISD::UMAX : ISD::UMIN;
1202  return DAG.getNode(Opc, DL, VT, LHS, RHS);
1203  }
1204  default:
1205  return SDValue();
1206  }
1207 }
1208 
1210  SelectionDAG &DAG) const {
1211  LoadSDNode *Load = cast<LoadSDNode>(Op);
1212  EVT MemVT = Load->getMemoryVT();
1213  EVT MemEltVT = MemVT.getVectorElementType();
1214 
1215  EVT LoadVT = Op.getValueType();
1216  EVT EltVT = LoadVT.getVectorElementType();
1217  EVT PtrVT = Load->getBasePtr().getValueType();
1218 
1219  unsigned NumElts = Load->getMemoryVT().getVectorNumElements();
1221  SmallVector<SDValue, 8> Chains;
1222 
1223  SDLoc SL(Op);
1224  unsigned MemEltSize = MemEltVT.getStoreSize();
1225  MachinePointerInfo SrcValue(Load->getMemOperand()->getValue());
1226 
1227  for (unsigned i = 0; i < NumElts; ++i) {
1228  SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(),
1229  DAG.getConstant(i * MemEltSize, SL, PtrVT));
1230 
1231  SDValue NewLoad
1232  = DAG.getExtLoad(Load->getExtensionType(), SL, EltVT,
1233  Load->getChain(), Ptr,
1234  SrcValue.getWithOffset(i * MemEltSize),
1235  MemEltVT, Load->isVolatile(), Load->isNonTemporal(),
1236  Load->isInvariant(), Load->getAlignment());
1237  Loads.push_back(NewLoad.getValue(0));
1238  Chains.push_back(NewLoad.getValue(1));
1239  }
1240 
1241  SDValue Ops[] = {
1242  DAG.getNode(ISD::BUILD_VECTOR, SL, LoadVT, Loads),
1243  DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Chains)
1244  };
1245 
1246  return DAG.getMergeValues(Ops, SL);
1247 }
1248 
1250  SelectionDAG &DAG) const {
1251  EVT VT = Op.getValueType();
1252 
1253  // If this is a 2 element vector, we really want to scalarize and not create
1254  // weird 1 element vectors.
1255  if (VT.getVectorNumElements() == 2)
1256  return ScalarizeVectorLoad(Op, DAG);
1257 
1258  LoadSDNode *Load = cast<LoadSDNode>(Op);
1259  SDValue BasePtr = Load->getBasePtr();
1260  EVT PtrVT = BasePtr.getValueType();
1261  EVT MemVT = Load->getMemoryVT();
1262  SDLoc SL(Op);
1263  MachinePointerInfo SrcValue(Load->getMemOperand()->getValue());
1264 
1265  EVT LoVT, HiVT;
1266  EVT LoMemVT, HiMemVT;
1267  SDValue Lo, Hi;
1268 
1269  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
1270  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT);
1271  std::tie(Lo, Hi) = DAG.SplitVector(Op, SL, LoVT, HiVT);
1272  SDValue LoLoad
1273  = DAG.getExtLoad(Load->getExtensionType(), SL, LoVT,
1274  Load->getChain(), BasePtr,
1275  SrcValue,
1276  LoMemVT, Load->isVolatile(), Load->isNonTemporal(),
1277  Load->isInvariant(), Load->getAlignment());
1278 
1279  SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
1280  DAG.getConstant(LoMemVT.getStoreSize(), SL,
1281  PtrVT));
1282 
1283  SDValue HiLoad
1284  = DAG.getExtLoad(Load->getExtensionType(), SL, HiVT,
1285  Load->getChain(), HiPtr,
1286  SrcValue.getWithOffset(LoMemVT.getStoreSize()),
1287  HiMemVT, Load->isVolatile(), Load->isNonTemporal(),
1288  Load->isInvariant(), Load->getAlignment());
1289 
1290  SDValue Ops[] = {
1291  DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, LoLoad, HiLoad),
1293  LoLoad.getValue(1), HiLoad.getValue(1))
1294  };
1295 
1296  return DAG.getMergeValues(Ops, SL);
1297 }
1298 
1299 SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
1300  SelectionDAG &DAG) const {
1301  StoreSDNode *Store = cast<StoreSDNode>(Op);
1302  EVT MemVT = Store->getMemoryVT();
1303  unsigned MemBits = MemVT.getSizeInBits();
1304 
1305  // Byte stores are really expensive, so if possible, try to pack 32-bit vector
1306  // truncating store into an i32 store.
1307  // XXX: We could also handle optimize other vector bitwidths.
1308  if (!MemVT.isVector() || MemBits > 32) {
1309  return SDValue();
1310  }
1311 
1312  SDLoc DL(Op);
1313  SDValue Value = Store->getValue();
1314  EVT VT = Value.getValueType();
1315  EVT ElemVT = VT.getVectorElementType();
1316  SDValue Ptr = Store->getBasePtr();
1317  EVT MemEltVT = MemVT.getVectorElementType();
1318  unsigned MemEltBits = MemEltVT.getSizeInBits();
1319  unsigned MemNumElements = MemVT.getVectorNumElements();
1320  unsigned PackedSize = MemVT.getStoreSizeInBits();
1321  SDValue Mask = DAG.getConstant((1 << MemEltBits) - 1, DL, MVT::i32);
1322 
1323  assert(Value.getValueType().getScalarSizeInBits() >= 32);
1324 
1325  SDValue PackedValue;
1326  for (unsigned i = 0; i < MemNumElements; ++i) {
1327  SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value,
1328  DAG.getConstant(i, DL, MVT::i32));
1329  Elt = DAG.getZExtOrTrunc(Elt, DL, MVT::i32);
1330  Elt = DAG.getNode(ISD::AND, DL, MVT::i32, Elt, Mask); // getZeroExtendInReg
1331 
1332  SDValue Shift = DAG.getConstant(MemEltBits * i, DL, MVT::i32);
1333  Elt = DAG.getNode(ISD::SHL, DL, MVT::i32, Elt, Shift);
1334 
1335  if (i == 0) {
1336  PackedValue = Elt;
1337  } else {
1338  PackedValue = DAG.getNode(ISD::OR, DL, MVT::i32, PackedValue, Elt);
1339  }
1340  }
1341 
1342  if (PackedSize < 32) {
1343  EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), PackedSize);
1344  return DAG.getTruncStore(Store->getChain(), DL, PackedValue, Ptr,
1345  Store->getMemOperand()->getPointerInfo(),
1346  PackedVT,
1347  Store->isNonTemporal(), Store->isVolatile(),
1348  Store->getAlignment());
1349  }
1350 
1351  return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr,
1352  Store->getMemOperand()->getPointerInfo(),
1353  Store->isVolatile(), Store->isNonTemporal(),
1354  Store->getAlignment());
1355 }
1356 
1358  SelectionDAG &DAG) const {
1359  StoreSDNode *Store = cast<StoreSDNode>(Op);
1360  EVT MemEltVT = Store->getMemoryVT().getVectorElementType();
1361  EVT EltVT = Store->getValue().getValueType().getVectorElementType();
1362  EVT PtrVT = Store->getBasePtr().getValueType();
1363  unsigned NumElts = Store->getMemoryVT().getVectorNumElements();
1364  SDLoc SL(Op);
1365 
1366  SmallVector<SDValue, 8> Chains;
1367 
1368  unsigned EltSize = MemEltVT.getStoreSize();
1369  MachinePointerInfo SrcValue(Store->getMemOperand()->getValue());
1370 
1371  for (unsigned i = 0, e = NumElts; i != e; ++i) {
1372  SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
1373  Store->getValue(),
1374  DAG.getConstant(i, SL, MVT::i32));
1375 
1376  SDValue Offset = DAG.getConstant(i * MemEltVT.getStoreSize(), SL, PtrVT);
1377  SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Store->getBasePtr(), Offset);
1378  SDValue NewStore =
1379  DAG.getTruncStore(Store->getChain(), SL, Val, Ptr,
1380  SrcValue.getWithOffset(i * EltSize),
1381  MemEltVT, Store->isNonTemporal(), Store->isVolatile(),
1382  Store->getAlignment());
1383  Chains.push_back(NewStore);
1384  }
1385 
1386  return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Chains);
1387 }
1388 
1390  SelectionDAG &DAG) const {
1391  StoreSDNode *Store = cast<StoreSDNode>(Op);
1392  SDValue Val = Store->getValue();
1393  EVT VT = Val.getValueType();
1394 
1395  // If this is a 2 element vector, we really want to scalarize and not create
1396  // weird 1 element vectors.
1397  if (VT.getVectorNumElements() == 2)
1398  return ScalarizeVectorStore(Op, DAG);
1399 
1400  EVT MemVT = Store->getMemoryVT();
1401  SDValue Chain = Store->getChain();
1402  SDValue BasePtr = Store->getBasePtr();
1403  SDLoc SL(Op);
1404 
1405  EVT LoVT, HiVT;
1406  EVT LoMemVT, HiMemVT;
1407  SDValue Lo, Hi;
1408 
1409  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
1410  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT);
1411  std::tie(Lo, Hi) = DAG.SplitVector(Val, SL, LoVT, HiVT);
1412 
1413  EVT PtrVT = BasePtr.getValueType();
1414  SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
1415  DAG.getConstant(LoMemVT.getStoreSize(), SL,
1416  PtrVT));
1417 
1418  MachinePointerInfo SrcValue(Store->getMemOperand()->getValue());
1419  SDValue LoStore
1420  = DAG.getTruncStore(Chain, SL, Lo,
1421  BasePtr,
1422  SrcValue,
1423  LoMemVT,
1424  Store->isNonTemporal(),
1425  Store->isVolatile(),
1426  Store->getAlignment());
1427  SDValue HiStore
1428  = DAG.getTruncStore(Chain, SL, Hi,
1429  HiPtr,
1430  SrcValue.getWithOffset(LoMemVT.getStoreSize()),
1431  HiMemVT,
1432  Store->isNonTemporal(),
1433  Store->isVolatile(),
1434  Store->getAlignment());
1435 
1436  return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoStore, HiStore);
1437 }
1438 
1439 
1441  SDLoc DL(Op);
1442  LoadSDNode *Load = cast<LoadSDNode>(Op);
1443  ISD::LoadExtType ExtType = Load->getExtensionType();
1444  EVT VT = Op.getValueType();
1445  EVT MemVT = Load->getMemoryVT();
1446 
1447  if (ExtType == ISD::NON_EXTLOAD && VT.getSizeInBits() < 32) {
1448  assert(VT == MVT::i1 && "Only i1 non-extloads expected");
1449  // FIXME: Copied from PPC
1450  // First, load into 32 bits, then truncate to 1 bit.
1451 
1452  SDValue Chain = Load->getChain();
1453  SDValue BasePtr = Load->getBasePtr();
1454  MachineMemOperand *MMO = Load->getMemOperand();
1455 
1456  SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain,
1457  BasePtr, MVT::i8, MMO);
1458 
1459  SDValue Ops[] = {
1460  DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD),
1461  NewLD.getValue(1)
1462  };
1463 
1464  return DAG.getMergeValues(Ops, DL);
1465  }
1466 
1469  ExtType == ISD::NON_EXTLOAD || Load->getMemoryVT().bitsGE(MVT::i32))
1470  return SDValue();
1471 
1472  // <SI && AS=PRIVATE && EXTLOAD && size < 32bit,
1473  // register (2-)byte extract.
1474 
1475  // Get Register holding the target.
1476  SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
1477  DAG.getConstant(2, DL, MVT::i32));
1478  // Load the Register.
1480  Load->getChain(), Ptr,
1481  DAG.getTargetConstant(0, DL, MVT::i32),
1482  Op.getOperand(2));
1483 
1484  // Get offset within the register.
1485  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1486  Load->getBasePtr(),
1487  DAG.getConstant(0x3, DL, MVT::i32));
1488 
1489  // Bit offset of target byte (byteIdx * 8).
1490  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1491  DAG.getConstant(3, DL, MVT::i32));
1492 
1493  // Shift to the right.
1494  Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt);
1495 
1496  // Eliminate the upper bits by setting them to ...
1497  EVT MemEltVT = MemVT.getScalarType();
1498 
1499  // ... ones.
1500  if (ExtType == ISD::SEXTLOAD) {
1501  SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1502 
1503  SDValue Ops[] = {
1504  DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode),
1505  Load->getChain()
1506  };
1507 
1508  return DAG.getMergeValues(Ops, DL);
1509  }
1510 
1511  // ... or zeros.
1512  SDValue Ops[] = {
1513  DAG.getZeroExtendInReg(Ret, DL, MemEltVT),
1514  Load->getChain()
1515  };
1516 
1517  return DAG.getMergeValues(Ops, DL);
1518 }
1519 
1521  SDLoc DL(Op);
1522  SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG);
1523  if (Result.getNode()) {
1524  return Result;
1525  }
1526 
1527  StoreSDNode *Store = cast<StoreSDNode>(Op);
1528  SDValue Chain = Store->getChain();
1529  if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
1531  Store->getValue().getValueType().isVector()) {
1532  return ScalarizeVectorStore(Op, DAG);
1533  }
1534 
1535  EVT MemVT = Store->getMemoryVT();
1536  if (Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS &&
1537  MemVT.bitsLT(MVT::i32)) {
1538  unsigned Mask = 0;
1539  if (Store->getMemoryVT() == MVT::i8) {
1540  Mask = 0xff;
1541  } else if (Store->getMemoryVT() == MVT::i16) {
1542  Mask = 0xffff;
1543  }
1544  SDValue BasePtr = Store->getBasePtr();
1545  SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, BasePtr,
1546  DAG.getConstant(2, DL, MVT::i32));
1548  Chain, Ptr,
1549  DAG.getTargetConstant(0, DL, MVT::i32));
1550 
1551  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, BasePtr,
1552  DAG.getConstant(0x3, DL, MVT::i32));
1553 
1554  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1555  DAG.getConstant(3, DL, MVT::i32));
1556 
1557  SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1558  Store->getValue());
1559 
1560  SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1561 
1562  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1563  MaskedValue, ShiftAmt);
1564 
1565  SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32,
1566  DAG.getConstant(Mask, DL, MVT::i32),
1567  ShiftAmt);
1568  DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask,
1569  DAG.getConstant(0xffffffff, DL, MVT::i32));
1570  Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1571 
1572  SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1574  Chain, Value, Ptr,
1575  DAG.getTargetConstant(0, DL, MVT::i32));
1576  }
1577  return SDValue();
1578 }
1579 
1580 // This is a shortcut for integer division because we have fast i32<->f32
1581 // conversions, and fast f32 reciprocal instructions. The fractional part of a
1582 // float is enough to accurately represent up to a 24-bit integer.
1584  SDLoc DL(Op);
1585  EVT VT = Op.getValueType();
1586  SDValue LHS = Op.getOperand(0);
1587  SDValue RHS = Op.getOperand(1);
1588  MVT IntVT = MVT::i32;
1589  MVT FltVT = MVT::f32;
1590 
1593 
1594  if (VT.isVector()) {
1595  unsigned NElts = VT.getVectorNumElements();
1596  IntVT = MVT::getVectorVT(MVT::i32, NElts);
1597  FltVT = MVT::getVectorVT(MVT::f32, NElts);
1598  }
1599 
1600  unsigned BitSize = VT.getScalarType().getSizeInBits();
1601 
1602  SDValue jq = DAG.getConstant(1, DL, IntVT);
1603 
1604  if (sign) {
1605  // char|short jq = ia ^ ib;
1606  jq = DAG.getNode(ISD::XOR, DL, VT, LHS, RHS);
1607 
1608  // jq = jq >> (bitsize - 2)
1609  jq = DAG.getNode(ISD::SRA, DL, VT, jq,
1610  DAG.getConstant(BitSize - 2, DL, VT));
1611 
1612  // jq = jq | 0x1
1613  jq = DAG.getNode(ISD::OR, DL, VT, jq, DAG.getConstant(1, DL, VT));
1614 
1615  // jq = (int)jq
1616  jq = DAG.getSExtOrTrunc(jq, DL, IntVT);
1617  }
1618 
1619  // int ia = (int)LHS;
1620  SDValue ia = sign ?
1621  DAG.getSExtOrTrunc(LHS, DL, IntVT) : DAG.getZExtOrTrunc(LHS, DL, IntVT);
1622 
1623  // int ib, (int)RHS;
1624  SDValue ib = sign ?
1625  DAG.getSExtOrTrunc(RHS, DL, IntVT) : DAG.getZExtOrTrunc(RHS, DL, IntVT);
1626 
1627  // float fa = (float)ia;
1628  SDValue fa = DAG.getNode(ToFp, DL, FltVT, ia);
1629 
1630  // float fb = (float)ib;
1631  SDValue fb = DAG.getNode(ToFp, DL, FltVT, ib);
1632 
1633  // float fq = native_divide(fa, fb);
1634  SDValue fq = DAG.getNode(ISD::FMUL, DL, FltVT,
1635  fa, DAG.getNode(AMDGPUISD::RCP, DL, FltVT, fb));
1636 
1637  // fq = trunc(fq);
1638  fq = DAG.getNode(ISD::FTRUNC, DL, FltVT, fq);
1639 
1640  // float fqneg = -fq;
1641  SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FltVT, fq);
1642 
1643  // float fr = mad(fqneg, fb, fa);
1644  SDValue fr = DAG.getNode(ISD::FADD, DL, FltVT,
1645  DAG.getNode(ISD::FMUL, DL, FltVT, fqneg, fb), fa);
1646 
1647  // int iq = (int)fq;
1648  SDValue iq = DAG.getNode(ToInt, DL, IntVT, fq);
1649 
1650  // fr = fabs(fr);
1651  fr = DAG.getNode(ISD::FABS, DL, FltVT, fr);
1652 
1653  // fb = fabs(fb);
1654  fb = DAG.getNode(ISD::FABS, DL, FltVT, fb);
1655 
1656  EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
1657 
1658  // int cv = fr >= fb;
1659  SDValue cv = DAG.getSetCC(DL, SetCCVT, fr, fb, ISD::SETOGE);
1660 
1661  // jq = (cv ? jq : 0);
1662  jq = DAG.getNode(ISD::SELECT, DL, VT, cv, jq, DAG.getConstant(0, DL, VT));
1663 
1664  // dst = trunc/extend to legal type
1665  iq = sign ? DAG.getSExtOrTrunc(iq, DL, VT) : DAG.getZExtOrTrunc(iq, DL, VT);
1666 
1667  // dst = iq + jq;
1668  SDValue Div = DAG.getNode(ISD::ADD, DL, VT, iq, jq);
1669 
1670  // Rem needs compensation, it's easier to recompute it
1671  SDValue Rem = DAG.getNode(ISD::MUL, DL, VT, Div, RHS);
1672  Rem = DAG.getNode(ISD::SUB, DL, VT, LHS, Rem);
1673 
1674  SDValue Res[2] = {
1675  Div,
1676  Rem
1677  };
1678  return DAG.getMergeValues(Res, DL);
1679 }
1680 
1682  SelectionDAG &DAG,
1683  SmallVectorImpl<SDValue> &Results) const {
1684  assert(Op.getValueType() == MVT::i64);
1685 
1686  SDLoc DL(Op);
1687  EVT VT = Op.getValueType();
1688  EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
1689 
1690  SDValue one = DAG.getConstant(1, DL, HalfVT);
1691  SDValue zero = DAG.getConstant(0, DL, HalfVT);
1692 
1693  //HiLo split
1694  SDValue LHS = Op.getOperand(0);
1695  SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero);
1696  SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one);
1697 
1698  SDValue RHS = Op.getOperand(1);
1699  SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
1700  SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
1701 
1702  if (VT == MVT::i64 &&
1703  DAG.MaskedValueIsZero(RHS, APInt::getHighBitsSet(64, 32)) &&
1704  DAG.MaskedValueIsZero(LHS, APInt::getHighBitsSet(64, 32))) {
1705 
1706  SDValue Res = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(HalfVT, HalfVT),
1707  LHS_Lo, RHS_Lo);
1708 
1709  SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, Res.getValue(0), zero);
1710  SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, Res.getValue(1), zero);
1711  Results.push_back(DIV);
1712  Results.push_back(REM);
1713  return;
1714  }
1715 
1716  // Get Speculative values
1717  SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
1718  SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
1719 
1720  SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ);
1721  SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, zero);
1722 
1723  SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ);
1724  SDValue DIV_Lo = zero;
1725 
1726  const unsigned halfBitWidth = HalfVT.getSizeInBits();
1727 
1728  for (unsigned i = 0; i < halfBitWidth; ++i) {
1729  const unsigned bitPos = halfBitWidth - i - 1;
1730  SDValue POS = DAG.getConstant(bitPos, DL, HalfVT);
1731  // Get value of high bit
1732  SDValue HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
1733  HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
1734  HBit = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, HBit);
1735 
1736  // Shift
1737  REM = DAG.getNode(ISD::SHL, DL, VT, REM, DAG.getConstant(1, DL, VT));
1738  // Add LHS high bit
1739  REM = DAG.getNode(ISD::OR, DL, VT, REM, HBit);
1740 
1741  SDValue BIT = DAG.getConstant(1 << bitPos, DL, HalfVT);
1742  SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETUGE);
1743 
1744  DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
1745 
1746  // Update REM
1747  SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
1748  REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETUGE);
1749  }
1750 
1751  SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi);
1752  Results.push_back(DIV);
1753  Results.push_back(REM);
1754 }
1755 
1757  SelectionDAG &DAG) const {
1758  SDLoc DL(Op);
1759  EVT VT = Op.getValueType();
1760 
1761  if (VT == MVT::i64) {
1762  SmallVector<SDValue, 2> Results;
1763  LowerUDIVREM64(Op, DAG, Results);
1764  return DAG.getMergeValues(Results, DL);
1765  }
1766 
1767  SDValue Num = Op.getOperand(0);
1768  SDValue Den = Op.getOperand(1);
1769 
1770  if (VT == MVT::i32) {
1771  if (DAG.MaskedValueIsZero(Num, APInt::getHighBitsSet(32, 8)) &&
1772  DAG.MaskedValueIsZero(Den, APInt::getHighBitsSet(32, 8))) {
1773  // TODO: We technically could do this for i64, but shouldn't that just be
1774  // handled by something generally reducing 64-bit division on 32-bit
1775  // values to 32-bit?
1776  return LowerDIVREM24(Op, DAG, false);
1777  }
1778  }
1779 
1780  // RCP = URECIP(Den) = 2^32 / Den + e
1781  // e is rounding error.
1782  SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
1783 
1784  // RCP_LO = mul(RCP, Den) */
1785  SDValue RCP_LO = DAG.getNode(ISD::MUL, DL, VT, RCP, Den);
1786 
1787  // RCP_HI = mulhu (RCP, Den) */
1788  SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
1789 
1790  // NEG_RCP_LO = -RCP_LO
1791  SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
1792  RCP_LO);
1793 
1794  // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
1795  SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, DL, VT),
1796  NEG_RCP_LO, RCP_LO,
1797  ISD::SETEQ);
1798  // Calculate the rounding error from the URECIP instruction
1799  // E = mulhu(ABS_RCP_LO, RCP)
1800  SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
1801 
1802  // RCP_A_E = RCP + E
1803  SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
1804 
1805  // RCP_S_E = RCP - E
1806  SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
1807 
1808  // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
1809  SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, DL, VT),
1810  RCP_A_E, RCP_S_E,
1811  ISD::SETEQ);
1812  // Quotient = mulhu(Tmp0, Num)
1813  SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
1814 
1815  // Num_S_Remainder = Quotient * Den
1816  SDValue Num_S_Remainder = DAG.getNode(ISD::MUL, DL, VT, Quotient, Den);
1817 
1818  // Remainder = Num - Num_S_Remainder
1819  SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
1820 
1821  // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
1822  SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
1823  DAG.getConstant(-1, DL, VT),
1824  DAG.getConstant(0, DL, VT),
1825  ISD::SETUGE);
1826  // Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0)
1827  SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Num,
1828  Num_S_Remainder,
1829  DAG.getConstant(-1, DL, VT),
1830  DAG.getConstant(0, DL, VT),
1831  ISD::SETUGE);
1832  // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
1833  SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
1834  Remainder_GE_Zero);
1835 
1836  // Calculate Division result:
1837 
1838  // Quotient_A_One = Quotient + 1
1839  SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
1840  DAG.getConstant(1, DL, VT));
1841 
1842  // Quotient_S_One = Quotient - 1
1843  SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
1844  DAG.getConstant(1, DL, VT));
1845 
1846  // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
1847  SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, DL, VT),
1848  Quotient, Quotient_A_One, ISD::SETEQ);
1849 
1850  // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
1851  Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, DL, VT),
1852  Quotient_S_One, Div, ISD::SETEQ);
1853 
1854  // Calculate Rem result:
1855 
1856  // Remainder_S_Den = Remainder - Den
1857  SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
1858 
1859  // Remainder_A_Den = Remainder + Den
1860  SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
1861 
1862  // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
1863  SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, DL, VT),
1864  Remainder, Remainder_S_Den, ISD::SETEQ);
1865 
1866  // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
1867  Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, DL, VT),
1868  Remainder_A_Den, Rem, ISD::SETEQ);
1869  SDValue Ops[2] = {
1870  Div,
1871  Rem
1872  };
1873  return DAG.getMergeValues(Ops, DL);
1874 }
1875 
1877  SelectionDAG &DAG) const {
1878  SDLoc DL(Op);
1879  EVT VT = Op.getValueType();
1880 
1881  SDValue LHS = Op.getOperand(0);
1882  SDValue RHS = Op.getOperand(1);
1883 
1884  SDValue Zero = DAG.getConstant(0, DL, VT);
1885  SDValue NegOne = DAG.getConstant(-1, DL, VT);
1886 
1887  if (VT == MVT::i32 &&
1888  DAG.ComputeNumSignBits(LHS) > 8 &&
1889  DAG.ComputeNumSignBits(RHS) > 8) {
1890  return LowerDIVREM24(Op, DAG, true);
1891  }
1892  if (VT == MVT::i64 &&
1893  DAG.ComputeNumSignBits(LHS) > 32 &&
1894  DAG.ComputeNumSignBits(RHS) > 32) {
1895  EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
1896 
1897  //HiLo split
1898  SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, Zero);
1899  SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, Zero);
1900  SDValue DIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(HalfVT, HalfVT),
1901  LHS_Lo, RHS_Lo);
1902  SDValue Res[2] = {
1903  DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(0)),
1904  DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(1))
1905  };
1906  return DAG.getMergeValues(Res, DL);
1907  }
1908 
1909  SDValue LHSign = DAG.getSelectCC(DL, LHS, Zero, NegOne, Zero, ISD::SETLT);
1910  SDValue RHSign = DAG.getSelectCC(DL, RHS, Zero, NegOne, Zero, ISD::SETLT);
1911  SDValue DSign = DAG.getNode(ISD::XOR, DL, VT, LHSign, RHSign);
1912  SDValue RSign = LHSign; // Remainder sign is the same as LHS
1913 
1914  LHS = DAG.getNode(ISD::ADD, DL, VT, LHS, LHSign);
1915  RHS = DAG.getNode(ISD::ADD, DL, VT, RHS, RHSign);
1916 
1917  LHS = DAG.getNode(ISD::XOR, DL, VT, LHS, LHSign);
1918  RHS = DAG.getNode(ISD::XOR, DL, VT, RHS, RHSign);
1919 
1920  SDValue Div = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), LHS, RHS);
1921  SDValue Rem = Div.getValue(1);
1922 
1923  Div = DAG.getNode(ISD::XOR, DL, VT, Div, DSign);
1924  Rem = DAG.getNode(ISD::XOR, DL, VT, Rem, RSign);
1925 
1926  Div = DAG.getNode(ISD::SUB, DL, VT, Div, DSign);
1927  Rem = DAG.getNode(ISD::SUB, DL, VT, Rem, RSign);
1928 
1929  SDValue Res[2] = {
1930  Div,
1931  Rem
1932  };
1933  return DAG.getMergeValues(Res, DL);
1934 }
1935 
1936 // (frem x, y) -> (fsub x, (fmul (ftrunc (fdiv x, y)), y))
1937 SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const {
1938  SDLoc SL(Op);
1939  EVT VT = Op.getValueType();
1940  SDValue X = Op.getOperand(0);
1941  SDValue Y = Op.getOperand(1);
1942 
1943  SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y);
1944  SDValue Floor = DAG.getNode(ISD::FTRUNC, SL, VT, Div);
1945  SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Floor, Y);
1946 
1947  return DAG.getNode(ISD::FSUB, SL, VT, X, Mul);
1948 }
1949 
1950 SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
1951  SDLoc SL(Op);
1952  SDValue Src = Op.getOperand(0);
1953 
1954  // result = trunc(src)
1955  // if (src > 0.0 && src != result)
1956  // result += 1.0
1957 
1958  SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
1959 
1960  const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64);
1961  const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f64);
1962 
1963  EVT SetCCVT =
1965 
1966  SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOGT);
1967  SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
1968  SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
1969 
1970  SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero);
1971  return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
1972 }
1973 
1975  const unsigned FractBits = 52;
1976  const unsigned ExpBits = 11;
1977 
1978  SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32,
1979  Hi,
1980  DAG.getConstant(FractBits - 32, SL, MVT::i32),
1981  DAG.getConstant(ExpBits, SL, MVT::i32));
1982  SDValue Exp = DAG.getNode(ISD::SUB, SL, MVT::i32, ExpPart,
1983  DAG.getConstant(1023, SL, MVT::i32));
1984 
1985  return Exp;
1986 }
1987 
1988 SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
1989  SDLoc SL(Op);
1990  SDValue Src = Op.getOperand(0);
1991 
1992  assert(Op.getValueType() == MVT::f64);
1993 
1994  const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
1995  const SDValue One = DAG.getConstant(1, SL, MVT::i32);
1996 
1997  SDValue VecSrc = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
1998 
1999  // Extract the upper half, since this is where we will find the sign and
2000  // exponent.
2001  SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, VecSrc, One);
2002 
2003  SDValue Exp = extractF64Exponent(Hi, SL, DAG);
2004 
2005  const unsigned FractBits = 52;
2006 
2007  // Extract the sign bit.
2008  const SDValue SignBitMask = DAG.getConstant(UINT32_C(1) << 31, SL, MVT::i32);
2009  SDValue SignBit = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, SignBitMask);
2010 
2011  // Extend back to to 64-bits.
2012  SDValue SignBit64 = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
2013  Zero, SignBit);
2014  SignBit64 = DAG.getNode(ISD::BITCAST, SL, MVT::i64, SignBit64);
2015 
2016  SDValue BcInt = DAG.getNode(ISD::BITCAST, SL, MVT::i64, Src);
2017  const SDValue FractMask
2018  = DAG.getConstant((UINT64_C(1) << FractBits) - 1, SL, MVT::i64);
2019 
2020  SDValue Shr = DAG.getNode(ISD::SRA, SL, MVT::i64, FractMask, Exp);
2021  SDValue Not = DAG.getNOT(SL, Shr, MVT::i64);
2022  SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, BcInt, Not);
2023 
2024  EVT SetCCVT =
2026 
2027  const SDValue FiftyOne = DAG.getConstant(FractBits - 1, SL, MVT::i32);
2028 
2029  SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT);
2030  SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT);
2031 
2032  SDValue Tmp1 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpLt0, SignBit64, Tmp0);
2033  SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpGt51, BcInt, Tmp1);
2034 
2035  return DAG.getNode(ISD::BITCAST, SL, MVT::f64, Tmp2);
2036 }
2037 
2038 SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
2039  SDLoc SL(Op);
2040  SDValue Src = Op.getOperand(0);
2041 
2042  assert(Op.getValueType() == MVT::f64);
2043 
2044  APFloat C1Val(APFloat::IEEEdouble, "0x1.0p+52");
2045  SDValue C1 = DAG.getConstantFP(C1Val, SL, MVT::f64);
2046  SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src);
2047 
2048  SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign);
2049  SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign);
2050 
2051  SDValue Fabs = DAG.getNode(ISD::FABS, SL, MVT::f64, Src);
2052 
2053  APFloat C2Val(APFloat::IEEEdouble, "0x1.fffffffffffffp+51");
2054  SDValue C2 = DAG.getConstantFP(C2Val, SL, MVT::f64);
2055 
2056  EVT SetCCVT =
2058  SDValue Cond = DAG.getSetCC(SL, SetCCVT, Fabs, C2, ISD::SETOGT);
2059 
2060  return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2);
2061 }
2062 
2063 SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const {
2064  // FNEARBYINT and FRINT are the same, except in their handling of FP
2065  // exceptions. Those aren't really meaningful for us, and OpenCL only has
2066  // rint, so just treat them as equivalent.
2067  return DAG.getNode(ISD::FRINT, SDLoc(Op), Op.getValueType(), Op.getOperand(0));
2068 }
2069 
2070 // XXX - May require not supporting f32 denormals?
2071 SDValue AMDGPUTargetLowering::LowerFROUND32(SDValue Op, SelectionDAG &DAG) const {
2072  SDLoc SL(Op);
2073  SDValue X = Op.getOperand(0);
2074 
2075  SDValue T = DAG.getNode(ISD::FTRUNC, SL, MVT::f32, X);
2076 
2077  SDValue Diff = DAG.getNode(ISD::FSUB, SL, MVT::f32, X, T);
2078 
2079  SDValue AbsDiff = DAG.getNode(ISD::FABS, SL, MVT::f32, Diff);
2080 
2081  const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f32);
2082  const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
2083  const SDValue Half = DAG.getConstantFP(0.5, SL, MVT::f32);
2084 
2085  SDValue SignOne = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f32, One, X);
2086 
2087  EVT SetCCVT =
2089 
2090  SDValue Cmp = DAG.getSetCC(SL, SetCCVT, AbsDiff, Half, ISD::SETOGE);
2091 
2092  SDValue Sel = DAG.getNode(ISD::SELECT, SL, MVT::f32, Cmp, SignOne, Zero);
2093 
2094  return DAG.getNode(ISD::FADD, SL, MVT::f32, T, Sel);
2095 }
2096 
2097 SDValue AMDGPUTargetLowering::LowerFROUND64(SDValue Op, SelectionDAG &DAG) const {
2098  SDLoc SL(Op);
2099  SDValue X = Op.getOperand(0);
2100 
2101  SDValue L = DAG.getNode(ISD::BITCAST, SL, MVT::i64, X);
2102 
2103  const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
2104  const SDValue One = DAG.getConstant(1, SL, MVT::i32);
2105  const SDValue NegOne = DAG.getConstant(-1, SL, MVT::i32);
2106  const SDValue FiftyOne = DAG.getConstant(51, SL, MVT::i32);
2107  EVT SetCCVT =
2109 
2110  SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, X);
2111 
2112  SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC, One);
2113 
2114  SDValue Exp = extractF64Exponent(Hi, SL, DAG);
2115 
2116  const SDValue Mask = DAG.getConstant(INT64_C(0x000fffffffffffff), SL,
2117  MVT::i64);
2118 
2119  SDValue M = DAG.getNode(ISD::SRA, SL, MVT::i64, Mask, Exp);
2120  SDValue D = DAG.getNode(ISD::SRA, SL, MVT::i64,
2121  DAG.getConstant(INT64_C(0x0008000000000000), SL,
2122  MVT::i64),
2123  Exp);
2124 
2125  SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, L, M);
2126  SDValue Tmp1 = DAG.getSetCC(SL, SetCCVT,
2127  DAG.getConstant(0, SL, MVT::i64), Tmp0,
2128  ISD::SETNE);
2129 
2130  SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, Tmp1,
2131  D, DAG.getConstant(0, SL, MVT::i64));
2132  SDValue K = DAG.getNode(ISD::ADD, SL, MVT::i64, L, Tmp2);
2133 
2134  K = DAG.getNode(ISD::AND, SL, MVT::i64, K, DAG.getNOT(SL, M, MVT::i64));
2135  K = DAG.getNode(ISD::BITCAST, SL, MVT::f64, K);
2136 
2137  SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT);
2138  SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT);
2139  SDValue ExpEqNegOne = DAG.getSetCC(SL, SetCCVT, NegOne, Exp, ISD::SETEQ);
2140 
2141  SDValue Mag = DAG.getNode(ISD::SELECT, SL, MVT::f64,
2142  ExpEqNegOne,
2143  DAG.getConstantFP(1.0, SL, MVT::f64),
2144  DAG.getConstantFP(0.0, SL, MVT::f64));
2145 
2146  SDValue S = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, Mag, X);
2147 
2148  K = DAG.getNode(ISD::SELECT, SL, MVT::f64, ExpLt0, S, K);
2149  K = DAG.getNode(ISD::SELECT, SL, MVT::f64, ExpGt51, X, K);
2150 
2151  return K;
2152 }
2153 
2154 SDValue AMDGPUTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const {
2155  EVT VT = Op.getValueType();
2156 
2157  if (VT == MVT::f32)
2158  return LowerFROUND32(Op, DAG);
2159 
2160  if (VT == MVT::f64)
2161  return LowerFROUND64(Op, DAG);
2162 
2163  llvm_unreachable("unhandled type");
2164 }
2165 
2166 SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
2167  SDLoc SL(Op);
2168  SDValue Src = Op.getOperand(0);
2169 
2170  // result = trunc(src);
2171  // if (src < 0.0 && src != result)
2172  // result += -1.0.
2173 
2174  SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
2175 
2176  const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64);
2177  const SDValue NegOne = DAG.getConstantFP(-1.0, SL, MVT::f64);
2178 
2179  EVT SetCCVT =
2181 
2182  SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOLT);
2183  SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
2184  SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
2185 
2186  SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero);
2187  return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
2188 }
2189 
2190 SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
2191  bool Signed) const {
2192  SDLoc SL(Op);
2193  SDValue Src = Op.getOperand(0);
2194 
2195  SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
2196 
2197  SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC,
2198  DAG.getConstant(0, SL, MVT::i32));
2199  SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC,
2200  DAG.getConstant(1, SL, MVT::i32));
2201 
2202  SDValue CvtHi = DAG.getNode(Signed ? ISD::SINT_TO_FP : ISD::UINT_TO_FP,
2203  SL, MVT::f64, Hi);
2204 
2205  SDValue CvtLo = DAG.getNode(ISD::UINT_TO_FP, SL, MVT::f64, Lo);
2206 
2207  SDValue LdExp = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, CvtHi,
2208  DAG.getConstant(32, SL, MVT::i32));
2209 
2210  return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo);
2211 }
2212 
2213 SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
2214  SelectionDAG &DAG) const {
2215  SDValue S0 = Op.getOperand(0);
2216  if (S0.getValueType() != MVT::i64)
2217  return SDValue();
2218 
2219  EVT DestVT = Op.getValueType();
2220  if (DestVT == MVT::f64)
2221  return LowerINT_TO_FP64(Op, DAG, false);
2222 
2223  assert(DestVT == MVT::f32);
2224 
2225  SDLoc DL(Op);
2226 
2227  // f32 uint_to_fp i64
2228  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
2229  DAG.getConstant(0, DL, MVT::i32));
2230  SDValue FloatLo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Lo);
2231  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
2232  DAG.getConstant(1, DL, MVT::i32));
2233  SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi);
2234  FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi,
2235  DAG.getConstantFP(4294967296.0f, DL, MVT::f32)); // 2^32
2236  return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
2237 }
2238 
2239 SDValue AMDGPUTargetLowering::LowerSINT_TO_FP(SDValue Op,
2240  SelectionDAG &DAG) const {
2241  SDValue Src = Op.getOperand(0);
2242  if (Src.getValueType() == MVT::i64 && Op.getValueType() == MVT::f64)
2243  return LowerINT_TO_FP64(Op, DAG, true);
2244 
2245  return SDValue();
2246 }
2247 
2248 SDValue AMDGPUTargetLowering::LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG,
2249  bool Signed) const {
2250  SDLoc SL(Op);
2251 
2252  SDValue Src = Op.getOperand(0);
2253 
2254  SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
2255 
2256  SDValue K0 = DAG.getConstantFP(BitsToDouble(UINT64_C(0x3df0000000000000)), SL,
2257  MVT::f64);
2258  SDValue K1 = DAG.getConstantFP(BitsToDouble(UINT64_C(0xc1f0000000000000)), SL,
2259  MVT::f64);
2260 
2261  SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, Trunc, K0);
2262 
2263  SDValue FloorMul = DAG.getNode(ISD::FFLOOR, SL, MVT::f64, Mul);
2264 
2265 
2266  SDValue Fma = DAG.getNode(ISD::FMA, SL, MVT::f64, FloorMul, K1, Trunc);
2267 
2268  SDValue Hi = DAG.getNode(Signed ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, SL,
2269  MVT::i32, FloorMul);
2270  SDValue Lo = DAG.getNode(ISD::FP_TO_UINT, SL, MVT::i32, Fma);
2271 
2272  SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Lo, Hi);
2273 
2274  return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Result);
2275 }
2276 
2277 SDValue AMDGPUTargetLowering::LowerFP_TO_SINT(SDValue Op,
2278  SelectionDAG &DAG) const {
2279  SDValue Src = Op.getOperand(0);
2280 
2281  if (Op.getValueType() == MVT::i64 && Src.getValueType() == MVT::f64)
2282  return LowerFP64_TO_INT(Op, DAG, true);
2283 
2284  return SDValue();
2285 }
2286 
2287 SDValue AMDGPUTargetLowering::LowerFP_TO_UINT(SDValue Op,
2288  SelectionDAG &DAG) const {
2289  SDValue Src = Op.getOperand(0);
2290 
2291  if (Op.getValueType() == MVT::i64 && Src.getValueType() == MVT::f64)
2292  return LowerFP64_TO_INT(Op, DAG, false);
2293 
2294  return SDValue();
2295 }
2296 
2297 SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
2298  SelectionDAG &DAG) const {
2299  EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2300  MVT VT = Op.getSimpleValueType();
2301  MVT ScalarVT = VT.getScalarType();
2302 
2303  if (!VT.isVector())
2304  return SDValue();
2305 
2306  SDValue Src = Op.getOperand(0);
2307  SDLoc DL(Op);
2308 
2309  // TODO: Don't scalarize on Evergreen?
2310  unsigned NElts = VT.getVectorNumElements();
2312  DAG.ExtractVectorElements(Src, Args, 0, NElts);
2313 
2314  SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType());
2315  for (unsigned I = 0; I < NElts; ++I)
2316  Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp);
2317 
2318  return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Args);
2319 }
2320 
2321 //===----------------------------------------------------------------------===//
2322 // Custom DAG optimizations
2323 //===----------------------------------------------------------------------===//
2324 
2325 static bool isU24(SDValue Op, SelectionDAG &DAG) {
2326  APInt KnownZero, KnownOne;
2327  EVT VT = Op.getValueType();
2328  DAG.computeKnownBits(Op, KnownZero, KnownOne);
2329 
2330  return (VT.getSizeInBits() - KnownZero.countLeadingOnes()) <= 24;
2331 }
2332 
2333 static bool isI24(SDValue Op, SelectionDAG &DAG) {
2334  EVT VT = Op.getValueType();
2335 
2336  // In order for this to be a signed 24-bit value, bit 23, must
2337  // be a sign bit.
2338  return VT.getSizeInBits() >= 24 && // Types less than 24-bit should be treated
2339  // as unsigned 24-bit values.
2340  (VT.getSizeInBits() - DAG.ComputeNumSignBits(Op)) < 24;
2341 }
2342 
2344 
2345  SelectionDAG &DAG = DCI.DAG;
2346  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2347  EVT VT = Op.getValueType();
2348 
2349  APInt Demanded = APInt::getLowBitsSet(VT.getSizeInBits(), 24);
2350  APInt KnownZero, KnownOne;
2351  TargetLowering::TargetLoweringOpt TLO(DAG, true, true);
2352  if (TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
2353  DCI.CommitTargetLoweringOpt(TLO);
2354 }
2355 
2356 template <typename IntTy>
2357 static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0,
2358  uint32_t Offset, uint32_t Width, SDLoc DL) {
2359  if (Width + Offset < 32) {
2360  uint32_t Shl = static_cast<uint32_t>(Src0) << (32 - Offset - Width);
2361  IntTy Result = static_cast<IntTy>(Shl) >> (32 - Width);
2362  return DAG.getConstant(Result, DL, MVT::i32);
2363  }
2364 
2365  return DAG.getConstant(Src0 >> Offset, DL, MVT::i32);
2366 }
2367 
2368 static bool usesAllNormalStores(SDNode *LoadVal) {
2369  for (SDNode::use_iterator I = LoadVal->use_begin(); !I.atEnd(); ++I) {
2370  if (!ISD::isNormalStore(*I))
2371  return false;
2372  }
2373 
2374  return true;
2375 }
2376 
2377 // If we have a copy of an illegal type, replace it with a load / store of an
2378 // equivalently sized legal type. This avoids intermediate bit pack / unpack
2379 // instructions emitted when handling extloads and truncstores. Ideally we could
2380 // recognize the pack / unpack pattern to eliminate it.
2381 SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
2382  DAGCombinerInfo &DCI) const {
2383  if (!DCI.isBeforeLegalize())
2384  return SDValue();
2385 
2386  StoreSDNode *SN = cast<StoreSDNode>(N);
2387  SDValue Value = SN->getValue();
2388  EVT VT = Value.getValueType();
2389 
2390  if (isTypeLegal(VT) || SN->isVolatile() ||
2391  !ISD::isNormalLoad(Value.getNode()) || VT.getSizeInBits() < 8)
2392  return SDValue();
2393 
2394  LoadSDNode *LoadVal = cast<LoadSDNode>(Value);
2395  if (LoadVal->isVolatile() || !usesAllNormalStores(LoadVal))
2396  return SDValue();
2397 
2398  EVT MemVT = LoadVal->getMemoryVT();
2399 
2400  SDLoc SL(N);
2401  SelectionDAG &DAG = DCI.DAG;
2402  EVT LoadVT = getEquivalentMemType(*DAG.getContext(), MemVT);
2403 
2405  LoadVT, SL,
2406  LoadVal->getChain(),
2407  LoadVal->getBasePtr(),
2408  LoadVal->getOffset(),
2409  LoadVT,
2410  LoadVal->getMemOperand());
2411 
2412  SDValue CastLoad = DAG.getNode(ISD::BITCAST, SL, VT, NewLoad.getValue(0));
2413  DCI.CombineTo(LoadVal, CastLoad, NewLoad.getValue(1), false);
2414 
2415  return DAG.getStore(SN->getChain(), SL, NewLoad,
2416  SN->getBasePtr(), SN->getMemOperand());
2417 }
2418 
2419 SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
2420  DAGCombinerInfo &DCI) const {
2421  if (N->getValueType(0) != MVT::i64)
2422  return SDValue();
2423 
2424  // i64 (shl x, 32) -> (build_pair 0, x)
2425 
2426  // Doing this with moves theoretically helps MI optimizations that understand
2427  // copies. 2 v_mov_b32_e32 will have the same code size / cycle count as
2428  // v_lshl_b64. In the SALU case, I think this is slightly worse since it
2429  // doubles the code size and I'm unsure about cycle count.
2430  const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
2431  if (!RHS || RHS->getZExtValue() != 32)
2432  return SDValue();
2433 
2434  SDValue LHS = N->getOperand(0);
2435 
2436  SDLoc SL(N);
2437  SelectionDAG &DAG = DCI.DAG;
2438 
2439  // Extract low 32-bits.
2440  SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
2441 
2442  const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
2443  return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, Zero, Lo);
2444 }
2445 
2446 SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
2447  DAGCombinerInfo &DCI) const {
2448  EVT VT = N->getValueType(0);
2449 
2450  if (VT.isVector() || VT.getSizeInBits() > 32)
2451  return SDValue();
2452 
2453  SelectionDAG &DAG = DCI.DAG;
2454  SDLoc DL(N);
2455 
2456  SDValue N0 = N->getOperand(0);
2457  SDValue N1 = N->getOperand(1);
2458  SDValue Mul;
2459 
2460  if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
2461  N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
2462  N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
2463  Mul = DAG.getNode(AMDGPUISD::MUL_U24, DL, MVT::i32, N0, N1);
2464  } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
2465  N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
2466  N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
2467  Mul = DAG.getNode(AMDGPUISD::MUL_I24, DL, MVT::i32, N0, N1);
2468  } else {
2469  return SDValue();
2470  }
2471 
2472  // We need to use sext even for MUL_U24, because MUL_U24 is used
2473  // for signed multiply of 8 and 16-bit types.
2474  return DAG.getSExtOrTrunc(Mul, DL, VT);
2475 }
2476 
2478  DAGCombinerInfo &DCI) const {
2479  SelectionDAG &DAG = DCI.DAG;
2480  SDLoc DL(N);
2481 
2482  switch(N->getOpcode()) {
2483  default:
2484  break;
2485  case ISD::SHL: {
2487  break;
2488 
2489  return performShlCombine(N, DCI);
2490  }
2491  case ISD::MUL:
2492  return performMulCombine(N, DCI);
2493  case AMDGPUISD::MUL_I24:
2494  case AMDGPUISD::MUL_U24: {
2495  SDValue N0 = N->getOperand(0);
2496  SDValue N1 = N->getOperand(1);
2497  simplifyI24(N0, DCI);
2498  simplifyI24(N1, DCI);
2499  return SDValue();
2500  }
2501  case ISD::SELECT: {
2502  SDValue Cond = N->getOperand(0);
2503  if (Cond.getOpcode() == ISD::SETCC && Cond.hasOneUse()) {
2504  EVT VT = N->getValueType(0);
2505  SDValue LHS = Cond.getOperand(0);
2506  SDValue RHS = Cond.getOperand(1);
2507  SDValue CC = Cond.getOperand(2);
2508 
2509  SDValue True = N->getOperand(1);
2510  SDValue False = N->getOperand(2);
2511 
2512  if (VT == MVT::f32)
2513  return CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
2514 
2515  // TODO: Implement min / max Evergreen instructions.
2516  if (VT == MVT::i32 &&
2518  return CombineIMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
2519  }
2520  }
2521 
2522  break;
2523  }
2524  case AMDGPUISD::BFE_I32:
2525  case AMDGPUISD::BFE_U32: {
2526  assert(!N->getValueType(0).isVector() &&
2527  "Vector handling of BFE not implemented");
2529  if (!Width)
2530  break;
2531 
2532  uint32_t WidthVal = Width->getZExtValue() & 0x1f;
2533  if (WidthVal == 0)
2534  return DAG.getConstant(0, DL, MVT::i32);
2535 
2537  if (!Offset)
2538  break;
2539 
2540  SDValue BitsFrom = N->getOperand(0);
2541  uint32_t OffsetVal = Offset->getZExtValue() & 0x1f;
2542 
2543  bool Signed = N->getOpcode() == AMDGPUISD::BFE_I32;
2544 
2545  if (OffsetVal == 0) {
2546  // This is already sign / zero extended, so try to fold away extra BFEs.
2547  unsigned SignBits = Signed ? (32 - WidthVal + 1) : (32 - WidthVal);
2548 
2549  unsigned OpSignBits = DAG.ComputeNumSignBits(BitsFrom);
2550  if (OpSignBits >= SignBits)
2551  return BitsFrom;
2552 
2553  EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), WidthVal);
2554  if (Signed) {
2555  // This is a sign_extend_inreg. Replace it to take advantage of existing
2556  // DAG Combines. If not eliminated, we will match back to BFE during
2557  // selection.
2558 
2559  // TODO: The sext_inreg of extended types ends, although we can could
2560  // handle them in a single BFE.
2561  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, BitsFrom,
2562  DAG.getValueType(SmallVT));
2563  }
2564 
2565  return DAG.getZeroExtendInReg(BitsFrom, DL, SmallVT);
2566  }
2567 
2568  if (ConstantSDNode *CVal = dyn_cast<ConstantSDNode>(BitsFrom)) {
2569  if (Signed) {
2570  return constantFoldBFE<int32_t>(DAG,
2571  CVal->getSExtValue(),
2572  OffsetVal,
2573  WidthVal,
2574  DL);
2575  }
2576 
2577  return constantFoldBFE<uint32_t>(DAG,
2578  CVal->getZExtValue(),
2579  OffsetVal,
2580  WidthVal,
2581  DL);
2582  }
2583 
2584  if ((OffsetVal + WidthVal) >= 32) {
2585  SDValue ShiftVal = DAG.getConstant(OffsetVal, DL, MVT::i32);
2586  return DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, MVT::i32,
2587  BitsFrom, ShiftVal);
2588  }
2589 
2590  if (BitsFrom.hasOneUse()) {
2591  APInt Demanded = APInt::getBitsSet(32,
2592  OffsetVal,
2593  OffsetVal + WidthVal);
2594 
2595  APInt KnownZero, KnownOne;
2597  !DCI.isBeforeLegalizeOps());
2598  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2599  if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) ||
2600  TLI.SimplifyDemandedBits(BitsFrom, Demanded,
2601  KnownZero, KnownOne, TLO)) {
2602  DCI.CommitTargetLoweringOpt(TLO);
2603  }
2604  }
2605 
2606  break;
2607  }
2608 
2609  case ISD::STORE:
2610  return performStoreCombine(N, DCI);
2611  }
2612  return SDValue();
2613 }
2614 
2615 //===----------------------------------------------------------------------===//
2616 // Helper functions
2617 //===----------------------------------------------------------------------===//
2618 
2620  SelectionDAG &DAG,
2621  const Function *F,
2623  SmallVectorImpl<ISD::InputArg> &OrigIns) const {
2624 
2625  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
2626  if (Ins[i].ArgVT == Ins[i].VT) {
2627  OrigIns.push_back(Ins[i]);
2628  continue;
2629  }
2630 
2631  EVT VT;
2632  if (Ins[i].ArgVT.isVector() && !Ins[i].VT.isVector()) {
2633  // Vector has been split into scalars.
2634  VT = Ins[i].ArgVT.getVectorElementType();
2635  } else if (Ins[i].VT.isVector() && Ins[i].ArgVT.isVector() &&
2636  Ins[i].ArgVT.getVectorElementType() !=
2637  Ins[i].VT.getVectorElementType()) {
2638  // Vector elements have been promoted
2639  VT = Ins[i].ArgVT;
2640  } else {
2641  // Vector has been spilt into smaller vectors.
2642  VT = Ins[i].VT;
2643  }
2644 
2645  ISD::InputArg Arg(Ins[i].Flags, VT, VT, Ins[i].Used,
2646  Ins[i].OrigArgIndex, Ins[i].PartOffset);
2647  OrigIns.push_back(Arg);
2648  }
2649 }
2650 
2652  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
2653  return CFP->isExactlyValue(1.0);
2654  }
2655  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
2656  return C->isAllOnesValue();
2657  }
2658  return false;
2659 }
2660 
2662  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
2663  return CFP->getValueAPF().isZero();
2664  }
2665  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
2666  return C->isNullValue();
2667  }
2668  return false;
2669 }
2670 
2672  const TargetRegisterClass *RC,
2673  unsigned Reg, EVT VT) const {
2674  MachineFunction &MF = DAG.getMachineFunction();
2675  MachineRegisterInfo &MRI = MF.getRegInfo();
2676  unsigned VirtualRegister;
2677  if (!MRI.isLiveIn(Reg)) {
2678  VirtualRegister = MRI.createVirtualRegister(RC);
2679  MRI.addLiveIn(Reg, VirtualRegister);
2680  } else {
2681  VirtualRegister = MRI.getLiveInVirtReg(Reg);
2682  }
2683  return DAG.getRegister(VirtualRegister, VT);
2684 }
2685 
2687  const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const {
2688  uint64_t ArgOffset = MFI->ABIArgOffset;
2689  switch (Param) {
2690  case GRID_DIM:
2691  return ArgOffset;
2692  case GRID_OFFSET:
2693  return ArgOffset + 4;
2694  }
2695  llvm_unreachable("unexpected implicit parameter type");
2696 }
2697 
2698 #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
2699 
2700 const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
2701  switch ((AMDGPUISD::NodeType)Opcode) {
2702  case AMDGPUISD::FIRST_NUMBER: break;
2703  // AMDIL DAG nodes
2708 
2709  // AMDGPU DAG nodes
2771  }
2772  return nullptr;
2773 }
2774 
2776  DAGCombinerInfo &DCI,
2777  unsigned &RefinementSteps,
2778  bool &UseOneConstNR) const {
2779  SelectionDAG &DAG = DCI.DAG;
2780  EVT VT = Operand.getValueType();
2781 
2782  if (VT == MVT::f32) {
2783  RefinementSteps = 0;
2784  return DAG.getNode(AMDGPUISD::RSQ, SDLoc(Operand), VT, Operand);
2785  }
2786 
2787  // TODO: There is also f64 rsq instruction, but the documentation is less
2788  // clear on its precision.
2789 
2790  return SDValue();
2791 }
2792 
2794  DAGCombinerInfo &DCI,
2795  unsigned &RefinementSteps) const {
2796  SelectionDAG &DAG = DCI.DAG;
2797  EVT VT = Operand.getValueType();
2798 
2799  if (VT == MVT::f32) {
2800  // Reciprocal, < 1 ulp error.
2801  //
2802  // This reciprocal approximation converges to < 0.5 ulp error with one
2803  // newton rhapson performed with two fused multiple adds (FMAs).
2804 
2805  RefinementSteps = 0;
2806  return DAG.getNode(AMDGPUISD::RCP, SDLoc(Operand), VT, Operand);
2807  }
2808 
2809  // TODO: There is also f64 rcp instruction, but the documentation is less
2810  // clear on its precision.
2811 
2812  return SDValue();
2813 }
2814 
2815 static void computeKnownBitsForMinMax(const SDValue Op0,
2816  const SDValue Op1,
2817  APInt &KnownZero,
2818  APInt &KnownOne,
2819  const SelectionDAG &DAG,
2820  unsigned Depth) {
2821  APInt Op0Zero, Op0One;
2822  APInt Op1Zero, Op1One;
2823  DAG.computeKnownBits(Op0, Op0Zero, Op0One, Depth);
2824  DAG.computeKnownBits(Op1, Op1Zero, Op1One, Depth);
2825 
2826  KnownZero = Op0Zero & Op1Zero;
2827  KnownOne = Op0One & Op1One;
2828 }
2829 
2831  const SDValue Op,
2832  APInt &KnownZero,
2833  APInt &KnownOne,
2834  const SelectionDAG &DAG,
2835  unsigned Depth) const {
2836 
2837  KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything.
2838 
2839  APInt KnownZero2;
2840  APInt KnownOne2;
2841  unsigned Opc = Op.getOpcode();
2842 
2843  switch (Opc) {
2844  default:
2845  break;
2846  case ISD::INTRINSIC_WO_CHAIN: {
2847  // FIXME: The intrinsic should just use the node.
2848  switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
2849  case AMDGPUIntrinsic::AMDGPU_imax:
2850  case AMDGPUIntrinsic::AMDGPU_umax:
2851  case AMDGPUIntrinsic::AMDGPU_imin:
2852  case AMDGPUIntrinsic::AMDGPU_umin:
2854  KnownZero, KnownOne, DAG, Depth);
2855  break;
2856  default:
2857  break;
2858  }
2859 
2860  break;
2861  }
2862  case AMDGPUISD::CARRY:
2863  case AMDGPUISD::BORROW: {
2864  KnownZero = APInt::getHighBitsSet(32, 31);
2865  break;
2866  }
2867 
2868  case AMDGPUISD::BFE_I32:
2869  case AMDGPUISD::BFE_U32: {
2871  if (!CWidth)
2872  return;
2873 
2874  unsigned BitWidth = 32;
2875  uint32_t Width = CWidth->getZExtValue() & 0x1f;
2876 
2877  if (Opc == AMDGPUISD::BFE_U32)
2878  KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - Width);
2879 
2880  break;
2881  }
2882  }
2883 }
2884 
2886  SDValue Op,
2887  const SelectionDAG &DAG,
2888  unsigned Depth) const {
2889  switch (Op.getOpcode()) {
2890  case AMDGPUISD::BFE_I32: {
2892  if (!Width)
2893  return 1;
2894 
2895  unsigned SignBits = 32 - Width->getZExtValue() + 1;
2897  if (!Offset || !Offset->isNullValue())
2898  return SignBits;
2899 
2900  // TODO: Could probably figure something out with non-0 offsets.
2901  unsigned Op0SignBits = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
2902  return std::max(SignBits, Op0SignBits);
2903  }
2904 
2905  case AMDGPUISD::BFE_U32: {
2907  return Width ? 32 - (Width->getZExtValue() & 0x1f) : 1;
2908  }
2909 
2910  case AMDGPUISD::CARRY:
2911  case AMDGPUISD::BORROW:
2912  return 31;
2913 
2914  default:
2915  return 1;
2916  }
2917 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
SDValue getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT TVT, bool isNonTemporal, bool isVolatile, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes())
AMDGPUTargetLowering(TargetMachine &TM, const AMDGPUSubtarget &STI)
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:477
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:104
SDValue getValue(unsigned R) const
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, SDLoc DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array...
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant, which is required to be operand #1) half of the integer or float value specified as operand #0.
Definition: ISDOpcodes.h:175
LLVMContext * getContext() const
Definition: SelectionDAG.h:289
AMDGPU specific subclass of TargetSubtarget.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
void dump() const
Dump this node, for debugging.
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtType, EVT ExtVT) const override
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:292
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:554
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static MVT getVectorVT(MVT VT, unsigned NumElements)
static const fltSemantics IEEEdouble
Definition: APFloat.h:133
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:301
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
void AnalyzeFormalArguments(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
SDVTList getVTList() const
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(unsigned Reg, unsigned vreg=0)
addLiveIn - Add the specified register as a live-in.
DiagnosticSeverity
Defines the different supported severity of a diagnostic.
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
SDValue getMergeValues(ArrayRef< SDValue > Ops, SDLoc dl)
Create a MERGE_VALUES node from the given operands.
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:210
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:684
void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in the KnownZero/KnownO...
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:531
const GlobalValue * getGlobal() const
void setHasFloatingPointExceptions(bool FPExceptions=true)
Tells the code generator that this target supports floating point exceptions and cares about preservi...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
PointerType::get - This constructs a pointer to an object of the specified type in a numbered address...
Definition: Type.cpp:738
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDValue getSelectCC(SDLoc DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:752
Type * getTypeForEVT(LLVMContext &Context) const
getTypeForEVT - This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:181
SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
SDValue getZeroExtendInReg(SDValue Op, SDLoc DL, EVT SrcTy)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value...
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
const SDValue & getOperand(unsigned Num) const
F(f)
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
static void computeKnownBitsForMinMax(const SDValue Op0, const SDValue Op1, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth)
Address space for local memory.
Definition: AMDGPU.h:112
[US]{MIN/MAX} - Binary minimum or maximum or signed or unsigned integers.
Definition: ISDOpcodes.h:318
const MachinePointerInfo & getPointerInfo() const
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
const SDValue & getBasePtr() const
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:357
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:39
bool bitsLT(EVT VT) const
bitsLT - Return true if this has less bits than VT.
Definition: ValueTypes.h:189
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, APInt &KnownZero, APInt &KnownOne, TargetLoweringOpt &TLO, unsigned Depth=0) const
Look at Op.
bool isAllOnesValue() const
SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem, unsigned AS) const override
Return true if it is expected to be cheaper to do a store of a non-zero vector constant with the give...
bool isCheapToSpeculateCttz() const override
Return true if it is cheap to speculate a call to intrinsic cttz.
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:200
APInt Not(const APInt &APIVal)
Bitwise complement function.
Definition: APInt.h:1905
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:115
SDValue getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, unsigned &RefinementSteps, bool &UseOneConstNR) const override
Hooks for building estimates in place of slower divisions and square roots.
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:475
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const
MachineMemOperand - A description of a memory reference used in the backend.
Pointer to the start of the shader's constant data.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:79
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:551
Shift and rotation operations.
Definition: ISDOpcodes.h:332
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
StructType - Class to represent struct types.
Definition: DerivedTypes.h:191
SDValue ScalarizeVectorStore(SDValue Op, SelectionDAG &DAG) const
Split a vector store into a scalar store of each component.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:283
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:181
virtual SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, unsigned Reg, EVT VT) const
Helper function that adds Reg to the LiveIn list of the DAG's MachineFunction.
void addLoc(const CCValAssign &V)
unsigned getAddressSpace() const
Reg
All possible values of the reg field in the ModR/M byte.
SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const
IABS(a) = SMAX(sub(0, a), a)
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const SelectionDAG &DAG, unsigned Depth=0) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getScalarType() const
getScalarType - If this is a vector type, return the element type, otherwise return this...
static bool isI24(SDValue Op, SelectionDAG &DAG)
EVT getScalarType() const
getScalarType - If this is a vector type, return the element type, otherwise return this...
Definition: ValueTypes.h:210
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool bitsGE(EVT VT) const
bitsGE - Return true if this has no less bits than VT.
Definition: ValueTypes.h:183
Interface for custom diagnostic printing.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
unsigned getStoreSize() const
getStoreSize - Return the number of bytes overwritten by a store of the specified value type...
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
Interface to describe a layout of a stack frame on a AMDIL target machine.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
#define G(x, y, z)
Definition: MD5.cpp:52
EVT getVectorElementType() const
getVectorElementType - Given a vector type, return the type of each element.
Definition: ValueTypes.h:216
TargetRegisterInfo interface that is implemented by all hw codegen targets.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
load Combine Adjacent Loads
bool isCheapToSpeculateCtlz() const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:393
void setSelectIsExpensive(bool isExpensive=true)
Tells the code generator not to expand operations into sequences that use the select operations if po...
const AMDGPUSubtarget * Subtarget
bool isLiveIn(unsigned Reg) const
SDValue CombineIMinMax(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, SelectionDAG &DAG) const
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:351
ArrayType - Class to represent array types.
Definition: DerivedTypes.h:336
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:191
static cl::opt< std::string > FuncName("cppfname", cl::desc("Specify the name of the generated function"), cl::value_desc("function name"))
void setFsqrtIsCheap(bool isCheap=true)
Tells the code generator that fsqrt is cheap, and should not be replaced with an alternative sequence...
const SDValue & getBasePtr() const
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
getHalfSizedIntegerVT - Finds the smallest simple value type that is greater than or equal to half th...
Definition: ValueTypes.h:270
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:659
static mvt_range integer_vector_valuetypes()
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:804
EVT getMemoryVT() const
Return the type of the in-memory value.
Generation getGeneration() const
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:142
static void simplifyI24(SDValue Op, TargetLowering::DAGCombinerInfo &DCI)
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:284
SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, unsigned &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
PointerType - Class to represent pointers.
Definition: DerivedTypes.h:449
bool isHWTrueValue(SDValue Op) const
This class is used to represent ISD::STORE nodes.
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:436
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:262
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:491
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:513
SDNode * getNode() const
get the SDNode which holds the desired result
A self-contained host- and target-independent arbitrary-precision floating-point software implementat...
Definition: APFloat.h:122
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:239
unsigned getStoreSize() const
getStoreSize - Return the number of bytes overwritten by a store of the specified value type...
Definition: ValueTypes.h:245
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
This is the base abstract class for diagnostic reporting in the backend.
unsigned getStoreSizeInBits() const
getStoreSizeInBits - Return the number of bits overwritten by a store of the specified value type...
Definition: ValueTypes.h:251
unsigned getVectorNumElements() const
MVT - Machine Value Type.
const SDValue & getOperand(unsigned i) const
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:41
static volatile int One
Definition: InfiniteTest.cpp:9
Address space for constant memory.
Definition: AMDGPU.h:111
Simple binary floating point operators.
Definition: ISDOpcodes.h:237
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
bool isNonTemporal() const
Address space for private memory.
Definition: AMDGPU.h:109
This is an important base class in LLVM.
Definition: Constant.h:41
bool isVector() const
isVector - Return true if this is a vector value type.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:780
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:219
unsigned getLiveInVirtReg(unsigned PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in physical ...
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:233
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
static bool hasDefinedInitializer(const GlobalValue *GV)
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
This class provides iterator support for SDUse operands that use a specific SDNode.
SDValue getSExtOrTrunc(SDValue Op, SDLoc DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang","erlang-compatible garbage collector")
SDValue getTargetConstant(uint64_t Val, SDLoc DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:436
bool isLoadBitCastBeneficial(EVT, EVT) const override
isLoadBitCastBeneficial() - Return true if the following transform is beneficial. ...
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1273
unsigned getOpcode() const
bool hasFP32Denormals() const
SDValue ScalarizeVectorLoad(SDValue Op, SelectionDAG &DAG) const
Split a vector load into a scalar load of each component.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isVolatile() const
const SDValue & getValue() const
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
Constant * getAggregateElement(unsigned Elt) const
getAggregateElement - For aggregates (struct/array/vector) return the constant that corresponds to th...
Definition: Constants.cpp:250
unsigned MaxStoresPerMemmove
Specify maximum bytes of store instructions per memmove call.
SDValue getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes())
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:338
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const
Linear Interpolation LRP(a, b, c) = muladd(a, b, (1 - a) * c)
void setPow2SDivIsCheap(bool isCheap=true)
Tells the code generator that it shouldn't generate sra/srl/add/sra for a signed divide by power of t...
EVT - Extended Value Type.
Definition: ValueTypes.h:31
static UndefValue * get(Type *T)
get() - Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1473
bool isHWFalseValue(SDValue Op) const
static SDValue extractF64Exponent(SDValue Hi, SDLoc SL, SelectionDAG &DAG)
This structure contains all information that is necessary for lowering calls.
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements)
getVectorVT - Returns the EVT that represents a vector NumElements in length, where each element is o...
Definition: ValueTypes.h:70
MachinePointerInfo - This class contains a discriminated union of information about pointers in memor...
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override
Returns true if the target can instruction select the specified FP immediate natively.
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:3353
const SDValue & getOffset() const
SequentialType - This is the superclass of the array, pointer and vector type classes.
Definition: DerivedTypes.h:310
SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const
Split a vector store into 2 stores of half the vector.
static bool isU24(SDValue Op, SelectionDAG &DAG)
void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
SDValue getNOT(SDLoc DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
CCState - This class holds information needed while lowering arguments and return values...
This is the shared class of boolean and integer constants.
Definition: Constants.h:47
SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const
Split a vector load into 2 loads of half the vector.
unsigned LDSSize
Number of bytes in the LDS that are being used.
bool isFAbsFree(EVT VT) const override
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:273
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:388
bool isInvariant() const
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provides VTs and return the low/high part...
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
unsigned getScalarSizeInBits() const LLVM_READONLY
getScalarSizeInBits - If this is a vector type, return the getPrimitiveSizeInBits value for the eleme...
Definition: Type.cpp:139
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:179
Interface definition of the TargetLowering class that is common to all AMD GPUs.
static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:222
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
const SDValue & getChain() const
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:335
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:500
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0)
Append the extracted elements from Start to Count out of the vector Op in Args.
unsigned getStackWidth(const MachineFunction &MF) const
MachineFrameInfo * getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Represents one node in the SelectionDAG.
const fltSemantics & getFltSemantics() const
Definition: Type.h:166
static bool usesAllNormalStores(SDNode *LoadVal)
double BitsToDouble(uint64_t Bits)
BitsToDouble - This function takes a 64-bit integer and returns the bit equivalent double...
Definition: MathExtras.h:504
static mvt_range integer_valuetypes()
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
VectorType - Class to represent vector types.
Definition: DerivedTypes.h:362
int getFrameIndexOffset(const MachineFunction &MF, int FI) const override
Class for arbitrary precision integers.
Definition: APInt.h:73
bool hasInitializer() const
Definitions have initializers, declarations don't.
const Value * getValue() const
getValue - Return the base address of the memory access.
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:342
Interface for the AMDGPU Implementation of the Intrinsic Info class.
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:383
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
static EVT getEquivalentLoadRegType(LLVMContext &Context, EVT VT)
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition: APFloat.cpp:3417
int getNextAvailablePluginDiagnosticKind()
Get the next available kind ID for a plugin diagnostic.
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:250
const AMDGPUFrameLowering * getFrameLowering() const override
std::map< const GlobalValue *, unsigned > LocalMemoryObjects
A map to keep track of local memory objects and their offsets within the local memory space...
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
Definition: APInt.h:1890
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:498
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:542
unsigned countLeadingOnes() const
Count the number of leading one bits.
Definition: APInt.cpp:722
Represents a use of a SDNode.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:321
bool isNarrowingProfitable(EVT VT1, EVT VT2) const override
Return true if it's profitable to narrow operations of type VT1 to VT2.
SelectSupportKind
Enum that describes what type of support for selects the target has.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned ABIArgOffset
Start of implicit kernel args.
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:196
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:401
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:518
Information about the stack frame layout on the AMDGPU targets.
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:233
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
ArrayRef< SDUse > ops() const
bool isTruncateFree(EVT Src, EVT Dest) const override
#define NODE_NAME_CASE(node)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, unsigned Offset, MVT LocVT, LocInfo HTP)
SDValue CombineFMinMaxLegacy(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0, uint32_t Offset, uint32_t Width, SDLoc DL)
unsigned MaxStoresPerMemcpy
Specify maximum bytes of store instructions per memcpy call.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
getEVT - Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:277
bool isFNegFree(EVT VT) const override
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS, const AllocaInst *Alloca=nullptr)
Create a new statically sized stack object, returning a nonnegative identifier to represent it...
EVT getValueType() const
Return the ValueType of the referenced return value.
SDValue getConstant(uint64_t Val, SDLoc DL, EVT VT, bool isTarget=false, bool isOpaque=false)
void getOriginalFunctionArgs(SelectionDAG &DAG, const Function *F, const SmallVectorImpl< ISD::InputArg > &Ins, SmallVectorImpl< ISD::InputArg > &OrigIns) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types...
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:279
SDValue getSelect(SDLoc DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
Definition: SelectionDAG.h:739
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
Definition: ValueTypes.h:105
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
void setIntDivIsCheap(bool isCheap=true)
Tells the code generator that integer divide is expensive, and if possible, should be replaced by an ...
bool atEnd() const
Return true if this iterator is at the end of uses list.
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
getPrimitiveSizeInBits - Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:121
SDValue getZExtOrTrunc(SDValue Op, SDLoc DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM Value Representation.
Definition: Value.h:69
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:240
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getValueType(EVT)
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:287
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.Val alone...
static EVT getEquivalentMemType(LLVMContext &Context, EVT VT)
Primary interface to the complete machine description for the target machine.
bool hasBCNT(unsigned Size) const
bool isZExtFree(Type *Src, Type *Dest) const override
Return true if any actual instruction that defines a value of type Ty1 implicitly zero-extends the va...
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:40
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations...
Definition: ISDOpcodes.h:244
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:365
unsigned getImplicitParameterOffset(const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
unsigned getOrigAlign() const
SDValue getConstantFP(double Val, SDLoc DL, EVT VT, bool isTarget=false)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml","ocaml 3.10-compatible collector")
SDValue getSetCC(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
Definition: SelectionDAG.h:726
Conversion operators.
Definition: ISDOpcodes.h:380
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:338
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:389
unsigned getAlignment() const
bool ShouldShrinkFPConstant(EVT VT) const override
If true, then instruction selection should seek to shrink the FP constant of the specified type to a ...
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, FLOG, FLOG2, FLOG10, FEXP, FEXP2, FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary floating point operations.
Definition: ISDOpcodes.h:506
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
getIntegerVT - Returns the EVT that represents an integer with the given number of bits...
Definition: ValueTypes.h:61
unsigned AllocateStack(unsigned Size, unsigned Align)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
MVT getVectorIdxTy(const DataLayout &) const override
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
int getKind() const
bool isSelectSupported(SelectSupportKind) const override
uint64_t getZExtValue() const
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:314
BRIND - Indirect branch.
Definition: ISDOpcodes.h:538
unsigned getVectorNumElements() const
getVectorNumElements - Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:225
This class is used to represent ISD::LOAD nodes.