LLVM  4.0.0
AMDGPUISelLowering.cpp
Go to the documentation of this file.
1 //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief This is the parent TargetLowering class for hardware code gen
12 /// targets.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPUISelLowering.h"
17 #include "AMDGPU.h"
18 #include "AMDGPUFrameLowering.h"
19 #include "AMDGPUIntrinsicInfo.h"
20 #include "AMDGPURegisterInfo.h"
21 #include "AMDGPUSubtarget.h"
23 #include "SIMachineFunctionInfo.h"
29 #include "llvm/IR/DataLayout.h"
30 #include "llvm/IR/DiagnosticInfo.h"
31 #include "SIInstrInfo.h"
32 using namespace llvm;
33 
34 static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT,
35  CCValAssign::LocInfo LocInfo,
36  ISD::ArgFlagsTy ArgFlags, CCState &State) {
37  MachineFunction &MF = State.getMachineFunction();
39 
40  uint64_t Offset = MFI->allocateKernArg(LocVT.getStoreSize(),
41  ArgFlags.getOrigAlign());
42  State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
43  return true;
44 }
45 
46 #include "AMDGPUGenCallingConv.inc"
47 
48 // Find a larger type to do a load / store of a vector with.
50  unsigned StoreSize = VT.getStoreSizeInBits();
51  if (StoreSize <= 32)
52  return EVT::getIntegerVT(Ctx, StoreSize);
53 
54  assert(StoreSize % 32 == 0 && "Store size not a multiple of 32");
55  return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32);
56 }
57 
59  const AMDGPUSubtarget &STI)
60  : TargetLowering(TM), Subtarget(&STI) {
61  // Lower floating point store/load to integer store/load to reduce the number
62  // of patterns in tablegen.
65 
68 
71 
74 
77 
80 
83 
86 
89 
90  // There are no 64-bit extloads. These should be done as a 32-bit extload and
91  // an extension to 64-bit.
92  for (MVT VT : MVT::integer_valuetypes()) {
96  }
97 
98  for (MVT VT : MVT::integer_valuetypes()) {
99  if (VT == MVT::i64)
100  continue;
101 
106 
111 
116  }
117 
118  for (MVT VT : MVT::integer_vector_valuetypes()) {
131  }
132 
137 
142 
147 
150 
153 
156 
159 
162 
165 
168 
171 
174 
179 
184 
189 
192 
195 
198 
201 
202 
207 
210 
211  // This is totally unsupported, just custom lower to produce an error.
213 
214  // We need to custom lower some of the intrinsics
217 
218  // Library functions. These default to Expand, but we have instructions
219  // for them.
230 
233 
236 
239 
240  // v_mad_f32 does not support denormals according to some sources.
241  if (!Subtarget->hasFP32Denormals())
243 
244  // Expand to fneg + fadd.
246 
257 
263  }
264 
265  if (!Subtarget->hasBFI()) {
266  // fcopysign can be done in a single instruction with BFI.
269  }
270 
273 
274  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
275  for (MVT VT : ScalarIntVTs) {
276  // These should use [SU]DIVREM, so set them to expand
281 
282  // GPU does not have divrem function for signed or unsigned.
285 
286  // GPU does not have [S|U]MUL_LOHI functions as a single instruction.
289 
293  }
294 
295  if (!Subtarget->hasBCNT(32))
297 
298  if (!Subtarget->hasBCNT(64))
300 
301  // The hardware supports 32-bit ROTR, but not ROTL.
305 
316 
321 
322  if (Subtarget->hasFFBH())
324 
325  if (Subtarget->hasFFBL())
327 
330 
331  // We only really have 32-bit BFE instructions (and 16-bit on VI).
332  //
333  // On SI+ there are 64-bit BFEs, but they are scalar only and there isn't any
334  // effort to match them now. We want this to be false for i64 cases when the
335  // extraction isn't restricted to the upper or lower half. Ideally we would
336  // have some pass reduce 64-bit extracts to 32-bit if possible. Extracts that
337  // span the midpoint are probably relatively rare, so don't worry about them
338  // for now.
339  if (Subtarget->hasBFE())
340  setHasExtractBitsInsn(true);
341 
342  static const MVT::SimpleValueType VectorIntTypes[] = {
344  };
345 
346  for (MVT VT : VectorIntTypes) {
347  // Expand the following operations for the current type by default.
385  }
386 
387  static const MVT::SimpleValueType FloatVectorTypes[] = {
389  };
390 
391  for (MVT VT : FloatVectorTypes) {
417  }
418 
419  // This causes using an unrolled select operation rather than expansion with
420  // bit operations. This is in general better, but the alternative using BFI
421  // instructions may be better if the select sources are SGPRs.
424 
427 
428  // There are no libcalls of any kind.
429  for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I)
430  setLibcallName(static_cast<RTLIB::Libcall>(I), nullptr);
431 
434 
436  setJumpIsExpensive(true);
437 
438  // FIXME: This is only partially true. If we have to do vector compares, any
439  // SGPR pair can be a condition register. If we have a uniform condition, we
440  // are better off doing SALU operations, where there is only one SCC. For now,
441  // we don't have a way of knowing during instruction selection if a condition
442  // will be uniform and we always use vector compares. Assume we are using
443  // vector compares until that is fixed.
445 
446  // SI at least has hardware support for floating point exceptions, but no way
447  // of using or handling them is implemented. They are also optional in OpenCL
448  // (Section 7.3)
450 
452 
453  // We want to find all load dependencies for long chains of stores to enable
454  // merging into very wide vectors. The problem is with vectors with > 4
455  // elements. MergeConsecutiveStores will attempt to merge these because x8/x16
456  // vectors are a legal type, even though we have to split the loads
457  // usually. When we can more precisely specify load legality per address
458  // space, we should be able to make FindBetterChain/MergeConsecutiveStores
459  // smarter so that they can figure out what to do in 2 iterations without all
460  // N > 4 stores on the same chain.
462 
463  // FIXME: Need to really handle these.
464  MaxStoresPerMemcpy = 4096;
465  MaxStoresPerMemmove = 4096;
466  MaxStoresPerMemset = 4096;
467 
481 }
482 
483 //===----------------------------------------------------------------------===//
484 // Target Information
485 //===----------------------------------------------------------------------===//
486 
487 static bool fnegFoldsIntoOp(unsigned Opc) {
488  switch (Opc) {
489  case ISD::FADD:
490  case ISD::FSUB:
491  case ISD::FMUL:
492  case ISD::FMA:
493  case ISD::FMAD:
494  case ISD::FSIN:
495  case AMDGPUISD::RCP:
497  case AMDGPUISD::SIN_HW:
499  return true;
500  default:
501  return false;
502  }
503 }
504 
506  return MVT::i32;
507 }
508 
510  return true;
511 }
512 
513 // The backend supports 32 and 64 bit floating point immediates.
514 // FIXME: Why are we reporting vectors of FP immediates as legal?
515 bool AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
516  EVT ScalarVT = VT.getScalarType();
517  return (ScalarVT == MVT::f32 || ScalarVT == MVT::f64 ||
518  (ScalarVT == MVT::f16 && Subtarget->has16BitInsts()));
519 }
520 
521 // We don't want to shrink f64 / f32 constants.
523  EVT ScalarVT = VT.getScalarType();
524  return (ScalarVT != MVT::f32 && ScalarVT != MVT::f64);
525 }
526 
529  EVT NewVT) const {
530 
531  unsigned NewSize = NewVT.getStoreSizeInBits();
532 
533  // If we are reducing to a 32-bit load, this is always better.
534  if (NewSize == 32)
535  return true;
536 
537  EVT OldVT = N->getValueType(0);
538  unsigned OldSize = OldVT.getStoreSizeInBits();
539 
540  // Don't produce extloads from sub 32-bit types. SI doesn't have scalar
541  // extloads, so doing one requires using a buffer_load. In cases where we
542  // still couldn't use a scalar load, using the wider load shouldn't really
543  // hurt anything.
544 
545  // If the old size already had to be an extload, there's no harm in continuing
546  // to reduce the width.
547  return (OldSize < 32);
548 }
549 
551  EVT CastTy) const {
552 
553  assert(LoadTy.getSizeInBits() == CastTy.getSizeInBits());
554 
555  if (LoadTy.getScalarType() == MVT::i32)
556  return false;
557 
558  unsigned LScalarSize = LoadTy.getScalarSizeInBits();
559  unsigned CastScalarSize = CastTy.getScalarSizeInBits();
560 
561  return (LScalarSize < CastScalarSize) ||
562  (CastScalarSize >= 32);
563 }
564 
565 // SI+ has instructions for cttz / ctlz for 32-bit values. This is probably also
566 // profitable with the expansion for 64-bit since it's generally good to
567 // speculate things.
568 // FIXME: These should really have the size as a parameter.
570  return true;
571 }
572 
574  return true;
575 }
576 
577 //===---------------------------------------------------------------------===//
578 // Target Properties
579 //===---------------------------------------------------------------------===//
580 
582  assert(VT.isFloatingPoint());
583  return VT == MVT::f32 || VT == MVT::f64 || (Subtarget->has16BitInsts() &&
584  VT == MVT::f16);
585 }
586 
588  return isFAbsFree(VT);
589 }
590 
592  unsigned NumElem,
593  unsigned AS) const {
594  return true;
595 }
596 
598  // There are few operations which truly have vector input operands. Any vector
599  // operation is going to involve operations on each component, and a
600  // build_vector will be a copy per element, so it always makes sense to use a
601  // build_vector input in place of the extracted element to avoid a copy into a
602  // super register.
603  //
604  // We should probably only do this if all users are extracts only, but this
605  // should be the common case.
606  return true;
607 }
608 
610  // Truncate is just accessing a subregister.
611 
612  unsigned SrcSize = Source.getSizeInBits();
613  unsigned DestSize = Dest.getSizeInBits();
614 
615  return DestSize < SrcSize && DestSize % 32 == 0 ;
616 }
617 
619  // Truncate is just accessing a subregister.
620 
621  unsigned SrcSize = Source->getScalarSizeInBits();
622  unsigned DestSize = Dest->getScalarSizeInBits();
623 
624  if (DestSize== 16 && Subtarget->has16BitInsts())
625  return SrcSize >= 32;
626 
627  return DestSize < SrcSize && DestSize % 32 == 0;
628 }
629 
630 bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const {
631  unsigned SrcSize = Src->getScalarSizeInBits();
632  unsigned DestSize = Dest->getScalarSizeInBits();
633 
634  if (SrcSize == 16 && Subtarget->has16BitInsts())
635  return DestSize >= 32;
636 
637  return SrcSize == 32 && DestSize == 64;
638 }
639 
640 bool AMDGPUTargetLowering::isZExtFree(EVT Src, EVT Dest) const {
641  // Any register load of a 64-bit value really requires 2 32-bit moves. For all
642  // practical purposes, the extra mov 0 to load a 64-bit is free. As used,
643  // this will enable reducing 64-bit operations the 32-bit, which is always
644  // good.
645 
646  if (Src == MVT::i16)
647  return Dest == MVT::i32 ||Dest == MVT::i64 ;
648 
649  return Src == MVT::i32 && Dest == MVT::i64;
650 }
651 
653  return isZExtFree(Val.getValueType(), VT2);
654 }
655 
657  // There aren't really 64-bit registers, but pairs of 32-bit ones and only a
658  // limited number of native 64-bit operations. Shrinking an operation to fit
659  // in a single 32-bit register should always be helpful. As currently used,
660  // this is much less general than the name suggests, and is only used in
661  // places trying to reduce the sizes of loads. Shrinking loads to < 32-bits is
662  // not profitable, and may actually be harmful.
663  return SrcVT.getSizeInBits() > 32 && DestVT.getSizeInBits() == 32;
664 }
665 
666 //===---------------------------------------------------------------------===//
667 // TargetLowering Callbacks
668 //===---------------------------------------------------------------------===//
669 
670 /// The SelectionDAGBuilder will automatically promote function arguments
671 /// with illegal types. However, this does not work for the AMDGPU targets
672 /// since the function arguments are stored in memory as these illegal types.
673 /// In order to handle this properly we need to get the original types sizes
674 /// from the LLVM IR Function and fixup the ISD:InputArg values before
675 /// passing them to AnalyzeFormalArguments()
676 
677 /// When the SelectionDAGBuilder computes the Ins, it takes care of splitting
678 /// input values across multiple registers. Each item in the Ins array
679 /// represents a single value that will be stored in regsters. Ins[x].VT is
680 /// the value type of the value that will be stored in the register, so
681 /// whatever SDNode we lower the argument to needs to be this type.
682 ///
683 /// In order to correctly lower the arguments we need to know the size of each
684 /// argument. Since Ins[x].VT gives us the size of the register that will
685 /// hold the value, we need to look at Ins[x].ArgVT to see the 'real' type
686 /// for the orignal function argument so that we can deduce the correct memory
687 /// type to use for Ins[x]. In most cases the correct memory type will be
688 /// Ins[x].ArgVT. However, this will not always be the case. If, for example,
689 /// we have a kernel argument of type v8i8, this argument will be split into
690 /// 8 parts and each part will be represented by its own item in the Ins array.
691 /// For each part the Ins[x].ArgVT will be the v8i8, which is the full type of
692 /// the argument before it was split. From this, we deduce that the memory type
693 /// for each individual part is i8. We pass the memory type as LocVT to the
694 /// calling convention analysis function and the register type (Ins[x].VT) as
695 /// the ValVT.
697  const SmallVectorImpl<ISD::InputArg> &Ins) const {
698  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
699  const ISD::InputArg &In = Ins[i];
700  EVT MemVT;
701 
702  unsigned NumRegs = getNumRegisters(State.getContext(), In.ArgVT);
703 
704  if (!Subtarget->isAmdHsaOS() &&
705  (In.ArgVT == MVT::i16 || In.ArgVT == MVT::i8 || In.ArgVT == MVT::f16)) {
706  // The ABI says the caller will extend these values to 32-bits.
707  MemVT = In.ArgVT.isInteger() ? MVT::i32 : MVT::f32;
708  } else if (NumRegs == 1) {
709  // This argument is not split, so the IR type is the memory type.
710  assert(!In.Flags.isSplit());
711  if (In.ArgVT.isExtended()) {
712  // We have an extended type, like i24, so we should just use the register type
713  MemVT = In.VT;
714  } else {
715  MemVT = In.ArgVT;
716  }
717  } else if (In.ArgVT.isVector() && In.VT.isVector() &&
718  In.ArgVT.getScalarType() == In.VT.getScalarType()) {
720  // We have a vector value which has been split into a vector with
721  // the same scalar type, but fewer elements. This should handle
722  // all the floating-point vector types.
723  MemVT = In.VT;
724  } else if (In.ArgVT.isVector() &&
725  In.ArgVT.getVectorNumElements() == NumRegs) {
726  // This arg has been split so that each element is stored in a separate
727  // register.
728  MemVT = In.ArgVT.getScalarType();
729  } else if (In.ArgVT.isExtended()) {
730  // We have an extended type, like i65.
731  MemVT = In.VT;
732  } else {
733  unsigned MemoryBits = In.ArgVT.getStoreSizeInBits() / NumRegs;
734  assert(In.ArgVT.getStoreSizeInBits() % NumRegs == 0);
735  if (In.VT.isInteger()) {
736  MemVT = EVT::getIntegerVT(State.getContext(), MemoryBits);
737  } else if (In.VT.isVector()) {
739  unsigned NumElements = In.VT.getVectorNumElements();
740  assert(MemoryBits % NumElements == 0);
741  // This vector type has been split into another vector type with
742  // a different elements size.
743  EVT ScalarVT = EVT::getIntegerVT(State.getContext(),
744  MemoryBits / NumElements);
745  MemVT = EVT::getVectorVT(State.getContext(), ScalarVT, NumElements);
746  } else {
747  llvm_unreachable("cannot deduce memory type.");
748  }
749  }
750 
751  // Convert one element vectors to scalar.
752  if (MemVT.isVector() && MemVT.getVectorNumElements() == 1)
753  MemVT = MemVT.getScalarType();
754 
755  if (MemVT.isExtended()) {
756  // This should really only happen if we have vec3 arguments
757  assert(MemVT.isVector() && MemVT.getVectorNumElements() == 3);
758  MemVT = MemVT.getPow2VectorType(State.getContext());
759  }
760 
761  assert(MemVT.isSimple());
763  State);
764  }
765 }
766 
768  const SmallVectorImpl<ISD::InputArg> &Ins) const {
769  State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
770 }
771 
773  const SmallVectorImpl<ISD::OutputArg> &Outs) const {
774 
775  State.AnalyzeReturn(Outs, RetCC_SI);
776 }
777 
778 SDValue
780  bool isVarArg,
782  const SmallVectorImpl<SDValue> &OutVals,
783  const SDLoc &DL, SelectionDAG &DAG) const {
784  return DAG.getNode(AMDGPUISD::ENDPGM, DL, MVT::Other, Chain);
785 }
786 
787 //===---------------------------------------------------------------------===//
788 // Target specific lowering
789 //===---------------------------------------------------------------------===//
790 
792  SmallVectorImpl<SDValue> &InVals) const {
793  SDValue Callee = CLI.Callee;
794  SelectionDAG &DAG = CLI.DAG;
795 
796  const Function &Fn = *DAG.getMachineFunction().getFunction();
797 
798  StringRef FuncName("<unknown>");
799 
800  if (const ExternalSymbolSDNode *G = dyn_cast<ExternalSymbolSDNode>(Callee))
801  FuncName = G->getSymbol();
802  else if (const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
803  FuncName = G->getGlobal()->getName();
804 
806  Fn, "unsupported call to function " + FuncName, CLI.DL.getDebugLoc());
807  DAG.getContext()->diagnose(NoCalls);
808 
809  if (!CLI.IsTailCall) {
810  for (unsigned I = 0, E = CLI.Ins.size(); I != E; ++I)
811  InVals.push_back(DAG.getUNDEF(CLI.Ins[I].VT));
812  }
813 
814  return DAG.getEntryNode();
815 }
816 
818  SelectionDAG &DAG) const {
819  const Function &Fn = *DAG.getMachineFunction().getFunction();
820 
821  DiagnosticInfoUnsupported NoDynamicAlloca(Fn, "unsupported dynamic alloca",
822  SDLoc(Op).getDebugLoc());
823  DAG.getContext()->diagnose(NoDynamicAlloca);
824  auto Ops = {DAG.getConstant(0, SDLoc(), Op.getValueType()), Op.getOperand(0)};
825  return DAG.getMergeValues(Ops, SDLoc());
826 }
827 
829  SelectionDAG &DAG) const {
830  switch (Op.getOpcode()) {
831  default:
832  Op->dump(&DAG);
833  llvm_unreachable("Custom lowering code for this"
834  "instruction is not implemented yet!");
835  break;
836  case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
837  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
838  case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
839  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
840  case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
841  case ISD::SDIVREM: return LowerSDIVREM(Op, DAG);
842  case ISD::FREM: return LowerFREM(Op, DAG);
843  case ISD::FCEIL: return LowerFCEIL(Op, DAG);
844  case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
845  case ISD::FRINT: return LowerFRINT(Op, DAG);
846  case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG);
847  case ISD::FROUND: return LowerFROUND(Op, DAG);
848  case ISD::FFLOOR: return LowerFFLOOR(Op, DAG);
849  case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
850  case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
851  case ISD::FP_TO_FP16: return LowerFP_TO_FP16(Op, DAG);
852  case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
853  case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
854  case ISD::CTLZ:
856  return LowerCTLZ(Op, DAG);
857  case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
858  }
859  return Op;
860 }
861 
864  SelectionDAG &DAG) const {
865  switch (N->getOpcode()) {
867  // Different parts of legalization seem to interpret which type of
868  // sign_extend_inreg is the one to check for custom lowering. The extended
869  // from type is what really matters, but some places check for custom
870  // lowering of the result type. This results in trying to use
871  // ReplaceNodeResults to sext_in_reg to an illegal type, so we'll just do
872  // nothing here and let the illegal result integer be handled normally.
873  return;
874  default:
875  return;
876  }
877 }
878 
879 static bool hasDefinedInitializer(const GlobalValue *GV) {
880  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
881  if (!GVar || !GVar->hasInitializer())
882  return false;
883 
884  return !isa<UndefValue>(GVar->getInitializer());
885 }
886 
888  SDValue Op,
889  SelectionDAG &DAG) const {
890 
891  const DataLayout &DL = DAG.getDataLayout();
892  GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
893  const GlobalValue *GV = G->getGlobal();
894 
895  switch (G->getAddressSpace()) {
897  // XXX: What does the value of G->getOffset() mean?
898  assert(G->getOffset() == 0 &&
899  "Do not know what to do with an non-zero offset");
900 
901  // TODO: We could emit code to handle the initialization somewhere.
902  if (hasDefinedInitializer(GV))
903  break;
904 
905  unsigned Offset = MFI->allocateLDSGlobal(DL, *GV);
906  return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
907  }
908  }
909 
910  const Function &Fn = *DAG.getMachineFunction().getFunction();
912  Fn, "unsupported initializer for address space", SDLoc(Op).getDebugLoc());
913  DAG.getContext()->diagnose(BadInit);
914  return SDValue();
915 }
916 
918  SelectionDAG &DAG) const {
920 
921  for (const SDUse &U : Op->ops())
922  DAG.ExtractVectorElements(U.get(), Args);
923 
924  return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Args);
925 }
926 
928  SelectionDAG &DAG) const {
929 
931  unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
932  EVT VT = Op.getValueType();
933  DAG.ExtractVectorElements(Op.getOperand(0), Args, Start,
934  VT.getVectorNumElements());
935 
936  return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Args);
937 }
938 
940  SelectionDAG &DAG) const {
941  unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
942  SDLoc DL(Op);
943  EVT VT = Op.getValueType();
944 
945  switch (IntrinsicID) {
946  default: return Op;
947  case AMDGPUIntrinsic::AMDGPU_clamp: // Legacy name.
948  return DAG.getNode(AMDGPUISD::CLAMP, DL, VT,
949  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
950 
951  case AMDGPUIntrinsic::AMDGPU_bfe_i32:
952  return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT,
953  Op.getOperand(1),
954  Op.getOperand(2),
955  Op.getOperand(3));
956 
957  case AMDGPUIntrinsic::AMDGPU_bfe_u32:
958  return DAG.getNode(AMDGPUISD::BFE_U32, DL, VT,
959  Op.getOperand(1),
960  Op.getOperand(2),
961  Op.getOperand(3));
962  }
963 }
964 
965 /// \brief Generate Min/Max node
967  SDValue LHS, SDValue RHS,
968  SDValue True, SDValue False,
969  SDValue CC,
970  DAGCombinerInfo &DCI) const {
972  return SDValue();
973 
974  if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
975  return SDValue();
976 
977  SelectionDAG &DAG = DCI.DAG;
978  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
979  switch (CCOpcode) {
980  case ISD::SETOEQ:
981  case ISD::SETONE:
982  case ISD::SETUNE:
983  case ISD::SETNE:
984  case ISD::SETUEQ:
985  case ISD::SETEQ:
986  case ISD::SETFALSE:
987  case ISD::SETFALSE2:
988  case ISD::SETTRUE:
989  case ISD::SETTRUE2:
990  case ISD::SETUO:
991  case ISD::SETO:
992  break;
993  case ISD::SETULE:
994  case ISD::SETULT: {
995  if (LHS == True)
996  return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
997  return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
998  }
999  case ISD::SETOLE:
1000  case ISD::SETOLT:
1001  case ISD::SETLE:
1002  case ISD::SETLT: {
1003  // Ordered. Assume ordered for undefined.
1004 
1005  // Only do this after legalization to avoid interfering with other combines
1006  // which might occur.
1007  if (DCI.getDAGCombineLevel() < AfterLegalizeDAG &&
1008  !DCI.isCalledByLegalizer())
1009  return SDValue();
1010 
1011  // We need to permute the operands to get the correct NaN behavior. The
1012  // selected operand is the second one based on the failing compare with NaN,
1013  // so permute it based on the compare type the hardware uses.
1014  if (LHS == True)
1015  return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
1016  return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
1017  }
1018  case ISD::SETUGE:
1019  case ISD::SETUGT: {
1020  if (LHS == True)
1021  return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
1022  return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
1023  }
1024  case ISD::SETGT:
1025  case ISD::SETGE:
1026  case ISD::SETOGE:
1027  case ISD::SETOGT: {
1028  if (DCI.getDAGCombineLevel() < AfterLegalizeDAG &&
1029  !DCI.isCalledByLegalizer())
1030  return SDValue();
1031 
1032  if (LHS == True)
1033  return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
1034  return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
1035  }
1036  case ISD::SETCC_INVALID:
1037  llvm_unreachable("Invalid setcc condcode!");
1038  }
1039  return SDValue();
1040 }
1041 
1042 std::pair<SDValue, SDValue>
1044  SDLoc SL(Op);
1045 
1046  SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op);
1047 
1048  const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
1049  const SDValue One = DAG.getConstant(1, SL, MVT::i32);
1050 
1051  SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero);
1052  SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One);
1053 
1054  return std::make_pair(Lo, Hi);
1055 }
1056 
1058  SDLoc SL(Op);
1059 
1060  SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op);
1061  const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
1062  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero);
1063 }
1064 
1066  SDLoc SL(Op);
1067 
1068  SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op);
1069  const SDValue One = DAG.getConstant(1, SL, MVT::i32);
1070  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One);
1071 }
1072 
1074  SelectionDAG &DAG) const {
1075  LoadSDNode *Load = cast<LoadSDNode>(Op);
1076  EVT VT = Op.getValueType();
1077 
1078 
1079  // If this is a 2 element vector, we really want to scalarize and not create
1080  // weird 1 element vectors.
1081  if (VT.getVectorNumElements() == 2)
1082  return scalarizeVectorLoad(Load, DAG);
1083 
1084  SDValue BasePtr = Load->getBasePtr();
1085  EVT PtrVT = BasePtr.getValueType();
1086  EVT MemVT = Load->getMemoryVT();
1087  SDLoc SL(Op);
1088 
1089  const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo();
1090 
1091  EVT LoVT, HiVT;
1092  EVT LoMemVT, HiMemVT;
1093  SDValue Lo, Hi;
1094 
1095  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
1096  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT);
1097  std::tie(Lo, Hi) = DAG.SplitVector(Op, SL, LoVT, HiVT);
1098 
1099  unsigned Size = LoMemVT.getStoreSize();
1100  unsigned BaseAlign = Load->getAlignment();
1101  unsigned HiAlign = MinAlign(BaseAlign, Size);
1102 
1103  SDValue LoLoad = DAG.getExtLoad(Load->getExtensionType(), SL, LoVT,
1104  Load->getChain(), BasePtr, SrcValue, LoMemVT,
1105  BaseAlign, Load->getMemOperand()->getFlags());
1106  SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
1107  DAG.getConstant(Size, SL, PtrVT));
1108  SDValue HiLoad =
1109  DAG.getExtLoad(Load->getExtensionType(), SL, HiVT, Load->getChain(),
1110  HiPtr, SrcValue.getWithOffset(LoMemVT.getStoreSize()),
1111  HiMemVT, HiAlign, Load->getMemOperand()->getFlags());
1112 
1113  SDValue Ops[] = {
1114  DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, LoLoad, HiLoad),
1116  LoLoad.getValue(1), HiLoad.getValue(1))
1117  };
1118 
1119  return DAG.getMergeValues(Ops, SL);
1120 }
1121 
1123  SelectionDAG &DAG) const {
1124  StoreSDNode *Store = cast<StoreSDNode>(Op);
1125  SDValue Val = Store->getValue();
1126  EVT VT = Val.getValueType();
1127 
1128  // If this is a 2 element vector, we really want to scalarize and not create
1129  // weird 1 element vectors.
1130  if (VT.getVectorNumElements() == 2)
1131  return scalarizeVectorStore(Store, DAG);
1132 
1133  EVT MemVT = Store->getMemoryVT();
1134  SDValue Chain = Store->getChain();
1135  SDValue BasePtr = Store->getBasePtr();
1136  SDLoc SL(Op);
1137 
1138  EVT LoVT, HiVT;
1139  EVT LoMemVT, HiMemVT;
1140  SDValue Lo, Hi;
1141 
1142  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
1143  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT);
1144  std::tie(Lo, Hi) = DAG.SplitVector(Val, SL, LoVT, HiVT);
1145 
1146  EVT PtrVT = BasePtr.getValueType();
1147  SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
1148  DAG.getConstant(LoMemVT.getStoreSize(), SL,
1149  PtrVT));
1150 
1151  const MachinePointerInfo &SrcValue = Store->getMemOperand()->getPointerInfo();
1152  unsigned BaseAlign = Store->getAlignment();
1153  unsigned Size = LoMemVT.getStoreSize();
1154  unsigned HiAlign = MinAlign(BaseAlign, Size);
1155 
1156  SDValue LoStore =
1157  DAG.getTruncStore(Chain, SL, Lo, BasePtr, SrcValue, LoMemVT, BaseAlign,
1158  Store->getMemOperand()->getFlags());
1159  SDValue HiStore =
1160  DAG.getTruncStore(Chain, SL, Hi, HiPtr, SrcValue.getWithOffset(Size),
1161  HiMemVT, HiAlign, Store->getMemOperand()->getFlags());
1162 
1163  return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoStore, HiStore);
1164 }
1165 
1166 // This is a shortcut for integer division because we have fast i32<->f32
1167 // conversions, and fast f32 reciprocal instructions. The fractional part of a
1168 // float is enough to accurately represent up to a 24-bit signed integer.
1170  bool Sign) const {
1171  SDLoc DL(Op);
1172  EVT VT = Op.getValueType();
1173  SDValue LHS = Op.getOperand(0);
1174  SDValue RHS = Op.getOperand(1);
1175  MVT IntVT = MVT::i32;
1176  MVT FltVT = MVT::f32;
1177 
1178  unsigned LHSSignBits = DAG.ComputeNumSignBits(LHS);
1179  if (LHSSignBits < 9)
1180  return SDValue();
1181 
1182  unsigned RHSSignBits = DAG.ComputeNumSignBits(RHS);
1183  if (RHSSignBits < 9)
1184  return SDValue();
1185 
1186  unsigned BitSize = VT.getSizeInBits();
1187  unsigned SignBits = std::min(LHSSignBits, RHSSignBits);
1188  unsigned DivBits = BitSize - SignBits;
1189  if (Sign)
1190  ++DivBits;
1191 
1194 
1195  SDValue jq = DAG.getConstant(1, DL, IntVT);
1196 
1197  if (Sign) {
1198  // char|short jq = ia ^ ib;
1199  jq = DAG.getNode(ISD::XOR, DL, VT, LHS, RHS);
1200 
1201  // jq = jq >> (bitsize - 2)
1202  jq = DAG.getNode(ISD::SRA, DL, VT, jq,
1203  DAG.getConstant(BitSize - 2, DL, VT));
1204 
1205  // jq = jq | 0x1
1206  jq = DAG.getNode(ISD::OR, DL, VT, jq, DAG.getConstant(1, DL, VT));
1207  }
1208 
1209  // int ia = (int)LHS;
1210  SDValue ia = LHS;
1211 
1212  // int ib, (int)RHS;
1213  SDValue ib = RHS;
1214 
1215  // float fa = (float)ia;
1216  SDValue fa = DAG.getNode(ToFp, DL, FltVT, ia);
1217 
1218  // float fb = (float)ib;
1219  SDValue fb = DAG.getNode(ToFp, DL, FltVT, ib);
1220 
1221  SDValue fq = DAG.getNode(ISD::FMUL, DL, FltVT,
1222  fa, DAG.getNode(AMDGPUISD::RCP, DL, FltVT, fb));
1223 
1224  // fq = trunc(fq);
1225  fq = DAG.getNode(ISD::FTRUNC, DL, FltVT, fq);
1226 
1227  // float fqneg = -fq;
1228  SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FltVT, fq);
1229 
1230  // float fr = mad(fqneg, fb, fa);
1231  SDValue fr = DAG.getNode(ISD::FMAD, DL, FltVT, fqneg, fb, fa);
1232 
1233  // int iq = (int)fq;
1234  SDValue iq = DAG.getNode(ToInt, DL, IntVT, fq);
1235 
1236  // fr = fabs(fr);
1237  fr = DAG.getNode(ISD::FABS, DL, FltVT, fr);
1238 
1239  // fb = fabs(fb);
1240  fb = DAG.getNode(ISD::FABS, DL, FltVT, fb);
1241 
1242  EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
1243 
1244  // int cv = fr >= fb;
1245  SDValue cv = DAG.getSetCC(DL, SetCCVT, fr, fb, ISD::SETOGE);
1246 
1247  // jq = (cv ? jq : 0);
1248  jq = DAG.getNode(ISD::SELECT, DL, VT, cv, jq, DAG.getConstant(0, DL, VT));
1249 
1250  // dst = iq + jq;
1251  SDValue Div = DAG.getNode(ISD::ADD, DL, VT, iq, jq);
1252 
1253  // Rem needs compensation, it's easier to recompute it
1254  SDValue Rem = DAG.getNode(ISD::MUL, DL, VT, Div, RHS);
1255  Rem = DAG.getNode(ISD::SUB, DL, VT, LHS, Rem);
1256 
1257  // Truncate to number of bits this divide really is.
1258  if (Sign) {
1259  SDValue InRegSize
1260  = DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), DivBits));
1261  Div = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Div, InRegSize);
1262  Rem = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Rem, InRegSize);
1263  } else {
1264  SDValue TruncMask = DAG.getConstant((UINT64_C(1) << DivBits) - 1, DL, VT);
1265  Div = DAG.getNode(ISD::AND, DL, VT, Div, TruncMask);
1266  Rem = DAG.getNode(ISD::AND, DL, VT, Rem, TruncMask);
1267  }
1268 
1269  return DAG.getMergeValues({ Div, Rem }, DL);
1270 }
1271 
1273  SelectionDAG &DAG,
1275  assert(Op.getValueType() == MVT::i64);
1276 
1277  SDLoc DL(Op);
1278  EVT VT = Op.getValueType();
1279  EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
1280 
1281  SDValue one = DAG.getConstant(1, DL, HalfVT);
1282  SDValue zero = DAG.getConstant(0, DL, HalfVT);
1283 
1284  //HiLo split
1285  SDValue LHS = Op.getOperand(0);
1286  SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero);
1287  SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one);
1288 
1289  SDValue RHS = Op.getOperand(1);
1290  SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
1291  SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
1292 
1293  if (VT == MVT::i64 &&
1294  DAG.MaskedValueIsZero(RHS, APInt::getHighBitsSet(64, 32)) &&
1295  DAG.MaskedValueIsZero(LHS, APInt::getHighBitsSet(64, 32))) {
1296 
1297  SDValue Res = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(HalfVT, HalfVT),
1298  LHS_Lo, RHS_Lo);
1299 
1300  SDValue DIV = DAG.getBuildVector(MVT::v2i32, DL, {Res.getValue(0), zero});
1301  SDValue REM = DAG.getBuildVector(MVT::v2i32, DL, {Res.getValue(1), zero});
1302 
1303  Results.push_back(DAG.getNode(ISD::BITCAST, DL, MVT::i64, DIV));
1304  Results.push_back(DAG.getNode(ISD::BITCAST, DL, MVT::i64, REM));
1305  return;
1306  }
1307 
1308  // Get Speculative values
1309  SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
1310  SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
1311 
1312  SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ);
1313  SDValue REM = DAG.getBuildVector(MVT::v2i32, DL, {REM_Lo, zero});
1314  REM = DAG.getNode(ISD::BITCAST, DL, MVT::i64, REM);
1315 
1316  SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ);
1317  SDValue DIV_Lo = zero;
1318 
1319  const unsigned halfBitWidth = HalfVT.getSizeInBits();
1320 
1321  for (unsigned i = 0; i < halfBitWidth; ++i) {
1322  const unsigned bitPos = halfBitWidth - i - 1;
1323  SDValue POS = DAG.getConstant(bitPos, DL, HalfVT);
1324  // Get value of high bit
1325  SDValue HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
1326  HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
1327  HBit = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, HBit);
1328 
1329  // Shift
1330  REM = DAG.getNode(ISD::SHL, DL, VT, REM, DAG.getConstant(1, DL, VT));
1331  // Add LHS high bit
1332  REM = DAG.getNode(ISD::OR, DL, VT, REM, HBit);
1333 
1334  SDValue BIT = DAG.getConstant(1ULL << bitPos, DL, HalfVT);
1335  SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETUGE);
1336 
1337  DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
1338 
1339  // Update REM
1340  SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
1341  REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETUGE);
1342  }
1343 
1344  SDValue DIV = DAG.getBuildVector(MVT::v2i32, DL, {DIV_Lo, DIV_Hi});
1345  DIV = DAG.getNode(ISD::BITCAST, DL, MVT::i64, DIV);
1346  Results.push_back(DIV);
1347  Results.push_back(REM);
1348 }
1349 
1351  SelectionDAG &DAG) const {
1352  SDLoc DL(Op);
1353  EVT VT = Op.getValueType();
1354 
1355  if (VT == MVT::i64) {
1357  LowerUDIVREM64(Op, DAG, Results);
1358  return DAG.getMergeValues(Results, DL);
1359  }
1360 
1361  if (VT == MVT::i32) {
1362  if (SDValue Res = LowerDIVREM24(Op, DAG, false))
1363  return Res;
1364  }
1365 
1366  SDValue Num = Op.getOperand(0);
1367  SDValue Den = Op.getOperand(1);
1368 
1369  // RCP = URECIP(Den) = 2^32 / Den + e
1370  // e is rounding error.
1371  SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
1372 
1373  // RCP_LO = mul(RCP, Den) */
1374  SDValue RCP_LO = DAG.getNode(ISD::MUL, DL, VT, RCP, Den);
1375 
1376  // RCP_HI = mulhu (RCP, Den) */
1377  SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
1378 
1379  // NEG_RCP_LO = -RCP_LO
1380  SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
1381  RCP_LO);
1382 
1383  // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
1384  SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, DL, VT),
1385  NEG_RCP_LO, RCP_LO,
1386  ISD::SETEQ);
1387  // Calculate the rounding error from the URECIP instruction
1388  // E = mulhu(ABS_RCP_LO, RCP)
1389  SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
1390 
1391  // RCP_A_E = RCP + E
1392  SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
1393 
1394  // RCP_S_E = RCP - E
1395  SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
1396 
1397  // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
1398  SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, DL, VT),
1399  RCP_A_E, RCP_S_E,
1400  ISD::SETEQ);
1401  // Quotient = mulhu(Tmp0, Num)
1402  SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
1403 
1404  // Num_S_Remainder = Quotient * Den
1405  SDValue Num_S_Remainder = DAG.getNode(ISD::MUL, DL, VT, Quotient, Den);
1406 
1407  // Remainder = Num - Num_S_Remainder
1408  SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
1409 
1410  // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
1411  SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
1412  DAG.getConstant(-1, DL, VT),
1413  DAG.getConstant(0, DL, VT),
1414  ISD::SETUGE);
1415  // Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0)
1416  SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Num,
1417  Num_S_Remainder,
1418  DAG.getConstant(-1, DL, VT),
1419  DAG.getConstant(0, DL, VT),
1420  ISD::SETUGE);
1421  // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
1422  SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
1423  Remainder_GE_Zero);
1424 
1425  // Calculate Division result:
1426 
1427  // Quotient_A_One = Quotient + 1
1428  SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
1429  DAG.getConstant(1, DL, VT));
1430 
1431  // Quotient_S_One = Quotient - 1
1432  SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
1433  DAG.getConstant(1, DL, VT));
1434 
1435  // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
1436  SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, DL, VT),
1437  Quotient, Quotient_A_One, ISD::SETEQ);
1438 
1439  // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
1440  Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, DL, VT),
1441  Quotient_S_One, Div, ISD::SETEQ);
1442 
1443  // Calculate Rem result:
1444 
1445  // Remainder_S_Den = Remainder - Den
1446  SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
1447 
1448  // Remainder_A_Den = Remainder + Den
1449  SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
1450 
1451  // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
1452  SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, DL, VT),
1453  Remainder, Remainder_S_Den, ISD::SETEQ);
1454 
1455  // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
1456  Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, DL, VT),
1457  Remainder_A_Den, Rem, ISD::SETEQ);
1458  SDValue Ops[2] = {
1459  Div,
1460  Rem
1461  };
1462  return DAG.getMergeValues(Ops, DL);
1463 }
1464 
1466  SelectionDAG &DAG) const {
1467  SDLoc DL(Op);
1468  EVT VT = Op.getValueType();
1469 
1470  SDValue LHS = Op.getOperand(0);
1471  SDValue RHS = Op.getOperand(1);
1472 
1473  SDValue Zero = DAG.getConstant(0, DL, VT);
1474  SDValue NegOne = DAG.getConstant(-1, DL, VT);
1475 
1476  if (VT == MVT::i32) {
1477  if (SDValue Res = LowerDIVREM24(Op, DAG, true))
1478  return Res;
1479  }
1480 
1481  if (VT == MVT::i64 &&
1482  DAG.ComputeNumSignBits(LHS) > 32 &&
1483  DAG.ComputeNumSignBits(RHS) > 32) {
1484  EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
1485 
1486  //HiLo split
1487  SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, Zero);
1488  SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, Zero);
1489  SDValue DIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(HalfVT, HalfVT),
1490  LHS_Lo, RHS_Lo);
1491  SDValue Res[2] = {
1492  DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(0)),
1493  DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(1))
1494  };
1495  return DAG.getMergeValues(Res, DL);
1496  }
1497 
1498  SDValue LHSign = DAG.getSelectCC(DL, LHS, Zero, NegOne, Zero, ISD::SETLT);
1499  SDValue RHSign = DAG.getSelectCC(DL, RHS, Zero, NegOne, Zero, ISD::SETLT);
1500  SDValue DSign = DAG.getNode(ISD::XOR, DL, VT, LHSign, RHSign);
1501  SDValue RSign = LHSign; // Remainder sign is the same as LHS
1502 
1503  LHS = DAG.getNode(ISD::ADD, DL, VT, LHS, LHSign);
1504  RHS = DAG.getNode(ISD::ADD, DL, VT, RHS, RHSign);
1505 
1506  LHS = DAG.getNode(ISD::XOR, DL, VT, LHS, LHSign);
1507  RHS = DAG.getNode(ISD::XOR, DL, VT, RHS, RHSign);
1508 
1509  SDValue Div = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), LHS, RHS);
1510  SDValue Rem = Div.getValue(1);
1511 
1512  Div = DAG.getNode(ISD::XOR, DL, VT, Div, DSign);
1513  Rem = DAG.getNode(ISD::XOR, DL, VT, Rem, RSign);
1514 
1515  Div = DAG.getNode(ISD::SUB, DL, VT, Div, DSign);
1516  Rem = DAG.getNode(ISD::SUB, DL, VT, Rem, RSign);
1517 
1518  SDValue Res[2] = {
1519  Div,
1520  Rem
1521  };
1522  return DAG.getMergeValues(Res, DL);
1523 }
1524 
1525 // (frem x, y) -> (fsub x, (fmul (ftrunc (fdiv x, y)), y))
1527  SDLoc SL(Op);
1528  EVT VT = Op.getValueType();
1529  SDValue X = Op.getOperand(0);
1530  SDValue Y = Op.getOperand(1);
1531 
1532  // TODO: Should this propagate fast-math-flags?
1533 
1534  SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y);
1535  SDValue Floor = DAG.getNode(ISD::FTRUNC, SL, VT, Div);
1536  SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Floor, Y);
1537 
1538  return DAG.getNode(ISD::FSUB, SL, VT, X, Mul);
1539 }
1540 
1542  SDLoc SL(Op);
1543  SDValue Src = Op.getOperand(0);
1544 
1545  // result = trunc(src)
1546  // if (src > 0.0 && src != result)
1547  // result += 1.0
1548 
1549  SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
1550 
1551  const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64);
1552  const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f64);
1553 
1554  EVT SetCCVT =
1556 
1557  SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOGT);
1558  SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
1559  SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
1560 
1561  SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero);
1562  // TODO: Should this propagate fast-math-flags?
1563  return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
1564 }
1565 
1567  SelectionDAG &DAG) {
1568  const unsigned FractBits = 52;
1569  const unsigned ExpBits = 11;
1570 
1571  SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32,
1572  Hi,
1573  DAG.getConstant(FractBits - 32, SL, MVT::i32),
1574  DAG.getConstant(ExpBits, SL, MVT::i32));
1575  SDValue Exp = DAG.getNode(ISD::SUB, SL, MVT::i32, ExpPart,
1576  DAG.getConstant(1023, SL, MVT::i32));
1577 
1578  return Exp;
1579 }
1580 
1582  SDLoc SL(Op);
1583  SDValue Src = Op.getOperand(0);
1584 
1585  assert(Op.getValueType() == MVT::f64);
1586 
1587  const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
1588  const SDValue One = DAG.getConstant(1, SL, MVT::i32);
1589 
1590  SDValue VecSrc = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
1591 
1592  // Extract the upper half, since this is where we will find the sign and
1593  // exponent.
1594  SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, VecSrc, One);
1595 
1596  SDValue Exp = extractF64Exponent(Hi, SL, DAG);
1597 
1598  const unsigned FractBits = 52;
1599 
1600  // Extract the sign bit.
1601  const SDValue SignBitMask = DAG.getConstant(UINT32_C(1) << 31, SL, MVT::i32);
1602  SDValue SignBit = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, SignBitMask);
1603 
1604  // Extend back to to 64-bits.
1605  SDValue SignBit64 = DAG.getBuildVector(MVT::v2i32, SL, {Zero, SignBit});
1606  SignBit64 = DAG.getNode(ISD::BITCAST, SL, MVT::i64, SignBit64);
1607 
1608  SDValue BcInt = DAG.getNode(ISD::BITCAST, SL, MVT::i64, Src);
1609  const SDValue FractMask
1610  = DAG.getConstant((UINT64_C(1) << FractBits) - 1, SL, MVT::i64);
1611 
1612  SDValue Shr = DAG.getNode(ISD::SRA, SL, MVT::i64, FractMask, Exp);
1613  SDValue Not = DAG.getNOT(SL, Shr, MVT::i64);
1614  SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, BcInt, Not);
1615 
1616  EVT SetCCVT =
1618 
1619  const SDValue FiftyOne = DAG.getConstant(FractBits - 1, SL, MVT::i32);
1620 
1621  SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT);
1622  SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT);
1623 
1624  SDValue Tmp1 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpLt0, SignBit64, Tmp0);
1625  SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpGt51, BcInt, Tmp1);
1626 
1627  return DAG.getNode(ISD::BITCAST, SL, MVT::f64, Tmp2);
1628 }
1629 
1631  SDLoc SL(Op);
1632  SDValue Src = Op.getOperand(0);
1633 
1634  assert(Op.getValueType() == MVT::f64);
1635 
1636  APFloat C1Val(APFloat::IEEEdouble(), "0x1.0p+52");
1637  SDValue C1 = DAG.getConstantFP(C1Val, SL, MVT::f64);
1638  SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src);
1639 
1640  // TODO: Should this propagate fast-math-flags?
1641 
1642  SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign);
1643  SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign);
1644 
1645  SDValue Fabs = DAG.getNode(ISD::FABS, SL, MVT::f64, Src);
1646 
1647  APFloat C2Val(APFloat::IEEEdouble(), "0x1.fffffffffffffp+51");
1648  SDValue C2 = DAG.getConstantFP(C2Val, SL, MVT::f64);
1649 
1650  EVT SetCCVT =
1652  SDValue Cond = DAG.getSetCC(SL, SetCCVT, Fabs, C2, ISD::SETOGT);
1653 
1654  return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2);
1655 }
1656 
1658  // FNEARBYINT and FRINT are the same, except in their handling of FP
1659  // exceptions. Those aren't really meaningful for us, and OpenCL only has
1660  // rint, so just treat them as equivalent.
1661  return DAG.getNode(ISD::FRINT, SDLoc(Op), Op.getValueType(), Op.getOperand(0));
1662 }
1663 
1664 // XXX - May require not supporting f32 denormals?
1666  SDLoc SL(Op);
1667  SDValue X = Op.getOperand(0);
1668 
1669  SDValue T = DAG.getNode(ISD::FTRUNC, SL, MVT::f32, X);
1670 
1671  // TODO: Should this propagate fast-math-flags?
1672 
1673  SDValue Diff = DAG.getNode(ISD::FSUB, SL, MVT::f32, X, T);
1674 
1675  SDValue AbsDiff = DAG.getNode(ISD::FABS, SL, MVT::f32, Diff);
1676 
1677  const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f32);
1678  const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
1679  const SDValue Half = DAG.getConstantFP(0.5, SL, MVT::f32);
1680 
1681  SDValue SignOne = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f32, One, X);
1682 
1683  EVT SetCCVT =
1685 
1686  SDValue Cmp = DAG.getSetCC(SL, SetCCVT, AbsDiff, Half, ISD::SETOGE);
1687 
1688  SDValue Sel = DAG.getNode(ISD::SELECT, SL, MVT::f32, Cmp, SignOne, Zero);
1689 
1690  return DAG.getNode(ISD::FADD, SL, MVT::f32, T, Sel);
1691 }
1692 
1694  SDLoc SL(Op);
1695  SDValue X = Op.getOperand(0);
1696 
1697  SDValue L = DAG.getNode(ISD::BITCAST, SL, MVT::i64, X);
1698 
1699  const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
1700  const SDValue One = DAG.getConstant(1, SL, MVT::i32);
1701  const SDValue NegOne = DAG.getConstant(-1, SL, MVT::i32);
1702  const SDValue FiftyOne = DAG.getConstant(51, SL, MVT::i32);
1703  EVT SetCCVT =
1705 
1706  SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, X);
1707 
1708  SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC, One);
1709 
1710  SDValue Exp = extractF64Exponent(Hi, SL, DAG);
1711 
1712  const SDValue Mask = DAG.getConstant(INT64_C(0x000fffffffffffff), SL,
1713  MVT::i64);
1714 
1715  SDValue M = DAG.getNode(ISD::SRA, SL, MVT::i64, Mask, Exp);
1716  SDValue D = DAG.getNode(ISD::SRA, SL, MVT::i64,
1717  DAG.getConstant(INT64_C(0x0008000000000000), SL,
1718  MVT::i64),
1719  Exp);
1720 
1721  SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, L, M);
1722  SDValue Tmp1 = DAG.getSetCC(SL, SetCCVT,
1723  DAG.getConstant(0, SL, MVT::i64), Tmp0,
1724  ISD::SETNE);
1725 
1726  SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, Tmp1,
1727  D, DAG.getConstant(0, SL, MVT::i64));
1728  SDValue K = DAG.getNode(ISD::ADD, SL, MVT::i64, L, Tmp2);
1729 
1730  K = DAG.getNode(ISD::AND, SL, MVT::i64, K, DAG.getNOT(SL, M, MVT::i64));
1731  K = DAG.getNode(ISD::BITCAST, SL, MVT::f64, K);
1732 
1733  SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT);
1734  SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT);
1735  SDValue ExpEqNegOne = DAG.getSetCC(SL, SetCCVT, NegOne, Exp, ISD::SETEQ);
1736 
1737  SDValue Mag = DAG.getNode(ISD::SELECT, SL, MVT::f64,
1738  ExpEqNegOne,
1739  DAG.getConstantFP(1.0, SL, MVT::f64),
1740  DAG.getConstantFP(0.0, SL, MVT::f64));
1741 
1742  SDValue S = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, Mag, X);
1743 
1744  K = DAG.getNode(ISD::SELECT, SL, MVT::f64, ExpLt0, S, K);
1745  K = DAG.getNode(ISD::SELECT, SL, MVT::f64, ExpGt51, X, K);
1746 
1747  return K;
1748 }
1749 
1751  EVT VT = Op.getValueType();
1752 
1753  if (VT == MVT::f32)
1754  return LowerFROUND32(Op, DAG);
1755 
1756  if (VT == MVT::f64)
1757  return LowerFROUND64(Op, DAG);
1758 
1759  llvm_unreachable("unhandled type");
1760 }
1761 
1763  SDLoc SL(Op);
1764  SDValue Src = Op.getOperand(0);
1765 
1766  // result = trunc(src);
1767  // if (src < 0.0 && src != result)
1768  // result += -1.0.
1769 
1770  SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
1771 
1772  const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64);
1773  const SDValue NegOne = DAG.getConstantFP(-1.0, SL, MVT::f64);
1774 
1775  EVT SetCCVT =
1777 
1778  SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOLT);
1779  SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
1780  SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
1781 
1782  SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero);
1783  // TODO: Should this propagate fast-math-flags?
1784  return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
1785 }
1786 
1788  SDLoc SL(Op);
1789  SDValue Src = Op.getOperand(0);
1790  bool ZeroUndef = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF;
1791 
1792  if (ZeroUndef && Src.getValueType() == MVT::i32)
1793  return DAG.getNode(AMDGPUISD::FFBH_U32, SL, MVT::i32, Src);
1794 
1795  SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
1796 
1797  const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
1798  const SDValue One = DAG.getConstant(1, SL, MVT::i32);
1799 
1800  SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero);
1801  SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One);
1802 
1803  EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(),
1804  *DAG.getContext(), MVT::i32);
1805 
1806  SDValue Hi0 = DAG.getSetCC(SL, SetCCVT, Hi, Zero, ISD::SETEQ);
1807 
1808  SDValue CtlzLo = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SL, MVT::i32, Lo);
1809  SDValue CtlzHi = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SL, MVT::i32, Hi);
1810 
1811  const SDValue Bits32 = DAG.getConstant(32, SL, MVT::i32);
1812  SDValue Add = DAG.getNode(ISD::ADD, SL, MVT::i32, CtlzLo, Bits32);
1813 
1814  // ctlz(x) = hi_32(x) == 0 ? ctlz(lo_32(x)) + 32 : ctlz(hi_32(x))
1815  SDValue NewCtlz = DAG.getNode(ISD::SELECT, SL, MVT::i32, Hi0, Add, CtlzHi);
1816 
1817  if (!ZeroUndef) {
1818  // Test if the full 64-bit input is zero.
1819 
1820  // FIXME: DAG combines turn what should be an s_and_b64 into a v_or_b32,
1821  // which we probably don't want.
1822  SDValue Lo0 = DAG.getSetCC(SL, SetCCVT, Lo, Zero, ISD::SETEQ);
1823  SDValue SrcIsZero = DAG.getNode(ISD::AND, SL, SetCCVT, Lo0, Hi0);
1824 
1825  // TODO: If i64 setcc is half rate, it can result in 1 fewer instruction
1826  // with the same cycles, otherwise it is slower.
1827  // SDValue SrcIsZero = DAG.getSetCC(SL, SetCCVT, Src,
1828  // DAG.getConstant(0, SL, MVT::i64), ISD::SETEQ);
1829 
1830  const SDValue Bits32 = DAG.getConstant(64, SL, MVT::i32);
1831 
1832  // The instruction returns -1 for 0 input, but the defined intrinsic
1833  // behavior is to return the number of bits.
1834  NewCtlz = DAG.getNode(ISD::SELECT, SL, MVT::i32,
1835  SrcIsZero, Bits32, NewCtlz);
1836  }
1837 
1838  return DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i64, NewCtlz);
1839 }
1840 
1842  bool Signed) const {
1843  // Unsigned
1844  // cul2f(ulong u)
1845  //{
1846  // uint lz = clz(u);
1847  // uint e = (u != 0) ? 127U + 63U - lz : 0;
1848  // u = (u << lz) & 0x7fffffffffffffffUL;
1849  // ulong t = u & 0xffffffffffUL;
1850  // uint v = (e << 23) | (uint)(u >> 40);
1851  // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
1852  // return as_float(v + r);
1853  //}
1854  // Signed
1855  // cl2f(long l)
1856  //{
1857  // long s = l >> 63;
1858  // float r = cul2f((l + s) ^ s);
1859  // return s ? -r : r;
1860  //}
1861 
1862  SDLoc SL(Op);
1863  SDValue Src = Op.getOperand(0);
1864  SDValue L = Src;
1865 
1866  SDValue S;
1867  if (Signed) {
1868  const SDValue SignBit = DAG.getConstant(63, SL, MVT::i64);
1869  S = DAG.getNode(ISD::SRA, SL, MVT::i64, L, SignBit);
1870 
1871  SDValue LPlusS = DAG.getNode(ISD::ADD, SL, MVT::i64, L, S);
1872  L = DAG.getNode(ISD::XOR, SL, MVT::i64, LPlusS, S);
1873  }
1874 
1875  EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(),
1876  *DAG.getContext(), MVT::f32);
1877 
1878 
1879  SDValue ZeroI32 = DAG.getConstant(0, SL, MVT::i32);
1880  SDValue ZeroI64 = DAG.getConstant(0, SL, MVT::i64);
1881  SDValue LZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SL, MVT::i64, L);
1882  LZ = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LZ);
1883 
1884  SDValue K = DAG.getConstant(127U + 63U, SL, MVT::i32);
1885  SDValue E = DAG.getSelect(SL, MVT::i32,
1886  DAG.getSetCC(SL, SetCCVT, L, ZeroI64, ISD::SETNE),
1887  DAG.getNode(ISD::SUB, SL, MVT::i32, K, LZ),
1888  ZeroI32);
1889 
1890  SDValue U = DAG.getNode(ISD::AND, SL, MVT::i64,
1891  DAG.getNode(ISD::SHL, SL, MVT::i64, L, LZ),
1892  DAG.getConstant((-1ULL) >> 1, SL, MVT::i64));
1893 
1894  SDValue T = DAG.getNode(ISD::AND, SL, MVT::i64, U,
1895  DAG.getConstant(0xffffffffffULL, SL, MVT::i64));
1896 
1897  SDValue UShl = DAG.getNode(ISD::SRL, SL, MVT::i64,
1898  U, DAG.getConstant(40, SL, MVT::i64));
1899 
1900  SDValue V = DAG.getNode(ISD::OR, SL, MVT::i32,
1901  DAG.getNode(ISD::SHL, SL, MVT::i32, E, DAG.getConstant(23, SL, MVT::i32)),
1902  DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, UShl));
1903 
1904  SDValue C = DAG.getConstant(0x8000000000ULL, SL, MVT::i64);
1905  SDValue RCmp = DAG.getSetCC(SL, SetCCVT, T, C, ISD::SETUGT);
1906  SDValue TCmp = DAG.getSetCC(SL, SetCCVT, T, C, ISD::SETEQ);
1907 
1908  SDValue One = DAG.getConstant(1, SL, MVT::i32);
1909 
1910  SDValue VTrunc1 = DAG.getNode(ISD::AND, SL, MVT::i32, V, One);
1911 
1912  SDValue R = DAG.getSelect(SL, MVT::i32,
1913  RCmp,
1914  One,
1915  DAG.getSelect(SL, MVT::i32, TCmp, VTrunc1, ZeroI32));
1916  R = DAG.getNode(ISD::ADD, SL, MVT::i32, V, R);
1917  R = DAG.getNode(ISD::BITCAST, SL, MVT::f32, R);
1918 
1919  if (!Signed)
1920  return R;
1921 
1922  SDValue RNeg = DAG.getNode(ISD::FNEG, SL, MVT::f32, R);
1923  return DAG.getSelect(SL, MVT::f32, DAG.getSExtOrTrunc(S, SL, SetCCVT), RNeg, R);
1924 }
1925 
1927  bool Signed) const {
1928  SDLoc SL(Op);
1929  SDValue Src = Op.getOperand(0);
1930 
1931  SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
1932 
1933  SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC,
1934  DAG.getConstant(0, SL, MVT::i32));
1935  SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC,
1936  DAG.getConstant(1, SL, MVT::i32));
1937 
1938  SDValue CvtHi = DAG.getNode(Signed ? ISD::SINT_TO_FP : ISD::UINT_TO_FP,
1939  SL, MVT::f64, Hi);
1940 
1941  SDValue CvtLo = DAG.getNode(ISD::UINT_TO_FP, SL, MVT::f64, Lo);
1942 
1943  SDValue LdExp = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, CvtHi,
1944  DAG.getConstant(32, SL, MVT::i32));
1945  // TODO: Should this propagate fast-math-flags?
1946  return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo);
1947 }
1948 
1950  SelectionDAG &DAG) const {
1951  assert(Op.getOperand(0).getValueType() == MVT::i64 &&
1952  "operation should be legal");
1953 
1954  // TODO: Factor out code common with LowerSINT_TO_FP.
1955 
1956  EVT DestVT = Op.getValueType();
1957  if (Subtarget->has16BitInsts() && DestVT == MVT::f16) {
1958  SDLoc DL(Op);
1959  SDValue Src = Op.getOperand(0);
1960 
1961  SDValue IntToFp32 = DAG.getNode(Op.getOpcode(), DL, MVT::f32, Src);
1962  SDValue FPRoundFlag = DAG.getIntPtrConstant(0, SDLoc(Op));
1963  SDValue FPRound =
1964  DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, IntToFp32, FPRoundFlag);
1965 
1966  return FPRound;
1967  }
1968 
1969  if (DestVT == MVT::f32)
1970  return LowerINT_TO_FP32(Op, DAG, false);
1971 
1972  assert(DestVT == MVT::f64);
1973  return LowerINT_TO_FP64(Op, DAG, false);
1974 }
1975 
1977  SelectionDAG &DAG) const {
1978  assert(Op.getOperand(0).getValueType() == MVT::i64 &&
1979  "operation should be legal");
1980 
1981  // TODO: Factor out code common with LowerUINT_TO_FP.
1982 
1983  EVT DestVT = Op.getValueType();
1984  if (Subtarget->has16BitInsts() && DestVT == MVT::f16) {
1985  SDLoc DL(Op);
1986  SDValue Src = Op.getOperand(0);
1987 
1988  SDValue IntToFp32 = DAG.getNode(Op.getOpcode(), DL, MVT::f32, Src);
1989  SDValue FPRoundFlag = DAG.getIntPtrConstant(0, SDLoc(Op));
1990  SDValue FPRound =
1991  DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, IntToFp32, FPRoundFlag);
1992 
1993  return FPRound;
1994  }
1995 
1996  if (DestVT == MVT::f32)
1997  return LowerINT_TO_FP32(Op, DAG, true);
1998 
1999  assert(DestVT == MVT::f64);
2000  return LowerINT_TO_FP64(Op, DAG, true);
2001 }
2002 
2004  bool Signed) const {
2005  SDLoc SL(Op);
2006 
2007  SDValue Src = Op.getOperand(0);
2008 
2009  SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
2010 
2011  SDValue K0 = DAG.getConstantFP(BitsToDouble(UINT64_C(0x3df0000000000000)), SL,
2012  MVT::f64);
2013  SDValue K1 = DAG.getConstantFP(BitsToDouble(UINT64_C(0xc1f0000000000000)), SL,
2014  MVT::f64);
2015  // TODO: Should this propagate fast-math-flags?
2016  SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, Trunc, K0);
2017 
2018  SDValue FloorMul = DAG.getNode(ISD::FFLOOR, SL, MVT::f64, Mul);
2019 
2020 
2021  SDValue Fma = DAG.getNode(ISD::FMA, SL, MVT::f64, FloorMul, K1, Trunc);
2022 
2023  SDValue Hi = DAG.getNode(Signed ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, SL,
2024  MVT::i32, FloorMul);
2025  SDValue Lo = DAG.getNode(ISD::FP_TO_UINT, SL, MVT::i32, Fma);
2026 
2027  SDValue Result = DAG.getBuildVector(MVT::v2i32, SL, {Lo, Hi});
2028 
2029  return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Result);
2030 }
2031 
2033 
2034  if (getTargetMachine().Options.UnsafeFPMath) {
2035  // There is a generic expand for FP_TO_FP16 with unsafe fast math.
2036  return SDValue();
2037  }
2038 
2039  SDLoc DL(Op);
2040  SDValue N0 = Op.getOperand(0);
2042 
2043  // f64 -> f16 conversion using round-to-nearest-even rounding mode.
2044  const unsigned ExpMask = 0x7ff;
2045  const unsigned ExpBiasf64 = 1023;
2046  const unsigned ExpBiasf16 = 15;
2047  SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
2048  SDValue One = DAG.getConstant(1, DL, MVT::i32);
2049  SDValue U = DAG.getNode(ISD::BITCAST, DL, MVT::i64, N0);
2050  SDValue UH = DAG.getNode(ISD::SRL, DL, MVT::i64, U,
2051  DAG.getConstant(32, DL, MVT::i64));
2052  UH = DAG.getZExtOrTrunc(UH, DL, MVT::i32);
2053  U = DAG.getZExtOrTrunc(U, DL, MVT::i32);
2054  SDValue E = DAG.getNode(ISD::SRL, DL, MVT::i32, UH,
2055  DAG.getConstant(20, DL, MVT::i64));
2056  E = DAG.getNode(ISD::AND, DL, MVT::i32, E,
2057  DAG.getConstant(ExpMask, DL, MVT::i32));
2058  // Subtract the fp64 exponent bias (1023) to get the real exponent and
2059  // add the f16 bias (15) to get the biased exponent for the f16 format.
2060  E = DAG.getNode(ISD::ADD, DL, MVT::i32, E,
2061  DAG.getConstant(-ExpBiasf64 + ExpBiasf16, DL, MVT::i32));
2062 
2063  SDValue M = DAG.getNode(ISD::SRL, DL, MVT::i32, UH,
2064  DAG.getConstant(8, DL, MVT::i32));
2065  M = DAG.getNode(ISD::AND, DL, MVT::i32, M,
2066  DAG.getConstant(0xffe, DL, MVT::i32));
2067 
2068  SDValue MaskedSig = DAG.getNode(ISD::AND, DL, MVT::i32, UH,
2069  DAG.getConstant(0x1ff, DL, MVT::i32));
2070  MaskedSig = DAG.getNode(ISD::OR, DL, MVT::i32, MaskedSig, U);
2071 
2072  SDValue Lo40Set = DAG.getSelectCC(DL, MaskedSig, Zero, Zero, One, ISD::SETEQ);
2073  M = DAG.getNode(ISD::OR, DL, MVT::i32, M, Lo40Set);
2074 
2075  // (M != 0 ? 0x0200 : 0) | 0x7c00;
2076  SDValue I = DAG.getNode(ISD::OR, DL, MVT::i32,
2077  DAG.getSelectCC(DL, M, Zero, DAG.getConstant(0x0200, DL, MVT::i32),
2078  Zero, ISD::SETNE), DAG.getConstant(0x7c00, DL, MVT::i32));
2079 
2080  // N = M | (E << 12);
2081  SDValue N = DAG.getNode(ISD::OR, DL, MVT::i32, M,
2082  DAG.getNode(ISD::SHL, DL, MVT::i32, E,
2083  DAG.getConstant(12, DL, MVT::i32)));
2084 
2085  // B = clamp(1-E, 0, 13);
2086  SDValue OneSubExp = DAG.getNode(ISD::SUB, DL, MVT::i32,
2087  One, E);
2088  SDValue B = DAG.getNode(ISD::SMAX, DL, MVT::i32, OneSubExp, Zero);
2089  B = DAG.getNode(ISD::SMIN, DL, MVT::i32, B,
2090  DAG.getConstant(13, DL, MVT::i32));
2091 
2092  SDValue SigSetHigh = DAG.getNode(ISD::OR, DL, MVT::i32, M,
2093  DAG.getConstant(0x1000, DL, MVT::i32));
2094 
2095  SDValue D = DAG.getNode(ISD::SRL, DL, MVT::i32, SigSetHigh, B);
2096  SDValue D0 = DAG.getNode(ISD::SHL, DL, MVT::i32, D, B);
2097  SDValue D1 = DAG.getSelectCC(DL, D0, SigSetHigh, One, Zero, ISD::SETNE);
2098  D = DAG.getNode(ISD::OR, DL, MVT::i32, D, D1);
2099 
2100  SDValue V = DAG.getSelectCC(DL, E, One, D, N, ISD::SETLT);
2101  SDValue VLow3 = DAG.getNode(ISD::AND, DL, MVT::i32, V,
2102  DAG.getConstant(0x7, DL, MVT::i32));
2103  V = DAG.getNode(ISD::SRL, DL, MVT::i32, V,
2104  DAG.getConstant(2, DL, MVT::i32));
2105  SDValue V0 = DAG.getSelectCC(DL, VLow3, DAG.getConstant(3, DL, MVT::i32),
2106  One, Zero, ISD::SETEQ);
2107  SDValue V1 = DAG.getSelectCC(DL, VLow3, DAG.getConstant(5, DL, MVT::i32),
2108  One, Zero, ISD::SETGT);
2109  V1 = DAG.getNode(ISD::OR, DL, MVT::i32, V0, V1);
2110  V = DAG.getNode(ISD::ADD, DL, MVT::i32, V, V1);
2111 
2112  V = DAG.getSelectCC(DL, E, DAG.getConstant(30, DL, MVT::i32),
2113  DAG.getConstant(0x7c00, DL, MVT::i32), V, ISD::SETGT);
2114  V = DAG.getSelectCC(DL, E, DAG.getConstant(1039, DL, MVT::i32),
2115  I, V, ISD::SETEQ);
2116 
2117  // Extract the sign bit.
2118  SDValue Sign = DAG.getNode(ISD::SRL, DL, MVT::i32, UH,
2119  DAG.getConstant(16, DL, MVT::i32));
2120  Sign = DAG.getNode(ISD::AND, DL, MVT::i32, Sign,
2121  DAG.getConstant(0x8000, DL, MVT::i32));
2122 
2123  V = DAG.getNode(ISD::OR, DL, MVT::i32, Sign, V);
2124  return DAG.getZExtOrTrunc(V, DL, Op.getValueType());
2125 }
2126 
2128  SelectionDAG &DAG) const {
2129  SDValue Src = Op.getOperand(0);
2130 
2131  // TODO: Factor out code common with LowerFP_TO_UINT.
2132 
2133  EVT SrcVT = Src.getValueType();
2134  if (Subtarget->has16BitInsts() && SrcVT == MVT::f16) {
2135  SDLoc DL(Op);
2136 
2137  SDValue FPExtend = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
2138  SDValue FpToInt32 =
2139  DAG.getNode(Op.getOpcode(), DL, MVT::i64, FPExtend);
2140 
2141  return FpToInt32;
2142  }
2143 
2144  if (Op.getValueType() == MVT::i64 && Src.getValueType() == MVT::f64)
2145  return LowerFP64_TO_INT(Op, DAG, true);
2146 
2147  return SDValue();
2148 }
2149 
2151  SelectionDAG &DAG) const {
2152  SDValue Src = Op.getOperand(0);
2153 
2154  // TODO: Factor out code common with LowerFP_TO_SINT.
2155 
2156  EVT SrcVT = Src.getValueType();
2157  if (Subtarget->has16BitInsts() && SrcVT == MVT::f16) {
2158  SDLoc DL(Op);
2159 
2160  SDValue FPExtend = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
2161  SDValue FpToInt32 =
2162  DAG.getNode(Op.getOpcode(), DL, MVT::i64, FPExtend);
2163 
2164  return FpToInt32;
2165  }
2166 
2167  if (Op.getValueType() == MVT::i64 && Src.getValueType() == MVT::f64)
2168  return LowerFP64_TO_INT(Op, DAG, false);
2169 
2170  return SDValue();
2171 }
2172 
2174  SelectionDAG &DAG) const {
2175  EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2176  MVT VT = Op.getSimpleValueType();
2177  MVT ScalarVT = VT.getScalarType();
2178 
2179  assert(VT.isVector());
2180 
2181  SDValue Src = Op.getOperand(0);
2182  SDLoc DL(Op);
2183 
2184  // TODO: Don't scalarize on Evergreen?
2185  unsigned NElts = VT.getVectorNumElements();
2187  DAG.ExtractVectorElements(Src, Args, 0, NElts);
2188 
2189  SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType());
2190  for (unsigned I = 0; I < NElts; ++I)
2191  Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp);
2192 
2193  return DAG.getBuildVector(VT, DL, Args);
2194 }
2195 
2196 //===----------------------------------------------------------------------===//
2197 // Custom DAG optimizations
2198 //===----------------------------------------------------------------------===//
2199 
2200 static bool isU24(SDValue Op, SelectionDAG &DAG) {
2201  APInt KnownZero, KnownOne;
2202  EVT VT = Op.getValueType();
2203  DAG.computeKnownBits(Op, KnownZero, KnownOne);
2204 
2205  return (VT.getSizeInBits() - KnownZero.countLeadingOnes()) <= 24;
2206 }
2207 
2208 static bool isI24(SDValue Op, SelectionDAG &DAG) {
2209  EVT VT = Op.getValueType();
2210 
2211  // In order for this to be a signed 24-bit value, bit 23, must
2212  // be a sign bit.
2213  return VT.getSizeInBits() >= 24 && // Types less than 24-bit should be treated
2214  // as unsigned 24-bit values.
2215  (VT.getSizeInBits() - DAG.ComputeNumSignBits(Op)) < 24;
2216 }
2217 
2218 static bool simplifyI24(SDNode *Node24, unsigned OpIdx,
2220 
2221  SelectionDAG &DAG = DCI.DAG;
2222  SDValue Op = Node24->getOperand(OpIdx);
2223  EVT VT = Op.getValueType();
2224 
2225  APInt Demanded = APInt::getLowBitsSet(VT.getSizeInBits(), 24);
2226  APInt KnownZero, KnownOne;
2227  TargetLowering::TargetLoweringOpt TLO(DAG, true, true);
2228  if (TLO.SimplifyDemandedBits(Node24, OpIdx, Demanded, DCI))
2229  return true;
2230 
2231  return false;
2232 }
2233 
2234 template <typename IntTy>
2236  uint32_t Width, const SDLoc &DL) {
2237  if (Width + Offset < 32) {
2238  uint32_t Shl = static_cast<uint32_t>(Src0) << (32 - Offset - Width);
2239  IntTy Result = static_cast<IntTy>(Shl) >> (32 - Width);
2240  return DAG.getConstant(Result, DL, MVT::i32);
2241  }
2242 
2243  return DAG.getConstant(Src0 >> Offset, DL, MVT::i32);
2244 }
2245 
2246 static bool hasVolatileUser(SDNode *Val) {
2247  for (SDNode *U : Val->uses()) {
2248  if (MemSDNode *M = dyn_cast<MemSDNode>(U)) {
2249  if (M->isVolatile())
2250  return true;
2251  }
2252  }
2253 
2254  return false;
2255 }
2256 
2258  // i32 vectors are the canonical memory type.
2259  if (VT.getScalarType() == MVT::i32 || isTypeLegal(VT))
2260  return false;
2261 
2262  if (!VT.isByteSized())
2263  return false;
2264 
2265  unsigned Size = VT.getStoreSize();
2266 
2267  if ((Size == 1 || Size == 2 || Size == 4) && !VT.isVector())
2268  return false;
2269 
2270  if (Size == 3 || (Size > 4 && (Size % 4 != 0)))
2271  return false;
2272 
2273  return true;
2274 }
2275 
2276 // Replace load of an illegal type with a store of a bitcast to a friendlier
2277 // type.
2279  DAGCombinerInfo &DCI) const {
2280  if (!DCI.isBeforeLegalize())
2281  return SDValue();
2282 
2283  LoadSDNode *LN = cast<LoadSDNode>(N);
2284  if (LN->isVolatile() || !ISD::isNormalLoad(LN) || hasVolatileUser(LN))
2285  return SDValue();
2286 
2287  SDLoc SL(N);
2288  SelectionDAG &DAG = DCI.DAG;
2289  EVT VT = LN->getMemoryVT();
2290 
2291  unsigned Size = VT.getStoreSize();
2292  unsigned Align = LN->getAlignment();
2293  if (Align < Size && isTypeLegal(VT)) {
2294  bool IsFast;
2295  unsigned AS = LN->getAddressSpace();
2296 
2297  // Expand unaligned loads earlier than legalization. Due to visitation order
2298  // problems during legalization, the emitted instructions to pack and unpack
2299  // the bytes again are not eliminated in the case of an unaligned copy.
2300  if (!allowsMisalignedMemoryAccesses(VT, AS, Align, &IsFast)) {
2301  if (VT.isVector())
2302  return scalarizeVectorLoad(LN, DAG);
2303 
2304  SDValue Ops[2];
2305  std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(LN, DAG);
2306  return DAG.getMergeValues(Ops, SDLoc(N));
2307  }
2308 
2309  if (!IsFast)
2310  return SDValue();
2311  }
2312 
2313  if (!shouldCombineMemoryType(VT))
2314  return SDValue();
2315 
2316  EVT NewVT = getEquivalentMemType(*DAG.getContext(), VT);
2317 
2318  SDValue NewLoad
2319  = DAG.getLoad(NewVT, SL, LN->getChain(),
2320  LN->getBasePtr(), LN->getMemOperand());
2321 
2322  SDValue BC = DAG.getNode(ISD::BITCAST, SL, VT, NewLoad);
2323  DCI.CombineTo(N, BC, NewLoad.getValue(1));
2324  return SDValue(N, 0);
2325 }
2326 
2327 // Replace store of an illegal type with a store of a bitcast to a friendlier
2328 // type.
2330  DAGCombinerInfo &DCI) const {
2331  if (!DCI.isBeforeLegalize())
2332  return SDValue();
2333 
2334  StoreSDNode *SN = cast<StoreSDNode>(N);
2335  if (SN->isVolatile() || !ISD::isNormalStore(SN))
2336  return SDValue();
2337 
2338  EVT VT = SN->getMemoryVT();
2339  unsigned Size = VT.getStoreSize();
2340 
2341  SDLoc SL(N);
2342  SelectionDAG &DAG = DCI.DAG;
2343  unsigned Align = SN->getAlignment();
2344  if (Align < Size && isTypeLegal(VT)) {
2345  bool IsFast;
2346  unsigned AS = SN->getAddressSpace();
2347 
2348  // Expand unaligned stores earlier than legalization. Due to visitation
2349  // order problems during legalization, the emitted instructions to pack and
2350  // unpack the bytes again are not eliminated in the case of an unaligned
2351  // copy.
2352  if (!allowsMisalignedMemoryAccesses(VT, AS, Align, &IsFast)) {
2353  if (VT.isVector())
2354  return scalarizeVectorStore(SN, DAG);
2355 
2356  return expandUnalignedStore(SN, DAG);
2357  }
2358 
2359  if (!IsFast)
2360  return SDValue();
2361  }
2362 
2363  if (!shouldCombineMemoryType(VT))
2364  return SDValue();
2365 
2366  EVT NewVT = getEquivalentMemType(*DAG.getContext(), VT);
2367  SDValue Val = SN->getValue();
2368 
2369  //DCI.AddToWorklist(Val.getNode());
2370 
2371  bool OtherUses = !Val.hasOneUse();
2372  SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, NewVT, Val);
2373  if (OtherUses) {
2374  SDValue CastBack = DAG.getNode(ISD::BITCAST, SL, VT, CastVal);
2375  DAG.ReplaceAllUsesOfValueWith(Val, CastBack);
2376  }
2377 
2378  return DAG.getStore(SN->getChain(), SL, CastVal,
2379  SN->getBasePtr(), SN->getMemOperand());
2380 }
2381 
2382 /// Split the 64-bit value \p LHS into two 32-bit components, and perform the
2383 /// binary operation \p Opc to it with the corresponding constant operands.
2385  DAGCombinerInfo &DCI, const SDLoc &SL,
2386  unsigned Opc, SDValue LHS,
2387  uint32_t ValLo, uint32_t ValHi) const {
2388  SelectionDAG &DAG = DCI.DAG;
2389  SDValue Lo, Hi;
2390  std::tie(Lo, Hi) = split64BitValue(LHS, DAG);
2391 
2392  SDValue LoRHS = DAG.getConstant(ValLo, SL, MVT::i32);
2393  SDValue HiRHS = DAG.getConstant(ValHi, SL, MVT::i32);
2394 
2395  SDValue LoAnd = DAG.getNode(Opc, SL, MVT::i32, Lo, LoRHS);
2396  SDValue HiAnd = DAG.getNode(Opc, SL, MVT::i32, Hi, HiRHS);
2397 
2398  // Re-visit the ands. It's possible we eliminated one of them and it could
2399  // simplify the vector.
2400  DCI.AddToWorklist(Lo.getNode());
2401  DCI.AddToWorklist(Hi.getNode());
2402 
2403  SDValue Vec = DAG.getBuildVector(MVT::v2i32, SL, {LoAnd, HiAnd});
2404  return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
2405 }
2406 
2408  DAGCombinerInfo &DCI) const {
2409  if (N->getValueType(0) != MVT::i64)
2410  return SDValue();
2411 
2412  // i64 (shl x, C) -> (build_pair 0, (shl x, C -32))
2413 
2414  // On some subtargets, 64-bit shift is a quarter rate instruction. In the
2415  // common case, splitting this into a move and a 32-bit shift is faster and
2416  // the same code size.
2417  const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
2418  if (!RHS)
2419  return SDValue();
2420 
2421  unsigned RHSVal = RHS->getZExtValue();
2422  if (RHSVal < 32)
2423  return SDValue();
2424 
2425  SDValue LHS = N->getOperand(0);
2426 
2427  SDLoc SL(N);
2428  SelectionDAG &DAG = DCI.DAG;
2429 
2430  SDValue ShiftAmt = DAG.getConstant(RHSVal - 32, SL, MVT::i32);
2431 
2432  SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
2433  SDValue NewShift = DAG.getNode(ISD::SHL, SL, MVT::i32, Lo, ShiftAmt);
2434 
2435  const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
2436 
2437  SDValue Vec = DAG.getBuildVector(MVT::v2i32, SL, {Zero, NewShift});
2438  return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
2439 }
2440 
2442  DAGCombinerInfo &DCI) const {
2443  if (N->getValueType(0) != MVT::i64)
2444  return SDValue();
2445 
2446  const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
2447  if (!RHS)
2448  return SDValue();
2449 
2450  SelectionDAG &DAG = DCI.DAG;
2451  SDLoc SL(N);
2452  unsigned RHSVal = RHS->getZExtValue();
2453 
2454  // (sra i64:x, 32) -> build_pair x, (sra hi_32(x), 31)
2455  if (RHSVal == 32) {
2456  SDValue Hi = getHiHalf64(N->getOperand(0), DAG);
2457  SDValue NewShift = DAG.getNode(ISD::SRA, SL, MVT::i32, Hi,
2458  DAG.getConstant(31, SL, MVT::i32));
2459 
2460  SDValue BuildVec = DAG.getBuildVector(MVT::v2i32, SL, {Hi, NewShift});
2461  return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildVec);
2462  }
2463 
2464  // (sra i64:x, 63) -> build_pair (sra hi_32(x), 31), (sra hi_32(x), 31)
2465  if (RHSVal == 63) {
2466  SDValue Hi = getHiHalf64(N->getOperand(0), DAG);
2467  SDValue NewShift = DAG.getNode(ISD::SRA, SL, MVT::i32, Hi,
2468  DAG.getConstant(31, SL, MVT::i32));
2469  SDValue BuildVec = DAG.getBuildVector(MVT::v2i32, SL, {NewShift, NewShift});
2470  return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildVec);
2471  }
2472 
2473  return SDValue();
2474 }
2475 
2477  DAGCombinerInfo &DCI) const {
2478  if (N->getValueType(0) != MVT::i64)
2479  return SDValue();
2480 
2481  const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
2482  if (!RHS)
2483  return SDValue();
2484 
2485  unsigned ShiftAmt = RHS->getZExtValue();
2486  if (ShiftAmt < 32)
2487  return SDValue();
2488 
2489  // srl i64:x, C for C >= 32
2490  // =>
2491  // build_pair (srl hi_32(x), C - 32), 0
2492 
2493  SelectionDAG &DAG = DCI.DAG;
2494  SDLoc SL(N);
2495 
2496  SDValue One = DAG.getConstant(1, SL, MVT::i32);
2497  SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
2498 
2499  SDValue VecOp = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, N->getOperand(0));
2501  VecOp, One);
2502 
2503  SDValue NewConst = DAG.getConstant(ShiftAmt - 32, SL, MVT::i32);
2504  SDValue NewShift = DAG.getNode(ISD::SRL, SL, MVT::i32, Hi, NewConst);
2505 
2506  SDValue BuildPair = DAG.getBuildVector(MVT::v2i32, SL, {NewShift, Zero});
2507 
2508  return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildPair);
2509 }
2510 
2511 // We need to specifically handle i64 mul here to avoid unnecessary conversion
2512 // instructions. If we only match on the legalized i64 mul expansion,
2513 // SimplifyDemandedBits will be unable to remove them because there will be
2514 // multiple uses due to the separate mul + mulh[su].
2515 static SDValue getMul24(SelectionDAG &DAG, const SDLoc &SL,
2516  SDValue N0, SDValue N1, unsigned Size, bool Signed) {
2517  if (Size <= 32) {
2518  unsigned MulOpc = Signed ? AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24;
2519  return DAG.getNode(MulOpc, SL, MVT::i32, N0, N1);
2520  }
2521 
2522  // Because we want to eliminate extension instructions before the
2523  // operation, we need to create a single user here (i.e. not the separate
2524  // mul_lo + mul_hi) so that SimplifyDemandedBits will deal with it.
2525 
2526  unsigned MulOpc = Signed ? AMDGPUISD::MUL_LOHI_I24 : AMDGPUISD::MUL_LOHI_U24;
2527 
2528  SDValue Mul = DAG.getNode(MulOpc, SL,
2529  DAG.getVTList(MVT::i32, MVT::i32), N0, N1);
2530 
2531  return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64,
2532  Mul.getValue(0), Mul.getValue(1));
2533 }
2534 
2536  DAGCombinerInfo &DCI) const {
2537  EVT VT = N->getValueType(0);
2538 
2539  unsigned Size = VT.getSizeInBits();
2540  if (VT.isVector() || Size > 64)
2541  return SDValue();
2542 
2543  // There are i16 integer mul/mad.
2545  return SDValue();
2546 
2547  SelectionDAG &DAG = DCI.DAG;
2548  SDLoc DL(N);
2549 
2550  SDValue N0 = N->getOperand(0);
2551  SDValue N1 = N->getOperand(1);
2552  SDValue Mul;
2553 
2554  if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
2555  N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
2556  N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
2557  Mul = getMul24(DAG, DL, N0, N1, Size, false);
2558  } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
2559  N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
2560  N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
2561  Mul = getMul24(DAG, DL, N0, N1, Size, true);
2562  } else {
2563  return SDValue();
2564  }
2565 
2566  // We need to use sext even for MUL_U24, because MUL_U24 is used
2567  // for signed multiply of 8 and 16-bit types.
2568  return DAG.getSExtOrTrunc(Mul, DL, VT);
2569 }
2570 
2572  DAGCombinerInfo &DCI) const {
2573  EVT VT = N->getValueType(0);
2574 
2575  if (!Subtarget->hasMulI24() || VT.isVector())
2576  return SDValue();
2577 
2578  SelectionDAG &DAG = DCI.DAG;
2579  SDLoc DL(N);
2580 
2581  SDValue N0 = N->getOperand(0);
2582  SDValue N1 = N->getOperand(1);
2583 
2584  if (!isI24(N0, DAG) || !isI24(N1, DAG))
2585  return SDValue();
2586 
2587  N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
2588  N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
2589 
2590  SDValue Mulhi = DAG.getNode(AMDGPUISD::MULHI_I24, DL, MVT::i32, N0, N1);
2591  DCI.AddToWorklist(Mulhi.getNode());
2592  return DAG.getSExtOrTrunc(Mulhi, DL, VT);
2593 }
2594 
2596  DAGCombinerInfo &DCI) const {
2597  EVT VT = N->getValueType(0);
2598 
2599  if (!Subtarget->hasMulU24() || VT.isVector() || VT.getSizeInBits() > 32)
2600  return SDValue();
2601 
2602  SelectionDAG &DAG = DCI.DAG;
2603  SDLoc DL(N);
2604 
2605  SDValue N0 = N->getOperand(0);
2606  SDValue N1 = N->getOperand(1);
2607 
2608  if (!isU24(N0, DAG) || !isU24(N1, DAG))
2609  return SDValue();
2610 
2611  N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
2612  N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
2613 
2614  SDValue Mulhi = DAG.getNode(AMDGPUISD::MULHI_U24, DL, MVT::i32, N0, N1);
2615  DCI.AddToWorklist(Mulhi.getNode());
2616  return DAG.getZExtOrTrunc(Mulhi, DL, VT);
2617 }
2618 
2620  SDNode *N, DAGCombinerInfo &DCI) const {
2621  SelectionDAG &DAG = DCI.DAG;
2622 
2623  // Simplify demanded bits before splitting into multiple users.
2624  if (simplifyI24(N, 0, DCI) || simplifyI24(N, 1, DCI))
2625  return SDValue();
2626 
2627  SDValue N0 = N->getOperand(0);
2628  SDValue N1 = N->getOperand(1);
2629 
2630  bool Signed = (N->getOpcode() == AMDGPUISD::MUL_LOHI_I24);
2631 
2632  unsigned MulLoOpc = Signed ? AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24;
2633  unsigned MulHiOpc = Signed ? AMDGPUISD::MULHI_I24 : AMDGPUISD::MULHI_U24;
2634 
2635  SDLoc SL(N);
2636 
2637  SDValue MulLo = DAG.getNode(MulLoOpc, SL, MVT::i32, N0, N1);
2638  SDValue MulHi = DAG.getNode(MulHiOpc, SL, MVT::i32, N0, N1);
2639  return DAG.getMergeValues({ MulLo, MulHi }, SL);
2640 }
2641 
2642 static bool isNegativeOne(SDValue Val) {
2643  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val))
2644  return C->isAllOnesValue();
2645  return false;
2646 }
2647 
2648 static bool isCtlzOpc(unsigned Opc) {
2649  return Opc == ISD::CTLZ || Opc == ISD::CTLZ_ZERO_UNDEF;
2650 }
2651 
2652 SDValue AMDGPUTargetLowering::getFFBH_U32(SelectionDAG &DAG,
2653  SDValue Op,
2654  const SDLoc &DL) const {
2655  EVT VT = Op.getValueType();
2656  EVT LegalVT = getTypeToTransformTo(*DAG.getContext(), VT);
2657  if (LegalVT != MVT::i32 && (Subtarget->has16BitInsts() &&
2658  LegalVT != MVT::i16))
2659  return SDValue();
2660 
2661  if (VT != MVT::i32)
2662  Op = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Op);
2663 
2664  SDValue FFBH = DAG.getNode(AMDGPUISD::FFBH_U32, DL, MVT::i32, Op);
2665  if (VT != MVT::i32)
2666  FFBH = DAG.getNode(ISD::TRUNCATE, DL, VT, FFBH);
2667 
2668  return FFBH;
2669 }
2670 
2671 // The native instructions return -1 on 0 input. Optimize out a select that
2672 // produces -1 on 0.
2673 //
2674 // TODO: If zero is not undef, we could also do this if the output is compared
2675 // against the bitwidth.
2676 //
2677 // TODO: Should probably combine against FFBH_U32 instead of ctlz directly.
2679  SDValue LHS, SDValue RHS,
2680  DAGCombinerInfo &DCI) const {
2681  ConstantSDNode *CmpRhs = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
2682  if (!CmpRhs || !CmpRhs->isNullValue())
2683  return SDValue();
2684 
2685  SelectionDAG &DAG = DCI.DAG;
2686  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
2687  SDValue CmpLHS = Cond.getOperand(0);
2688 
2689  // select (setcc x, 0, eq), -1, (ctlz_zero_undef x) -> ffbh_u32 x
2690  if (CCOpcode == ISD::SETEQ &&
2691  isCtlzOpc(RHS.getOpcode()) &&
2692  RHS.getOperand(0) == CmpLHS &&
2693  isNegativeOne(LHS)) {
2694  return getFFBH_U32(DAG, CmpLHS, SL);
2695  }
2696 
2697  // select (setcc x, 0, ne), (ctlz_zero_undef x), -1 -> ffbh_u32 x
2698  if (CCOpcode == ISD::SETNE &&
2699  isCtlzOpc(LHS.getOpcode()) &&
2700  LHS.getOperand(0) == CmpLHS &&
2701  isNegativeOne(RHS)) {
2702  return getFFBH_U32(DAG, CmpLHS, SL);
2703  }
2704 
2705  return SDValue();
2706 }
2707 
2709  unsigned Op,
2710  const SDLoc &SL,
2711  SDValue Cond,
2712  SDValue N1,
2713  SDValue N2) {
2714  SelectionDAG &DAG = DCI.DAG;
2715  EVT VT = N1.getValueType();
2716 
2717  SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT, Cond,
2718  N1.getOperand(0), N2.getOperand(0));
2719  DCI.AddToWorklist(NewSelect.getNode());
2720  return DAG.getNode(Op, SL, VT, NewSelect);
2721 }
2722 
2723 // Pull a free FP operation out of a select so it may fold into uses.
2724 //
2725 // select c, (fneg x), (fneg y) -> fneg (select c, x, y)
2726 // select c, (fneg x), k -> fneg (select c, x, (fneg k))
2727 //
2728 // select c, (fabs x), (fabs y) -> fabs (select c, x, y)
2729 // select c, (fabs x), +k -> fabs (select c, x, k)
2731  SDValue N) {
2732  SelectionDAG &DAG = DCI.DAG;
2733  SDValue Cond = N.getOperand(0);
2734  SDValue LHS = N.getOperand(1);
2735  SDValue RHS = N.getOperand(2);
2736 
2737  EVT VT = N.getValueType();
2738  if ((LHS.getOpcode() == ISD::FABS && RHS.getOpcode() == ISD::FABS) ||
2739  (LHS.getOpcode() == ISD::FNEG && RHS.getOpcode() == ISD::FNEG)) {
2740  return distributeOpThroughSelect(DCI, LHS.getOpcode(),
2741  SDLoc(N), Cond, LHS, RHS);
2742  }
2743 
2744  bool Inv = false;
2745  if (RHS.getOpcode() == ISD::FABS || RHS.getOpcode() == ISD::FNEG) {
2746  std::swap(LHS, RHS);
2747  Inv = true;
2748  }
2749 
2750  // TODO: Support vector constants.
2752  if ((LHS.getOpcode() == ISD::FNEG || LHS.getOpcode() == ISD::FABS) && CRHS) {
2753  SDLoc SL(N);
2754  // If one side is an fneg/fabs and the other is a constant, we can push the
2755  // fneg/fabs down. If it's an fabs, the constant needs to be non-negative.
2756  SDValue NewLHS = LHS.getOperand(0);
2757  SDValue NewRHS = RHS;
2758 
2759  // Careful: if the neg can be folded up, don't try to pull it back down.
2760  bool ShouldFoldNeg = true;
2761 
2762  if (NewLHS.hasOneUse()) {
2763  unsigned Opc = NewLHS.getOpcode();
2764  if (LHS.getOpcode() == ISD::FNEG && fnegFoldsIntoOp(Opc))
2765  ShouldFoldNeg = false;
2766  if (LHS.getOpcode() == ISD::FABS && Opc == ISD::FMUL)
2767  ShouldFoldNeg = false;
2768  }
2769 
2770  if (ShouldFoldNeg) {
2771  if (LHS.getOpcode() == ISD::FNEG)
2772  NewRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
2773  else if (CRHS->isNegative())
2774  return SDValue();
2775 
2776  if (Inv)
2777  std::swap(NewLHS, NewRHS);
2778 
2779  SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT,
2780  Cond, NewLHS, NewRHS);
2781  DCI.AddToWorklist(NewSelect.getNode());
2782  return DAG.getNode(LHS.getOpcode(), SL, VT, NewSelect);
2783  }
2784  }
2785 
2786  return SDValue();
2787 }
2788 
2789 
2791  DAGCombinerInfo &DCI) const {
2792  if (SDValue Folded = foldFreeOpFromSelect(DCI, SDValue(N, 0)))
2793  return Folded;
2794 
2795  SDValue Cond = N->getOperand(0);
2796  if (Cond.getOpcode() != ISD::SETCC)
2797  return SDValue();
2798 
2799  EVT VT = N->getValueType(0);
2800  SDValue LHS = Cond.getOperand(0);
2801  SDValue RHS = Cond.getOperand(1);
2802  SDValue CC = Cond.getOperand(2);
2803 
2804  SDValue True = N->getOperand(1);
2805  SDValue False = N->getOperand(2);
2806 
2807  if (Cond.hasOneUse()) { // TODO: Look for multiple select uses.
2808  SelectionDAG &DAG = DCI.DAG;
2809  if ((DAG.isConstantValueOfAnyType(True) ||
2810  DAG.isConstantValueOfAnyType(True)) &&
2811  (!DAG.isConstantValueOfAnyType(False) &&
2812  !DAG.isConstantValueOfAnyType(False))) {
2813  // Swap cmp + select pair to move constant to false input.
2814  // This will allow using VOPC cndmasks more often.
2815  // select (setcc x, y), k, x -> select (setcc y, x) x, x
2816 
2817  SDLoc SL(N);
2818  ISD::CondCode NewCC = getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
2819  LHS.getValueType().isInteger());
2820 
2821  SDValue NewCond = DAG.getSetCC(SL, Cond.getValueType(), LHS, RHS, NewCC);
2822  return DAG.getNode(ISD::SELECT, SL, VT, NewCond, False, True);
2823  }
2824  }
2825 
2826  if (VT == MVT::f32 && Cond.hasOneUse()) {
2827  SDValue MinMax
2828  = CombineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI);
2829  // Revisit this node so we can catch min3/max3/med3 patterns.
2830  //DCI.AddToWorklist(MinMax.getNode());
2831  return MinMax;
2832  }
2833 
2834  // There's no reason to not do this if the condition has other uses.
2835  return performCtlzCombine(SDLoc(N), Cond, True, False, DCI);
2836 }
2837 
2839  DAGCombinerInfo &DCI) const {
2840  SelectionDAG &DAG = DCI.DAG;
2841  SDValue N0 = N->getOperand(0);
2842  EVT VT = N->getValueType(0);
2843 
2844  unsigned Opc = N0.getOpcode();
2845 
2846  // If the input has multiple uses and we can either fold the negate down, or
2847  // the other uses cannot, give up. This both prevents unprofitable
2848  // transformations and infinite loops: we won't repeatedly try to fold around
2849  // a negate that has no 'good' form.
2850  //
2851  // TODO: Check users can fold
2852  if (fnegFoldsIntoOp(Opc) && !N0.hasOneUse())
2853  return SDValue();
2854 
2855  SDLoc SL(N);
2856  switch (Opc) {
2857  case ISD::FADD: {
2858  if (!mayIgnoreSignedZero(N0))
2859  return SDValue();
2860 
2861  // (fneg (fadd x, y)) -> (fadd (fneg x), (fneg y))
2862  SDValue LHS = N0.getOperand(0);
2863  SDValue RHS = N0.getOperand(1);
2864 
2865  if (LHS.getOpcode() != ISD::FNEG)
2866  LHS = DAG.getNode(ISD::FNEG, SL, VT, LHS);
2867  else
2868  LHS = LHS.getOperand(0);
2869 
2870  if (RHS.getOpcode() != ISD::FNEG)
2871  RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
2872  else
2873  RHS = RHS.getOperand(0);
2874 
2875  SDValue Res = DAG.getNode(ISD::FADD, SL, VT, LHS, RHS);
2876  if (!N0.hasOneUse())
2877  DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
2878  return Res;
2879  }
2880  case ISD::FMUL:
2881  case AMDGPUISD::FMUL_LEGACY: {
2882  // (fneg (fmul x, y)) -> (fmul x, (fneg y))
2883  // (fneg (fmul_legacy x, y)) -> (fmul_legacy x, (fneg y))
2884  SDValue LHS = N0.getOperand(0);
2885  SDValue RHS = N0.getOperand(1);
2886 
2887  if (LHS.getOpcode() == ISD::FNEG)
2888  LHS = LHS.getOperand(0);
2889  else if (RHS.getOpcode() == ISD::FNEG)
2890  RHS = RHS.getOperand(0);
2891  else
2892  RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
2893 
2894  SDValue Res = DAG.getNode(Opc, SL, VT, LHS, RHS);
2895  if (!N0.hasOneUse())
2896  DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
2897  return Res;
2898  }
2899  case ISD::FMA:
2900  case ISD::FMAD: {
2901  if (!mayIgnoreSignedZero(N0))
2902  return SDValue();
2903 
2904  // (fneg (fma x, y, z)) -> (fma x, (fneg y), (fneg z))
2905  SDValue LHS = N0.getOperand(0);
2906  SDValue MHS = N0.getOperand(1);
2907  SDValue RHS = N0.getOperand(2);
2908 
2909  if (LHS.getOpcode() == ISD::FNEG)
2910  LHS = LHS.getOperand(0);
2911  else if (MHS.getOpcode() == ISD::FNEG)
2912  MHS = MHS.getOperand(0);
2913  else
2914  MHS = DAG.getNode(ISD::FNEG, SL, VT, MHS);
2915 
2916  if (RHS.getOpcode() != ISD::FNEG)
2917  RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
2918  else
2919  RHS = RHS.getOperand(0);
2920 
2921  SDValue Res = DAG.getNode(Opc, SL, VT, LHS, MHS, RHS);
2922  if (!N0.hasOneUse())
2923  DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
2924  return Res;
2925  }
2926  case ISD::FP_EXTEND:
2927  case AMDGPUISD::RCP:
2928  case AMDGPUISD::RCP_LEGACY:
2929  case ISD::FSIN:
2930  case AMDGPUISD::SIN_HW: {
2931  SDValue CvtSrc = N0.getOperand(0);
2932  if (CvtSrc.getOpcode() == ISD::FNEG) {
2933  // (fneg (fp_extend (fneg x))) -> (fp_extend x)
2934  // (fneg (rcp (fneg x))) -> (rcp x)
2935  return DAG.getNode(Opc, SL, VT, CvtSrc.getOperand(0));
2936  }
2937 
2938  if (!N0.hasOneUse())
2939  return SDValue();
2940 
2941  // (fneg (fp_extend x)) -> (fp_extend (fneg x))
2942  // (fneg (rcp x)) -> (rcp (fneg x))
2943  SDValue Neg = DAG.getNode(ISD::FNEG, SL, CvtSrc.getValueType(), CvtSrc);
2944  return DAG.getNode(Opc, SL, VT, Neg);
2945  }
2946  case ISD::FP_ROUND: {
2947  SDValue CvtSrc = N0.getOperand(0);
2948 
2949  if (CvtSrc.getOpcode() == ISD::FNEG) {
2950  // (fneg (fp_round (fneg x))) -> (fp_round x)
2951  return DAG.getNode(ISD::FP_ROUND, SL, VT,
2952  CvtSrc.getOperand(0), N0.getOperand(1));
2953  }
2954 
2955  if (!N0.hasOneUse())
2956  return SDValue();
2957 
2958  // (fneg (fp_round x)) -> (fp_round (fneg x))
2959  SDValue Neg = DAG.getNode(ISD::FNEG, SL, CvtSrc.getValueType(), CvtSrc);
2960  return DAG.getNode(ISD::FP_ROUND, SL, VT, Neg, N0.getOperand(1));
2961  }
2962  default:
2963  return SDValue();
2964  }
2965 }
2966 
2968  DAGCombinerInfo &DCI) const {
2969  SelectionDAG &DAG = DCI.DAG;
2970  SDLoc DL(N);
2971 
2972  switch(N->getOpcode()) {
2973  default:
2974  break;
2975  case ISD::BITCAST: {
2976  EVT DestVT = N->getValueType(0);
2977 
2978  // Push casts through vector builds. This helps avoid emitting a large
2979  // number of copies when materializing floating point vector constants.
2980  //
2981  // vNt1 bitcast (vNt0 (build_vector t0:x, t0:y)) =>
2982  // vnt1 = build_vector (t1 (bitcast t0:x)), (t1 (bitcast t0:y))
2983  if (DestVT.isVector()) {
2984  SDValue Src = N->getOperand(0);
2985  if (Src.getOpcode() == ISD::BUILD_VECTOR) {
2986  EVT SrcVT = Src.getValueType();
2987  unsigned NElts = DestVT.getVectorNumElements();
2988 
2989  if (SrcVT.getVectorNumElements() == NElts) {
2990  EVT DestEltVT = DestVT.getVectorElementType();
2991 
2992  SmallVector<SDValue, 8> CastedElts;
2993  SDLoc SL(N);
2994  for (unsigned I = 0, E = SrcVT.getVectorNumElements(); I != E; ++I) {
2995  SDValue Elt = Src.getOperand(I);
2996  CastedElts.push_back(DAG.getNode(ISD::BITCAST, DL, DestEltVT, Elt));
2997  }
2998 
2999  return DAG.getBuildVector(DestVT, SL, CastedElts);
3000  }
3001  }
3002  }
3003 
3004  if (DestVT.getSizeInBits() != 64 && !DestVT.isVector())
3005  break;
3006 
3007  // Fold bitcasts of constants.
3008  //
3009  // v2i32 (bitcast i64:k) -> build_vector lo_32(k), hi_32(k)
3010  // TODO: Generalize and move to DAGCombiner
3011  SDValue Src = N->getOperand(0);
3012  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src)) {
3013  assert(Src.getValueType() == MVT::i64);
3014  SDLoc SL(N);
3015  uint64_t CVal = C->getZExtValue();
3016  return DAG.getNode(ISD::BUILD_VECTOR, SL, DestVT,
3017  DAG.getConstant(Lo_32(CVal), SL, MVT::i32),
3018  DAG.getConstant(Hi_32(CVal), SL, MVT::i32));
3019  }
3020 
3021  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Src)) {
3022  const APInt &Val = C->getValueAPF().bitcastToAPInt();
3023  SDLoc SL(N);
3024  uint64_t CVal = Val.getZExtValue();
3026  DAG.getConstant(Lo_32(CVal), SL, MVT::i32),
3027  DAG.getConstant(Hi_32(CVal), SL, MVT::i32));
3028 
3029  return DAG.getNode(ISD::BITCAST, SL, DestVT, Vec);
3030  }
3031 
3032  break;
3033  }
3034  case ISD::SHL: {
3036  break;
3037 
3038  return performShlCombine(N, DCI);
3039  }
3040  case ISD::SRL: {
3042  break;
3043 
3044  return performSrlCombine(N, DCI);
3045  }
3046  case ISD::SRA: {
3048  break;
3049 
3050  return performSraCombine(N, DCI);
3051  }
3052  case ISD::MUL:
3053  return performMulCombine(N, DCI);
3054  case ISD::MULHS:
3055  return performMulhsCombine(N, DCI);
3056  case ISD::MULHU:
3057  return performMulhuCombine(N, DCI);
3058  case AMDGPUISD::MUL_I24:
3059  case AMDGPUISD::MUL_U24:
3060  case AMDGPUISD::MULHI_I24:
3061  case AMDGPUISD::MULHI_U24: {
3062  // If the first call to simplify is successfull, then N may end up being
3063  // deleted, so we shouldn't call simplifyI24 again.
3064  simplifyI24(N, 0, DCI) || simplifyI24(N, 1, DCI);
3065  return SDValue();
3066  }
3069  return performMulLoHi24Combine(N, DCI);
3070  case ISD::SELECT:
3071  return performSelectCombine(N, DCI);
3072  case ISD::FNEG:
3073  return performFNegCombine(N, DCI);
3074  case AMDGPUISD::BFE_I32:
3075  case AMDGPUISD::BFE_U32: {
3076  assert(!N->getValueType(0).isVector() &&
3077  "Vector handling of BFE not implemented");
3079  if (!Width)
3080  break;
3081 
3082  uint32_t WidthVal = Width->getZExtValue() & 0x1f;
3083  if (WidthVal == 0)
3084  return DAG.getConstant(0, DL, MVT::i32);
3085 
3087  if (!Offset)
3088  break;
3089 
3090  SDValue BitsFrom = N->getOperand(0);
3091  uint32_t OffsetVal = Offset->getZExtValue() & 0x1f;
3092 
3093  bool Signed = N->getOpcode() == AMDGPUISD::BFE_I32;
3094 
3095  if (OffsetVal == 0) {
3096  // This is already sign / zero extended, so try to fold away extra BFEs.
3097  unsigned SignBits = Signed ? (32 - WidthVal + 1) : (32 - WidthVal);
3098 
3099  unsigned OpSignBits = DAG.ComputeNumSignBits(BitsFrom);
3100  if (OpSignBits >= SignBits)
3101  return BitsFrom;
3102 
3103  EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), WidthVal);
3104  if (Signed) {
3105  // This is a sign_extend_inreg. Replace it to take advantage of existing
3106  // DAG Combines. If not eliminated, we will match back to BFE during
3107  // selection.
3108 
3109  // TODO: The sext_inreg of extended types ends, although we can could
3110  // handle them in a single BFE.
3111  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, BitsFrom,
3112  DAG.getValueType(SmallVT));
3113  }
3114 
3115  return DAG.getZeroExtendInReg(BitsFrom, DL, SmallVT);
3116  }
3117 
3118  if (ConstantSDNode *CVal = dyn_cast<ConstantSDNode>(BitsFrom)) {
3119  if (Signed) {
3120  return constantFoldBFE<int32_t>(DAG,
3121  CVal->getSExtValue(),
3122  OffsetVal,
3123  WidthVal,
3124  DL);
3125  }
3126 
3127  return constantFoldBFE<uint32_t>(DAG,
3128  CVal->getZExtValue(),
3129  OffsetVal,
3130  WidthVal,
3131  DL);
3132  }
3133 
3134  if ((OffsetVal + WidthVal) >= 32) {
3135  SDValue ShiftVal = DAG.getConstant(OffsetVal, DL, MVT::i32);
3136  return DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, MVT::i32,
3137  BitsFrom, ShiftVal);
3138  }
3139 
3140  if (BitsFrom.hasOneUse()) {
3141  APInt Demanded = APInt::getBitsSet(32,
3142  OffsetVal,
3143  OffsetVal + WidthVal);
3144 
3145  APInt KnownZero, KnownOne;
3147  !DCI.isBeforeLegalizeOps());
3148  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3149  if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) ||
3150  TLI.SimplifyDemandedBits(BitsFrom, Demanded,
3151  KnownZero, KnownOne, TLO)) {
3152  DCI.CommitTargetLoweringOpt(TLO);
3153  }
3154  }
3155 
3156  break;
3157  }
3158  case ISD::LOAD:
3159  return performLoadCombine(N, DCI);
3160  case ISD::STORE:
3161  return performStoreCombine(N, DCI);
3162  }
3163  return SDValue();
3164 }
3165 
3166 //===----------------------------------------------------------------------===//
3167 // Helper functions
3168 //===----------------------------------------------------------------------===//
3169 
3171  const TargetRegisterClass *RC,
3172  unsigned Reg, EVT VT) const {
3173  MachineFunction &MF = DAG.getMachineFunction();
3175  unsigned VirtualRegister;
3176  if (!MRI.isLiveIn(Reg)) {
3177  VirtualRegister = MRI.createVirtualRegister(RC);
3178  MRI.addLiveIn(Reg, VirtualRegister);
3179  } else {
3180  VirtualRegister = MRI.getLiveInVirtReg(Reg);
3181  }
3182  return DAG.getRegister(VirtualRegister, VT);
3183 }
3184 
3186  const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const {
3187  unsigned Alignment = Subtarget->getAlignmentForImplicitArgPtr();
3188  uint64_t ArgOffset = alignTo(MFI->getABIArgOffset(), Alignment);
3189  switch (Param) {
3190  case GRID_DIM:
3191  return ArgOffset;
3192  case GRID_OFFSET:
3193  return ArgOffset + 4;
3194  }
3195  llvm_unreachable("unexpected implicit parameter type");
3196 }
3197 
3198 #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
3199 
3200 const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
3201  switch ((AMDGPUISD::NodeType)Opcode) {
3202  case AMDGPUISD::FIRST_NUMBER: break;
3203  // AMDIL DAG nodes
3207 
3208  // AMDGPU DAG nodes
3297  }
3298  return nullptr;
3299 }
3300 
3302  SelectionDAG &DAG, int Enabled,
3303  int &RefinementSteps,
3304  bool &UseOneConstNR,
3305  bool Reciprocal) const {
3306  EVT VT = Operand.getValueType();
3307 
3308  if (VT == MVT::f32) {
3309  RefinementSteps = 0;
3310  return DAG.getNode(AMDGPUISD::RSQ, SDLoc(Operand), VT, Operand);
3311  }
3312 
3313  // TODO: There is also f64 rsq instruction, but the documentation is less
3314  // clear on its precision.
3315 
3316  return SDValue();
3317 }
3318 
3320  SelectionDAG &DAG, int Enabled,
3321  int &RefinementSteps) const {
3322  EVT VT = Operand.getValueType();
3323 
3324  if (VT == MVT::f32) {
3325  // Reciprocal, < 1 ulp error.
3326  //
3327  // This reciprocal approximation converges to < 0.5 ulp error with one
3328  // newton rhapson performed with two fused multiple adds (FMAs).
3329 
3330  RefinementSteps = 0;
3331  return DAG.getNode(AMDGPUISD::RCP, SDLoc(Operand), VT, Operand);
3332  }
3333 
3334  // TODO: There is also f64 rcp instruction, but the documentation is less
3335  // clear on its precision.
3336 
3337  return SDValue();
3338 }
3339 
3341  const SDValue Op,
3342  APInt &KnownZero,
3343  APInt &KnownOne,
3344  const SelectionDAG &DAG,
3345  unsigned Depth) const {
3346 
3347  KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything.
3348 
3349  APInt KnownZero2;
3350  APInt KnownOne2;
3351  unsigned Opc = Op.getOpcode();
3352 
3353  switch (Opc) {
3354  default:
3355  break;
3356  case AMDGPUISD::CARRY:
3357  case AMDGPUISD::BORROW: {
3358  KnownZero = APInt::getHighBitsSet(32, 31);
3359  break;
3360  }
3361 
3362  case AMDGPUISD::BFE_I32:
3363  case AMDGPUISD::BFE_U32: {
3365  if (!CWidth)
3366  return;
3367 
3368  unsigned BitWidth = 32;
3369  uint32_t Width = CWidth->getZExtValue() & 0x1f;
3370 
3371  if (Opc == AMDGPUISD::BFE_U32)
3372  KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - Width);
3373 
3374  break;
3375  }
3376  }
3377 }
3378 
3380  SDValue Op,
3381  const SelectionDAG &DAG,
3382  unsigned Depth) const {
3383  switch (Op.getOpcode()) {
3384  case AMDGPUISD::BFE_I32: {
3386  if (!Width)
3387  return 1;
3388 
3389  unsigned SignBits = 32 - Width->getZExtValue() + 1;
3390  if (!isNullConstant(Op.getOperand(1)))
3391  return SignBits;
3392 
3393  // TODO: Could probably figure something out with non-0 offsets.
3394  unsigned Op0SignBits = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
3395  return std::max(SignBits, Op0SignBits);
3396  }
3397 
3398  case AMDGPUISD::BFE_U32: {
3400  return Width ? 32 - (Width->getZExtValue() & 0x1f) : 1;
3401  }
3402 
3403  case AMDGPUISD::CARRY:
3404  case AMDGPUISD::BORROW:
3405  return 31;
3406 
3407  default:
3408  return 1;
3409  }
3410 }
MachineLoop * L
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:500
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:467
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:102
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:524
SDValue getValue(unsigned R) const
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalValue &GV)
SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant, which is required to be operand #1) half of the integer or float value specified as operand #0.
Definition: ISDOpcodes.h:184
Flags getFlags() const
Return the raw flags of the source value,.
LLVMContext * getContext() const
Definition: SelectionDAG.h:333
static SDValue distributeOpThroughSelect(TargetLowering::DAGCombinerInfo &DCI, unsigned Op, const SDLoc &SL, SDValue Cond, SDValue N1, SDValue N2)
Diagnostic information for unsupported feature in backend.
AMDGPU specific subclass of TargetSubtarget.
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1309
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
Definition: SelectionDAG.h:804
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtType, EVT ExtVT) const override
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:304
size_t i
unsigned getNumRegisters(LLVMContext &Context, EVT VT) const
Return the number of registers that this ValueType will eventually require.
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:313
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getHiHalf64(SDValue Op, SelectionDAG &DAG) const
void AnalyzeFormalArguments(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
const TargetMachine & getTargetMachine() const
SDValue performMulhuCombine(SDNode *N, DAGCombinerInfo &DCI) const
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(unsigned Reg, unsigned vreg=0)
addLiveIn - Add the specified register as a live-in.
constexpr uint32_t Lo_32(uint64_t Value)
Lo_32 - This function returns the low 32 bits of a 64 bit value.
Definition: MathExtras.h:253
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:219
bool isExtended() const
isExtended - Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:113
SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
SDValue LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, bool Signed) const
SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const
void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in the KnownZero/KnownO...
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:536
const GlobalValue * getGlobal() const
void setHasFloatingPointExceptions(bool FPExceptions=true)
Tells the code generator that this target supports floating point exceptions and cares about preservi...
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
static bool fnegFoldsIntoOp(unsigned Opc)
Function Alias Analysis Results
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
const SDValue & getOperand(unsigned Num) const
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
Definition: SelectionDAG.h:817
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:664
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, unsigned Align=1, bool *=nullptr) const
Determine if the target supports unaligned memory accesses.
static SDValue extractF64Exponent(SDValue Hi, const SDLoc &SL, SelectionDAG &DAG)
SDValue getLoHalf64(SDValue Op, SelectionDAG &DAG) const
Address space for local memory.
Definition: AMDGPU.h:141
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If zero
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT TVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
[US]{MIN/MAX} - Binary minimum or maximum or signed or unsigned integers.
Definition: ISDOpcodes.h:330
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
const MachinePointerInfo & getPointerInfo() const
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
SDValue scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
const SDValue & getBasePtr() const
std::pair< SDValue, SDValue > split64BitValue(SDValue Op, SelectionDAG &DAG) const
Return 64-bit value Op as two 32-bit integers.
LLVMContext & getContext() const
bool hasFPExceptions() const
SDValue performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const
SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL, unsigned Opc, SDValue LHS, uint32_t ValLo, uint32_t ValHi) const
Split the 64-bit value LHS into two 32-bit components, and perform the binary operation Opc to it wit...
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:369
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:39
static bool simplifyI24(SDNode *Node24, unsigned OpIdx, TargetLowering::DAGCombinerInfo &DCI)
SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem, unsigned AS) const override
Return true if it is expected to be cheaper to do a store of a non-zero vector constant with the give...
bool isCheapToSpeculateCttz() const override
Return true if it is cheap to speculate a call to intrinsic cttz.
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:209
APInt Not(const APInt &APIVal)
Bitwise complement function.
Definition: APInt.h:1957
SDValue CombineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors...
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:133
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
bool isNegative() const
Return true if the value is negative.
uint64_t allocateKernArg(uint64_t Size, unsigned Align)
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const
SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const
Pointer to the start of the shader's constant data.
SDValue performMulLoHi24Combine(SDNode *N, DAGCombinerInfo &DCI) const
Shift and rotation operations.
Definition: ISDOpcodes.h:344
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:327
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:190
static GCRegistry::Add< StatepointGC > D("statepoint-example","an example strategy for statepoint")
virtual SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, unsigned Reg, EVT VT) const
Helper function that adds Reg to the LiveIn list of the DAG's MachineFunction.
void addLoc(const CCValAssign &V)
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
unsigned getAddressSpace() const
Reg
All possible values of the reg field in the ModR/M byte.
MachinePointerInfo getWithOffset(int64_t O) const
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const SelectionDAG &DAG, unsigned Depth=0) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getScalarType() const
getScalarType - If this is a vector type, return the element type, otherwise return this...
static bool isI24(SDValue Op, SelectionDAG &DAG)
EVT getScalarType() const
getScalarType - If this is a vector type, return the element type, otherwise return this...
Definition: ValueTypes.h:233
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
unsigned getStoreSize() const
getStoreSize - Return the number of bytes overwritten by a store of the specified value type...
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const
Interface to describe a layout of a stack frame on an AMDGPU target.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
Definition: ValueTypes.h:123
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SmallVector< ISD::InputArg, 32 > Ins
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
EVT getVectorElementType() const
getVectorElementType - Given a vector type, return the type of each element.
Definition: ValueTypes.h:239
TargetRegisterInfo interface that is implemented by all hw codegen targets.
MachineFunction & getMachineFunction() const
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
bool isCheapToSpeculateCtlz() const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:410
const AMDGPUSubtarget * Subtarget
bool isLiveIn(unsigned Reg) const
SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:363
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
static bool hasVolatileUser(SDNode *Val)
const SDValue & getBasePtr() const
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
getHalfSizedIntegerVT - Finds the smallest simple value type that is greater than or equal to half th...
Definition: ValueTypes.h:293
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:737
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
static mvt_range integer_vector_valuetypes()
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:842
EVT getMemoryVT() const
Return the type of the in-memory value.
static SDValue getMul24(SelectionDAG &DAG, const SDLoc &SL, SDValue N0, SDValue N1, unsigned Size, bool Signed)
Generation getGeneration() const
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:328
bool bitsLE(EVT VT) const
bitsLE - Return true if this has no more bits than VT.
Definition: ValueTypes.h:218
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
This class is used to represent ISD::STORE nodes.
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:453
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:274
static bool isCtlzOpc(unsigned Opc)
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:518
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const
SDValue LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const
static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0, uint32_t Offset, uint32_t Width, const SDLoc &DL)
SDNode * getNode() const
get the SDNode which holds the desired result
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
MinAlign - A and B are either alignments or offsets.
Definition: MathExtras.h:589
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:262
unsigned getStoreSize() const
getStoreSize - Return the number of bytes overwritten by a store of the specified value type...
Definition: ValueTypes.h:268
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:166
SDValue LowerFREM(SDValue Op, SelectionDAG &DAG) const
Split a vector store into multiple scalar stores.
unsigned getStoreSizeInBits() const
getStoreSizeInBits - Return the number of bits overwritten by a store of the specified value type...
Definition: ValueTypes.h:274
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors...
unsigned const MachineRegisterInfo * MRI
unsigned getVectorNumElements() const
MVT - Machine Value Type.
SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const
const SDValue & getOperand(unsigned i) const
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:48
Simple binary floating point operators.
Definition: ISDOpcodes.h:246
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
SDValue performSraCombine(SDNode *N, DAGCombinerInfo &DCI) const
SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const
bool isVector() const
isVector - Return true if this is a vector value type.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:818
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:228
unsigned getLiveInVirtReg(unsigned PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in physical ...
bool shouldCombineMemoryType(EVT VT) const
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
static bool hasDefinedInitializer(const GlobalValue *GV)
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
uint32_t Offset
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang","erlang-compatible garbage collector")
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1255
static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
unsigned getOpcode() const
bool hasFP32Denormals() const
bool isLoadBitCastBeneficial(EVT, EVT) const final
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On archi...
bool SimplifyDemandedBits(SDNode *User, unsigned OpIdx, const APInt &Demanded, DAGCombinerInfo &DCI)
Helper for SimplifyDemandedBits that can simplify an operation with multiple uses.
bool isVolatile() const
const SDValue & getValue() const
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
bool has16BitInsts() const
unsigned MaxStoresPerMemmove
Specify maximum bytes of store instructions per memmove call.
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:350
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo...
Definition: ISDOpcodes.h:705
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:485
SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const
EVT - Extended Value Type.
Definition: ValueTypes.h:31
SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const
This structure contains all information that is necessary for lowering calls.
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements)
getVectorVT - Returns the EVT that represents a vector NumElements in length, where each element is o...
Definition: ValueTypes.h:70
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override
Returns true if the target can instruction select the specified FP immediate natively.
unsigned GatherAllAliasesMaxDepth
Depth that GatherAllAliases should should continue looking for chain dependencies when trying to find...
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const
SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const
Split a vector store into 2 stores of half the vector.
static bool isU24(SDValue Op, SelectionDAG &DAG)
void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
CCState - This class holds information needed while lowering arguments and return values...
const DebugLoc & getDebugLoc() const
void dump() const
Dump this node, for debugging.
SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const
Split a vector load into 2 loads of half the vector.
bool isFAbsFree(EVT VT) const override
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:285
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provides VTs and return the low/high part...
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:123
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:166
Interface definition of the TargetLowering class that is common to all AMD GPUs.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:639
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
const DataFlowGraph & G
Definition: RDFGraph.cpp:206
const SDValue & getChain() const
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:347
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:510
SDValue performCtlzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS, SDValue RHS, DAGCombinerInfo &DCI) const
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
This is an abstract virtual class for memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
void setHasMultipleConditionRegisters(bool hasManyRegs=true)
Tells the code generator that the target has multiple (allocatable) condition registers that can be u...
void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0)
Append the extracted elements from Start to Count out of the vector Op in Args.
Represents one node in the SelectionDAG.
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
double BitsToDouble(uint64_t Bits)
BitsToDouble - This function takes a 64-bit integer and returns the bit equivalent double...
Definition: MathExtras.h:549
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
static mvt_range integer_valuetypes()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Class for arbitrary precision integers.
Definition: APInt.h:77
bool hasInitializer() const
Definitions have initializers, declarations don't.
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
iterator_range< use_iterator > uses()
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:354
Interface for the AMDGPU Implementation of the Intrinsic Info class.
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:400
bool isAmdHsaOS() const
SDValue LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG, bool Signed) const
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:259
static const fltSemantics & IEEEdouble()
Definition: APFloat.cpp:103
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
Definition: APInt.h:1942
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:503
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
bool isConstantValueOfAnyType(SDValue N)
SDValue LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG, bool Signed) const
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:560
unsigned countLeadingOnes() const
Count the number of leading one bits.
Definition: APInt.cpp:676
Represents a use of a SDNode.
Interface definition for SIInstrInfo.
void AnalyzeReturn(CCState &State, const SmallVectorImpl< ISD::OutputArg > &Outs) const
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:333
bool isNarrowingProfitable(EVT VT1, EVT VT2) const override
Return true if it's profitable to narrow operations of type VT1 to VT2.
SelectSupportKind
Enum that describes what type of support for selects the target has.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:205
SDValue performMulhsCombine(SDNode *N, DAGCombinerInfo &DCI) const
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:418
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, unsigned Offset, MVT LocVT, LocInfo HTP)
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:536
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:256
void ReplaceAllUsesWith(SDValue From, SDValue Op)
Modify anything using 'From' to use 'To' instead.
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
ArrayRef< SDUse > ops() const
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
bool isTruncateFree(EVT Src, EVT Dest) const override
AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI)
#define NODE_NAME_CASE(node)
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types...
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
static volatile int Zero
SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const
unsigned MaxStoresPerMemcpy
Specify maximum bytes of store instructions per memcpy call.
static bool isNegativeOne(SDValue Val)
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
bool isFNegFree(EVT VT) const override
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
EVT getValueType() const
Return the ValueType of the referenced return value.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
EVT getPow2VectorType(LLVMContext &Context) const
getPow2VectorType - Widens the length of the given vector EVT up to the nearest power of 2 and return...
Definition: ValueTypes.h:321
SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const
bool isByteSized() const
isByteSized - Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:183
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:291
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
Definition: ValueTypes.h:118
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isSimple() const
isSimple - Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:107
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
constexpr uint32_t Hi_32(uint64_t Value)
Hi_32 - This function returns the high 32 bits of a 64 bit value.
Definition: MathExtras.h:248
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:249
SDValue getRegister(unsigned Reg, EVT VT)
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:830
SDValue getValueType(EVT)
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:81
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:331
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.Val alone...
static EVT getEquivalentMemType(LLVMContext &Context, EVT VT)
Primary interface to the complete machine description for the target machine.
static SDValue foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI, SDValue N)
bool hasBCNT(unsigned Size) const
bool isZExtFree(Type *Src, Type *Dest) const override
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations...
Definition: ISDOpcodes.h:253
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:377
uint32_t getImplicitParameterOffset(const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
unsigned getOrigAlign() const
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml","ocaml 3.10-compatible collector")
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const
Conversion operators.
Definition: ISDOpcodes.h:397
bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:381
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT SrcTy)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value...
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:406
unsigned getAlignment() const
bool ShouldShrinkFPConstant(EVT VT) const override
If true, then instruction selection should seek to shrink the FP constant of the specified type to a ...
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, FLOG, FLOG2, FLOG10, FEXP, FEXP2, FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary floating point operations.
Definition: ISDOpcodes.h:516
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
getIntegerVT - Returns the EVT that represents an integer with the given number of bits...
Definition: ValueTypes.h:61
MVT getVectorIdxTy(const DataLayout &) const override
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:226
bool mayIgnoreSignedZero(SDValue Op) const
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
bool isSelectSupported(SelectSupportKind) const override
uint64_t getZExtValue() const
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:326
BRIND - Indirect branch.
Definition: ISDOpcodes.h:556
unsigned getAlignmentForImplicitArgPtr() const
unsigned getVectorNumElements() const
getVectorNumElements - Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:248
This class is used to represent ISD::LOAD nodes.
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary...
Definition: ISDOpcodes.h:545
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const