LLVM  4.0.0
PPCISelLowering.cpp
Go to the documentation of this file.
1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the PPCISelLowering class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "PPCISelLowering.h"
16 #include "PPCCallingConv.h"
17 #include "PPCCCState.h"
18 #include "PPCMachineFunctionInfo.h"
19 #include "PPCPerfectShuffle.h"
20 #include "PPCTargetMachine.h"
21 #include "PPCTargetObjectFile.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/ADT/StringSwitch.h"
25 #include "llvm/ADT/Triple.h"
35 #include "llvm/IR/CallingConv.h"
36 #include "llvm/IR/Constants.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/IR/Intrinsics.h"
42 #include "llvm/Support/Format.h"
46 #include <list>
47 
48 using namespace llvm;
49 
50 #define DEBUG_TYPE "ppc-lowering"
51 
52 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
53 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
54 
55 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
56 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
57 
58 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
59 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
60 
61 static cl::opt<bool> DisableSCO("disable-ppc-sco",
62 cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
63 
64 STATISTIC(NumTailCalls, "Number of tail calls");
65 STATISTIC(NumSiblingCalls, "Number of sibling calls");
66 
67 // FIXME: Remove this once the bug has been fixed!
69 
71  const PPCSubtarget &STI)
72  : TargetLowering(TM), Subtarget(STI) {
73  // Use _setjmp/_longjmp instead of setjmp/longjmp.
76 
77  // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
78  // arguments are at least 4/8 bytes aligned.
79  bool isPPC64 = Subtarget.isPPC64();
80  setMinStackArgumentAlignment(isPPC64 ? 8:4);
81 
82  // Set up the register classes.
83  addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
84  if (!useSoftFloat()) {
85  addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
86  addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
87  }
88 
89  // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
90  for (MVT VT : MVT::integer_valuetypes()) {
93  }
94 
96 
97  // PowerPC has pre-inc load and store's.
112 
113  if (Subtarget.useCRBits()) {
115 
116  if (isPPC64 || Subtarget.hasFPCVT()) {
119  isPPC64 ? MVT::i64 : MVT::i32);
122  isPPC64 ? MVT::i64 : MVT::i32);
123  } else {
126  }
127 
128  // PowerPC does not support direct load / store of condition registers
131 
132  // FIXME: Remove this once the ANDI glue bug is fixed:
133  if (ANDIGlueBug)
135 
136  for (MVT VT : MVT::integer_valuetypes()) {
140  }
141 
142  addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
143  }
144 
145  // This is used in the ppcf128->int sequence. Note it has different semantics
146  // from FP_ROUND: that rounds to nearest, this rounds to zero.
148 
149  // We do not currently implement these libm ops for PowerPC.
156 
157  // PowerPC has no SREM/UREM instructions
162 
163  // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
172 
173  // We don't support sin/cos/sqrt/fmod/pow
186 
188 
189  // If we're enabling GP optimizations, use hardware square root
190  if (!Subtarget.hasFSQRT() &&
191  !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
192  Subtarget.hasFRE()))
194 
195  if (!Subtarget.hasFSQRT() &&
196  !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
197  Subtarget.hasFRES()))
199 
200  if (Subtarget.hasFCPSGN()) {
203  } else {
206  }
207 
208  if (Subtarget.hasFPRND()) {
213 
218  }
219 
220  // PowerPC does not have BSWAP
221  // CTPOP or CTTZ were introduced in P8/P9 respectivelly
224  if (Subtarget.isISA3_0()) {
227  } else {
230  }
231 
232  if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
235  } else {
238  }
239 
240  // PowerPC does not have ROTR
243 
244  if (!Subtarget.useCRBits()) {
245  // PowerPC does not have Select
250  }
251 
252  // PowerPC wants to turn select_cc of FP into fsel when possible.
255 
256  // PowerPC wants to optimize integer setcc a bit
257  if (!Subtarget.useCRBits())
259 
260  // PowerPC does not have BRCOND which requires SetCC
261  if (!Subtarget.useCRBits())
263 
265 
266  // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
268 
269  // PowerPC does not have [U|S]INT_TO_FP
272 
273  if (Subtarget.hasDirectMove() && isPPC64) {
278  } else {
283  }
284 
285  // We cannot sextinreg(i1). Expand to shifts.
287 
288  // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
289  // SjLj exception handling but a light-weight setjmp/longjmp replacement to
290  // support continuation, user-level threading, and etc.. As a result, no
291  // other SjLj exception interfaces are implemented and please don't build
292  // your own exception handling based on them.
293  // LLVM/Clang supports zero-cost DWARF exception handling.
296 
297  // We want to legalize GlobalAddress and ConstantPool nodes into the
298  // appropriate instructions to materialize the address.
309 
310  // TRAP is legal.
312 
313  // TRAMPOLINE is custom lowered.
316 
317  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
319 
320  if (Subtarget.isSVR4ABI()) {
321  if (isPPC64) {
322  // VAARG always uses double-word chunks, so promote anything smaller.
332  } else {
333  // VAARG is custom lowered with the 32-bit SVR4 ABI.
336  }
337  } else
339 
340  if (Subtarget.isSVR4ABI() && !isPPC64)
341  // VACOPY is custom lowered with the 32-bit SVR4 ABI.
343  else
345 
346  // Use the default implementation.
356 
357  // We want to custom lower some of our intrinsics.
359 
360  // To handle counter-based loop conditions.
362 
363  // Comparisons that require checking two conditions.
376 
377  if (Subtarget.has64BitSupport()) {
378  // They also have instructions for converting between i64 and fp.
383  // This is just the low 32 bits of a (signed) fp->i64 conversion.
384  // We cannot do this with Promote because i64 is not a legal type.
386 
387  if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
389  } else {
390  // PowerPC does not have FP_TO_UINT on 32-bit implementations.
392  }
393 
394  // With the instructions enabled under FPCVT, we can do everything.
395  if (Subtarget.hasFPCVT()) {
396  if (Subtarget.has64BitSupport()) {
401  }
402 
407  }
408 
409  if (Subtarget.use64BitRegs()) {
410  // 64-bit PowerPC implementations can support i64 types directly
411  addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
412  // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
414  // 64-bit PowerPC wants to expand i128 shifts itself.
418  } else {
419  // 32-bit PowerPC wants to expand i64 shifts itself.
423  }
424 
425  if (Subtarget.hasAltivec()) {
426  // First set operation action for all vector types to expand. Then we
427  // will selectively turn on ones that can be effectively codegen'd.
428  for (MVT VT : MVT::vector_valuetypes()) {
429  // add/sub are legal for all supported vector VT's.
432 
433  // Vector instructions introduced in P8
434  if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
437  }
438  else {
441  }
442 
443  // Vector instructions introduced in P9
444  if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
446  else
448 
449  // We promote all shuffles to v16i8.
452 
453  // We promote all non-typed operations to v4i32.
468 
469  // No other operations are legal.
509 
510  for (MVT InnerVT : MVT::vector_valuetypes()) {
511  setTruncStoreAction(VT, InnerVT, Expand);
512  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
513  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
514  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
515  }
516  }
517 
518  // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
519  // with merges, splats, etc.
521 
527  Subtarget.useCRBits() ? Legal : Expand);
537 
538  addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
539  addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
540  addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
541  addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
542 
545 
546  if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
549  }
550 
551  if (Subtarget.hasP8Altivec())
553  else
555 
558 
561 
566 
567  // Altivec does not contain unordered floating-point compare instructions
572 
573  if (Subtarget.hasVSX()) {
576  if (Subtarget.hasP8Vector()) {
579  }
580  if (Subtarget.hasDirectMove() && isPPC64) {
589  }
591 
597 
599 
602 
605 
611 
612  // Share the Altivec comparison restrictions.
617 
620 
622 
623  if (Subtarget.hasP8Vector())
624  addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
625 
626  addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
627 
628  addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
629  addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
630  addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
631 
632  if (Subtarget.hasP8Altivec()) {
636 
638  }
639  else {
643 
645 
646  // VSX v2i64 only supports non-arithmetic operations.
649  }
650 
655 
657 
662 
663  // Vector operation legalization checks the result type of
664  // SIGN_EXTEND_INREG, overall legalization checks the inner type.
669 
674 
675  if (Subtarget.hasDirectMove())
678 
679  addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
680  }
681 
682  if (Subtarget.hasP8Altivec()) {
683  addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
684  addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
685  }
686 
687  if (Subtarget.hasP9Vector()) {
690  }
691  }
692 
693  if (Subtarget.hasQPX()) {
698 
701 
704 
707 
708  if (!Subtarget.useCRBits())
711 
719 
722 
726 
738 
741 
744 
745  addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
746 
751 
754 
757 
758  if (!Subtarget.useCRBits())
761 
769 
772 
784 
787 
790 
791  addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
792 
796 
797  if (!Subtarget.useCRBits())
800 
803 
811 
814 
815  addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
816 
821 
826 
829 
830  // These need to set FE_INEXACT, and so cannot be vectorized here.
833 
834  if (TM.Options.UnsafeFPMath) {
837 
840  } else {
843 
846  }
847  }
848 
849  if (Subtarget.has64BitSupport())
851 
853 
854  if (!isPPC64) {
857  }
858 
860 
861  if (Subtarget.hasAltivec()) {
862  // Altivec instructions set fields to all zeros or all ones.
864  }
865 
866  if (!isPPC64) {
867  // These libcalls are not available in 32-bit.
871  }
872 
873  setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
874 
875  // We have target-specific dag combine patterns for the following nodes:
878  if (Subtarget.hasFPCVT())
883  if (Subtarget.useCRBits())
889 
893 
894  if (Subtarget.useCRBits()) {
898  }
899 
900  // Use reciprocal estimates.
901  if (TM.Options.UnsafeFPMath) {
904  }
905 
906  // Darwin long double math library functions have $LDBL128 appended.
907  if (Subtarget.isDarwin()) {
908  setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
909  setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
910  setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
911  setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
912  setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
913  setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
914  setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
915  setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
916  setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
917  setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
918  }
919 
920  // With 32 condition bits, we don't need to sink (and duplicate) compares
921  // aggressively in CodeGenPrep.
922  if (Subtarget.useCRBits()) {
925  }
926 
928  if (Subtarget.isDarwin())
930 
931  switch (Subtarget.getDarwinDirective()) {
932  default: break;
933  case PPC::DIR_970:
934  case PPC::DIR_A2:
935  case PPC::DIR_E500mc:
936  case PPC::DIR_E5500:
937  case PPC::DIR_PWR4:
938  case PPC::DIR_PWR5:
939  case PPC::DIR_PWR5X:
940  case PPC::DIR_PWR6:
941  case PPC::DIR_PWR6X:
942  case PPC::DIR_PWR7:
943  case PPC::DIR_PWR8:
944  case PPC::DIR_PWR9:
947  break;
948  }
949 
950  if (Subtarget.enableMachineScheduler())
952  else
954 
956 
957  // The Freescale cores do better with aggressive inlining of memcpy and
958  // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
959  if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
960  Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
961  MaxStoresPerMemset = 32;
963  MaxStoresPerMemcpy = 32;
965  MaxStoresPerMemmove = 32;
967  } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
968  // The A2 also benefits from (very) aggressive inlining of memcpy and
969  // friends. The overhead of a the function call, even when warm, can be
970  // over one hundred cycles.
971  MaxStoresPerMemset = 128;
972  MaxStoresPerMemcpy = 128;
973  MaxStoresPerMemmove = 128;
974  }
975 }
976 
977 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
978 /// the desired ByVal argument alignment.
979 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
980  unsigned MaxMaxAlign) {
981  if (MaxAlign == MaxMaxAlign)
982  return;
983  if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
984  if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
985  MaxAlign = 32;
986  else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
987  MaxAlign = 16;
988  } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
989  unsigned EltAlign = 0;
990  getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
991  if (EltAlign > MaxAlign)
992  MaxAlign = EltAlign;
993  } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
994  for (auto *EltTy : STy->elements()) {
995  unsigned EltAlign = 0;
996  getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
997  if (EltAlign > MaxAlign)
998  MaxAlign = EltAlign;
999  if (MaxAlign == MaxMaxAlign)
1000  break;
1001  }
1002  }
1003 }
1004 
1005 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1006 /// function arguments in the caller parameter area.
1008  const DataLayout &DL) const {
1009  // Darwin passes everything on 4 byte boundary.
1010  if (Subtarget.isDarwin())
1011  return 4;
1012 
1013  // 16byte and wider vectors are passed on 16byte boundary.
1014  // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1015  unsigned Align = Subtarget.isPPC64() ? 8 : 4;
1016  if (Subtarget.hasAltivec() || Subtarget.hasQPX())
1017  getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
1018  return Align;
1019 }
1020 
1022  return Subtarget.useSoftFloat();
1023 }
1024 
1025 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1026  switch ((PPCISD::NodeType)Opcode) {
1027  case PPCISD::FIRST_NUMBER: break;
1028  case PPCISD::FSEL: return "PPCISD::FSEL";
1029  case PPCISD::FCFID: return "PPCISD::FCFID";
1030  case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1031  case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1032  case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1033  case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1034  case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1035  case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1036  case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1037  case PPCISD::FRE: return "PPCISD::FRE";
1038  case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1039  case PPCISD::STFIWX: return "PPCISD::STFIWX";
1040  case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
1041  case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
1042  case PPCISD::VPERM: return "PPCISD::VPERM";
1043  case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1044  case PPCISD::XXINSERT: return "PPCISD::XXINSERT";
1045  case PPCISD::VECSHL: return "PPCISD::VECSHL";
1046  case PPCISD::CMPB: return "PPCISD::CMPB";
1047  case PPCISD::Hi: return "PPCISD::Hi";
1048  case PPCISD::Lo: return "PPCISD::Lo";
1049  case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1050  case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1051  case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1052  case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1053  case PPCISD::SRL: return "PPCISD::SRL";
1054  case PPCISD::SRA: return "PPCISD::SRA";
1055  case PPCISD::SHL: return "PPCISD::SHL";
1056  case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1057  case PPCISD::CALL: return "PPCISD::CALL";
1058  case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1059  case PPCISD::MTCTR: return "PPCISD::MTCTR";
1060  case PPCISD::BCTRL: return "PPCISD::BCTRL";
1061  case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1062  case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
1063  case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1064  case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1065  case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1066  case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1067  case PPCISD::MFVSR: return "PPCISD::MFVSR";
1068  case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1069  case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1070  case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1071  case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1072  case PPCISD::ANDIo_1_EQ_BIT: return "PPCISD::ANDIo_1_EQ_BIT";
1073  case PPCISD::ANDIo_1_GT_BIT: return "PPCISD::ANDIo_1_GT_BIT";
1074  case PPCISD::VCMP: return "PPCISD::VCMP";
1075  case PPCISD::VCMPo: return "PPCISD::VCMPo";
1076  case PPCISD::LBRX: return "PPCISD::LBRX";
1077  case PPCISD::STBRX: return "PPCISD::STBRX";
1078  case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1079  case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1080  case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1081  case PPCISD::STXSIX: return "PPCISD::STXSIX";
1082  case PPCISD::VEXTS: return "PPCISD::VEXTS";
1083  case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1084  case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1085  case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1086  case PPCISD::BDNZ: return "PPCISD::BDNZ";
1087  case PPCISD::BDZ: return "PPCISD::BDZ";
1088  case PPCISD::MFFS: return "PPCISD::MFFS";
1089  case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1090  case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1091  case PPCISD::CR6SET: return "PPCISD::CR6SET";
1092  case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1093  case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1094  case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1095  case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1096  case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1097  case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1098  case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1099  case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1100  case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1101  case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1102  case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1103  case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1104  case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1105  case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1106  case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1107  case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1108  case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1109  case PPCISD::SC: return "PPCISD::SC";
1110  case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1111  case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1112  case PPCISD::RFEBB: return "PPCISD::RFEBB";
1113  case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1114  case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1115  case PPCISD::QVFPERM: return "PPCISD::QVFPERM";
1116  case PPCISD::QVGPCI: return "PPCISD::QVGPCI";
1117  case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI";
1118  case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI";
1119  case PPCISD::QBFLT: return "PPCISD::QBFLT";
1120  case PPCISD::QVLFSb: return "PPCISD::QVLFSb";
1121  }
1122  return nullptr;
1123 }
1124 
1126  EVT VT) const {
1127  if (!VT.isVector())
1128  return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1129 
1130  if (Subtarget.hasQPX())
1132 
1134 }
1135 
1137  assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1138  return true;
1139 }
1140 
1141 //===----------------------------------------------------------------------===//
1142 // Node matching predicates, for use by the tblgen matching code.
1143 //===----------------------------------------------------------------------===//
1144 
1145 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1147  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1148  return CFP->getValueAPF().isZero();
1149  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1150  // Maybe this has already been legalized into the constant pool?
1151  if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1152  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1153  return CFP->getValueAPF().isZero();
1154  }
1155  return false;
1156 }
1157 
1158 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1159 /// true if Op is undef or if it matches the specified value.
1160 static bool isConstantOrUndef(int Op, int Val) {
1161  return Op < 0 || Op == Val;
1162 }
1163 
1164 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1165 /// VPKUHUM instruction.
1166 /// The ShuffleKind distinguishes between big-endian operations with
1167 /// two different inputs (0), either-endian operations with two identical
1168 /// inputs (1), and little-endian operations with two different inputs (2).
1169 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1171  SelectionDAG &DAG) {
1172  bool IsLE = DAG.getDataLayout().isLittleEndian();
1173  if (ShuffleKind == 0) {
1174  if (IsLE)
1175  return false;
1176  for (unsigned i = 0; i != 16; ++i)
1177  if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1178  return false;
1179  } else if (ShuffleKind == 2) {
1180  if (!IsLE)
1181  return false;
1182  for (unsigned i = 0; i != 16; ++i)
1183  if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1184  return false;
1185  } else if (ShuffleKind == 1) {
1186  unsigned j = IsLE ? 0 : 1;
1187  for (unsigned i = 0; i != 8; ++i)
1188  if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1189  !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1190  return false;
1191  }
1192  return true;
1193 }
1194 
1195 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1196 /// VPKUWUM instruction.
1197 /// The ShuffleKind distinguishes between big-endian operations with
1198 /// two different inputs (0), either-endian operations with two identical
1199 /// inputs (1), and little-endian operations with two different inputs (2).
1200 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1202  SelectionDAG &DAG) {
1203  bool IsLE = DAG.getDataLayout().isLittleEndian();
1204  if (ShuffleKind == 0) {
1205  if (IsLE)
1206  return false;
1207  for (unsigned i = 0; i != 16; i += 2)
1208  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1209  !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1210  return false;
1211  } else if (ShuffleKind == 2) {
1212  if (!IsLE)
1213  return false;
1214  for (unsigned i = 0; i != 16; i += 2)
1215  if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1216  !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1217  return false;
1218  } else if (ShuffleKind == 1) {
1219  unsigned j = IsLE ? 0 : 2;
1220  for (unsigned i = 0; i != 8; i += 2)
1221  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1222  !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1223  !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1224  !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1225  return false;
1226  }
1227  return true;
1228 }
1229 
1230 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1231 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1232 /// current subtarget.
1233 ///
1234 /// The ShuffleKind distinguishes between big-endian operations with
1235 /// two different inputs (0), either-endian operations with two identical
1236 /// inputs (1), and little-endian operations with two different inputs (2).
1237 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1239  SelectionDAG &DAG) {
1240  const PPCSubtarget& Subtarget =
1241  static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1242  if (!Subtarget.hasP8Vector())
1243  return false;
1244 
1245  bool IsLE = DAG.getDataLayout().isLittleEndian();
1246  if (ShuffleKind == 0) {
1247  if (IsLE)
1248  return false;
1249  for (unsigned i = 0; i != 16; i += 4)
1250  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1251  !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1252  !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1253  !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1254  return false;
1255  } else if (ShuffleKind == 2) {
1256  if (!IsLE)
1257  return false;
1258  for (unsigned i = 0; i != 16; i += 4)
1259  if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1260  !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1261  !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1262  !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1263  return false;
1264  } else if (ShuffleKind == 1) {
1265  unsigned j = IsLE ? 0 : 4;
1266  for (unsigned i = 0; i != 8; i += 4)
1267  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1268  !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1269  !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1270  !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1271  !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1272  !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1273  !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1274  !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1275  return false;
1276  }
1277  return true;
1278 }
1279 
1280 /// isVMerge - Common function, used to match vmrg* shuffles.
1281 ///
1282 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1283  unsigned LHSStart, unsigned RHSStart) {
1284  if (N->getValueType(0) != MVT::v16i8)
1285  return false;
1286  assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1287  "Unsupported merge size!");
1288 
1289  for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1290  for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1291  if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1292  LHSStart+j+i*UnitSize) ||
1293  !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1294  RHSStart+j+i*UnitSize))
1295  return false;
1296  }
1297  return true;
1298 }
1299 
1300 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1301 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1302 /// The ShuffleKind distinguishes between big-endian merges with two
1303 /// different inputs (0), either-endian merges with two identical inputs (1),
1304 /// and little-endian merges with two different inputs (2). For the latter,
1305 /// the input operands are swapped (see PPCInstrAltivec.td).
1307  unsigned ShuffleKind, SelectionDAG &DAG) {
1308  if (DAG.getDataLayout().isLittleEndian()) {
1309  if (ShuffleKind == 1) // unary
1310  return isVMerge(N, UnitSize, 0, 0);
1311  else if (ShuffleKind == 2) // swapped
1312  return isVMerge(N, UnitSize, 0, 16);
1313  else
1314  return false;
1315  } else {
1316  if (ShuffleKind == 1) // unary
1317  return isVMerge(N, UnitSize, 8, 8);
1318  else if (ShuffleKind == 0) // normal
1319  return isVMerge(N, UnitSize, 8, 24);
1320  else
1321  return false;
1322  }
1323 }
1324 
1325 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1326 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1327 /// The ShuffleKind distinguishes between big-endian merges with two
1328 /// different inputs (0), either-endian merges with two identical inputs (1),
1329 /// and little-endian merges with two different inputs (2). For the latter,
1330 /// the input operands are swapped (see PPCInstrAltivec.td).
1332  unsigned ShuffleKind, SelectionDAG &DAG) {
1333  if (DAG.getDataLayout().isLittleEndian()) {
1334  if (ShuffleKind == 1) // unary
1335  return isVMerge(N, UnitSize, 8, 8);
1336  else if (ShuffleKind == 2) // swapped
1337  return isVMerge(N, UnitSize, 8, 24);
1338  else
1339  return false;
1340  } else {
1341  if (ShuffleKind == 1) // unary
1342  return isVMerge(N, UnitSize, 0, 0);
1343  else if (ShuffleKind == 0) // normal
1344  return isVMerge(N, UnitSize, 0, 16);
1345  else
1346  return false;
1347  }
1348 }
1349 
1350 /**
1351  * \brief Common function used to match vmrgew and vmrgow shuffles
1352  *
1353  * The indexOffset determines whether to look for even or odd words in
1354  * the shuffle mask. This is based on the of the endianness of the target
1355  * machine.
1356  * - Little Endian:
1357  * - Use offset of 0 to check for odd elements
1358  * - Use offset of 4 to check for even elements
1359  * - Big Endian:
1360  * - Use offset of 0 to check for even elements
1361  * - Use offset of 4 to check for odd elements
1362  * A detailed description of the vector element ordering for little endian and
1363  * big endian can be found at
1364  * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1365  * Targeting your applications - what little endian and big endian IBM XL C/C++
1366  * compiler differences mean to you
1367  *
1368  * The mask to the shuffle vector instruction specifies the indices of the
1369  * elements from the two input vectors to place in the result. The elements are
1370  * numbered in array-access order, starting with the first vector. These vectors
1371  * are always of type v16i8, thus each vector will contain 16 elements of size
1372  * 8. More info on the shuffle vector can be found in the
1373  * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1374  * Language Reference.
1375  *
1376  * The RHSStartValue indicates whether the same input vectors are used (unary)
1377  * or two different input vectors are used, based on the following:
1378  * - If the instruction uses the same vector for both inputs, the range of the
1379  * indices will be 0 to 15. In this case, the RHSStart value passed should
1380  * be 0.
1381  * - If the instruction has two different vectors then the range of the
1382  * indices will be 0 to 31. In this case, the RHSStart value passed should
1383  * be 16 (indices 0-15 specify elements in the first vector while indices 16
1384  * to 31 specify elements in the second vector).
1385  *
1386  * \param[in] N The shuffle vector SD Node to analyze
1387  * \param[in] IndexOffset Specifies whether to look for even or odd elements
1388  * \param[in] RHSStartValue Specifies the starting index for the righthand input
1389  * vector to the shuffle_vector instruction
1390  * \return true iff this shuffle vector represents an even or odd word merge
1391  */
1392 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1393  unsigned RHSStartValue) {
1394  if (N->getValueType(0) != MVT::v16i8)
1395  return false;
1396 
1397  for (unsigned i = 0; i < 2; ++i)
1398  for (unsigned j = 0; j < 4; ++j)
1399  if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1400  i*RHSStartValue+j+IndexOffset) ||
1401  !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1402  i*RHSStartValue+j+IndexOffset+8))
1403  return false;
1404  return true;
1405 }
1406 
1407 /**
1408  * \brief Determine if the specified shuffle mask is suitable for the vmrgew or
1409  * vmrgow instructions.
1410  *
1411  * \param[in] N The shuffle vector SD Node to analyze
1412  * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1413  * \param[in] ShuffleKind Identify the type of merge:
1414  * - 0 = big-endian merge with two different inputs;
1415  * - 1 = either-endian merge with two identical inputs;
1416  * - 2 = little-endian merge with two different inputs (inputs are swapped for
1417  * little-endian merges).
1418  * \param[in] DAG The current SelectionDAG
1419  * \return true iff this shuffle mask
1420  */
1422  unsigned ShuffleKind, SelectionDAG &DAG) {
1423  if (DAG.getDataLayout().isLittleEndian()) {
1424  unsigned indexOffset = CheckEven ? 4 : 0;
1425  if (ShuffleKind == 1) // Unary
1426  return isVMerge(N, indexOffset, 0);
1427  else if (ShuffleKind == 2) // swapped
1428  return isVMerge(N, indexOffset, 16);
1429  else
1430  return false;
1431  }
1432  else {
1433  unsigned indexOffset = CheckEven ? 0 : 4;
1434  if (ShuffleKind == 1) // Unary
1435  return isVMerge(N, indexOffset, 0);
1436  else if (ShuffleKind == 0) // Normal
1437  return isVMerge(N, indexOffset, 16);
1438  else
1439  return false;
1440  }
1441  return false;
1442 }
1443 
1444 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1445 /// amount, otherwise return -1.
1446 /// The ShuffleKind distinguishes between big-endian operations with two
1447 /// different inputs (0), either-endian operations with two identical inputs
1448 /// (1), and little-endian operations with two different inputs (2). For the
1449 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
1450 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1451  SelectionDAG &DAG) {
1452  if (N->getValueType(0) != MVT::v16i8)
1453  return -1;
1454 
1455  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1456 
1457  // Find the first non-undef value in the shuffle mask.
1458  unsigned i;
1459  for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1460  /*search*/;
1461 
1462  if (i == 16) return -1; // all undef.
1463 
1464  // Otherwise, check to see if the rest of the elements are consecutively
1465  // numbered from this value.
1466  unsigned ShiftAmt = SVOp->getMaskElt(i);
1467  if (ShiftAmt < i) return -1;
1468 
1469  ShiftAmt -= i;
1470  bool isLE = DAG.getDataLayout().isLittleEndian();
1471 
1472  if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1473  // Check the rest of the elements to see if they are consecutive.
1474  for (++i; i != 16; ++i)
1475  if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1476  return -1;
1477  } else if (ShuffleKind == 1) {
1478  // Check the rest of the elements to see if they are consecutive.
1479  for (++i; i != 16; ++i)
1480  if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1481  return -1;
1482  } else
1483  return -1;
1484 
1485  if (isLE)
1486  ShiftAmt = 16 - ShiftAmt;
1487 
1488  return ShiftAmt;
1489 }
1490 
1491 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1492 /// specifies a splat of a single element that is suitable for input to
1493 /// VSPLTB/VSPLTH/VSPLTW.
1495  assert(N->getValueType(0) == MVT::v16i8 &&
1496  (EltSize == 1 || EltSize == 2 || EltSize == 4));
1497 
1498  // The consecutive indices need to specify an element, not part of two
1499  // different elements. So abandon ship early if this isn't the case.
1500  if (N->getMaskElt(0) % EltSize != 0)
1501  return false;
1502 
1503  // This is a splat operation if each element of the permute is the same, and
1504  // if the value doesn't reference the second vector.
1505  unsigned ElementBase = N->getMaskElt(0);
1506 
1507  // FIXME: Handle UNDEF elements too!
1508  if (ElementBase >= 16)
1509  return false;
1510 
1511  // Check that the indices are consecutive, in the case of a multi-byte element
1512  // splatted with a v16i8 mask.
1513  for (unsigned i = 1; i != EltSize; ++i)
1514  if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1515  return false;
1516 
1517  for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1518  if (N->getMaskElt(i) < 0) continue;
1519  for (unsigned j = 0; j != EltSize; ++j)
1520  if (N->getMaskElt(i+j) != N->getMaskElt(j))
1521  return false;
1522  }
1523  return true;
1524 }
1525 
1526 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1527  unsigned &InsertAtByte, bool &Swap, bool IsLE) {
1528 
1529  // Check that the mask is shuffling words
1530  for (unsigned i = 0; i < 4; ++i) {
1531  unsigned B0 = N->getMaskElt(i*4);
1532  unsigned B1 = N->getMaskElt(i*4+1);
1533  unsigned B2 = N->getMaskElt(i*4+2);
1534  unsigned B3 = N->getMaskElt(i*4+3);
1535  if (B0 % 4)
1536  return false;
1537  if (B1 != B0+1 || B2 != B1+1 || B3 != B2+1)
1538  return false;
1539  }
1540 
1541  // Now we look at mask elements 0,4,8,12
1542  unsigned M0 = N->getMaskElt(0) / 4;
1543  unsigned M1 = N->getMaskElt(4) / 4;
1544  unsigned M2 = N->getMaskElt(8) / 4;
1545  unsigned M3 = N->getMaskElt(12) / 4;
1546  unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
1547  unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
1548 
1549  // Below, let H and L be arbitrary elements of the shuffle mask
1550  // where H is in the range [4,7] and L is in the range [0,3].
1551  // H, 1, 2, 3 or L, 5, 6, 7
1552  if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
1553  (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
1554  ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
1555  InsertAtByte = IsLE ? 12 : 0;
1556  Swap = M0 < 4;
1557  return true;
1558  }
1559  // 0, H, 2, 3 or 4, L, 6, 7
1560  if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
1561  (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
1562  ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
1563  InsertAtByte = IsLE ? 8 : 4;
1564  Swap = M1 < 4;
1565  return true;
1566  }
1567  // 0, 1, H, 3 or 4, 5, L, 7
1568  if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
1569  (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
1570  ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
1571  InsertAtByte = IsLE ? 4 : 8;
1572  Swap = M2 < 4;
1573  return true;
1574  }
1575  // 0, 1, 2, H or 4, 5, 6, L
1576  if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
1577  (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
1578  ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
1579  InsertAtByte = IsLE ? 0 : 12;
1580  Swap = M3 < 4;
1581  return true;
1582  }
1583 
1584  // If both vector operands for the shuffle are the same vector, the mask will
1585  // contain only elements from the first one and the second one will be undef.
1586  if (N->getOperand(1).isUndef()) {
1587  ShiftElts = 0;
1588  Swap = true;
1589  unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
1590  if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
1591  InsertAtByte = IsLE ? 12 : 0;
1592  return true;
1593  }
1594  if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
1595  InsertAtByte = IsLE ? 8 : 4;
1596  return true;
1597  }
1598  if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
1599  InsertAtByte = IsLE ? 4 : 8;
1600  return true;
1601  }
1602  if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
1603  InsertAtByte = IsLE ? 0 : 12;
1604  return true;
1605  }
1606  }
1607 
1608  return false;
1609 }
1610 
1611 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
1612 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
1613 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
1614  SelectionDAG &DAG) {
1615  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1616  assert(isSplatShuffleMask(SVOp, EltSize));
1617  if (DAG.getDataLayout().isLittleEndian())
1618  return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
1619  else
1620  return SVOp->getMaskElt(0) / EltSize;
1621 }
1622 
1623 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
1624 /// by using a vspltis[bhw] instruction of the specified element size, return
1625 /// the constant being splatted. The ByteSize field indicates the number of
1626 /// bytes of each element [124] -> [bhw].
1627 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
1628  SDValue OpVal(nullptr, 0);
1629 
1630  // If ByteSize of the splat is bigger than the element size of the
1631  // build_vector, then we have a case where we are checking for a splat where
1632  // multiple elements of the buildvector are folded together into a single
1633  // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
1634  unsigned EltSize = 16/N->getNumOperands();
1635  if (EltSize < ByteSize) {
1636  unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
1637  SDValue UniquedVals[4];
1638  assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
1639 
1640  // See if all of the elements in the buildvector agree across.
1641  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1642  if (N->getOperand(i).isUndef()) continue;
1643  // If the element isn't a constant, bail fully out.
1644  if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
1645 
1646 
1647  if (!UniquedVals[i&(Multiple-1)].getNode())
1648  UniquedVals[i&(Multiple-1)] = N->getOperand(i);
1649  else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
1650  return SDValue(); // no match.
1651  }
1652 
1653  // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
1654  // either constant or undef values that are identical for each chunk. See
1655  // if these chunks can form into a larger vspltis*.
1656 
1657  // Check to see if all of the leading entries are either 0 or -1. If
1658  // neither, then this won't fit into the immediate field.
1659  bool LeadingZero = true;
1660  bool LeadingOnes = true;
1661  for (unsigned i = 0; i != Multiple-1; ++i) {
1662  if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
1663 
1664  LeadingZero &= isNullConstant(UniquedVals[i]);
1665  LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
1666  }
1667  // Finally, check the least significant entry.
1668  if (LeadingZero) {
1669  if (!UniquedVals[Multiple-1].getNode())
1670  return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
1671  int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
1672  if (Val < 16) // 0,0,0,4 -> vspltisw(4)
1673  return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
1674  }
1675  if (LeadingOnes) {
1676  if (!UniquedVals[Multiple-1].getNode())
1677  return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
1678  int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
1679  if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
1680  return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
1681  }
1682 
1683  return SDValue();
1684  }
1685 
1686  // Check to see if this buildvec has a single non-undef value in its elements.
1687  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1688  if (N->getOperand(i).isUndef()) continue;
1689  if (!OpVal.getNode())
1690  OpVal = N->getOperand(i);
1691  else if (OpVal != N->getOperand(i))
1692  return SDValue();
1693  }
1694 
1695  if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
1696 
1697  unsigned ValSizeInBytes = EltSize;
1698  uint64_t Value = 0;
1699  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1700  Value = CN->getZExtValue();
1701  } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1702  assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
1703  Value = FloatToBits(CN->getValueAPF().convertToFloat());
1704  }
1705 
1706  // If the splat value is larger than the element value, then we can never do
1707  // this splat. The only case that we could fit the replicated bits into our
1708  // immediate field for would be zero, and we prefer to use vxor for it.
1709  if (ValSizeInBytes < ByteSize) return SDValue();
1710 
1711  // If the element value is larger than the splat value, check if it consists
1712  // of a repeated bit pattern of size ByteSize.
1713  if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
1714  return SDValue();
1715 
1716  // Properly sign extend the value.
1717  int MaskVal = SignExtend32(Value, ByteSize * 8);
1718 
1719  // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
1720  if (MaskVal == 0) return SDValue();
1721 
1722  // Finally, if this value fits in a 5 bit sext field, return it
1723  if (SignExtend32<5>(MaskVal) == MaskVal)
1724  return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
1725  return SDValue();
1726 }
1727 
1728 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
1729 /// amount, otherwise return -1.
1731  EVT VT = N->getValueType(0);
1732  if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
1733  return -1;
1734 
1735  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1736 
1737  // Find the first non-undef value in the shuffle mask.
1738  unsigned i;
1739  for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
1740  /*search*/;
1741 
1742  if (i == 4) return -1; // all undef.
1743 
1744  // Otherwise, check to see if the rest of the elements are consecutively
1745  // numbered from this value.
1746  unsigned ShiftAmt = SVOp->getMaskElt(i);
1747  if (ShiftAmt < i) return -1;
1748  ShiftAmt -= i;
1749 
1750  // Check the rest of the elements to see if they are consecutive.
1751  for (++i; i != 4; ++i)
1752  if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1753  return -1;
1754 
1755  return ShiftAmt;
1756 }
1757 
1758 //===----------------------------------------------------------------------===//
1759 // Addressing Mode Selection
1760 //===----------------------------------------------------------------------===//
1761 
1762 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
1763 /// or 64-bit immediate, and if the value can be accurately represented as a
1764 /// sign extension from a 16-bit value. If so, this returns true and the
1765 /// immediate.
1766 static bool isIntS16Immediate(SDNode *N, short &Imm) {
1767  if (!isa<ConstantSDNode>(N))
1768  return false;
1769 
1770  Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
1771  if (N->getValueType(0) == MVT::i32)
1772  return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
1773  else
1774  return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
1775 }
1776 static bool isIntS16Immediate(SDValue Op, short &Imm) {
1777  return isIntS16Immediate(Op.getNode(), Imm);
1778 }
1779 
1780 /// SelectAddressRegReg - Given the specified addressed, check to see if it
1781 /// can be represented as an indexed [r+r] operation. Returns false if it
1782 /// can be more efficiently represented with [r+imm].
1784  SDValue &Index,
1785  SelectionDAG &DAG) const {
1786  short imm = 0;
1787  if (N.getOpcode() == ISD::ADD) {
1788  if (isIntS16Immediate(N.getOperand(1), imm))
1789  return false; // r+i
1790  if (N.getOperand(1).getOpcode() == PPCISD::Lo)
1791  return false; // r+i
1792 
1793  Base = N.getOperand(0);
1794  Index = N.getOperand(1);
1795  return true;
1796  } else if (N.getOpcode() == ISD::OR) {
1797  if (isIntS16Immediate(N.getOperand(1), imm))
1798  return false; // r+i can fold it if we can.
1799 
1800  // If this is an or of disjoint bitfields, we can codegen this as an add
1801  // (for better address arithmetic) if the LHS and RHS of the OR are provably
1802  // disjoint.
1803  APInt LHSKnownZero, LHSKnownOne;
1804  APInt RHSKnownZero, RHSKnownOne;
1805  DAG.computeKnownBits(N.getOperand(0),
1806  LHSKnownZero, LHSKnownOne);
1807 
1808  if (LHSKnownZero.getBoolValue()) {
1809  DAG.computeKnownBits(N.getOperand(1),
1810  RHSKnownZero, RHSKnownOne);
1811  // If all of the bits are known zero on the LHS or RHS, the add won't
1812  // carry.
1813  if (~(LHSKnownZero | RHSKnownZero) == 0) {
1814  Base = N.getOperand(0);
1815  Index = N.getOperand(1);
1816  return true;
1817  }
1818  }
1819  }
1820 
1821  return false;
1822 }
1823 
1824 // If we happen to be doing an i64 load or store into a stack slot that has
1825 // less than a 4-byte alignment, then the frame-index elimination may need to
1826 // use an indexed load or store instruction (because the offset may not be a
1827 // multiple of 4). The extra register needed to hold the offset comes from the
1828 // register scavenger, and it is possible that the scavenger will need to use
1829 // an emergency spill slot. As a result, we need to make sure that a spill slot
1830 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
1831 // stack slot.
1832 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
1833  // FIXME: This does not handle the LWA case.
1834  if (VT != MVT::i64)
1835  return;
1836 
1837  // NOTE: We'll exclude negative FIs here, which come from argument
1838  // lowering, because there are no known test cases triggering this problem
1839  // using packed structures (or similar). We can remove this exclusion if
1840  // we find such a test case. The reason why this is so test-case driven is
1841  // because this entire 'fixup' is only to prevent crashes (from the
1842  // register scavenger) on not-really-valid inputs. For example, if we have:
1843  // %a = alloca i1
1844  // %b = bitcast i1* %a to i64*
1845  // store i64* a, i64 b
1846  // then the store should really be marked as 'align 1', but is not. If it
1847  // were marked as 'align 1' then the indexed form would have been
1848  // instruction-selected initially, and the problem this 'fixup' is preventing
1849  // won't happen regardless.
1850  if (FrameIdx < 0)
1851  return;
1852 
1853  MachineFunction &MF = DAG.getMachineFunction();
1854  MachineFrameInfo &MFI = MF.getFrameInfo();
1855 
1856  unsigned Align = MFI.getObjectAlignment(FrameIdx);
1857  if (Align >= 4)
1858  return;
1859 
1860  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1861  FuncInfo->setHasNonRISpills();
1862 }
1863 
1864 /// Returns true if the address N can be represented by a base register plus
1865 /// a signed 16-bit displacement [r+imm], and if it is not better
1866 /// represented as reg+reg. If Aligned is true, only accept displacements
1867 /// suitable for STD and friends, i.e. multiples of 4.
1869  SDValue &Base,
1870  SelectionDAG &DAG,
1871  bool Aligned) const {
1872  // FIXME dl should come from parent load or store, not from address
1873  SDLoc dl(N);
1874  // If this can be more profitably realized as r+r, fail.
1875  if (SelectAddressRegReg(N, Disp, Base, DAG))
1876  return false;
1877 
1878  if (N.getOpcode() == ISD::ADD) {
1879  short imm = 0;
1880  if (isIntS16Immediate(N.getOperand(1), imm) &&
1881  (!Aligned || (imm & 3) == 0)) {
1882  Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
1883  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
1884  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1885  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1886  } else {
1887  Base = N.getOperand(0);
1888  }
1889  return true; // [r+i]
1890  } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
1891  // Match LOAD (ADD (X, Lo(G))).
1892  assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
1893  && "Cannot handle constant offsets yet!");
1894  Disp = N.getOperand(1).getOperand(0); // The global address.
1895  assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
1896  Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
1897  Disp.getOpcode() == ISD::TargetConstantPool ||
1898  Disp.getOpcode() == ISD::TargetJumpTable);
1899  Base = N.getOperand(0);
1900  return true; // [&g+r]
1901  }
1902  } else if (N.getOpcode() == ISD::OR) {
1903  short imm = 0;
1904  if (isIntS16Immediate(N.getOperand(1), imm) &&
1905  (!Aligned || (imm & 3) == 0)) {
1906  // If this is an or of disjoint bitfields, we can codegen this as an add
1907  // (for better address arithmetic) if the LHS and RHS of the OR are
1908  // provably disjoint.
1909  APInt LHSKnownZero, LHSKnownOne;
1910  DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
1911 
1912  if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
1913  // If all of the bits are known zero on the LHS or RHS, the add won't
1914  // carry.
1915  if (FrameIndexSDNode *FI =
1916  dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
1917  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1918  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1919  } else {
1920  Base = N.getOperand(0);
1921  }
1922  Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
1923  return true;
1924  }
1925  }
1926  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
1927  // Loading from a constant address.
1928 
1929  // If this address fits entirely in a 16-bit sext immediate field, codegen
1930  // this as "d, 0"
1931  short Imm;
1932  if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
1933  Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
1934  Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
1935  CN->getValueType(0));
1936  return true;
1937  }
1938 
1939  // Handle 32-bit sext immediates with LIS + addr mode.
1940  if ((CN->getValueType(0) == MVT::i32 ||
1941  (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
1942  (!Aligned || (CN->getZExtValue() & 3) == 0)) {
1943  int Addr = (int)CN->getZExtValue();
1944 
1945  // Otherwise, break this down into an LIS + disp.
1946  Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
1947 
1948  Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
1949  MVT::i32);
1950  unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
1951  Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
1952  return true;
1953  }
1954  }
1955 
1956  Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
1957  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
1958  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1959  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1960  } else
1961  Base = N;
1962  return true; // [r+0]
1963 }
1964 
1965 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
1966 /// represented as an indexed [r+r] operation.
1968  SDValue &Index,
1969  SelectionDAG &DAG) const {
1970  // Check to see if we can easily represent this as an [r+r] address. This
1971  // will fail if it thinks that the address is more profitably represented as
1972  // reg+imm, e.g. where imm = 0.
1973  if (SelectAddressRegReg(N, Base, Index, DAG))
1974  return true;
1975 
1976  // If the operand is an addition, always emit this as [r+r], since this is
1977  // better (for code size, and execution, as the memop does the add for free)
1978  // than emitting an explicit add.
1979  if (N.getOpcode() == ISD::ADD) {
1980  Base = N.getOperand(0);
1981  Index = N.getOperand(1);
1982  return true;
1983  }
1984 
1985  // Otherwise, do it the hard way, using R0 as the base register.
1986  Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
1987  N.getValueType());
1988  Index = N;
1989  return true;
1990 }
1991 
1992 /// getPreIndexedAddressParts - returns true by value, base pointer and
1993 /// offset pointer and addressing mode by reference if the node's address
1994 /// can be legally represented as pre-indexed load / store address.
1996  SDValue &Offset,
1997  ISD::MemIndexedMode &AM,
1998  SelectionDAG &DAG) const {
1999  if (DisablePPCPreinc) return false;
2000 
2001  bool isLoad = true;
2002  SDValue Ptr;
2003  EVT VT;
2004  unsigned Alignment;
2005  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2006  Ptr = LD->getBasePtr();
2007  VT = LD->getMemoryVT();
2008  Alignment = LD->getAlignment();
2009  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2010  Ptr = ST->getBasePtr();
2011  VT = ST->getMemoryVT();
2012  Alignment = ST->getAlignment();
2013  isLoad = false;
2014  } else
2015  return false;
2016 
2017  // PowerPC doesn't have preinc load/store instructions for vectors (except
2018  // for QPX, which does have preinc r+r forms).
2019  if (VT.isVector()) {
2020  if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
2021  return false;
2022  } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
2023  AM = ISD::PRE_INC;
2024  return true;
2025  }
2026  }
2027 
2028  if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2029 
2030  // Common code will reject creating a pre-inc form if the base pointer
2031  // is a frame index, or if N is a store and the base pointer is either
2032  // the same as or a predecessor of the value being stored. Check for
2033  // those situations here, and try with swapped Base/Offset instead.
2034  bool Swap = false;
2035 
2036  if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2037  Swap = true;
2038  else if (!isLoad) {
2039  SDValue Val = cast<StoreSDNode>(N)->getValue();
2040  if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2041  Swap = true;
2042  }
2043 
2044  if (Swap)
2045  std::swap(Base, Offset);
2046 
2047  AM = ISD::PRE_INC;
2048  return true;
2049  }
2050 
2051  // LDU/STU can only handle immediates that are a multiple of 4.
2052  if (VT != MVT::i64) {
2053  if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false))
2054  return false;
2055  } else {
2056  // LDU/STU need an address with at least 4-byte alignment.
2057  if (Alignment < 4)
2058  return false;
2059 
2060  if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true))
2061  return false;
2062  }
2063 
2064  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2065  // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2066  // sext i32 to i64 when addr mode is r+i.
2067  if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2068  LD->getExtensionType() == ISD::SEXTLOAD &&
2069  isa<ConstantSDNode>(Offset))
2070  return false;
2071  }
2072 
2073  AM = ISD::PRE_INC;
2074  return true;
2075 }
2076 
2077 //===----------------------------------------------------------------------===//
2078 // LowerOperation implementation
2079 //===----------------------------------------------------------------------===//
2080 
2081 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
2082 /// and LoOpFlags to the target MO flags.
2083 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2084  unsigned &HiOpFlags, unsigned &LoOpFlags,
2085  const GlobalValue *GV = nullptr) {
2086  HiOpFlags = PPCII::MO_HA;
2087  LoOpFlags = PPCII::MO_LO;
2088 
2089  // Don't use the pic base if not in PIC relocation model.
2090  if (IsPIC) {
2091  HiOpFlags |= PPCII::MO_PIC_FLAG;
2092  LoOpFlags |= PPCII::MO_PIC_FLAG;
2093  }
2094 
2095  // If this is a reference to a global value that requires a non-lazy-ptr, make
2096  // sure that instruction lowering adds it.
2097  if (GV && Subtarget.hasLazyResolverStub(GV)) {
2098  HiOpFlags |= PPCII::MO_NLP_FLAG;
2099  LoOpFlags |= PPCII::MO_NLP_FLAG;
2100 
2101  if (GV->hasHiddenVisibility()) {
2102  HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2103  LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2104  }
2105  }
2106 }
2107 
2108 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2109  SelectionDAG &DAG) {
2110  SDLoc DL(HiPart);
2111  EVT PtrVT = HiPart.getValueType();
2112  SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2113 
2114  SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2115  SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2116 
2117  // With PIC, the first instruction is actually "GR+hi(&G)".
2118  if (isPIC)
2119  Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2120  DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2121 
2122  // Generate non-pic code that has direct accesses to the constant pool.
2123  // The address of the global is just (hi(&g)+lo(&g)).
2124  return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2125 }
2126 
2128  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2129  FuncInfo->setUsesTOCBasePtr();
2130 }
2131 
2132 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2134 }
2135 
2136 static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit,
2137  SDValue GA) {
2138  EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2139  SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) :
2140  DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2141 
2142  SDValue Ops[] = { GA, Reg };
2143  return DAG.getMemIntrinsicNode(
2144  PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2145  MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, false, true,
2146  false, 0);
2147 }
2148 
2149 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2150  SelectionDAG &DAG) const {
2151  EVT PtrVT = Op.getValueType();
2152  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2153  const Constant *C = CP->getConstVal();
2154 
2155  // 64-bit SVR4 ABI code is always position-independent.
2156  // The actual address of the GlobalValue is stored in the TOC.
2157  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2158  setUsesTOCBasePtr(DAG);
2159  SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
2160  return getTOCEntry(DAG, SDLoc(CP), true, GA);
2161  }
2162 
2163  unsigned MOHiFlag, MOLoFlag;
2164  bool IsPIC = isPositionIndependent();
2165  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2166 
2167  if (IsPIC && Subtarget.isSVR4ABI()) {
2168  SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
2170  return getTOCEntry(DAG, SDLoc(CP), false, GA);
2171  }
2172 
2173  SDValue CPIHi =
2174  DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
2175  SDValue CPILo =
2176  DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
2177  return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2178 }
2179 
2180 // For 64-bit PowerPC, prefer the more compact relative encodings.
2181 // This trades 32 bits per jump table entry for one or two instructions
2182 // on the jump site.
2184  if (isJumpTableRelative())
2186 
2188 }
2189 
2191  if (Subtarget.isPPC64())
2192  return true;
2194 }
2195 
2197  SelectionDAG &DAG) const {
2198  if (!Subtarget.isPPC64())
2199  return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2200 
2201  switch (getTargetMachine().getCodeModel()) {
2202  case CodeModel::Default:
2203  case CodeModel::Small:
2204  case CodeModel::Medium:
2205  return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2206  default:
2207  return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
2208  getPointerTy(DAG.getDataLayout()));
2209  }
2210 }
2211 
2212 const MCExpr *
2214  unsigned JTI,
2215  MCContext &Ctx) const {
2216  if (!Subtarget.isPPC64())
2217  return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2218 
2219  switch (getTargetMachine().getCodeModel()) {
2220  case CodeModel::Default:
2221  case CodeModel::Small:
2222  case CodeModel::Medium:
2223  return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2224  default:
2225  return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2226  }
2227 }
2228 
2229 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
2230  EVT PtrVT = Op.getValueType();
2231  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2232 
2233  // 64-bit SVR4 ABI code is always position-independent.
2234  // The actual address of the GlobalValue is stored in the TOC.
2235  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2236  setUsesTOCBasePtr(DAG);
2237  SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2238  return getTOCEntry(DAG, SDLoc(JT), true, GA);
2239  }
2240 
2241  unsigned MOHiFlag, MOLoFlag;
2242  bool IsPIC = isPositionIndependent();
2243  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2244 
2245  if (IsPIC && Subtarget.isSVR4ABI()) {
2246  SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
2248  return getTOCEntry(DAG, SDLoc(GA), false, GA);
2249  }
2250 
2251  SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
2252  SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
2253  return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
2254 }
2255 
2256 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
2257  SelectionDAG &DAG) const {
2258  EVT PtrVT = Op.getValueType();
2259  BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
2260  const BlockAddress *BA = BASDN->getBlockAddress();
2261 
2262  // 64-bit SVR4 ABI code is always position-independent.
2263  // The actual BlockAddress is stored in the TOC.
2264  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2265  setUsesTOCBasePtr(DAG);
2266  SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
2267  return getTOCEntry(DAG, SDLoc(BASDN), true, GA);
2268  }
2269 
2270  unsigned MOHiFlag, MOLoFlag;
2271  bool IsPIC = isPositionIndependent();
2272  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2273  SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
2274  SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
2275  return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
2276 }
2277 
2278 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2279  SelectionDAG &DAG) const {
2280 
2281  // FIXME: TLS addresses currently use medium model code sequences,
2282  // which is the most useful form. Eventually support for small and
2283  // large models could be added if users need it, at the cost of
2284  // additional complexity.
2285  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2286  if (DAG.getTarget().Options.EmulatedTLS)
2287  return LowerToTLSEmulatedModel(GA, DAG);
2288 
2289  SDLoc dl(GA);
2290  const GlobalValue *GV = GA->getGlobal();
2291  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2292  bool is64bit = Subtarget.isPPC64();
2293  const Module *M = DAG.getMachineFunction().getFunction()->getParent();
2294  PICLevel::Level picLevel = M->getPICLevel();
2295 
2297 
2298  if (Model == TLSModel::LocalExec) {
2299  SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2301  SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2303  SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
2304  is64bit ? MVT::i64 : MVT::i32);
2305  SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
2306  return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
2307  }
2308 
2309  if (Model == TLSModel::InitialExec) {
2310  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2311  SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2312  PPCII::MO_TLS);
2313  SDValue GOTPtr;
2314  if (is64bit) {
2315  setUsesTOCBasePtr(DAG);
2316  SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2317  GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
2318  PtrVT, GOTReg, TGA);
2319  } else
2320  GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
2321  SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
2322  PtrVT, TGA, GOTPtr);
2323  return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
2324  }
2325 
2326  if (Model == TLSModel::GeneralDynamic) {
2327  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2328  SDValue GOTPtr;
2329  if (is64bit) {
2330  setUsesTOCBasePtr(DAG);
2331  SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2332  GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
2333  GOTReg, TGA);
2334  } else {
2335  if (picLevel == PICLevel::SmallPIC)
2336  GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2337  else
2338  GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2339  }
2340  return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
2341  GOTPtr, TGA, TGA);
2342  }
2343 
2344  if (Model == TLSModel::LocalDynamic) {
2345  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2346  SDValue GOTPtr;
2347  if (is64bit) {
2348  setUsesTOCBasePtr(DAG);
2349  SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2350  GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
2351  GOTReg, TGA);
2352  } else {
2353  if (picLevel == PICLevel::SmallPIC)
2354  GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2355  else
2356  GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2357  }
2358  SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
2359  PtrVT, GOTPtr, TGA, TGA);
2360  SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
2361  PtrVT, TLSAddr, TGA);
2362  return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
2363  }
2364 
2365  llvm_unreachable("Unknown TLS model!");
2366 }
2367 
2368 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
2369  SelectionDAG &DAG) const {
2370  EVT PtrVT = Op.getValueType();
2371  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
2372  SDLoc DL(GSDN);
2373  const GlobalValue *GV = GSDN->getGlobal();
2374 
2375  // 64-bit SVR4 ABI code is always position-independent.
2376  // The actual address of the GlobalValue is stored in the TOC.
2377  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2378  setUsesTOCBasePtr(DAG);
2379  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
2380  return getTOCEntry(DAG, DL, true, GA);
2381  }
2382 
2383  unsigned MOHiFlag, MOLoFlag;
2384  bool IsPIC = isPositionIndependent();
2385  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
2386 
2387  if (IsPIC && Subtarget.isSVR4ABI()) {
2388  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
2389  GSDN->getOffset(),
2391  return getTOCEntry(DAG, DL, false, GA);
2392  }
2393 
2394  SDValue GAHi =
2395  DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
2396  SDValue GALo =
2397  DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
2398 
2399  SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG);
2400 
2401  // If the global reference is actually to a non-lazy-pointer, we have to do an
2402  // extra load to get the address of the global.
2403  if (MOHiFlag & PPCII::MO_NLP_FLAG)
2404  Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
2405  return Ptr;
2406 }
2407 
2408 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
2409  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2410  SDLoc dl(Op);
2411 
2412  if (Op.getValueType() == MVT::v2i64) {
2413  // When the operands themselves are v2i64 values, we need to do something
2414  // special because VSX has no underlying comparison operations for these.
2415  if (Op.getOperand(0).getValueType() == MVT::v2i64) {
2416  // Equality can be handled by casting to the legal type for Altivec
2417  // comparisons, everything else needs to be expanded.
2418  if (CC == ISD::SETEQ || CC == ISD::SETNE) {
2419  return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
2420  DAG.getSetCC(dl, MVT::v4i32,
2421  DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
2422  DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
2423  CC));
2424  }
2425 
2426  return SDValue();
2427  }
2428 
2429  // We handle most of these in the usual way.
2430  return Op;
2431  }
2432 
2433  // If we're comparing for equality to zero, expose the fact that this is
2434  // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
2435  // fold the new nodes.
2436  if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
2437  return V;
2438 
2439  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2440  // Leave comparisons against 0 and -1 alone for now, since they're usually
2441  // optimized. FIXME: revisit this when we can custom lower all setcc
2442  // optimizations.
2443  if (C->isAllOnesValue() || C->isNullValue())
2444  return SDValue();
2445  }
2446 
2447  // If we have an integer seteq/setne, turn it into a compare against zero
2448  // by xor'ing the rhs with the lhs, which is faster than setting a
2449  // condition register, reading it back out, and masking the correct bit. The
2450  // normal approach here uses sub to do this instead of xor. Using xor exposes
2451  // the result to other bit-twiddling opportunities.
2452  EVT LHSVT = Op.getOperand(0).getValueType();
2453  if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
2454  EVT VT = Op.getValueType();
2455  SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
2456  Op.getOperand(1));
2457  return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
2458  }
2459  return SDValue();
2460 }
2461 
2462 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
2463  SDNode *Node = Op.getNode();
2464  EVT VT = Node->getValueType(0);
2465  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2466  SDValue InChain = Node->getOperand(0);
2467  SDValue VAListPtr = Node->getOperand(1);
2468  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
2469  SDLoc dl(Node);
2470 
2471  assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
2472 
2473  // gpr_index
2474  SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2475  VAListPtr, MachinePointerInfo(SV), MVT::i8);
2476  InChain = GprIndex.getValue(1);
2477 
2478  if (VT == MVT::i64) {
2479  // Check if GprIndex is even
2480  SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
2481  DAG.getConstant(1, dl, MVT::i32));
2482  SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
2483  DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
2484  SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
2485  DAG.getConstant(1, dl, MVT::i32));
2486  // Align GprIndex to be even if it isn't
2487  GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
2488  GprIndex);
2489  }
2490 
2491  // fpr index is 1 byte after gpr
2492  SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2493  DAG.getConstant(1, dl, MVT::i32));
2494 
2495  // fpr
2496  SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2497  FprPtr, MachinePointerInfo(SV), MVT::i8);
2498  InChain = FprIndex.getValue(1);
2499 
2500  SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2501  DAG.getConstant(8, dl, MVT::i32));
2502 
2503  SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2504  DAG.getConstant(4, dl, MVT::i32));
2505 
2506  // areas
2507  SDValue OverflowArea =
2508  DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
2509  InChain = OverflowArea.getValue(1);
2510 
2511  SDValue RegSaveArea =
2512  DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
2513  InChain = RegSaveArea.getValue(1);
2514 
2515  // select overflow_area if index > 8
2516  SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
2517  DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
2518 
2519  // adjustment constant gpr_index * 4/8
2520  SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
2521  VT.isInteger() ? GprIndex : FprIndex,
2522  DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
2523  MVT::i32));
2524 
2525  // OurReg = RegSaveArea + RegConstant
2526  SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
2527  RegConstant);
2528 
2529  // Floating types are 32 bytes into RegSaveArea
2530  if (VT.isFloatingPoint())
2531  OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
2532  DAG.getConstant(32, dl, MVT::i32));
2533 
2534  // increase {f,g}pr_index by 1 (or 2 if VT is i64)
2535  SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
2536  VT.isInteger() ? GprIndex : FprIndex,
2537  DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
2538  MVT::i32));
2539 
2540  InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
2541  VT.isInteger() ? VAListPtr : FprPtr,
2543 
2544  // determine if we should load from reg_save_area or overflow_area
2545  SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
2546 
2547  // increase overflow_area by 4/8 if gpr/fpr > 8
2548  SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
2549  DAG.getConstant(VT.isInteger() ? 4 : 8,
2550  dl, MVT::i32));
2551 
2552  OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
2553  OverflowAreaPlusN);
2554 
2555  InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
2556  MachinePointerInfo(), MVT::i32);
2557 
2558  return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
2559 }
2560 
2561 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
2562  assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
2563 
2564  // We have to copy the entire va_list struct:
2565  // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
2566  return DAG.getMemcpy(Op.getOperand(0), Op,
2567  Op.getOperand(1), Op.getOperand(2),
2568  DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true,
2570 }
2571 
2572 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
2573  SelectionDAG &DAG) const {
2574  return Op.getOperand(0);
2575 }
2576 
2577 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
2578  SelectionDAG &DAG) const {
2579  SDValue Chain = Op.getOperand(0);
2580  SDValue Trmp = Op.getOperand(1); // trampoline
2581  SDValue FPtr = Op.getOperand(2); // nested function
2582  SDValue Nest = Op.getOperand(3); // 'nest' parameter value
2583  SDLoc dl(Op);
2584 
2585  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2586  bool isPPC64 = (PtrVT == MVT::i64);
2587  Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
2588 
2591 
2592  Entry.Ty = IntPtrTy;
2593  Entry.Node = Trmp; Args.push_back(Entry);
2594 
2595  // TrampSize == (isPPC64 ? 48 : 40);
2596  Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
2597  isPPC64 ? MVT::i64 : MVT::i32);
2598  Args.push_back(Entry);
2599 
2600  Entry.Node = FPtr; Args.push_back(Entry);
2601  Entry.Node = Nest; Args.push_back(Entry);
2602 
2603  // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
2605  CLI.setDebugLoc(dl).setChain(Chain)
2606  .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
2607  DAG.getExternalSymbol("__trampoline_setup", PtrVT),
2608  std::move(Args));
2609 
2610  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2611  return CallResult.second;
2612 }
2613 
2614 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
2615  MachineFunction &MF = DAG.getMachineFunction();
2616  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2617  EVT PtrVT = getPointerTy(MF.getDataLayout());
2618 
2619  SDLoc dl(Op);
2620 
2621  if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
2622  // vastart just stores the address of the VarArgsFrameIndex slot into the
2623  // memory location argument.
2624  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2625  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2626  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
2627  MachinePointerInfo(SV));
2628  }
2629 
2630  // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
2631  // We suppose the given va_list is already allocated.
2632  //
2633  // typedef struct {
2634  // char gpr; /* index into the array of 8 GPRs
2635  // * stored in the register save area
2636  // * gpr=0 corresponds to r3,
2637  // * gpr=1 to r4, etc.
2638  // */
2639  // char fpr; /* index into the array of 8 FPRs
2640  // * stored in the register save area
2641  // * fpr=0 corresponds to f1,
2642  // * fpr=1 to f2, etc.
2643  // */
2644  // char *overflow_arg_area;
2645  // /* location on stack that holds
2646  // * the next overflow argument
2647  // */
2648  // char *reg_save_area;
2649  // /* where r3:r10 and f1:f8 (if saved)
2650  // * are stored
2651  // */
2652  // } va_list[1];
2653 
2654  SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
2655  SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
2656  SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
2657  PtrVT);
2658  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
2659  PtrVT);
2660 
2661  uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
2662  SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
2663 
2664  uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
2665  SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
2666 
2667  uint64_t FPROffset = 1;
2668  SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
2669 
2670  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2671 
2672  // Store first byte : number of int regs
2673  SDValue firstStore =
2674  DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
2676  uint64_t nextOffset = FPROffset;
2677  SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
2678  ConstFPROffset);
2679 
2680  // Store second byte : number of float regs
2681  SDValue secondStore =
2682  DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
2683  MachinePointerInfo(SV, nextOffset), MVT::i8);
2684  nextOffset += StackOffset;
2685  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
2686 
2687  // Store second word : arguments given on stack
2688  SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
2689  MachinePointerInfo(SV, nextOffset));
2690  nextOffset += FrameOffset;
2691  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
2692 
2693  // Store third word : arguments given in registers
2694  return DAG.getStore(thirdStore, dl, FR, nextPtr,
2695  MachinePointerInfo(SV, nextOffset));
2696 }
2697 
2698 #include "PPCGenCallingConv.inc"
2699 
2700 // Function whose sole purpose is to kill compiler warnings
2701 // stemming from unused functions included from PPCGenCallingConv.inc.
2702 CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
2703  return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
2704 }
2705 
2706 bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
2707  CCValAssign::LocInfo &LocInfo,
2708  ISD::ArgFlagsTy &ArgFlags,
2709  CCState &State) {
2710  return true;
2711 }
2712 
2713 bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
2714  MVT &LocVT,
2715  CCValAssign::LocInfo &LocInfo,
2716  ISD::ArgFlagsTy &ArgFlags,
2717  CCState &State) {
2718  static const MCPhysReg ArgRegs[] = {
2719  PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2720  PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2721  };
2722  const unsigned NumArgRegs = array_lengthof(ArgRegs);
2723 
2724  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
2725 
2726  // Skip one register if the first unallocated register has an even register
2727  // number and there are still argument registers available which have not been
2728  // allocated yet. RegNum is actually an index into ArgRegs, which means we
2729  // need to skip a register if RegNum is odd.
2730  if (RegNum != NumArgRegs && RegNum % 2 == 1) {
2731  State.AllocateReg(ArgRegs[RegNum]);
2732  }
2733 
2734  // Always return false here, as this function only makes sure that the first
2735  // unallocated register has an odd register number and does not actually
2736  // allocate a register for the current argument.
2737  return false;
2738 }
2739 
2740 bool
2742  MVT &LocVT,
2743  CCValAssign::LocInfo &LocInfo,
2744  ISD::ArgFlagsTy &ArgFlags,
2745  CCState &State) {
2746  static const MCPhysReg ArgRegs[] = {
2747  PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2748  PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2749  };
2750  const unsigned NumArgRegs = array_lengthof(ArgRegs);
2751 
2752  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
2753  int RegsLeft = NumArgRegs - RegNum;
2754 
2755  // Skip if there is not enough registers left for long double type (4 gpr regs
2756  // in soft float mode) and put long double argument on the stack.
2757  if (RegNum != NumArgRegs && RegsLeft < 4) {
2758  for (int i = 0; i < RegsLeft; i++) {
2759  State.AllocateReg(ArgRegs[RegNum + i]);
2760  }
2761  }
2762 
2763  return false;
2764 }
2765 
2766 bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
2767  MVT &LocVT,
2768  CCValAssign::LocInfo &LocInfo,
2769  ISD::ArgFlagsTy &ArgFlags,
2770  CCState &State) {
2771  static const MCPhysReg ArgRegs[] = {
2772  PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
2773  PPC::F8
2774  };
2775 
2776  const unsigned NumArgRegs = array_lengthof(ArgRegs);
2777 
2778  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
2779 
2780  // If there is only one Floating-point register left we need to put both f64
2781  // values of a split ppc_fp128 value on the stack.
2782  if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
2783  State.AllocateReg(ArgRegs[RegNum]);
2784  }
2785 
2786  // Always return false here, as this function only makes sure that the two f64
2787  // values a ppc_fp128 value is split into are both passed in registers or both
2788  // passed on the stack and does not actually allocate a register for the
2789  // current argument.
2790  return false;
2791 }
2792 
2793 /// FPR - The set of FP registers that should be allocated for arguments,
2794 /// on Darwin.
2795 static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
2796  PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
2797  PPC::F11, PPC::F12, PPC::F13};
2798 
2799 /// QFPR - The set of QPX registers that should be allocated for arguments.
2800 static const MCPhysReg QFPR[] = {
2801  PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
2802  PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
2803 
2804 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
2805 /// the stack.
2807  unsigned PtrByteSize) {
2808  unsigned ArgSize = ArgVT.getStoreSize();
2809  if (Flags.isByVal())
2810  ArgSize = Flags.getByValSize();
2811 
2812  // Round up to multiples of the pointer size, except for array members,
2813  // which are always packed.
2814  if (!Flags.isInConsecutiveRegs())
2815  ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2816 
2817  return ArgSize;
2818 }
2819 
2820 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
2821 /// on the stack.
2822 static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
2824  unsigned PtrByteSize) {
2825  unsigned Align = PtrByteSize;
2826 
2827  // Altivec parameters are padded to a 16 byte boundary.
2828  if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
2829  ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
2830  ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
2831  ArgVT == MVT::v1i128)
2832  Align = 16;
2833  // QPX vector types stored in double-precision are padded to a 32 byte
2834  // boundary.
2835  else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
2836  Align = 32;
2837 
2838  // ByVal parameters are aligned as requested.
2839  if (Flags.isByVal()) {
2840  unsigned BVAlign = Flags.getByValAlign();
2841  if (BVAlign > PtrByteSize) {
2842  if (BVAlign % PtrByteSize != 0)
2844  "ByVal alignment is not a multiple of the pointer size");
2845 
2846  Align = BVAlign;
2847  }
2848  }
2849 
2850  // Array members are always packed to their original alignment.
2851  if (Flags.isInConsecutiveRegs()) {
2852  // If the array member was split into multiple registers, the first
2853  // needs to be aligned to the size of the full type. (Except for
2854  // ppcf128, which is only aligned as its f64 components.)
2855  if (Flags.isSplit() && OrigVT != MVT::ppcf128)
2856  Align = OrigVT.getStoreSize();
2857  else
2858  Align = ArgVT.getStoreSize();
2859  }
2860 
2861  return Align;
2862 }
2863 
2864 /// CalculateStackSlotUsed - Return whether this argument will use its
2865 /// stack slot (instead of being passed in registers). ArgOffset,
2866 /// AvailableFPRs, and AvailableVRs must hold the current argument
2867 /// position, and will be updated to account for this argument.
2868 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
2870  unsigned PtrByteSize,
2871  unsigned LinkageSize,
2872  unsigned ParamAreaSize,
2873  unsigned &ArgOffset,
2874  unsigned &AvailableFPRs,
2875  unsigned &AvailableVRs, bool HasQPX) {
2876  bool UseMemory = false;
2877 
2878  // Respect alignment of argument on the stack.
2879  unsigned Align =
2880  CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
2881  ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
2882  // If there's no space left in the argument save area, we must
2883  // use memory (this check also catches zero-sized arguments).
2884  if (ArgOffset >= LinkageSize + ParamAreaSize)
2885  UseMemory = true;
2886 
2887  // Allocate argument on the stack.
2888  ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
2889  if (Flags.isInConsecutiveRegsLast())
2890  ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2891  // If we overran the argument save area, we must use memory
2892  // (this check catches arguments passed partially in memory)
2893  if (ArgOffset > LinkageSize + ParamAreaSize)
2894  UseMemory = true;
2895 
2896  // However, if the argument is actually passed in an FPR or a VR,
2897  // we don't use memory after all.
2898  if (!Flags.isByVal()) {
2899  if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
2900  // QPX registers overlap with the scalar FP registers.
2901  (HasQPX && (ArgVT == MVT::v4f32 ||
2902  ArgVT == MVT::v4f64 ||
2903  ArgVT == MVT::v4i1)))
2904  if (AvailableFPRs > 0) {
2905  --AvailableFPRs;
2906  return false;
2907  }
2908  if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
2909  ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
2910  ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
2911  ArgVT == MVT::v1i128)
2912  if (AvailableVRs > 0) {
2913  --AvailableVRs;
2914  return false;
2915  }
2916  }
2917 
2918  return UseMemory;
2919 }
2920 
2921 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
2922 /// ensure minimum alignment required for target.
2924  unsigned NumBytes) {
2925  unsigned TargetAlign = Lowering->getStackAlignment();
2926  unsigned AlignMask = TargetAlign - 1;
2927  NumBytes = (NumBytes + AlignMask) & ~AlignMask;
2928  return NumBytes;
2929 }
2930 
2931 SDValue PPCTargetLowering::LowerFormalArguments(
2932  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2933  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2934  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2935  if (Subtarget.isSVR4ABI()) {
2936  if (Subtarget.isPPC64())
2937  return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
2938  dl, DAG, InVals);
2939  else
2940  return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
2941  dl, DAG, InVals);
2942  } else {
2943  return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
2944  dl, DAG, InVals);
2945  }
2946 }
2947 
2948 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
2949  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2950  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2951  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2952 
2953  // 32-bit SVR4 ABI Stack Frame Layout:
2954  // +-----------------------------------+
2955  // +--> | Back chain |
2956  // | +-----------------------------------+
2957  // | | Floating-point register save area |
2958  // | +-----------------------------------+
2959  // | | General register save area |
2960  // | +-----------------------------------+
2961  // | | CR save word |
2962  // | +-----------------------------------+
2963  // | | VRSAVE save word |
2964  // | +-----------------------------------+
2965  // | | Alignment padding |
2966  // | +-----------------------------------+
2967  // | | Vector register save area |
2968  // | +-----------------------------------+
2969  // | | Local variable space |
2970  // | +-----------------------------------+
2971  // | | Parameter list area |
2972  // | +-----------------------------------+
2973  // | | LR save word |
2974  // | +-----------------------------------+
2975  // SP--> +--- | Back chain |
2976  // +-----------------------------------+
2977  //
2978  // Specifications:
2979  // System V Application Binary Interface PowerPC Processor Supplement
2980  // AltiVec Technology Programming Interface Manual
2981 
2982  MachineFunction &MF = DAG.getMachineFunction();
2983  MachineFrameInfo &MFI = MF.getFrameInfo();
2984  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2985 
2986  EVT PtrVT = getPointerTy(MF.getDataLayout());
2987  // Potential tail calls could cause overwriting of argument stack slots.
2988  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
2989  (CallConv == CallingConv::Fast));
2990  unsigned PtrByteSize = 4;
2991 
2992  // Assign locations to all of the incoming arguments.
2994  PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2995  *DAG.getContext());
2996 
2997  // Reserve space for the linkage area on the stack.
2998  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
2999  CCInfo.AllocateStack(LinkageSize, PtrByteSize);
3000  if (useSoftFloat())
3001  CCInfo.PreAnalyzeFormalArguments(Ins);
3002 
3003  CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3004  CCInfo.clearWasPPCF128();
3005 
3006  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3007  CCValAssign &VA = ArgLocs[i];
3008 
3009  // Arguments stored in registers.
3010  if (VA.isRegLoc()) {
3011  const TargetRegisterClass *RC;
3012  EVT ValVT = VA.getValVT();
3013 
3014  switch (ValVT.getSimpleVT().SimpleTy) {
3015  default:
3016  llvm_unreachable("ValVT not supported by formal arguments Lowering");
3017  case MVT::i1:
3018  case MVT::i32:
3019  RC = &PPC::GPRCRegClass;
3020  break;
3021  case MVT::f32:
3022  if (Subtarget.hasP8Vector())
3023  RC = &PPC::VSSRCRegClass;
3024  else
3025  RC = &PPC::F4RCRegClass;
3026  break;
3027  case MVT::f64:
3028  if (Subtarget.hasVSX())
3029  RC = &PPC::VSFRCRegClass;
3030  else
3031  RC = &PPC::F8RCRegClass;
3032  break;
3033  case MVT::v16i8:
3034  case MVT::v8i16:
3035  case MVT::v4i32:
3036  RC = &PPC::VRRCRegClass;
3037  break;
3038  case MVT::v4f32:
3039  RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
3040  break;
3041  case MVT::v2f64:
3042  case MVT::v2i64:
3043  RC = &PPC::VRRCRegClass;
3044  break;
3045  case MVT::v4f64:
3046  RC = &PPC::QFRCRegClass;
3047  break;
3048  case MVT::v4i1:
3049  RC = &PPC::QBRCRegClass;
3050  break;
3051  }
3052 
3053  // Transform the arguments stored in physical registers into virtual ones.
3054  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3055  SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3056  ValVT == MVT::i1 ? MVT::i32 : ValVT);
3057 
3058  if (ValVT == MVT::i1)
3059  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3060 
3061  InVals.push_back(ArgValue);
3062  } else {
3063  // Argument stored in memory.
3064  assert(VA.isMemLoc());
3065 
3066  unsigned ArgSize = VA.getLocVT().getStoreSize();
3067  int FI = MFI.CreateFixedObject(ArgSize, VA.getLocMemOffset(),
3068  isImmutable);
3069 
3070  // Create load nodes to retrieve arguments from the stack.
3071  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3072  InVals.push_back(
3073  DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3074  }
3075  }
3076 
3077  // Assign locations to all of the incoming aggregate by value arguments.
3078  // Aggregates passed by value are stored in the local variable space of the
3079  // caller's stack frame, right above the parameter list area.
3080  SmallVector<CCValAssign, 16> ByValArgLocs;
3081  CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3082  ByValArgLocs, *DAG.getContext());
3083 
3084  // Reserve stack space for the allocations in CCInfo.
3085  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
3086 
3087  CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3088 
3089  // Area that is at least reserved in the caller of this function.
3090  unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3091  MinReservedArea = std::max(MinReservedArea, LinkageSize);
3092 
3093  // Set the size that is at least reserved in caller of this function. Tail
3094  // call optimized function's reserved stack space needs to be aligned so that
3095  // taking the difference between two stack areas will result in an aligned
3096  // stack.
3097  MinReservedArea =
3098  EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3099  FuncInfo->setMinReservedArea(MinReservedArea);
3100 
3101  SmallVector<SDValue, 8> MemOps;
3102 
3103  // If the function takes variable number of arguments, make a frame index for
3104  // the start of the first vararg value... for expansion of llvm.va_start.
3105  if (isVarArg) {
3106  static const MCPhysReg GPArgRegs[] = {
3107  PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3108  PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3109  };
3110  const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3111 
3112  static const MCPhysReg FPArgRegs[] = {
3113  PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3114  PPC::F8
3115  };
3116  unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3117 
3118  if (useSoftFloat())
3119  NumFPArgRegs = 0;
3120 
3121  FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3122  FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3123 
3124  // Make room for NumGPArgRegs and NumFPArgRegs.
3125  int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3126  NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3127 
3128  FuncInfo->setVarArgsStackOffset(
3129  MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3130  CCInfo.getNextStackOffset(), true));
3131 
3132  FuncInfo->setVarArgsFrameIndex(MFI.CreateStackObject(Depth, 8, false));
3133  SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3134 
3135  // The fixed integer arguments of a variadic function are stored to the
3136  // VarArgsFrameIndex on the stack so that they may be loaded by
3137  // dereferencing the result of va_next.
3138  for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3139  // Get an existing live-in vreg, or add a new one.
3140  unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3141  if (!VReg)
3142  VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3143 
3144  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3145  SDValue Store =
3146  DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3147  MemOps.push_back(Store);
3148  // Increment the address by four for the next argument to store
3149  SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3150  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3151  }
3152 
3153  // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3154  // is set.
3155  // The double arguments are stored to the VarArgsFrameIndex
3156  // on the stack.
3157  for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
3158  // Get an existing live-in vreg, or add a new one.
3159  unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3160  if (!VReg)
3161  VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3162 
3163  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3164  SDValue Store =
3165  DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3166  MemOps.push_back(Store);
3167  // Increment the address by eight for the next argument to store
3168  SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3169  PtrVT);
3170  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3171  }
3172  }
3173 
3174  if (!MemOps.empty())
3175  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3176 
3177  return Chain;
3178 }
3179 
3180 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3181 // value to MVT::i64 and then truncate to the correct register size.
3182 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3183  EVT ObjectVT, SelectionDAG &DAG,
3184  SDValue ArgVal,
3185  const SDLoc &dl) const {
3186  if (Flags.isSExt())
3187  ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3188  DAG.getValueType(ObjectVT));
3189  else if (Flags.isZExt())
3190  ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3191  DAG.getValueType(ObjectVT));
3192 
3193  return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3194 }
3195 
3196 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3197  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3198  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3199  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3200  // TODO: add description of PPC stack frame format, or at least some docs.
3201  //
3202  bool isELFv2ABI = Subtarget.isELFv2ABI();
3203  bool isLittleEndian = Subtarget.isLittleEndian();
3204  MachineFunction &MF = DAG.getMachineFunction();
3205  MachineFrameInfo &MFI = MF.getFrameInfo();
3206  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3207 
3208  assert(!(CallConv == CallingConv::Fast && isVarArg) &&
3209  "fastcc not supported on varargs functions");
3210 
3211  EVT PtrVT = getPointerTy(MF.getDataLayout());
3212  // Potential tail calls could cause overwriting of argument stack slots.
3213  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3214  (CallConv == CallingConv::Fast));
3215  unsigned PtrByteSize = 8;
3216  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3217 
3218  static const MCPhysReg GPR[] = {
3219  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3220  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3221  };
3222  static const MCPhysReg VR[] = {
3223  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3224  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3225  };
3226 
3227  const unsigned Num_GPR_Regs = array_lengthof(GPR);
3228  const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3229  const unsigned Num_VR_Regs = array_lengthof(VR);
3230  const unsigned Num_QFPR_Regs = Num_FPR_Regs;
3231 
3232  // Do a first pass over the arguments to determine whether the ABI
3233  // guarantees that our caller has allocated the parameter save area
3234  // on its stack frame. In the ELFv1 ABI, this is always the case;
3235  // in the ELFv2 ABI, it is true if this is a vararg function or if
3236  // any parameter is located in a stack slot.
3237 
3238  bool HasParameterArea = !isELFv2ABI || isVarArg;
3239  unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
3240  unsigned NumBytes = LinkageSize;
3241  unsigned AvailableFPRs = Num_FPR_Regs;
3242  unsigned AvailableVRs = Num_VR_Regs;
3243  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3244  if (Ins[i].Flags.isNest())
3245  continue;
3246 
3247  if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
3248  PtrByteSize, LinkageSize, ParamAreaSize,
3249  NumBytes, AvailableFPRs, AvailableVRs,
3250  Subtarget.hasQPX()))
3251  HasParameterArea = true;
3252  }
3253 
3254  // Add DAG nodes to load the arguments or copy them out of registers. On
3255  // entry to a function on PPC, the arguments start after the linkage area,
3256  // although the first ones are often in registers.
3257 
3258  unsigned ArgOffset = LinkageSize;
3259  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3260  unsigned &QFPR_idx = FPR_idx;
3261  SmallVector<SDValue, 8> MemOps;
3263  unsigned CurArgIdx = 0;
3264  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3265  SDValue ArgVal;
3266  bool needsLoad = false;
3267  EVT ObjectVT = Ins[ArgNo].VT;
3268  EVT OrigVT = Ins[ArgNo].ArgVT;
3269  unsigned ObjSize = ObjectVT.getStoreSize();
3270  unsigned ArgSize = ObjSize;
3271  ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3272  if (Ins[ArgNo].isOrigArg()) {
3273  std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3274  CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3275  }
3276  // We re-align the argument offset for each argument, except when using the
3277  // fast calling convention, when we need to make sure we do that only when
3278  // we'll actually use a stack slot.
3279  unsigned CurArgOffset, Align;
3280  auto ComputeArgOffset = [&]() {
3281  /* Respect alignment of argument on the stack. */
3282  Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
3283  ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3284  CurArgOffset = ArgOffset;
3285  };
3286 
3287  if (CallConv != CallingConv::Fast) {
3288  ComputeArgOffset();
3289 
3290  /* Compute GPR index associated with argument offset. */
3291  GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3292  GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
3293  }
3294 
3295  // FIXME the codegen can be much improved in some cases.
3296  // We do not have to keep everything in memory.
3297  if (Flags.isByVal()) {
3298  assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3299 
3300  if (CallConv == CallingConv::Fast)
3301  ComputeArgOffset();
3302 
3303  // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3304  ObjSize = Flags.getByValSize();
3305  ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3306  // Empty aggregate parameters do not take up registers. Examples:
3307  // struct { } a;
3308  // union { } b;
3309  // int c[0];
3310  // etc. However, we have to provide a place-holder in InVals, so
3311  // pretend we have an 8-byte item at the current address for that
3312  // purpose.
3313  if (!ObjSize) {
3314  int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
3315  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3316  InVals.push_back(FIN);
3317  continue;
3318  }
3319 
3320  // Create a stack object covering all stack doublewords occupied
3321  // by the argument. If the argument is (fully or partially) on
3322  // the stack, or if the argument is fully in registers but the
3323  // caller has allocated the parameter save anyway, we can refer
3324  // directly to the caller's stack frame. Otherwise, create a
3325  // local copy in our own frame.
3326  int FI;
3327  if (HasParameterArea ||
3328  ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
3329  FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
3330  else
3331  FI = MFI.CreateStackObject(ArgSize, Align, false);
3332  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3333 
3334  // Handle aggregates smaller than 8 bytes.
3335  if (ObjSize < PtrByteSize) {
3336  // The value of the object is its address, which differs from the
3337  // address of the enclosing doubleword on big-endian systems.
3338  SDValue Arg = FIN;
3339  if (!isLittleEndian) {
3340  SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
3341  Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
3342  }
3343  InVals.push_back(Arg);
3344 
3345  if (GPR_idx != Num_GPR_Regs) {
3346  unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3347  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3348  SDValue Store;
3349 
3350  if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
3351  EVT ObjType = (ObjSize == 1 ? MVT::i8 :
3352  (ObjSize == 2 ? MVT::i16 : MVT::i32));
3353  Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
3354  MachinePointerInfo(&*FuncArg), ObjType);
3355  } else {
3356  // For sizes that don't fit a truncating store (3, 5, 6, 7),
3357  // store the whole register as-is to the parameter save area
3358  // slot.
3359  Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3360  MachinePointerInfo(&*FuncArg));
3361  }
3362 
3363  MemOps.push_back(Store);
3364  }
3365  // Whether we copied from a register or not, advance the offset
3366  // into the parameter save area by a full doubleword.
3367  ArgOffset += PtrByteSize;
3368  continue;
3369  }
3370 
3371  // The value of the object is its address, which is the address of
3372  // its first stack doubleword.
3373  InVals.push_back(FIN);
3374 
3375  // Store whatever pieces of the object are in registers to memory.
3376  for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
3377  if (GPR_idx == Num_GPR_Regs)
3378  break;
3379 
3380  unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3381  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3382  SDValue Addr = FIN;
3383  if (j) {
3384  SDValue Off = DAG.getConstant(j, dl, PtrVT);
3385  Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
3386  }
3387  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
3388  MachinePointerInfo(&*FuncArg, j));
3389  MemOps.push_back(Store);
3390  ++GPR_idx;
3391  }
3392  ArgOffset += ArgSize;
3393  continue;
3394  }
3395 
3396  switch (ObjectVT.getSimpleVT().SimpleTy) {
3397  default: llvm_unreachable("Unhandled argument type!");
3398  case MVT::i1:
3399  case MVT::i32:
3400  case MVT::i64:
3401  if (Flags.isNest()) {
3402  // The 'nest' parameter, if any, is passed in R11.
3403  unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
3404  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3405 
3406  if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3407  ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3408 
3409  break;
3410  }
3411 
3412  // These can be scalar arguments or elements of an integer array type
3413  // passed directly. Clang may use those instead of "byval" aggregate
3414  // types to avoid forcing arguments to memory unnecessarily.
3415  if (GPR_idx != Num_GPR_Regs) {
3416  unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3417  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3418 
3419  if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3420  // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3421  // value to MVT::i64 and then truncate to the correct register size.
3422  ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3423  } else {
3424  if (CallConv == CallingConv::Fast)
3425  ComputeArgOffset();
3426 
3427  needsLoad = true;
3428  ArgSize = PtrByteSize;
3429  }
3430  if (CallConv != CallingConv::Fast || needsLoad)
3431  ArgOffset += 8;
3432  break;
3433 
3434  case MVT::f32:
3435  case MVT::f64:
3436  // These can be scalar arguments or elements of a float array type
3437  // passed directly. The latter are used to implement ELFv2 homogenous
3438  // float aggregates.
3439  if (FPR_idx != Num_FPR_Regs) {
3440  unsigned VReg;
3441 
3442  if (ObjectVT == MVT::f32)
3443  VReg = MF.addLiveIn(FPR[FPR_idx],
3444  Subtarget.hasP8Vector()
3445  ? &PPC::VSSRCRegClass
3446  : &PPC::F4RCRegClass);
3447  else
3448  VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
3449  ? &PPC::VSFRCRegClass
3450  : &PPC::F8RCRegClass);
3451 
3452  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3453  ++FPR_idx;
3454  } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
3455  // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
3456  // once we support fp <-> gpr moves.
3457 
3458  // This can only ever happen in the presence of f32 array types,
3459  // since otherwise we never run out of FPRs before running out
3460  // of GPRs.
3461  unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3462  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3463 
3464  if (ObjectVT == MVT::f32) {
3465  if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
3466  ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
3467  DAG.getConstant(32, dl, MVT::i32));
3468  ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
3469  }
3470 
3471  ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
3472  } else {
3473  if (CallConv == CallingConv::Fast)
3474  ComputeArgOffset();
3475 
3476  needsLoad = true;
3477  }
3478 
3479  // When passing an array of floats, the array occupies consecutive
3480  // space in the argument area; only round up to the next doubleword
3481  // at the end of the array. Otherwise, each float takes 8 bytes.
3482  if (CallConv != CallingConv::Fast || needsLoad) {
3483  ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
3484  ArgOffset += ArgSize;
3485  if (Flags.isInConsecutiveRegsLast())
3486  ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3487  }
3488  break;
3489  case MVT::v4f32:
3490  case MVT::v4i32:
3491  case MVT::v8i16:
3492  case MVT::v16i8:
3493  case MVT::v2f64:
3494  case MVT::v2i64:
3495  case MVT::v1i128:
3496  if (!Subtarget.hasQPX()) {
3497  // These can be scalar arguments or elements of a vector array type
3498  // passed directly. The latter are used to implement ELFv2 homogenous
3499  // vector aggregates.
3500  if (VR_idx != Num_VR_Regs) {
3501  unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3502  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3503  ++VR_idx;
3504  } else {
3505  if (CallConv == CallingConv::Fast)
3506  ComputeArgOffset();
3507 
3508  needsLoad = true;
3509  }
3510  if (CallConv != CallingConv::Fast || needsLoad)
3511  ArgOffset += 16;
3512  break;
3513  } // not QPX
3514 
3515  assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
3516  "Invalid QPX parameter type");
3517  /* fall through */
3518 
3519  case MVT::v4f64:
3520  case MVT::v4i1:
3521  // QPX vectors are treated like their scalar floating-point subregisters
3522  // (except that they're larger).
3523  unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
3524  if (QFPR_idx != Num_QFPR_Regs) {
3525  const TargetRegisterClass *RC;
3526  switch (ObjectVT.getSimpleVT().SimpleTy) {
3527  case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
3528  case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
3529  default: RC = &PPC::QBRCRegClass; break;
3530  }
3531 
3532  unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
3533  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3534  ++QFPR_idx;
3535  } else {
3536  if (CallConv == CallingConv::Fast)
3537  ComputeArgOffset();
3538  needsLoad = true;
3539  }
3540  if (CallConv != CallingConv::Fast || needsLoad)
3541  ArgOffset += Sz;
3542  break;
3543  }
3544 
3545  // We need to load the argument to a virtual register if we determined
3546  // above that we ran out of physical registers of the appropriate type.
3547  if (needsLoad) {
3548  if (ObjSize < ArgSize && !isLittleEndian)
3549  CurArgOffset += ArgSize - ObjSize;
3550  int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
3551  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3552  ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
3553  }
3554 
3555  InVals.push_back(ArgVal);
3556  }
3557 
3558  // Area that is at least reserved in the caller of this function.
3559  unsigned MinReservedArea;
3560  if (HasParameterArea)
3561  MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
3562  else
3563  MinReservedArea = LinkageSize;
3564 
3565  // Set the size that is at least reserved in caller of this function. Tail
3566  // call optimized functions' reserved stack space needs to be aligned so that
3567  // taking the difference between two stack areas will result in an aligned
3568  // stack.
3569  MinReservedArea =
3570  EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3571  FuncInfo->setMinReservedArea(MinReservedArea);
3572 
3573  // If the function takes variable number of arguments, make a frame index for
3574  // the start of the first vararg value... for expansion of llvm.va_start.
3575  if (isVarArg) {
3576  int Depth = ArgOffset;
3577 
3578  FuncInfo->setVarArgsFrameIndex(
3579  MFI.CreateFixedObject(PtrByteSize, Depth, true));
3580  SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3581 
3582  // If this function is vararg, store any remaining integer argument regs
3583  // to their spots on the stack so that they may be loaded by dereferencing
3584  // the result of va_next.
3585  for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3586  GPR_idx < Num_GPR_Regs; ++GPR_idx) {
3587  unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3588  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3589  SDValue Store =
3590  DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3591  MemOps.push_back(Store);
3592  // Increment the address by four for the next argument to store
3593  SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
3594  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3595  }
3596  }
3597 
3598  if (!MemOps.empty())
3599  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3600 
3601  return Chain;
3602 }
3603 
3604 SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
3605  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3606  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3607  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3608  // TODO: add description of PPC stack frame format, or at least some docs.
3609  //
3610  MachineFunction &MF = DAG.getMachineFunction();
3611  MachineFrameInfo &MFI = MF.getFrameInfo();
3612  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3613 
3614  EVT PtrVT = getPointerTy(MF.getDataLayout());
3615  bool isPPC64 = PtrVT == MVT::i64;
3616  // Potential tail calls could cause overwriting of argument stack slots.
3617  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3618  (CallConv == CallingConv::Fast));
3619  unsigned PtrByteSize = isPPC64 ? 8 : 4;
3620  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3621  unsigned ArgOffset = LinkageSize;
3622  // Area that is at least reserved in caller of this function.
3623  unsigned MinReservedArea = ArgOffset;
3624 
3625  static const MCPhysReg GPR_32[] = { // 32-bit registers.
3626  PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3627  PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3628  };
3629  static const MCPhysReg GPR_64[] = { // 64-bit registers.
3630  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3631  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3632  };
3633  static const MCPhysReg VR[] = {
3634  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3635  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3636  };
3637 
3638  const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
3639  const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3640  const unsigned Num_VR_Regs = array_lengthof( VR);
3641 
3642  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3643 
3644  const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
3645 
3646  // In 32-bit non-varargs functions, the stack space for vectors is after the
3647  // stack space for non-vectors. We do not use this space unless we have
3648  // too many vectors to fit in registers, something that only occurs in
3649  // constructed examples:), but we have to walk the arglist to figure
3650  // that out...for the pathological case, compute VecArgOffset as the
3651  // start of the vector parameter area. Computing VecArgOffset is the
3652  // entire point of the following loop.
3653  unsigned VecArgOffset = ArgOffset;
3654  if (!isVarArg && !isPPC64) {
3655  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
3656  ++ArgNo) {
3657  EVT ObjectVT = Ins[ArgNo].VT;
3658  ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3659 
3660  if (Flags.isByVal()) {
3661  // ObjSize is the true size, ArgSize rounded up to multiple of regs.
3662  unsigned ObjSize = Flags.getByValSize();
3663  unsigned ArgSize =
3664  ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3665  VecArgOffset += ArgSize;
3666  continue;
3667  }
3668 
3669  switch(ObjectVT.getSimpleVT().SimpleTy) {
3670  default: llvm_unreachable("Unhandled argument type!");
3671  case MVT::i1:
3672  case MVT::i32:
3673  case MVT::f32:
3674  VecArgOffset += 4;
3675  break;
3676  case MVT::i64: // PPC64
3677  case MVT::f64:
3678  // FIXME: We are guaranteed to be !isPPC64 at this point.
3679  // Does MVT::i64 apply?
3680  VecArgOffset += 8;
3681  break;
3682  case MVT::v4f32:
3683  case MVT::v4i32:
3684  case MVT::v8i16:
3685  case MVT::v16i8:
3686  // Nothing to do, we're only looking at Nonvector args here.
3687  break;
3688  }
3689  }
3690  }
3691  // We've found where the vector parameter area in memory is. Skip the
3692  // first 12 parameters; these don't use that memory.
3693  VecArgOffset = ((VecArgOffset+15)/16)*16;
3694  VecArgOffset += 12*16;
3695 
3696  // Add DAG nodes to load the arguments or copy them out of registers. On
3697  // entry to a function on PPC, the arguments start after the linkage area,
3698  // although the first ones are often in registers.
3699 
3700  SmallVector<SDValue, 8> MemOps;
3701  unsigned nAltivecParamsAtEnd = 0;
3703  unsigned CurArgIdx = 0;
3704  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3705  SDValue ArgVal;
3706  bool needsLoad = false;
3707  EVT ObjectVT = Ins[ArgNo].VT;
3708  unsigned ObjSize = ObjectVT.getSizeInBits()/8;
3709  unsigned ArgSize = ObjSize;
3710  ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3711  if (Ins[ArgNo].isOrigArg()) {
3712  std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3713  CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3714  }
3715  unsigned CurArgOffset = ArgOffset;
3716 
3717  // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
3718  if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
3719  ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
3720  if (isVarArg || isPPC64) {
3721  MinReservedArea = ((MinReservedArea+15)/16)*16;
3722  MinReservedArea += CalculateStackSlotSize(ObjectVT,
3723  Flags,
3724  PtrByteSize);
3725  } else nAltivecParamsAtEnd++;
3726  } else
3727  // Calculate min reserved area.
3728  MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
3729  Flags,
3730  PtrByteSize);
3731 
3732  // FIXME the codegen can be much improved in some cases.
3733  // We do not have to keep everything in memory.
3734  if (Flags.isByVal()) {
3735  assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3736 
3737  // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3738  ObjSize = Flags.getByValSize();
3739  ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3740  // Objects of size 1 and 2 are right justified, everything else is
3741  // left justified. This means the memory address is adjusted forwards.
3742  if (ObjSize==1 || ObjSize==2) {
3743  CurArgOffset = CurArgOffset + (4 - ObjSize);
3744  }
3745  // The value of the object is its address.
3746  int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
3747  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3748  InVals.push_back(FIN);
3749  if (ObjSize==1 || ObjSize==2) {
3750  if (GPR_idx != Num_GPR_Regs) {
3751  unsigned VReg;
3752  if (isPPC64)
3753  VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3754  else
3755  VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3756  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3757  EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
3758  SDValue Store =
3759  DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
3760  MachinePointerInfo(&*FuncArg), ObjType);
3761  MemOps.push_back(Store);
3762  ++GPR_idx;
3763  }
3764 
3765  ArgOffset += PtrByteSize;
3766 
3767  continue;
3768  }
3769  for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
3770  // Store whatever pieces of the object are in registers
3771  // to memory. ArgOffset will be the address of the beginning
3772  // of the object.
3773  if (GPR_idx != Num_GPR_Regs) {
3774  unsigned VReg;
3775  if (isPPC64)
3776  VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3777  else
3778  VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3779  int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
3780  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3781  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3782  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3783  MachinePointerInfo(&*FuncArg, j));
3784  MemOps.push_back(Store);
3785  ++GPR_idx;
3786  ArgOffset += PtrByteSize;
3787  } else {
3788  ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
3789  break;
3790  }
3791  }
3792  continue;
3793  }
3794 
3795  switch (ObjectVT.getSimpleVT().SimpleTy) {
3796  default: llvm_unreachable("Unhandled argument type!");
3797  case MVT::i1:
3798  case MVT::i32:
3799  if (!isPPC64) {
3800  if (GPR_idx != Num_GPR_Regs) {
3801  unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3802  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3803 
3804  if (ObjectVT == MVT::i1)
3805  ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
3806 
3807  ++GPR_idx;
3808  } else {
3809  needsLoad = true;
3810  ArgSize = PtrByteSize;
3811  }
3812  // All int arguments reserve stack space in the Darwin ABI.
3813  ArgOffset += PtrByteSize;
3814  break;
3815  }
3817  case MVT::i64: // PPC64
3818  if (GPR_idx != Num_GPR_Regs) {
3819  unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3820  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3821 
3822  if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3823  // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3824  // value to MVT::i64 and then truncate to the correct register size.
3825  ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3826 
3827  ++GPR_idx;
3828  } else {
3829  needsLoad = true;
3830  ArgSize = PtrByteSize;
3831  }
3832  // All int arguments reserve stack space in the Darwin ABI.
3833  ArgOffset += 8;
3834  break;
3835 
3836  case MVT::f32:
3837  case MVT::f64:
3838  // Every 4 bytes of argument space consumes one of the GPRs available for
3839  // argument passing.
3840  if (GPR_idx != Num_GPR_Regs) {
3841  ++GPR_idx;
3842  if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
3843  ++GPR_idx;
3844  }
3845  if (FPR_idx != Num_FPR_Regs) {
3846  unsigned VReg;
3847 
3848  if (ObjectVT == MVT::f32)
3849  VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
3850  else
3851  VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
3852 
3853  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3854  ++FPR_idx;
3855  } else {
3856  needsLoad = true;
3857  }
3858 
3859  // All FP arguments reserve stack space in the Darwin ABI.
3860  ArgOffset += isPPC64 ? 8 : ObjSize;
3861  break;
3862  case MVT::v4f32:
3863  case MVT::v4i32:
3864  case MVT::v8i16:
3865  case MVT::v16i8:
3866  // Note that vector arguments in registers don't reserve stack space,
3867  // except in varargs functions.
3868  if (VR_idx != Num_VR_Regs) {
3869  unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3870  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3871  if (isVarArg) {
3872  while ((ArgOffset % 16) != 0) {
3873  ArgOffset += PtrByteSize;
3874  if (GPR_idx != Num_GPR_Regs)
3875  GPR_idx++;
3876  }
3877  ArgOffset += 16;
3878  GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
3879  }
3880  ++VR_idx;
3881  } else {
3882  if (!isVarArg && !isPPC64) {
3883  // Vectors go after all the nonvectors.
3884  CurArgOffset = VecArgOffset;
3885  VecArgOffset += 16;
3886  } else {
3887  // Vectors are aligned.
3888  ArgOffset = ((ArgOffset+15)/16)*16;
3889  CurArgOffset = ArgOffset;
3890  ArgOffset += 16;
3891  }
3892  needsLoad = true;
3893  }
3894  break;
3895  }
3896 
3897  // We need to load the argument to a virtual register if we determined above
3898  // that we ran out of physical registers of the appropriate type.
3899  if (needsLoad) {
3900  int FI = MFI.CreateFixedObject(ObjSize,
3901  CurArgOffset + (ArgSize - ObjSize),
3902  isImmutable);
3903  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3904  ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
3905  }
3906 
3907  InVals.push_back(ArgVal);
3908  }
3909 
3910  // Allow for Altivec parameters at the end, if needed.
3911  if (nAltivecParamsAtEnd) {
3912  MinReservedArea = ((MinReservedArea+15)/16)*16;
3913  MinReservedArea += 16*nAltivecParamsAtEnd;
3914  }
3915 
3916  // Area that is at least reserved in the caller of this function.
3917  MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
3918 
3919  // Set the size that is at least reserved in caller of this function. Tail
3920  // call optimized functions' reserved stack space needs to be aligned so that
3921  // taking the difference between two stack areas will result in an aligned
3922  // stack.
3923  MinReservedArea =
3924  EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3925  FuncInfo->setMinReservedArea(MinReservedArea);
3926 
3927  // If the function takes variable number of arguments, make a frame index for
3928  // the start of the first vararg value... for expansion of llvm.va_start.
3929  if (isVarArg) {
3930  int Depth = ArgOffset;
3931 
3932  FuncInfo->setVarArgsFrameIndex(
3933  MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3934  Depth, true));
3935  SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3936 
3937  // If this function is vararg, store any remaining integer argument regs
3938  // to their spots on the stack so that they may be loaded by dereferencing
3939  // the result of va_next.
3940  for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
3941  unsigned VReg;
3942 
3943  if (isPPC64)
3944  VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3945  else
3946  VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3947 
3948  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3949  SDValue Store =
3950  DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3951  MemOps.push_back(Store);
3952  // Increment the address by four for the next argument to store
3953  SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3954  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3955  }
3956  }
3957 
3958  if (!MemOps.empty())
3959  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3960 
3961  return Chain;
3962 }
3963 
3964 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
3965 /// adjusted to accommodate the arguments for the tailcall.
3966 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
3967  unsigned ParamSize) {
3968 
3969  if (!isTailCall) return 0;
3970 
3972  unsigned CallerMinReservedArea = FI->getMinReservedArea();
3973  int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
3974  // Remember only if the new adjustement is bigger.
3975  if (SPDiff < FI->getTailCallSPDelta())
3976  FI->setTailCallSPDelta(SPDiff);
3977 
3978  return SPDiff;
3979 }
3980 
3981 static bool isFunctionGlobalAddress(SDValue Callee);
3982 
3983 static bool
3984 resideInSameSection(const Function *Caller, SDValue Callee,
3985  const TargetMachine &TM) {
3986  // If !G, Callee can be an external symbol.
3988  if (!G)
3989  return false;
3990 
3991  const GlobalValue *GV = G->getGlobal();
3992  if (!GV->isStrongDefinitionForLinker())
3993  return false;
3994 
3995  // Any explicitly-specified sections and section prefixes must also match.
3996  // Also, if we're using -ffunction-sections, then each function is always in
3997  // a different section (the same is true for COMDAT functions).
3998  if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
3999  GV->getSection() != Caller->getSection())
4000  return false;
4001  if (const auto *F = dyn_cast<Function>(GV)) {
4002  if (F->getSectionPrefix() != Caller->getSectionPrefix())
4003  return false;
4004  }
4005 
4006  // If the callee might be interposed, then we can't assume the ultimate call
4007  // target will be in the same section. Even in cases where we can assume that
4008  // interposition won't happen, in any case where the linker might insert a
4009  // stub to allow for interposition, we must generate code as though
4010  // interposition might occur. To understand why this matters, consider a
4011  // situation where: a -> b -> c where the arrows indicate calls. b and c are
4012  // in the same section, but a is in a different module (i.e. has a different
4013  // TOC base pointer). If the linker allows for interposition between b and c,
4014  // then it will generate a stub for the call edge between b and c which will
4015  // save the TOC pointer into the designated stack slot allocated by b. If we
4016  // return true here, and therefore allow a tail call between b and c, that
4017  // stack slot won't exist and the b -> c stub will end up saving b'c TOC base
4018  // pointer into the stack slot allocated by a (where the a -> b stub saved
4019  // a's TOC base pointer). If we're not considering a tail call, but rather,
4020  // whether a nop is needed after the call instruction in b, because the linker
4021  // will insert a stub, it might complain about a missing nop if we omit it
4022  // (although many don't complain in this case).
4023  if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4024  return false;
4025 
4026  return true;
4027 }
4028 
4029 static bool
4031  const SmallVectorImpl<ISD::OutputArg> &Outs) {
4032  assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64());
4033 
4034  const unsigned PtrByteSize = 8;
4035  const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4036 
4037  static const MCPhysReg GPR[] = {
4038  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4039  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4040  };
4041  static const MCPhysReg VR[] = {
4042  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4043  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4044  };
4045 
4046  const unsigned NumGPRs = array_lengthof(GPR);
4047  const unsigned NumFPRs = 13;
4048  const unsigned NumVRs = array_lengthof(VR);
4049  const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4050 
4051  unsigned NumBytes = LinkageSize;
4052  unsigned AvailableFPRs = NumFPRs;
4053  unsigned AvailableVRs = NumVRs;
4054 
4055  for (const ISD::OutputArg& Param : Outs) {
4056  if (Param.Flags.isNest()) continue;
4057 
4058  if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags,
4059  PtrByteSize, LinkageSize, ParamAreaSize,
4060  NumBytes, AvailableFPRs, AvailableVRs,
4061  Subtarget.hasQPX()))
4062  return true;
4063  }
4064  return false;
4065 }
4066 
4067 static bool
4069  if (CS->arg_size() != CallerFn->getArgumentList().size())
4070  return false;
4071 
4072  ImmutableCallSite::arg_iterator CalleeArgIter = CS->arg_begin();
4073  ImmutableCallSite::arg_iterator CalleeArgEnd = CS->arg_end();
4074  Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4075 
4076  for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4077  const Value* CalleeArg = *CalleeArgIter;
4078  const Value* CallerArg = &(*CallerArgIter);
4079  if (CalleeArg == CallerArg)
4080  continue;
4081 
4082  // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4083  // tail call @callee([4 x i64] undef, [4 x i64] %b)
4084  // }
4085  // 1st argument of callee is undef and has the same type as caller.
4086  if (CalleeArg->getType() == CallerArg->getType() &&
4087  isa<UndefValue>(CalleeArg))
4088  continue;
4089 
4090  return false;
4091  }
4092 
4093  return true;
4094 }
4095 
4096 bool
4097 PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4098  SDValue Callee,
4099  CallingConv::ID CalleeCC,
4100  ImmutableCallSite *CS,
4101  bool isVarArg,
4102  const SmallVectorImpl<ISD::OutputArg> &Outs,
4103  const SmallVectorImpl<ISD::InputArg> &Ins,
4104  SelectionDAG& DAG) const {
4105  bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4106 
4107  if (DisableSCO && !TailCallOpt) return false;
4108 
4109  // Variadic argument functions are not supported.
4110  if (isVarArg) return false;
4111 
4112  MachineFunction &MF = DAG.getMachineFunction();
4113  CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
4114 
4115  // Tail or Sibling call optimization (TCO/SCO) needs callee and caller has
4116  // the same calling convention
4117  if (CallerCC != CalleeCC) return false;
4118 
4119  // SCO support C calling convention
4120  if (CalleeCC != CallingConv::Fast && CalleeCC != CallingConv::C)
4121  return false;
4122 
4123  // Caller contains any byval parameter is not supported.
4124  if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4125  return false;
4126 
4127  // Callee contains any byval parameter is not supported, too.
4128  // Note: This is a quick work around, because in some cases, e.g.
4129  // caller's stack size > callee's stack size, we are still able to apply
4130  // sibling call optimization. See: https://reviews.llvm.org/D23441#513574
4131  if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4132  return false;
4133 
4134  // No TCO/SCO on indirect call because Caller have to restore its TOC
4135  if (!isFunctionGlobalAddress(Callee) &&
4136  !isa<ExternalSymbolSDNode>(Callee))
4137  return false;
4138 
4139  // Check if Callee resides in the same section, because for now, PPC64 SVR4
4140  // ABI (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another
4141  // section.
4142  // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4143  if (!resideInSameSection(MF.getFunction(), Callee, getTargetMachine()))
4144  return false;
4145 
4146  // TCO allows altering callee ABI, so we don't have to check further.
4147  if (CalleeCC == CallingConv::Fast && TailCallOpt)
4148  return true;
4149 
4150  if (DisableSCO) return false;
4151 
4152  // If callee use the same argument list that caller is using, then we can
4153  // apply SCO on this case. If it is not, then we need to check if callee needs
4154  // stack for passing arguments.
4155  if (!hasSameArgumentList(MF.getFunction(), CS) &&
4156  needStackSlotPassParameters(Subtarget, Outs)) {
4157  return false;
4158  }
4159 
4160  return true;
4161 }
4162 
4163 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
4164 /// for tail call optimization. Targets which want to do tail call
4165 /// optimization should implement this function.
4166 bool
4167 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4168  CallingConv::ID CalleeCC,
4169  bool isVarArg,
4170  const SmallVectorImpl<ISD::InputArg> &Ins,
4171  SelectionDAG& DAG) const {
4172  if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4173  return false;
4174 
4175  // Variable argument functions are not supported.
4176  if (isVarArg)
4177  return false;
4178 
4179  MachineFunction &MF = DAG.getMachineFunction();
4180  CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
4181  if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4182  // Functions containing by val parameters are not supported.
4183  for (unsigned i = 0; i != Ins.size(); i++) {
4184  ISD::ArgFlagsTy Flags = Ins[i].Flags;
4185  if (Flags.isByVal()) return false;
4186  }
4187 
4188  // Non-PIC/GOT tail calls are supported.
4190  return true;
4191 
4192  // At the moment we can only do local tail calls (in same module, hidden
4193  // or protected) if we are generating PIC.
4194  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4195  return G->getGlobal()->hasHiddenVisibility()
4196  || G->getGlobal()->hasProtectedVisibility();
4197  }
4198 
4199  return false;
4200 }
4201 
4202 /// isCallCompatibleAddress - Return the immediate to use if the specified
4203 /// 32-bit value is representable in the immediate field of a BxA instruction.
4206  if (!C) return nullptr;
4207 
4208  int Addr = C->getZExtValue();
4209  if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
4210  SignExtend32<26>(Addr) != Addr)
4211  return nullptr; // Top 6 bits have to be sext of immediate.
4212 
4213  return DAG
4214  .getConstant(
4215  (int)C->getZExtValue() >> 2, SDLoc(Op),
4217  .getNode();
4218 }
4219 
4220 namespace {
4221 
4222 struct TailCallArgumentInfo {
4223  SDValue Arg;
4224  SDValue FrameIdxOp;
4225  int FrameIdx;
4226 
4227  TailCallArgumentInfo() : FrameIdx(0) {}
4228 };
4229 }
4230 
4231 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4233  SelectionDAG &DAG, SDValue Chain,
4234  const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4235  SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4236  for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4237  SDValue Arg = TailCallArgs[i].Arg;
4238  SDValue FIN = TailCallArgs[i].FrameIdxOp;
4239  int FI = TailCallArgs[i].FrameIdx;
4240  // Store relative to framepointer.
4241  MemOpChains.push_back(DAG.getStore(
4242  Chain, dl, Arg, FIN,
4244  }
4245 }
4246 
4247 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4248 /// the appropriate stack slot for the tail call optimized function call.
4250  SDValue OldRetAddr, SDValue OldFP,
4251  int SPDiff, const SDLoc &dl) {
4252  if (SPDiff) {
4253  // Calculate the new stack slot for the return address.
4254  MachineFunction &MF = DAG.getMachineFunction();
4255  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4256  const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4257  bool isPPC64 = Subtarget.isPPC64();
4258  int SlotSize = isPPC64 ? 8 : 4;
4259  int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4260  int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4261  NewRetAddrLoc, true);
4262  EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4263  SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
4264  Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4265  MachinePointerInfo::getFixedStack(MF, NewRetAddr));
4266 
4267  // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
4268  // slot as the FP is never overwritten.
4269  if (Subtarget.isDarwinABI()) {
4270  int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
4271  int NewFPIdx = MF.getFrameInfo().CreateFixedObject(SlotSize, NewFPLoc,
4272  true);
4273  SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
4274  Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
4276  DAG.getMachineFunction(), NewFPIdx));
4277  }
4278  }
4279  return Chain;
4280 }
4281 
4282 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
4283 /// the position of the argument.
4284 static void
4286  SDValue Arg, int SPDiff, unsigned ArgOffset,
4287  SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
4288  int Offset = ArgOffset + SPDiff;
4289  uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
4290  int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4291  EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4292  SDValue FIN = DAG.getFrameIndex(FI, VT);
4293  TailCallArgumentInfo Info;
4294  Info.Arg = Arg;
4295  Info.FrameIdxOp = FIN;
4296  Info.FrameIdx = FI;
4297  TailCallArguments.push_back(Info);
4298 }
4299 
4300 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
4301 /// stack slot. Returns the chain as result and the loaded frame pointers in
4302 /// LROpOut/FPOpout. Used when tail calling.
4303 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
4304  SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
4305  SDValue &FPOpOut, const SDLoc &dl) const {
4306  if (SPDiff) {
4307  // Load the LR and FP stack slot for later adjusting.
4308  EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
4309  LROpOut = getReturnAddrFrameIndex(DAG);
4310  LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
4311  Chain = SDValue(LROpOut.getNode(), 1);
4312 
4313  // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
4314  // slot as the FP is never overwritten.
4315  if (Subtarget.isDarwinABI()) {
4316  FPOpOut = getFramePointerFrameIndex(DAG);
4317  FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo());
4318  Chain = SDValue(FPOpOut.getNode(), 1);
4319  }
4320  }
4321  return Chain;
4322 }
4323 
4324 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
4325 /// by "Src" to address "Dst" of size "Size". Alignment information is
4326 /// specified by the specific parameter attribute. The copy will be passed as
4327 /// a byval function parameter.
4328 /// Sometimes what we are copying is the end of a larger object, the part that
4329 /// does not fit in registers.
4331  SDValue Chain, ISD::ArgFlagsTy Flags,
4332  SelectionDAG &DAG, const SDLoc &dl) {
4333  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
4334  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
4335  false, false, false, MachinePointerInfo(),
4336  MachinePointerInfo());
4337 }
4338 
4339 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
4340 /// tail calls.
4341 static void LowerMemOpCallTo(
4342  SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
4343  SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
4344  bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
4345  SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
4346  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4347  if (!isTailCall) {
4348  if (isVector) {
4349  SDValue StackPtr;
4350  if (isPPC64)
4351  StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4352  else
4353  StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4354  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4355  DAG.getConstant(ArgOffset, dl, PtrVT));
4356  }
4357  MemOpChains.push_back(
4358  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
4359  // Calculate and remember argument location.
4360  } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
4361  TailCallArguments);
4362 }
4363 
4364 static void
4366  const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
4367  SDValue FPOp,
4368  SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
4369  // Emit a sequence of copyto/copyfrom virtual registers for arguments that
4370  // might overwrite each other in case of tail call optimization.
4371  SmallVector<SDValue, 8> MemOpChains2;
4372  // Do not flag preceding copytoreg stuff together with the following stuff.
4373  InFlag = SDValue();
4374  StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
4375  MemOpChains2, dl);
4376  if (!MemOpChains2.empty())
4377  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4378 
4379  // Store the return address to the appropriate stack slot.
4380  Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
4381 
4382  // Emit callseq_end just before tailcall node.
4383  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4384  DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4385  InFlag = Chain.getValue(1);
4386 }
4387 
4388 // Is this global address that of a function that can be called by name? (as
4389 // opposed to something that must hold a descriptor for an indirect call).
4390 static bool isFunctionGlobalAddress(SDValue Callee) {
4391  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
4392  if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
4394  return false;
4395 
4396  return G->getGlobal()->getValueType()->isFunctionTy();
4397  }
4398 
4399  return false;
4400 }
4401 
4402 static unsigned
4403 PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
4404  SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall,
4405  bool isPatchPoint, bool hasNest,
4406  SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
4407  SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
4408  ImmutableCallSite *CS, const PPCSubtarget &Subtarget) {
4409 
4410  bool isPPC64 = Subtarget.isPPC64();
4411  bool isSVR4ABI = Subtarget.isSVR4ABI();
4412  bool isELFv2ABI = Subtarget.isELFv2ABI();
4413 
4414  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4415  NodeTys.push_back(MVT::Other); // Returns a chain
4416  NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use.
4417 
4418  unsigned CallOpc = PPCISD::CALL;
4419 
4420  bool needIndirectCall = true;
4421  if (!isSVR4ABI || !isPPC64)
4422  if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
4423  // If this is an absolute destination address, use the munged value.
4424  Callee = SDValue(Dest, 0);
4425  needIndirectCall = false;
4426  }
4427 
4428  // PC-relative references to external symbols should go through $stub, unless
4429  // we're building with the leopard linker or later, which automatically
4430  // synthesizes these stubs.
4431  const TargetMachine &TM = DAG.getTarget();
4432  const Module *Mod = DAG.getMachineFunction().getFunction()->getParent();
4433  const GlobalValue *GV = nullptr;
4434  if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
4435  GV = G->getGlobal();
4436  bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
4437  bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64;
4438 
4439  if (isFunctionGlobalAddress(Callee)) {
4440  GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
4441  // A call to a TLS address is actually an indirect call to a
4442  // thread-specific pointer.
4443  unsigned OpFlags = 0;
4444  if (UsePlt)
4445  OpFlags = PPCII::MO_PLT;
4446 
4447  // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
4448  // every direct call is) turn it into a TargetGlobalAddress /
4449  // TargetExternalSymbol node so that legalize doesn't hack it.
4450  Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
4451  Callee.getValueType(), 0, OpFlags);
4452  needIndirectCall = false;
4453  }
4454 
4455  if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4456  unsigned char OpFlags = 0;
4457 
4458  if (UsePlt)
4459  OpFlags = PPCII::MO_PLT;
4460 
4461  Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
4462  OpFlags);
4463  needIndirectCall = false;
4464  }
4465 
4466  if (isPatchPoint) {
4467  // We'll form an invalid direct call when lowering a patchpoint; the full
4468  // sequence for an indirect call is complicated, and many of the
4469  // instructions introduced might have side effects (and, thus, can't be
4470  // removed later). The call itself will be removed as soon as the
4471  // argument/return lowering is complete, so the fact that it has the wrong
4472  // kind of operands should not really matter.
4473  needIndirectCall = false;
4474  }
4475 
4476  if (needIndirectCall) {
4477  // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
4478  // to do the call, we can't use PPCISD::CALL.
4479  SDValue MTCTROps[] = {Chain, Callee, InFlag};
4480 
4481  if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
4482  // Function pointers in the 64-bit SVR4 ABI do not point to the function
4483  // entry point, but to the function descriptor (the function entry point
4484  // address is part of the function descriptor though).
4485  // The function descriptor is a three doubleword structure with the
4486  // following fields: function entry point, TOC base address and
4487  // environment pointer.
4488  // Thus for a call through a function pointer, the following actions need
4489  // to be performed:
4490  // 1. Save the TOC of the caller in the TOC save area of its stack
4491  // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
4492  // 2. Load the address of the function entry point from the function
4493  // descriptor.
4494  // 3. Load the TOC of the callee from the function descriptor into r2.
4495  // 4. Load the environment pointer from the function descriptor into
4496  // r11.
4497  // 5. Branch to the function entry point address.
4498  // 6. On return of the callee, the TOC of the caller needs to be
4499  // restored (this is done in FinishCall()).
4500  //
4501  // The loads are scheduled at the beginning of the call sequence, and the
4502  // register copies are flagged together to ensure that no other
4503  // operations can be scheduled in between. E.g. without flagging the
4504  // copies together, a TOC access in the caller could be scheduled between
4505  // the assignment of the callee TOC and the branch to the callee, which
4506  // results in the TOC access going through the TOC of the callee instead
4507  // of going through the TOC of the caller, which leads to incorrect code.
4508 
4509  // Load the address of the function entry point from the function
4510  // descriptor.
4511  SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
4512  if (LDChain.getValueType() == MVT::Glue)
4513  LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
4514 
4515  auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
4519 
4520  MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr);
4521  SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
4522  /* Alignment = */ 8, MMOFlags);
4523 
4524  // Load environment pointer into r11.
4525  SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
4526  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
4527  SDValue LoadEnvPtr =
4528  DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16),
4529  /* Alignment = */ 8, MMOFlags);
4530 
4531  SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
4532  SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
4533  SDValue TOCPtr =
4534  DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8),
4535  /* Alignment = */ 8, MMOFlags);
4536 
4537  setUsesTOCBasePtr(DAG);
4538  SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
4539  InFlag);
4540  Chain = TOCVal.getValue(0);
4541  InFlag = TOCVal.getValue(1);
4542 
4543  // If the function call has an explicit 'nest' parameter, it takes the
4544  // place of the environment pointer.
4545  if (!hasNest) {
4546  SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
4547  InFlag);
4548 
4549  Chain = EnvVal.getValue(0);
4550  InFlag = EnvVal.getValue(1);
4551  }
4552 
4553  MTCTROps[0] = Chain;
4554  MTCTROps[1] = LoadFuncPtr;
4555  MTCTROps[2] = InFlag;
4556  }
4557 
4558  Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
4559  makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
4560  InFlag = Chain.getValue(1);
4561 
4562  NodeTys.clear();
4563  NodeTys.push_back(MVT::Other);
4564  NodeTys.push_back(MVT::Glue);
4565  Ops.push_back(Chain);
4566  CallOpc = PPCISD::BCTRL;
4567  Callee.setNode(nullptr);
4568  // Add use of X11 (holding environment pointer)
4569  if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest)
4570  Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
4571  // Add CTR register as callee so a bctr can be emitted later.
4572  if (isTailCall)
4573  Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
4574  }
4575 
4576  // If this is a direct call, pass the chain and the callee.
4577  if (Callee.getNode()) {
4578  Ops.push_back(Chain);
4579  Ops.push_back(Callee);
4580  }
4581  // If this is a tail call add stack pointer delta.
4582  if (isTailCall)
4583  Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
4584 
4585  // Add argument registers to the end of the list so that they are known live
4586  // into the call.
4587  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4588  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4589  RegsToPass[i].second.getValueType()));
4590 
4591  // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
4592  // into the call.
4593  if (isSVR4ABI && isPPC64 && !isPatchPoint) {
4594  setUsesTOCBasePtr(DAG);
4595  Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
4596  }
4597 
4598  return CallOpc;
4599 }
4600 
4601 SDValue PPCTargetLowering::LowerCallResult(
4602  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
4603  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4604  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4605 
4607  CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
4608  *DAG.getContext());
4609  CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
4610 
4611  // Copy all of the result registers out of their specified physreg.
4612  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4613  CCValAssign &VA = RVLocs[i];
4614  assert(VA.isRegLoc() && "Can only return in registers!");
4615 
4616  SDValue Val = DAG.getCopyFromReg(Chain, dl,
4617  VA.getLocReg(), VA.getLocVT(), InFlag);
4618  Chain = Val.getValue(1);
4619  InFlag = Val.getValue(2);
4620 
4621  switch (VA.getLocInfo()) {
4622  default: llvm_unreachable("Unknown loc info!");
4623  case CCValAssign::Full: break;
4624  case CCValAssign::AExt:
4625  Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4626  break;
4627  case CCValAssign::ZExt:
4628  Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
4629  DAG.getValueType(VA.getValVT()));
4630  Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4631  break;
4632  case CCValAssign::SExt:
4633  Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
4634  DAG.getValueType(VA.getValVT()));
4635  Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4636  break;
4637  }
4638 
4639  InVals.push_back(Val);
4640  }
4641 
4642  return Chain;
4643 }
4644 
4645 SDValue PPCTargetLowering::FinishCall(
4646  CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg,
4647  bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
4648  SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag,
4649  SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
4650  unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
4651  SmallVectorImpl<SDValue> &InVals, ImmutableCallSite *CS) const {
4652 
4653  std::vector<EVT> NodeTys;
4655  unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
4656  SPDiff, isTailCall, isPatchPoint, hasNest,
4657  RegsToPass, Ops, NodeTys, CS, Subtarget);
4658 
4659  // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
4660  if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
4661  Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
4662 
4663  // When performing tail call optimization the callee pops its arguments off
4664  // the stack. Account for this here so these bytes can be pushed back on in
4665  // PPCFrameLowering::eliminateCallFramePseudoInstr.
4666  int BytesCalleePops =
4667  (CallConv == CallingConv::Fast &&
4669 
4670  // Add a register mask operand representing the call-preserved registers.
4671  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4672  const uint32_t *Mask =
4673  TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
4674  assert(Mask && "Missing call preserved mask for calling convention");
4675  Ops.push_back(DAG.getRegisterMask(Mask));
4676 
4677  if (InFlag.getNode())
4678  Ops.push_back(InFlag);
4679 
4680  // Emit tail call.
4681  if (isTailCall) {
4682  assert(((Callee.getOpcode() == ISD::Register &&
4683  cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
4684  Callee.getOpcode() == ISD::TargetExternalSymbol ||
4685  Callee.getOpcode() == ISD::TargetGlobalAddress ||
4686  isa<ConstantSDNode>(Callee)) &&
4687  "Expecting an global address, external symbol, absolute value or register");
4688 
4690  return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
4691  }
4692 
4693  // Add a NOP immediately after the branch instruction when using the 64-bit
4694  // SVR4 ABI. At link time, if caller and callee are in a different module and
4695  // thus have a different TOC, the call will be replaced with a call to a stub
4696  // function which saves the current TOC, loads the TOC of the callee and
4697  // branches to the callee. The NOP will be replaced with a load instruction
4698  // which restores the TOC of the caller from the TOC save slot of the current
4699  // stack frame. If caller and callee belong to the same module (and have the
4700  // same TOC), the NOP will remain unchanged.
4701 
4702  MachineFunction &MF = DAG.getMachineFunction();
4703  if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
4704  !isPatchPoint) {
4705  if (CallOpc == PPCISD::BCTRL) {
4706  // This is a call through a function pointer.
4707  // Restore the caller TOC from the save area into R2.
4708  // See PrepareCall() for more information about calls through function
4709  // pointers in the 64-bit SVR4 ABI.
4710  // We are using a target-specific load with r2 hard coded, because the
4711  // result of a target-independent load would never go directly into r2,
4712  // since r2 is a reserved register (which prevents the register allocator
4713  // from allocating it), resulting in an additional register being
4714  // allocated and an unnecessary move instruction being generated.
4715  CallOpc = PPCISD::BCTRL_LOAD_TOC;
4716 
4717  EVT PtrVT = getPointerTy(DAG.getDataLayout());
4718  SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
4719  unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
4720  SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
4721  SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
4722 
4723  // The address needs to go after the chain input but before the flag (or
4724  // any other variadic arguments).
4725  Ops.insert(std::next(Ops.begin()), AddTOC);
4726  } else if (CallOpc == PPCISD::CALL &&
4727  !resideInSameSection(MF.getFunction(), Callee, DAG.getTarget())) {
4728  // Otherwise insert NOP for non-local calls.
4729  CallOpc = PPCISD::CALL_NOP;
4730  }
4731  }
4732 
4733  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
4734  InFlag = Chain.getValue(1);
4735 
4736  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4737  DAG.getIntPtrConstant(BytesCalleePops, dl, true),
4738  InFlag, dl);
4739  if (!Ins.empty())
4740  InFlag = Chain.getValue(1);
4741 
4742  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
4743  Ins, dl, DAG, InVals);
4744 }
4745 
4746 SDValue
4747 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
4748  SmallVectorImpl<SDValue> &InVals) const {
4749  SelectionDAG &DAG = CLI.DAG;
4750  SDLoc &dl = CLI.DL;
4752  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
4754  SDValue Chain = CLI.Chain;
4755  SDValue Callee = CLI.Callee;
4756  bool &isTailCall = CLI.IsTailCall;
4757  CallingConv::ID CallConv = CLI.CallConv;
4758  bool isVarArg = CLI.IsVarArg;
4759  bool isPatchPoint = CLI.IsPatchPoint;
4760  ImmutableCallSite *CS = CLI.CS;
4761 
4762  if (isTailCall) {
4763  if (Subtarget.useLongCalls() && !(CS && CS->isMustTailCall()))
4764  isTailCall = false;
4765  else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
4766  isTailCall =
4767  IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS,
4768  isVarArg, Outs, Ins, DAG);
4769  else
4770  isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
4771  Ins, DAG);
4772  if (isTailCall) {
4773  ++NumTailCalls;
4774  if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4775  ++NumSiblingCalls;
4776 
4777  assert(isa<GlobalAddressSDNode>(Callee) &&
4778  "Callee should be an llvm::Function object.");
4779  DEBUG(
4780  const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
4781  const unsigned Width = 80 - strlen("TCO caller: ")
4782  - strlen(", callee linkage: 0, 0");
4783  dbgs() << "TCO caller: "
4784  << left_justify(DAG.getMachineFunction().getName(), Width)
4785  << ", callee linkage: "
4786  << GV->getVisibility() << ", " << GV->getLinkage() << "\n"
4787  );
4788  }
4789  }
4790 
4791  if (!isTailCall && CS && CS->isMustTailCall())
4792  report_fatal_error("failed to perform tail call elimination on a call "
4793  "site marked musttail");
4794 
4795  // When long calls (i.e. indirect calls) are always used, calls are always
4796  // made via function pointer. If we have a function name, first translate it
4797  // into a pointer.
4798  if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
4799  !isTailCall)
4800  Callee = LowerGlobalAddress(Callee, DAG);
4801 
4802  if (Subtarget.isSVR4ABI()) {
4803  if (Subtarget.isPPC64())
4804  return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
4805  isTailCall, isPatchPoint, Outs, OutVals, Ins,
4806  dl, DAG, InVals, CS);
4807  else
4808  return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
4809  isTailCall, isPatchPoint, Outs, OutVals, Ins,
4810  dl, DAG, InVals, CS);
4811  }
4812 
4813  return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
4814  isTailCall, isPatchPoint, Outs, OutVals, Ins,
4815  dl, DAG, InVals, CS);
4816 }
4817 
4818 SDValue PPCTargetLowering::LowerCall_32SVR4(
4819  SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
4820  bool isTailCall, bool isPatchPoint,
4821  const SmallVectorImpl<ISD::OutputArg> &Outs,
4822  const SmallVectorImpl<SDValue> &OutVals,
4823  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4824  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
4825  ImmutableCallSite *CS) const {
4826  // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
4827  // of the 32-bit SVR4 ABI stack frame layout.
4828 
4829  assert((CallConv == CallingConv::C ||
4830  CallConv == CallingConv::Fast) && "Unknown calling convention!");
4831 
4832  unsigned PtrByteSize = 4;
4833 
4834  MachineFunction &MF = DAG.getMachineFunction();
4835 
4836  // Mark this function as potentially containing a function that contains a
4837  // tail call. As a consequence the frame pointer will be used for dynamicalloc
4838  // and restoring the callers stack pointer in this functions epilog. This is
4839  // done because by tail calling the called function might overwrite the value
4840  // in this function's (MF) stack pointer stack slot 0(SP).
4841  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
4842  CallConv == CallingConv::Fast)
4843  MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
4844 
4845  // Count how many bytes are to be pushed on the stack, including the linkage
4846  // area, parameter list area and the part of the local variable space which
4847  // contains copies of aggregates which are passed by value.
4848 
4849  // Assign locations to all of the outgoing arguments.
4851  PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
4852 
4853  // Reserve space for the linkage area on the stack.
4854  CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
4855  PtrByteSize);
4856  if (useSoftFloat())
4857  CCInfo.PreAnalyzeCallOperands(Outs);
4858 
4859  if (isVarArg) {
4860  // Handle fixed and variable vector arguments differently.
4861  // Fixed vector arguments go into registers as long as registers are
4862  // available. Variable vector arguments always go into memory.
4863  unsigned NumArgs = Outs.size();
4864 
4865  for (unsigned i = 0; i != NumArgs; ++i) {
4866  MVT ArgVT = Outs[i].VT;
4867  ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
4868  bool Result;
4869 
4870  if (Outs[i].IsFixed) {
4871  Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
4872  CCInfo);
4873  } else {
4874  Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
4875  ArgFlags, CCInfo);
4876  }
4877 
4878  if (Result) {
4879 #ifndef NDEBUG
4880  errs() << "Call operand #" << i << " has unhandled type "
4881  << EVT(ArgVT).getEVTString() << "\n";
4882 #endif
4883  llvm_unreachable(nullptr);
4884  }
4885  }
4886  } else {
4887  // All arguments are treated the same.
4888  CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
4889  }
4890  CCInfo.clearWasPPCF128();
4891 
4892  // Assign locations to all of the outgoing aggregate by value arguments.
4893  SmallVector<CCValAssign, 16> ByValArgLocs;
4894  CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext());
4895 
4896  // Reserve stack space for the allocations in CCInfo.
4897  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
4898 
4899  CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
4900 
4901  // Size of the linkage area, parameter list area and the part of the local
4902  // space variable where copies of aggregates which are passed by value are
4903  // stored.
4904  unsigned NumBytes = CCByValInfo.getNextStackOffset();
4905 
4906  // Calculate by how many bytes the stack has to be adjusted in case of tail
4907  // call optimization.
4908  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
4909 
4910  // Adjust the stack pointer for the new arguments...
4911  // These operations are automatically eliminated by the prolog/epilog pass
4912  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4913  dl);
4914  SDValue CallSeqStart = Chain;
4915 
4916  // Load the return address and frame pointer so it can be moved somewhere else
4917  // later.
4918  SDValue LROp, FPOp;
4919  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
4920 
4921  // Set up a copy of the stack pointer for use loading and storing any
4922  // arguments that may not fit in the registers available for argument
4923  // passing.
4924  SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4925 
4927  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
4928  SmallVector<SDValue, 8> MemOpChains;
4929 
4930  bool seenFloatArg = false;
4931  // Walk the register/memloc assignments, inserting copies/loads.
4932  for (unsigned i = 0, j = 0, e = ArgLocs.size();
4933  i != e;
4934  ++i) {
4935  CCValAssign &VA = ArgLocs[i];
4936  SDValue Arg = OutVals[i];
4937  ISD::ArgFlagsTy Flags = Outs[i].Flags;
4938 
4939  if (Flags.isByVal()) {
4940  // Argument is an aggregate which is passed by value, thus we need to
4941  // create a copy of it in the local variable space of the current stack
4942  // frame (which is the stack frame of the caller) and pass the address of
4943  // this copy to the callee.
4944  assert((j < ByValArgLocs.size()) && "Index out of bounds!");
4945  CCValAssign &ByValVA = ByValArgLocs[j++];
4946  assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
4947 
4948  // Memory reserved in the local variable space of the callers stack frame.
4949  unsigned LocMemOffset = ByValVA.getLocMemOffset();
4950 
4951  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
4952  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
4953  StackPtr, PtrOff);
4954 
4955  // Create a copy of the argument in the local area of the current
4956  // stack frame.
4957  SDValue MemcpyCall =
4958  CreateCopyOfByValArgument(Arg, PtrOff,
4959  CallSeqStart.getNode()->getOperand(0),
4960  Flags, DAG, dl);
4961 
4962  // This must go outside the CALLSEQ_START..END.
4963  SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
4964  CallSeqStart.getNode()->getOperand(1),
4965  SDLoc(MemcpyCall));
4966  DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
4967  NewCallSeqStart.getNode());
4968  Chain = CallSeqStart = NewCallSeqStart;
4969 
4970  // Pass the address of the aggregate copy on the stack either in a
4971  // physical register or in the parameter list area of the current stack
4972  // frame to the callee.
4973  Arg = PtrOff;
4974  }
4975 
4976  if (VA.isRegLoc()) {
4977  if (Arg.getValueType() == MVT::i1)
4978  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
4979 
4980  seenFloatArg |= VA.getLocVT().isFloatingPoint();
4981  // Put argument in a physical register.
4982  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
4983  } else {
4984  // Put argument in the parameter list area of the current stack frame.
4985  assert(VA.isMemLoc());
4986  unsigned LocMemOffset = VA.getLocMemOffset();
4987 
4988  if (!isTailCall) {
4989  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
4990  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
4991  StackPtr, PtrOff);
4992 
4993  MemOpChains.push_back(
4994  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
4995  } else {
4996  // Calculate and remember argument location.
4997  CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
4998  TailCallArguments);
4999  }
5000  }
5001  }
5002 
5003  if (!MemOpChains.empty())
5004  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5005 
5006  // Build a sequence of copy-to-reg nodes chained together with token chain
5007  // and flag operands which copy the outgoing args into the appropriate regs.
5008  SDValue InFlag;
5009  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5010  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5011  RegsToPass[i].second, InFlag);
5012  InFlag = Chain.getValue(1);
5013  }
5014 
5015  // Set CR bit 6 to true if this is a vararg call with floating args passed in
5016  // registers.
5017  if (isVarArg) {
5018  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5019  SDValue Ops[] = { Chain, InFlag };
5020 
5021  Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5022  dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5023 
5024  InFlag = Chain.getValue(1);
5025  }
5026 
5027  if (isTailCall)
5028  PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5029  TailCallArguments);
5030 
5031  return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
5032  /* unused except on PPC64 ELFv1 */ false, DAG,
5033  RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
5034  NumBytes, Ins, InVals, CS);
5035 }
5036 
5037 // Copy an argument into memory, being careful to do this outside the
5038 // call sequence for the call to which the argument belongs.
5039 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5040  SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5041  SelectionDAG &DAG, const SDLoc &dl) const {
5042  SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5043  CallSeqStart.getNode()->getOperand(0),
5044  Flags, DAG, dl);
5045  // The MEMCPY must go outside the CALLSEQ_START..END.
5046  SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
5047  CallSeqStart.getNode()->getOperand(1),
5048  SDLoc(MemcpyCall));
5049  DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5050  NewCallSeqStart.getNode());
5051  return NewCallSeqStart;
5052 }
5053 
5054 SDValue PPCTargetLowering::LowerCall_64SVR4(
5055  SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5056  bool isTailCall, bool isPatchPoint,
5057  const SmallVectorImpl<ISD::OutputArg> &Outs,
5058  const SmallVectorImpl<SDValue> &OutVals,
5059  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5060  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5061  ImmutableCallSite *CS) const {
5062 
5063  bool isELFv2ABI = Subtarget.isELFv2ABI();
5064  bool isLittleEndian = Subtarget.isLittleEndian();
5065  unsigned NumOps = Outs.size();
5066  bool hasNest = false;
5067  bool IsSibCall = false;
5068 
5069  EVT PtrVT = getPointerTy(DAG.getDataLayout());
5070  unsigned PtrByteSize = 8;
5071 
5072  MachineFunction &MF = DAG.getMachineFunction();
5073 
5074  if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5075  IsSibCall = true;
5076 
5077  // Mark this function as potentially containing a function that contains a
5078  // tail call. As a consequence the frame pointer will be used for dynamicalloc
5079  // and restoring the callers stack pointer in this functions epilog. This is
5080  // done because by tail calling the called function might overwrite the value
5081  // in this function's (MF) stack pointer stack slot 0(SP).
5082  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5083  CallConv == CallingConv::Fast)
5084  MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5085 
5086  assert(!(CallConv == CallingConv::Fast && isVarArg) &&
5087  "fastcc not supported on varargs functions");
5088 
5089  // Count how many bytes are to be pushed on the stack, including the linkage
5090  // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
5091  // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5092  // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5093  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5094  unsigned NumBytes = LinkageSize;
5095  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5096  unsigned &QFPR_idx = FPR_idx;
5097 
5098  static const MCPhysReg GPR[] = {
5099  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5100  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5101  };
5102  static const MCPhysReg VR[] = {
5103  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5104  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5105  };
5106 
5107  const unsigned NumGPRs = array_lengthof(GPR);
5108  const unsigned NumFPRs = 13;
5109  const unsigned NumVRs = array_lengthof(VR);
5110  const unsigned NumQFPRs = NumFPRs;
5111 
5112  // When using the fast calling convention, we don't provide backing for
5113  // arguments that will be in registers.
5114  unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
5115 
5116  // Add up all the space actually used.
5117  for (unsigned i = 0; i != NumOps; ++i) {
5118  ISD::ArgFlagsTy Flags = Outs[i].Flags;
5119  EVT ArgVT = Outs[i].VT;
5120  EVT OrigVT = Outs[i].ArgVT;
5121 
5122  if (Flags.isNest())
5123  continue;
5124 
5125  if (CallConv == CallingConv::Fast) {
5126  if (Flags.isByVal())
5127  NumGPRsUsed += (Flags.getByValSize()+7)/8;
5128  else
5129  switch (ArgVT.getSimpleVT().SimpleTy) {
5130  default: llvm_unreachable("Unexpected ValueType for argument!");
5131  case MVT::i1:
5132  case MVT::i32:
5133  case MVT::i64:
5134  if (++NumGPRsUsed <= NumGPRs)
5135  continue;
5136  break;
5137  case MVT::v4i32:
5138  case MVT::v8i16:
5139  case MVT::v16i8:
5140  case MVT::v2f64:
5141  case MVT::v2i64:
5142  case MVT::v1i128:
5143  if (++NumVRsUsed <= NumVRs)
5144  continue;
5145  break;
5146  case MVT::v4f32:
5147  // When using QPX, this is handled like a FP register, otherwise, it
5148  // is an Altivec register.
5149  if (Subtarget.hasQPX()) {
5150  if (++NumFPRsUsed <= NumFPRs)
5151  continue;
5152  } else {
5153  if (++NumVRsUsed <= NumVRs)
5154  continue;
5155  }
5156  break;
5157  case MVT::f32:
5158  case MVT::f64:
5159  case MVT::v4f64: // QPX
5160  case MVT::v4i1: // QPX
5161  if (++NumFPRsUsed <= NumFPRs)
5162  continue;
5163  break;
5164  }
5165  }
5166 
5167  /* Respect alignment of argument on the stack. */
5168  unsigned Align =
5169  CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5170  NumBytes = ((NumBytes + Align - 1) / Align) * Align;
5171 
5172  NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5173  if (Flags.isInConsecutiveRegsLast())
5174  NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5175  }
5176 
5177  unsigned NumBytesActuallyUsed = NumBytes;
5178 
5179  // The prolog code of the callee may store up to 8 GPR argument registers to
5180  // the stack, allowing va_start to index over them in memory if its varargs.
5181  // Because we cannot tell if this is needed on the caller side, we have to
5182  // conservatively assume that it is needed. As such, make sure we have at
5183  // least enough stack space for the caller to store the 8 GPRs.
5184  // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
5185  NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
5186 
5187  // Tail call needs the stack to be aligned.
5188  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5189  CallConv == CallingConv::Fast)
5190  NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
5191 
5192  int SPDiff = 0;
5193 
5194  // Calculate by how many bytes the stack has to be adjusted in case of tail
5195  // call optimization.
5196  if (!IsSibCall)
5197  SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5198 
5199  // To protect arguments on the stack from being clobbered in a tail call,
5200  // force all the loads to happen before doing any other lowering.
5201  if (isTailCall)
5202  Chain = DAG.getStackArgumentTokenFactor(Chain);
5203 
5204  // Adjust the stack pointer for the new arguments...
5205  // These operations are automatically eliminated by the prolog/epilog pass
5206  if (!IsSibCall)
5207  Chain = DAG.getCALLSEQ_START(Chain,
5208  DAG.getIntPtrConstant(NumBytes, dl, true), dl);
5209  SDValue CallSeqStart = Chain;
5210 
5211  // Load the return address and frame pointer so it can be move somewhere else
5212  // later.
5213  SDValue LROp, FPOp;
5214  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5215 
5216  // Set up a copy of the stack pointer for use loading and storing any
5217  // arguments that may not fit in the registers available for argument
5218  // passing.
5219  SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5220 
5221  // Figure out which arguments are going to go in registers, and which in
5222  // memory. Also, if this is a vararg function, floating point operations
5223  // must be stored to our stack, and loaded into integer regs as well, if
5224  // any integer regs are available for argument passing.
5225  unsigned ArgOffset = LinkageSize;
5226 
5228  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5229 
5230  SmallVector<SDValue, 8> MemOpChains;
5231  for (unsigned i = 0; i != NumOps; ++i) {
5232  SDValue Arg = OutVals[i];
5233  ISD::ArgFlagsTy Flags = Outs[i].Flags;
5234  EVT ArgVT = Outs[i].VT;
5235  EVT OrigVT = Outs[i].ArgVT;
5236 
5237  // PtrOff will be used to store the current argument to the stack if a
5238  // register cannot be found for it.
5239  SDValue PtrOff;
5240 
5241  // We re-align the argument offset for each argument, except when using the
5242  // fast calling convention, when we need to make sure we do that only when
5243  // we'll actually use a stack slot.
5244  auto ComputePtrOff = [&]() {
5245  /* Respect alignment of argument on the stack. */
5246  unsigned Align =
5247  CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5248  ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
5249 
5250  PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
5251 
5252  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5253  };
5254 
5255  if (CallConv != CallingConv::Fast) {
5256  ComputePtrOff();
5257 
5258  /* Compute GPR index associated with argument offset. */
5259  GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
5260  GPR_idx = std::min(GPR_idx, NumGPRs);
5261  }
5262 
5263  // Promote integers to 64-bit values.
5264  if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
5265  // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5266  unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5267  Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
5268  }
5269 
5270  // FIXME memcpy is used way more than necessary. Correctness first.
5271  // Note: "by value" is code for passing a structure by value, not
5272  // basic types.
5273  if (Flags.isByVal()) {
5274  // Note: Size includes alignment padding, so
5275  // struct x { short a; char b; }
5276  // will have Size = 4. With #pragma pack(1), it will have Size = 3.
5277  // These are the proper values we need for right-justifying the
5278  // aggregate in a parameter register.
5279  unsigned Size = Flags.getByValSize();
5280 
5281  // An empty aggregate parameter takes up no storage and no
5282  // registers.
5283  if (Size == 0)
5284  continue;
5285 
5286  if (CallConv == CallingConv::Fast)
5287  ComputePtrOff();
5288 
5289  // All aggregates smaller than 8 bytes must be passed right-justified.
5290  if (Size==1 || Size==2 || Size==4) {
5291  EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
5292  if (GPR_idx != NumGPRs) {
5293  SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
5294  MachinePointerInfo(), VT);
5295  MemOpChains.push_back(Load.getValue(1));
5296  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5297 
5298  ArgOffset += PtrByteSize;
5299  continue;
5300  }
5301  }
5302 
5303  if (GPR_idx == NumGPRs && Size < 8) {
5304  SDValue AddPtr = PtrOff;
5305  if (!isLittleEndian) {
5306  SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
5307  PtrOff.getValueType());
5308  AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5309  }
5310  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5311  CallSeqStart,
5312  Flags, DAG, dl);
5313  ArgOffset += PtrByteSize;
5314  continue;
5315  }
5316  // Copy entire object into memory. There are cases where gcc-generated
5317  // code assumes it is there, even if it could be put entirely into
5318  // registers. (This is not what the doc says.)
5319 
5320  // FIXME: The above statement is likely due to a misunderstanding of the
5321  // documents. All arguments must be copied into the parameter area BY
5322  // THE CALLEE in the event that the callee takes the address of any
5323  // formal argument. That has not yet been implemented. However, it is
5324  // reasonable to use the stack area as a staging area for the register
5325  // load.
5326 
5327  // Skip this for small aggregates, as we will use the same slot for a
5328  // right-justified copy, below.
5329  if (Size >= 8)
5330  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
5331  CallSeqStart,
5332  Flags, DAG, dl);
5333 
5334  // When a register is available, pass a small aggregate right-justified.
5335  if (Size < 8 && GPR_idx != NumGPRs) {
5336  // The easiest way to get this right-justified in a register
5337  // is to copy the structure into the rightmost portion of a
5338  // local variable slot, then load the whole slot into the
5339  // register.
5340  // FIXME: The memcpy seems to produce pretty awful code for
5341  // small aggregates, particularly for packed ones.
5342  // FIXME: It would be preferable to use the slot in the
5343  // parameter save area instead of a new local variable.
5344  SDValue AddPtr = PtrOff;
5345  if (!isLittleEndian) {
5346  SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
5347  AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5348  }
5349  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5350  CallSeqStart,
5351  Flags, DAG, dl);
5352 
5353  // Load the slot into the register.
5354  SDValue Load =
5355  DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
5356  MemOpChains.push_back(Load.getValue(1));
5357  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5358 
5359  // Done with this argument.
5360  ArgOffset += PtrByteSize;
5361  continue;
5362  }
5363 
5364  // For aggregates larger than PtrByteSize, copy the pieces of the
5365  // object that fit into registers from the parameter save area.
5366  for (unsigned j=0; j<Size; j+=PtrByteSize) {
5367  SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
5368  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
5369  if (GPR_idx != NumGPRs) {
5370  SDValue Load =
5371  DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
5372  MemOpChains.push_back(Load.getValue(1));
5373  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5374  ArgOffset += PtrByteSize;
5375  } else {
5376  ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
5377  break;
5378  }
5379  }
5380  continue;
5381  }
5382 
5383  switch (Arg.getSimpleValueType().SimpleTy) {
5384  default: llvm_unreachable("Unexpected ValueType for argument!");
5385  case MVT::i1:
5386  case MVT::i32:
5387  case MVT::i64:
5388  if (Flags.isNest()) {
5389  // The 'nest' parameter, if any, is passed in R11.
5390  RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
5391  hasNest = true;
5392  break;
5393  }
5394 
5395  // These can be scalar arguments or elements of an integer array type
5396  // passed directly. Clang may use those instead of "byval" aggregate
5397  // types to avoid forcing arguments to memory unnecessarily.
5398  if (GPR_idx != NumGPRs) {
5399  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
5400  } else {
5401  if (CallConv == CallingConv::Fast)
5402  ComputePtrOff();
5403 
5404  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5405  true, isTailCall, false, MemOpChains,
5406  TailCallArguments, dl);
5407  if (CallConv == CallingConv::Fast)
5408  ArgOffset += PtrByteSize;
5409  }
5410  if (CallConv != CallingConv::Fast)
5411  ArgOffset += PtrByteSize;
5412  break;
5413  case MVT::f32:
5414  case MVT::f64: {
5415  // These can be scalar arguments or elements of a float array type
5416  // passed directly. The latter are used to implement ELFv2 homogenous
5417  // float aggregates.
5418 
5419  // Named arguments go into FPRs first, and once they overflow, the
5420  // remaining arguments go into GPRs and then the parameter save area.
5421  // Unnamed arguments for vararg functions always go to GPRs and
5422  // then the parameter save area. For now, put all arguments to vararg
5423  // routines always in both locations (FPR *and* GPR or stack slot).
5424  bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
5425  bool NeededLoad = false;
5426 
5427  // First load the argument into the next available FPR.
5428  if (FPR_idx != NumFPRs)
5429  RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
5430 
5431  // Next, load the argument into GPR or stack slot if needed.
5432  if (!NeedGPROrStack)
5433  ;
5434  else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) {
5435  // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
5436  // once we support fp <-> gpr moves.
5437 
5438  // In the non-vararg case, this can only ever happen in the
5439  // presence of f32 array types, since otherwise we never run
5440  // out of FPRs before running out of GPRs.
5441  SDValue ArgVal;
5442 
5443  // Double values are always passed in a single GPR.
5444  if (Arg.getValueType() != MVT::f32) {
5445  ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
5446 
5447  // Non-array float values are extended and passed in a GPR.
5448  } else if (!Flags.isInConsecutiveRegs()) {
5449  ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5450  ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
5451 
5452  // If we have an array of floats, we collect every odd element
5453  // together with its predecessor into one GPR.
5454  } else if (ArgOffset % PtrByteSize != 0) {
5455  SDValue Lo, Hi;
5456  Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
5457  Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5458  if (!isLittleEndian)
5459  std::swap(Lo, Hi);
5460  ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
5461 
5462  // The final element, if even, goes into the first half of a GPR.
5463  } else if (Flags.isInConsecutiveRegsLast()) {
5464  ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5465  ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
5466  if (!isLittleEndian)
5467  ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
5468  DAG.getConstant(32, dl, MVT::i32));
5469 
5470  // Non-final even elements are skipped; they will be handled
5471  // together the with subsequent argument on the next go-around.
5472  } else
5473  ArgVal = SDValue();
5474 
5475  if (ArgVal.getNode())
5476  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
5477  } else {
5478  if (CallConv == CallingConv::Fast)
5479  ComputePtrOff();
5480 
5481  // Single-precision floating-point values are mapped to the
5482  // second (rightmost) word of the stack doubleword.
5483  if (Arg.getValueType() == MVT::f32 &&
5484  !isLittleEndian && !Flags.isInConsecutiveRegs()) {
5485  SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
5486  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
5487  }
5488 
5489  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5490  true, isTailCall, false, MemOpChains,
5491  TailCallArguments, dl);
5492 
5493  NeededLoad = true;
5494  }
5495  // When passing an array of floats, the array occupies consecutive
5496  // space in the argument area; only round up to the next doubleword
5497  // at the end of the array. Otherwise, each float takes 8 bytes.
5498  if (CallConv != CallingConv::Fast || NeededLoad) {
5499  ArgOffset += (Arg.getValueType() == MVT::f32 &&
5500  Flags.isInConsecutiveRegs()) ? 4 : 8;
5501  if (Flags.isInConsecutiveRegsLast())
5502  ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5503  }
5504  break;
5505  }
5506  case MVT::v4f32:
5507  case MVT::v4i32:
5508  case MVT::v8i16:
5509  case MVT::v16i8:
5510  case MVT::v2f64:
5511  case MVT::v2i64:
5512  case MVT::v1i128:
5513  if (!Subtarget.hasQPX()) {
5514  // These can be scalar arguments or elements of a vector array type
5515  // passed directly. The latter are used to implement ELFv2 homogenous
5516  // vector aggregates.
5517 
5518  // For a varargs call, named arguments go into VRs or on the stack as
5519  // usual; unnamed arguments always go to the stack or the corresponding
5520  // GPRs when within range. For now, we always put the value in both
5521  // locations (or even all three).
5522  if (isVarArg) {
5523  // We could elide this store in the case where the object fits
5524  // entirely in R registers. Maybe later.
5525  SDValue Store =
5526  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
5527  MemOpChains.push_back(Store);
5528  if (VR_idx != NumVRs) {
5529  SDValue Load =
5530  DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
5531  MemOpChains.push_back(Load.getValue(1));
5532  RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
5533  }
5534  ArgOffset += 16;
5535  for (unsigned i=0; i<16; i+=PtrByteSize) {
5536  if (GPR_idx == NumGPRs)
5537  break;
5538  SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5539  DAG.getConstant(i, dl, PtrVT));
5540  SDValue Load =
5541  DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
5542  MemOpChains.push_back(Load.getValue(1));
5543  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5544  }
5545  break;
5546  }
5547 
5548  // Non-varargs Altivec params go into VRs or on the stack.
5549  if (VR_idx != NumVRs) {
5550  RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
5551  } else {
5552  if (CallConv == CallingConv::Fast)
5553  ComputePtrOff();
5554 
5555  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5556  true, isTailCall, true, MemOpChains,
5557  TailCallArguments, dl);
5558  if (CallConv == CallingConv::Fast)
5559  ArgOffset += 16;
5560  }
5561 
5562  if (CallConv != CallingConv::Fast)
5563  ArgOffset += 16;
5564  break;
5565  } // not QPX
5566 
5568  "Invalid QPX parameter type");
5569 
5570  /* fall through */
5571  case MVT::v4f64:
5572  case MVT::v4i1: {
5573  bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
5574  if (isVarArg) {
5575  // We could elide this store in the case where the object fits
5576  // entirely in R registers. Maybe later.
5577  SDValue Store =
5578  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
5579  MemOpChains.push_back(Store);
5580  if (QFPR_idx != NumQFPRs) {
5581  SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, Store,
5582  PtrOff, MachinePointerInfo());
5583  MemOpChains.push_back(Load.getValue(1));
5584  RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
5585  }
5586  ArgOffset += (IsF32 ? 16 : 32);
5587  for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) {
5588  if (GPR_idx == NumGPRs)
5589  break;
5590  SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5591  DAG.getConstant(i, dl, PtrVT));
5592  SDValue Load =
5593  DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
5594  MemOpChains.push_back(Load.getValue(1));
5595  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5596  }
5597  break;
5598  }
5599 
5600  // Non-varargs QPX params go into registers or on the stack.
5601  if (QFPR_idx != NumQFPRs) {
5602  RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
5603  } else {
5604  if (CallConv == CallingConv::Fast)
5605  ComputePtrOff();
5606 
5607  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5608  true, isTailCall, true, MemOpChains,
5609  TailCallArguments, dl);
5610  if (CallConv == CallingConv::Fast)
5611  ArgOffset += (IsF32 ? 16 : 32);
5612  }
5613 
5614  if (CallConv != CallingConv::Fast)
5615  ArgOffset += (IsF32 ? 16 : 32);
5616  break;
5617  }
5618  }
5619  }
5620 
5621  assert(NumBytesActuallyUsed == ArgOffset);
5622  (void)NumBytesActuallyUsed;
5623 
5624  if (!MemOpChains.empty())
5625  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5626 
5627  // Check if this is an indirect call (MTCTR/BCTRL).
5628  // See PrepareCall() for more information about calls through function
5629  // pointers in the 64-bit SVR4 ABI.
5630  if (!isTailCall && !isPatchPoint &&
5631  !isFunctionGlobalAddress(Callee) &&
5632  !isa<ExternalSymbolSDNode>(Callee)) {
5633  // Load r2 into a virtual register and store it to the TOC save area.
5634  setUsesTOCBasePtr(DAG);
5635  SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
5636  // TOC save area offset.
5637  unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5638  SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5639  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5640  Chain = DAG.getStore(
5641  Val.getValue(1), dl, Val, AddPtr,
5642  MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
5643  // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
5644  // This does not mean the MTCTR instruction must use R12; it's easier
5645  // to model this as an extra parameter, so do that.
5646  if (isELFv2ABI && !isPatchPoint)
5647  RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
5648  }
5649 
5650  // Build a sequence of copy-to-reg nodes chained together with token chain
5651  // and flag operands which copy the outgoing args into the appropriate regs.
5652  SDValue InFlag;
5653  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5654  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5655  RegsToPass[i].second, InFlag);
5656  InFlag = Chain.getValue(1);
5657  }
5658 
5659  if (isTailCall && !IsSibCall)
5660  PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5661  TailCallArguments);
5662 
5663  return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, hasNest,
5664  DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee,
5665  SPDiff, NumBytes, Ins, InVals, CS);
5666 }
5667 
5668 SDValue PPCTargetLowering::LowerCall_Darwin(
5669  SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5670  bool isTailCall, bool isPatchPoint,
5671  const SmallVectorImpl<ISD::OutputArg> &Outs,
5672  const SmallVectorImpl<SDValue> &OutVals,
5673  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5674  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5675  ImmutableCallSite *CS) const {
5676 
5677  unsigned NumOps = Outs.size();
5678 
5679  EVT PtrVT = getPointerTy(DAG.getDataLayout());
5680  bool isPPC64 = PtrVT == MVT::i64;
5681  unsigned PtrByteSize = isPPC64 ? 8 : 4;
5682 
5683  MachineFunction &MF = DAG.getMachineFunction();
5684 
5685  // Mark this function as potentially containing a function that contains a
5686  // tail call. As a consequence the frame pointer will be used for dynamicalloc
5687  // and restoring the callers stack pointer in this functions epilog. This is
5688  // done because by tail calling the called function might overwrite the value
5689  // in this function's (MF) stack pointer stack slot 0(SP).
5690  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5691  CallConv == CallingConv::Fast)
5692  MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5693 
5694  // Count how many bytes are to be pushed on the stack, including the linkage
5695  // area, and parameter passing area. We start with 24/48 bytes, which is
5696  // prereserved space for [SP][CR][LR][3 x unused].
5697  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5698  unsigned NumBytes = LinkageSize;
5699 
5700  // Add up all the space actually used.
5701  // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
5702  // they all go in registers, but we must reserve stack space for them for
5703  // possible use by the caller. In varargs or 64-bit calls, parameters are
5704  // assigned stack space in order, with padding so Altivec parameters are
5705  // 16-byte aligned.
5706  unsigned nAltivecParamsAtEnd = 0;
5707  for (unsigned i = 0; i != NumOps; ++i) {
5708  ISD::ArgFlagsTy Flags = Outs[i].Flags;
5709  EVT ArgVT = Outs[i].VT;
5710  // Varargs Altivec parameters are padded to a 16 byte boundary.
5711  if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
5712  ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
5713  ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
5714  if (!isVarArg && !isPPC64) {
5715  // Non-varargs Altivec parameters go after all the non-Altivec
5716  // parameters; handle those later so we know how much padding we need.
5717  nAltivecParamsAtEnd++;
5718  continue;
5719  }
5720  // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
5721  NumBytes = ((NumBytes+15)/16)*16;
5722  }
5723  NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5724  }
5725 
5726  // Allow for Altivec parameters at the end, if needed.
5727  if (nAltivecParamsAtEnd) {
5728  NumBytes = ((NumBytes+15)/16)*16;
5729  NumBytes += 16*nAltivecParamsAtEnd;
5730  }
5731 
5732  // The prolog code of the callee may store up to 8 GPR argument registers to
5733  // the stack, allowing va_start to index over them in memory if its varargs.
5734  // Because we cannot tell if this is needed on the caller side, we have to
5735  // conservatively assume that it is needed. As such, make sure we have at
5736  // least enough stack space for the caller to store the 8 GPRs.
5737  NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
5738 
5739  // Tail call needs the stack to be aligned.
5740  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5741  CallConv == CallingConv::Fast)
5742  NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
5743 
5744  // Calculate by how many bytes the stack has to be adjusted in case of tail
5745  // call optimization.
5746  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5747 
5748  // To protect arguments on the stack from being clobbered in a tail call,
5749  // force all the loads to happen before doing any other lowering.
5750  if (isTailCall)
5751  Chain = DAG.getStackArgumentTokenFactor(Chain);
5752 
5753  // Adjust the stack pointer for the new arguments...
5754  // These operations are automatically eliminated by the prolog/epilog pass
5755  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5756  dl);
5757  SDValue CallSeqStart = Chain;
5758 
5759  // Load the return address and frame pointer so it can be move somewhere else
5760  // later.
5761  SDValue LROp, FPOp;
5762  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5763 
5764  // Set up a copy of the stack pointer for use loading and storing any
5765  // arguments that may not fit in the registers available for argument
5766  // passing.
5767  SDValue StackPtr;
5768  if (isPPC64)
5769  StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5770  else
5771  StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5772 
5773  // Figure out which arguments are going to go in registers, and which in
5774  // memory. Also, if this is a vararg function, floating point operations
5775  // must be stored to our stack, and loaded into integer regs as well, if
5776  // any integer regs are available for argument passing.
5777  unsigned ArgOffset = LinkageSize;
5778  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5779 
5780  static const MCPhysReg GPR_32[] = { // 32-bit registers.
5781  PPC::R3, PPC::R4, PPC::R5, PPC::R6,
5782  PPC::R7, PPC::R8, PPC::R9, PPC::R10,
5783  };
5784  static const MCPhysReg GPR_64[] = { // 64-bit registers.
5785  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5786  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5787  };
5788  static const MCPhysReg VR[] = {
5789  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5790  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5791  };
5792  const unsigned NumGPRs = array_lengthof(GPR_32);
5793  const unsigned NumFPRs = 13;
5794  const unsigned NumVRs = array_lengthof(VR);
5795 
5796  const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
5797 
5799  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5800 
5801  SmallVector<SDValue, 8> MemOpChains;
5802  for (unsigned i = 0; i != NumOps; ++i) {
5803  SDValue Arg = OutVals[i];
5804  ISD::ArgFlagsTy Flags = Outs[i].Flags;
5805 
5806  // PtrOff will be used to store the current argument to the stack if a
5807  // register cannot be found for it.
5808  SDValue PtrOff;
5809 
5810  PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
5811 
5812  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5813 
5814  // On PPC64, promote integers to 64-bit values.
5815  if (isPPC64 && Arg.getValueType() == MVT::i32) {
5816  // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5817  unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5818  Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
5819  }
5820 
5821  // FIXME memcpy is used way more than necessary. Correctness first.
5822  // Note: "by value" is code for passing a structure by value, not
5823  // basic types.
5824  if (Flags.isByVal()) {
5825  unsigned Size = Flags.getByValSize();
5826  // Very small objects are passed right-justified. Everything else is
5827  // passed left-justified.
5828  if (Size==1 || Size==2) {
5829  EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
5830  if (GPR_idx != NumGPRs) {
5831  SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
5832  MachinePointerInfo(), VT);
5833  MemOpChains.push_back(Load.getValue(1));
5834  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5835 
5836  ArgOffset += PtrByteSize;
5837  } else {
5838  SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
5839  PtrOff.getValueType());
5840  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5841  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5842  CallSeqStart,
5843  Flags, DAG, dl);
5844  ArgOffset += PtrByteSize;
5845  }
5846  continue;
5847  }
5848  // Copy entire object into memory. There are cases where gcc-generated
5849  // code assumes it is there, even if it could be put entirely into
5850  // registers. (This is not what the doc says.)
5851  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
5852  CallSeqStart,
5853  Flags, DAG, dl);
5854 
5855  // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
5856  // copy the pieces of the object that fit into registers from the
5857  // parameter save area.
5858  for (unsigned j=0; j<Size; j+=PtrByteSize) {
5859  SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
5860  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
5861  if (GPR_idx != NumGPRs) {
5862  SDValue Load =
5863  DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
5864  MemOpChains.push_back(Load.getValue(1));
5865  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5866  ArgOffset += PtrByteSize;
5867  } else {
5868  ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
5869  break;
5870  }
5871  }
5872  continue;
5873  }
5874 
5875  switch (Arg.getSimpleValueType().SimpleTy) {
5876  default: llvm_unreachable("Unexpected ValueType for argument!");
5877  case MVT::i1:
5878  case MVT::i32:
5879  case MVT::i64:
5880  if (GPR_idx != NumGPRs) {
5881  if (Arg.getValueType() == MVT::i1)
5882  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
5883 
5884  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
5885  } else {
5886  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5887  isPPC64, isTailCall, false, MemOpChains,
5888  TailCallArguments, dl);
5889  }
5890  ArgOffset += PtrByteSize;
5891  break;
5892  case MVT::f32:
5893  case MVT::f64:
5894  if (FPR_idx != NumFPRs) {
5895  RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
5896 
5897  if (isVarArg) {
5898  SDValue Store =
5899  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
5900  MemOpChains.push_back(Store);
5901 
5902  // Float varargs are always shadowed in available integer registers
5903  if (GPR_idx != NumGPRs) {
5904  SDValue Load =
5905  DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
5906  MemOpChains.push_back(Load.getValue(1));
5907  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5908  }
5909  if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
5910  SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
5911  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
5912  SDValue Load =
5913  DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
5914  MemOpChains.push_back(Load.getValue(1));
5915  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5916  }
5917  } else {
5918  // If we have any FPRs remaining, we may also have GPRs remaining.
5919  // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
5920  // GPRs.
5921  if (GPR_idx != NumGPRs)
5922  ++GPR_idx;
5923  if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
5924  !isPPC64) // PPC64 has 64-bit GPR's obviously :)
5925  ++GPR_idx;
5926  }
5927  } else
5928  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5929  isPPC64, isTailCall, false, MemOpChains,
5930  TailCallArguments, dl);
5931  if (isPPC64)
5932  ArgOffset += 8;
5933  else
5934  ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
5935  break;
5936  case MVT::v4f32:
5937  case MVT::v4i32:
5938  case MVT::v8i16:
5939  case MVT::v16i8:
5940  if (isVarArg) {
5941  // These go aligned on the stack, or in the corresponding R registers
5942  // when within range. The Darwin PPC ABI doc claims they also go in
5943  // V registers; in fact gcc does this only for arguments that are
5944  // prototyped, not for those that match the ... We do it for all
5945  // arguments, seems to work.
5946  while (ArgOffset % 16 !=0) {
5947  ArgOffset += PtrByteSize;
5948  if (GPR_idx != NumGPRs)
5949  GPR_idx++;
5950  }
5951  // We could elide this store in the case where the object fits
5952  // entirely in R registers. Maybe later.
5953  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5954  DAG.getConstant(ArgOffset, dl, PtrVT));
5955  SDValue Store =
5956  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
5957  MemOpChains.push_back(Store);
5958  if (VR_idx != NumVRs) {
5959  SDValue Load =
5960  DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
5961  MemOpChains.push_back(Load.getValue(1));
5962  RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
5963  }
5964  ArgOffset += 16;
5965  for (unsigned i=0; i<16; i+=PtrByteSize) {
5966  if (GPR_idx == NumGPRs)
5967  break;
5968  SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5969  DAG.getConstant(i, dl, PtrVT));
5970  SDValue Load =
5971  DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
5972  MemOpChains.push_back(Load.getValue(1));
5973  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5974  }
5975  break;
5976  }
5977 
5978  // Non-varargs Altivec params generally go in registers, but have
5979  // stack space allocated at the end.
5980  if (VR_idx != NumVRs) {
5981  // Doesn't have GPR space allocated.
5982  RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
5983  } else if (nAltivecParamsAtEnd==0) {
5984  // We are emitting Altivec params in order.
5985  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5986  isPPC64, isTailCall, true, MemOpChains,
5987  TailCallArguments, dl);
5988  ArgOffset += 16;
5989  }
5990  break;
5991  }
5992  }
5993  // If all Altivec parameters fit in registers, as they usually do,
5994  // they get stack space following the non-Altivec parameters. We
5995  // don't track this here because nobody below needs it.
5996  // If there are more Altivec parameters than fit in registers emit
5997  // the stores here.
5998  if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
5999  unsigned j = 0;
6000  // Offset is aligned; skip 1st 12 params which go in V registers.
6001  ArgOffset = ((ArgOffset+15)/16)*16;
6002  ArgOffset += 12*16;
6003  for (unsigned i = 0; i != NumOps; ++i) {
6004  SDValue Arg = OutVals[i];
6005  EVT ArgType = Outs[i].VT;
6006  if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
6007  ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
6008  if (++j > NumVRs) {
6009  SDValue PtrOff;
6010  // We are emitting Altivec params in order.
6011  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6012  isPPC64, isTailCall, true, MemOpChains,
6013  TailCallArguments, dl);
6014  ArgOffset += 16;
6015  }
6016  }
6017  }
6018  }
6019 
6020  if (!MemOpChains.empty())
6021  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6022 
6023  // On Darwin, R12 must contain the address of an indirect callee. This does
6024  // not mean the MTCTR instruction must use R12; it's easier to model this as
6025  // an extra parameter, so do that.
6026  if (!isTailCall &&
6027  !isFunctionGlobalAddress(Callee) &&
6028  !isa<ExternalSymbolSDNode>(Callee) &&
6029  !isBLACompatibleAddress(Callee, DAG))
6030  RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
6031  PPC::R12), Callee));
6032 
6033  // Build a sequence of copy-to-reg nodes chained together with token chain
6034  // and flag operands which copy the outgoing args into the appropriate regs.
6035  SDValue InFlag;
6036  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6037  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6038  RegsToPass[i].second, InFlag);
6039  InFlag = Chain.getValue(1);
6040  }
6041 
6042  if (isTailCall)
6043  PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6044  TailCallArguments);
6045 
6046  return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
6047  /* unused except on PPC64 ELFv1 */ false, DAG,
6048  RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
6049  NumBytes, Ins, InVals, CS);
6050 }
6051 
6052 bool
6053 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
6054  MachineFunction &MF, bool isVarArg,
6055  const SmallVectorImpl<ISD::OutputArg> &Outs,
6056  LLVMContext &Context) const {
6058  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
6059  return CCInfo.CheckReturn(Outs, RetCC_PPC);
6060 }
6061 
6062 SDValue
6063 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
6064  bool isVarArg,
6065  const SmallVectorImpl<ISD::OutputArg> &Outs,
6066  const SmallVectorImpl<SDValue> &OutVals,
6067  const SDLoc &dl, SelectionDAG &DAG) const {
6068 
6070  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
6071  *DAG.getContext());
6072  CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
6073 
6074  SDValue Flag;
6075  SmallVector<SDValue, 4> RetOps(1, Chain);
6076 
6077  // Copy the result values into the output registers.
6078  for (unsigned i = 0; i != RVLocs.size(); ++i) {
6079  CCValAssign &VA = RVLocs[i];
6080  assert(VA.isRegLoc() && "Can only return in registers!");
6081 
6082  SDValue Arg = OutVals[i];
6083 
6084  switch (VA.getLocInfo()) {
6085  default: llvm_unreachable("Unknown loc info!");
6086  case CCValAssign::Full: break;
6087  case CCValAssign::AExt:
6088  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
6089  break;
6090  case CCValAssign::ZExt:
6091  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
6092  break;
6093  case CCValAssign::SExt:
6094  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
6095  break;
6096  }
6097 
6098  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
6099  Flag = Chain.getValue(1);
6100  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
6101  }
6102 
6103  const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
6104  const MCPhysReg *I =
6106  if (I) {
6107  for (; *I; ++I) {
6108 
6109  if (PPC::G8RCRegClass.contains(*I))
6110  RetOps.push_back(DAG.getRegister(*I, MVT::i64));
6111  else if (PPC::F8RCRegClass.contains(*I))
6112  RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
6113  else if (PPC::CRRCRegClass.contains(*I))
6114  RetOps.push_back(DAG.getRegister(*I, MVT::i1));
6115  else if (PPC::VRRCRegClass.contains(*I))
6116  RetOps.push_back(DAG.getRegister(*I, MVT::Other));
6117  else
6118  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
6119  }
6120  }
6121 
6122  RetOps[0] = Chain; // Update chain.
6123 
6124  // Add the flag if we have it.
6125  if (Flag.getNode())
6126  RetOps.push_back(Flag);
6127 
6128  return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
6129 }
6130 
6131 SDValue
6132 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
6133  SelectionDAG &DAG) const {
6134  SDLoc dl(Op);
6135 
6136  // Get the corect type for integers.
6137  EVT IntVT = Op.getValueType();
6138 
6139  // Get the inputs.
6140  SDValue Chain = Op.getOperand(0);
6141  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
6142  // Build a DYNAREAOFFSET node.
6143  SDValue Ops[2] = {Chain, FPSIdx};
6144  SDVTList VTs = DAG.getVTList(IntVT);
6145  return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
6146 }
6147 
6148 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
6149  SelectionDAG &DAG) const {
6150  // When we pop the dynamic allocation we need to restore the SP link.
6151  SDLoc dl(Op);
6152 
6153  // Get the corect type for pointers.
6154  EVT PtrVT = getPointerTy(DAG.getDataLayout());
6155 
6156  // Construct the stack pointer operand.
6157  bool isPPC64 = Subtarget.isPPC64();
6158  unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
6159  SDValue StackPtr = DAG.getRegister(SP, PtrVT);
6160 
6161  // Get the operands for the STACKRESTORE.
6162  SDValue Chain = Op.getOperand(0);
6163  SDValue SaveSP = Op.getOperand(1);
6164 
6165  // Load the old link SP.
6166  SDValue LoadLinkSP =
6167  DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
6168 
6169  // Restore the stack pointer.
6170  Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
6171 
6172  // Store the old link SP.
6173  return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
6174 }
6175 
6176 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
6177  MachineFunction &MF = DAG.getMachineFunction();
6178  bool isPPC64 = Subtarget.isPPC64();
6179  EVT PtrVT = getPointerTy(MF.getDataLayout());
6180 
6181  // Get current frame pointer save index. The users of this index will be
6182  // primarily DYNALLOC instructions.
6184  int RASI = FI->getReturnAddrSaveIndex();
6185 
6186  // If the frame pointer save index hasn't been defined yet.
6187  if (!RASI) {
6188  // Find out what the fix offset of the frame pointer save area.
6189  int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
6190  // Allocate the frame index for frame pointer save area.
6191  RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
6192  // Save the result.
6193  FI->setReturnAddrSaveIndex(RASI);
6194  }
6195  return DAG.getFrameIndex(RASI, PtrVT);
6196 }
6197 
6198 SDValue
6199 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
6200  MachineFunction &MF = DAG.getMachineFunction();
6201  bool isPPC64 = Subtarget.isPPC64();
6202  EVT PtrVT = getPointerTy(MF.getDataLayout());
6203 
6204  // Get current frame pointer save index. The users of this index will be
6205  // primarily DYNALLOC instructions.
6207  int FPSI = FI->getFramePointerSaveIndex();
6208 
6209  // If the frame pointer save index hasn't been defined yet.
6210  if (!FPSI) {
6211  // Find out what the fix offset of the frame pointer save area.
6212  int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
6213  // Allocate the frame index for frame pointer save area.
6214  FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
6215  // Save the result.
6216  FI->setFramePointerSaveIndex(FPSI);
6217  }
6218  return DAG.getFrameIndex(FPSI, PtrVT);
6219 }
6220 
6221 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
6222  SelectionDAG &DAG) const {
6223  // Get the inputs.
6224  SDValue Chain = Op.getOperand(0);
6225  SDValue Size = Op.getOperand(1);
6226  SDLoc dl(Op);
6227 
6228  // Get the corect type for pointers.
6229  EVT PtrVT = getPointerTy(DAG.getDataLayout());
6230  // Negate the size.
6231  SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
6232  DAG.getConstant(0, dl, PtrVT), Size);
6233  // Construct a node for the frame pointer save index.
6234  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
6235  // Build a DYNALLOC node.
6236  SDValue Ops[3] = { Chain, NegSize, FPSIdx };
6237  SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
6238  return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
6239 }
6240 
6241 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
6242  SelectionDAG &DAG) const {
6243  MachineFunction &MF = DAG.getMachineFunction();
6244 
6245  bool isPPC64 = Subtarget.isPPC64();
6246  EVT PtrVT = getPointerTy(DAG.getDataLayout());
6247 
6248  int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
6249  return DAG.getFrameIndex(FI, PtrVT);
6250 }
6251 
6252 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
6253  SelectionDAG &DAG) const {
6254  SDLoc DL(Op);
6255  return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
6256  DAG.getVTList(MVT::i32, MVT::Other),
6257  Op.getOperand(0), Op.getOperand(1));
6258 }
6259 
6260 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
6261  SelectionDAG &DAG) const {
6262  SDLoc DL(Op);
6263  return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
6264  Op.getOperand(0), Op.getOperand(1));
6265 }
6266 
6267 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
6268  if (Op.getValueType().isVector())
6269  return LowerVectorLoad(Op, DAG);
6270 
6271  assert(Op.getValueType() == MVT::i1 &&
6272  "Custom lowering only for i1 loads");
6273 
6274  // First, load 8 bits into 32 bits, then truncate to 1 bit.
6275 
6276  SDLoc dl(Op);
6277  LoadSDNode *LD = cast<LoadSDNode>(Op);
6278 
6279  SDValue Chain = LD->getChain();
6280  SDValue BasePtr = LD->getBasePtr();
6281  MachineMemOperand *MMO = LD->getMemOperand();
6282 
6283  SDValue NewLD =
6284  DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
6285  BasePtr, MVT::i8, MMO);
6286  SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
6287 
6288  SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
6289  return DAG.getMergeValues(Ops, dl);
6290 }
6291 
6292 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
6293  if (Op.getOperand(1).getValueType().isVector())
6294  return LowerVectorStore(Op, DAG);
6295 
6296  assert(Op.getOperand(1).getValueType() == MVT::i1 &&
6297  "Custom lowering only for i1 stores");
6298 
6299  // First, zero extend to 32 bits, then use a truncating store to 8 bits.
6300 
6301  SDLoc dl(Op);
6302  StoreSDNode *ST = cast<StoreSDNode>(Op);
6303 
6304  SDValue Chain = ST->getChain();
6305  SDValue BasePtr = ST->getBasePtr();
6306  SDValue Value = ST->getValue();
6307  MachineMemOperand *MMO = ST->getMemOperand();
6308 
6309  Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
6310  Value);
6311  return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
6312 }
6313 
6314 // FIXME: Remove this once the ANDI glue bug is fixed:
6315 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
6316  assert(Op.getValueType() == MVT::i1 &&
6317  "Custom lowering only for i1 results");
6318 
6319  SDLoc DL(Op);
6320  return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
6321  Op.getOperand(0));
6322 }
6323 
6324 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
6325 /// possible.
6326 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
6327  // Not FP? Not a fsel.
6328  if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
6330  return Op;
6331 
6332  // We might be able to do better than this under some circumstances, but in
6333  // general, fsel-based lowering of select is a finite-math-only optimization.
6334  // For more information, see section F.3 of the 2.06 ISA specification.
6335  if (!DAG.getTarget().Options.NoInfsFPMath ||
6337  return Op;
6338  // TODO: Propagate flags from the select rather than global settings.
6340  Flags.setNoInfs(true);
6341  Flags.setNoNaNs(true);
6342 
6343  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
6344 
6345  EVT ResVT = Op.getValueType();
6346  EVT CmpVT = Op.getOperand(0).getValueType();
6347  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
6348  SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
6349  SDLoc dl(Op);
6350 
6351  // If the RHS of the comparison is a 0.0, we don't need to do the
6352  // subtraction at all.
6353  SDValue Sel1;
6354  if (isFloatingPointZero(RHS))
6355  switch (CC) {
6356  default: break; // SETUO etc aren't handled by fsel.
6357  case ISD::SETNE:
6358  std::swap(TV, FV);
6359  case ISD::SETEQ:
6360  if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
6361  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6362  Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
6363  if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
6364  Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
6365  return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6366  DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
6367  case ISD::SETULT:
6368  case ISD::SETLT:
6369  std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
6370  case ISD::SETOGE:
6371  case ISD::SETGE:
6372  if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
6373  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6374  return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
6375  case ISD::SETUGT:
6376  case ISD::SETGT:
6377  std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
6378  case ISD::SETOLE:
6379  case ISD::SETLE:
6380  if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
6381  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6382  return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6383  DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
6384  }
6385 
6386  SDValue Cmp;
6387  switch (CC) {
6388  default: break; // SETUO etc aren't handled by fsel.
6389  case ISD::SETNE:
6390  std::swap(TV, FV);
6391  case ISD::SETEQ:
6392  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
6393  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
6394  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6395  Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6396  if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
6397  Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
6398  return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6399  DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
6400  case ISD::SETULT:
6401  case ISD::SETLT:
6402  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
6403  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
6404  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6405  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
6406  case ISD::SETOGE:
6407  case ISD::SETGE:
6408  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
6409  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
6410  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6411  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6412  case ISD::SETUGT:
6413  case ISD::SETGT:
6414  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags);
6415  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
6416  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6417  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
6418  case ISD::SETOLE:
6419  case ISD::SETLE:
6420  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags);
6421  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
6422  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6423  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6424  }
6425  return Op;
6426 }
6427 
6428 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
6429  SelectionDAG &DAG,
6430  const SDLoc &dl) const {
6432  SDValue Src = Op.getOperand(0);
6433  if (Src.getValueType() == MVT::f32)
6434  Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
6435 
6436  SDValue Tmp;
6437  switch (Op.getSimpleValueType().SimpleTy) {
6438  default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
6439  case MVT::i32:
6440  Tmp = DAG.getNode(
6441  Op.getOpcode() == ISD::FP_TO_SINT
6442  ? PPCISD::FCTIWZ
6443  : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
6444  dl, MVT::f64, Src);
6445  break;
6446  case MVT::i64:
6447  assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
6448  "i64 FP_TO_UINT is supported only with FPCVT");
6449  Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
6451  dl, MVT::f64, Src);
6452  break;
6453  }
6454 
6455  // Convert the FP value to an int value through memory.
6456  bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
6457  (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
6458  SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
6459  int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
6460  MachinePointerInfo MPI =
6462 
6463  // Emit a store to the stack slot.
6464  SDValue Chain;
6465  if (i32Stack) {
6466  MachineFunction &MF = DAG.getMachineFunction();
6467  MachineMemOperand *MMO =
6469  SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
6470  Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
6471  DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
6472  } else
6473  Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI);
6474 
6475  // Result is a load from the stack slot. If loading 4 bytes, make sure to
6476  // add in a bias on big endian.
6477  if (Op.getValueType() == MVT::i32 && !i32Stack) {
6478  FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
6479  DAG.getConstant(4, dl, FIPtr.getValueType()));
6480  MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
6481  }
6482 
6483  RLI.Chain = Chain;
6484  RLI.Ptr = FIPtr;
6485  RLI.MPI = MPI;
6486 }
6487 
6488 /// \brief Custom lowers floating point to integer conversions to use
6489 /// the direct move instructions available in ISA 2.07 to avoid the
6490 /// need for load/store combinations.
6491 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
6492  SelectionDAG &DAG,
6493  const SDLoc &dl) const {
6495  SDValue Src = Op.getOperand(0);
6496 
6497  if (Src.getValueType() == MVT::f32)
6498  Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
6499 
6500  SDValue Tmp;
6501  switch (Op.getSimpleValueType().SimpleTy) {
6502  default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
6503  case MVT::i32:
6504  Tmp = DAG.getNode(
6505  Op.getOpcode() == ISD::FP_TO_SINT
6506  ? PPCISD::FCTIWZ
6507  : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
6508  dl, MVT::f64, Src);
6509  Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp);
6510  break;
6511  case MVT::i64:
6512  assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
6513  "i64 FP_TO_UINT is supported only with FPCVT");
6514  Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
6516  dl, MVT::f64, Src);
6517  Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp);
6518  break;
6519  }
6520  return Tmp;
6521 }
6522 
6523 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
6524  const SDLoc &dl) const {
6525  if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
6526  return LowerFP_TO_INTDirectMove(Op, DAG, dl);
6527 
6528  ReuseLoadInfo RLI;
6529  LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
6530 
6531  return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
6532  RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
6533 }
6534 
6535 // We're trying to insert a regular store, S, and then a load, L. If the
6536 // incoming value, O, is a load, we might just be able to have our load use the
6537 // address used by O. However, we don't know if anything else will store to
6538 // that address before we can load from it. To prevent this situation, we need
6539 // to insert our load, L, into the chain as a peer of O. To do this, we give L
6540 // the same chain operand as O, we create a token factor from the chain results
6541 // of O and L, and we replace all uses of O's chain result with that token
6542 // factor (see spliceIntoChain below for this last part).
6543 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
6544  ReuseLoadInfo &RLI,
6545  SelectionDAG &DAG,
6546  ISD::LoadExtType ET) const {
6547  SDLoc dl(Op);
6548  if (ET == ISD::NON_EXTLOAD &&
6549  (Op.getOpcode() == ISD::FP_TO_UINT ||
6550  Op.getOpcode() == ISD::FP_TO_SINT) &&
6552  Op.getOperand(0).getValueType())) {
6553 
6554  LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
6555  return true;
6556  }
6557 
6558  LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
6559  if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
6560  LD->isNonTemporal())
6561  return false;
6562  if (LD->getMemoryVT() != MemVT)
6563  return false;
6564 
6565  RLI.Ptr = LD->getBasePtr();
6566  if (LD->isIndexed() && !LD->getOffset().isUndef()) {
6568  "Non-pre-inc AM on PPC?");
6569  RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
6570  LD->getOffset());
6571  }
6572 
6573  RLI.Chain = LD->getChain();
6574  RLI.MPI = LD->getPointerInfo();
6575  RLI.IsDereferenceable = LD->isDereferenceable();
6576  RLI.IsInvariant = LD->isInvariant();
6577  RLI.Alignment = LD->getAlignment();
6578  RLI.AAInfo = LD->getAAInfo();
6579  RLI.Ranges = LD->getRanges();
6580 
6581  RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
6582  return true;
6583 }
6584 
6585 // Given the head of the old chain, ResChain, insert a token factor containing
6586 // it and NewResChain, and make users of ResChain now be users of that token
6587 // factor.
6588 void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
6589  SDValue NewResChain,
6590  SelectionDAG &DAG) const {
6591  if (!ResChain)
6592  return;
6593 
6594  SDLoc dl(NewResChain);
6595 
6597  NewResChain, DAG.getUNDEF(MVT::Other));
6598  assert(TF.getNode() != NewResChain.getNode() &&
6599  "A new TF really is required here");
6600 
6601  DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
6602  DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
6603 }
6604 
6605 /// \brief Analyze profitability of direct move
6606 /// prefer float load to int load plus direct move
6607 /// when there is no integer use of int load
6608 bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
6609  SDNode *Origin = Op.getOperand(0).getNode();
6610  if (Origin->getOpcode() != ISD::LOAD)
6611  return true;
6612 
6613  // If there is no LXSIBZX/LXSIHZX, like Power8,
6614  // prefer direct move if the memory size is 1 or 2 bytes.
6615  MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
6616  if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
6617  return true;
6618 
6619  for (SDNode::use_iterator UI = Origin->use_begin(),
6620  UE = Origin->use_end();
6621  UI != UE; ++UI) {
6622 
6623  // Only look at the users of the loaded value.
6624  if (UI.getUse().get().getResNo() != 0)
6625  continue;
6626 
6627  if (UI->getOpcode() != ISD::SINT_TO_FP &&
6628  UI->getOpcode() != ISD::UINT_TO_FP)
6629  return true;
6630  }
6631 
6632  return false;
6633 }
6634 
6635 /// \brief Custom lowers integer to floating point conversions to use
6636 /// the direct move instructions available in ISA 2.07 to avoid the
6637 /// need for load/store combinations.
6638 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
6639  SelectionDAG &DAG,
6640  const SDLoc &dl) const {
6641  assert((Op.getValueType() == MVT::f32 ||
6642  Op.getValueType() == MVT::f64) &&
6643  "Invalid floating point type as target of conversion");
6644  assert(Subtarget.hasFPCVT() &&
6645  "Int to FP conversions with direct moves require FPCVT");
6646  SDValue FP;
6647  SDValue Src = Op.getOperand(0);
6648  bool SinglePrec = Op.getValueType() == MVT::f32;
6649  bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
6650  bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
6651  unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) :
6652  (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU);
6653 
6654  if (WordInt) {
6655  FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ,
6656  dl, MVT::f64, Src);
6657  FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
6658  }
6659  else {
6660  FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src);
6661  FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
6662  }
6663 
6664  return FP;
6665 }
6666 
6667 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
6668  SelectionDAG &DAG) const {
6669  SDLoc dl(Op);
6670 
6671  if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
6672  if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64)
6673  return SDValue();
6674 
6675  SDValue Value = Op.getOperand(0);
6676  // The values are now known to be -1 (false) or 1 (true). To convert this
6677  // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
6678  // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
6679  Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
6680 
6681  SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
6682 
6683  Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
6684 
6685  if (Op.getValueType() != MVT::v4f64)
6686  Value = DAG.getNode(ISD::FP_ROUND, dl,
6687  Op.getValueType(), Value,
6688  DAG.getIntPtrConstant(1, dl));
6689  return Value;
6690  }
6691 
6692  // Don't handle ppc_fp128 here; let it be lowered to a libcall.
6693  if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
6694  return SDValue();
6695 
6696  if (Op.getOperand(0).getValueType() == MVT::i1)
6697  return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
6698  DAG.getConstantFP(1.0, dl, Op.getValueType()),
6699  DAG.getConstantFP(0.0, dl, Op.getValueType()));
6700 
6701  // If we have direct moves, we can do all the conversion, skip the store/load
6702  // however, without FPCVT we can't do most conversions.
6703  if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
6704  Subtarget.isPPC64() && Subtarget.hasFPCVT())
6705  return LowerINT_TO_FPDirectMove(Op, DAG, dl);
6706 
6707  assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
6708  "UINT_TO_FP is supported only with FPCVT");
6709 
6710  // If we have FCFIDS, then use it when converting to single-precision.
6711  // Otherwise, convert to double-precision and then round.
6712  unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
6714  : PPCISD::FCFIDS)
6716  : PPCISD::FCFID);
6717  MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
6718  ? MVT::f32
6719  : MVT::f64;
6720 
6721  if (Op.getOperand(0).getValueType() == MVT::i64) {
6722  SDValue SINT = Op.getOperand(0);
6723  // When converting to single-precision, we actually need to convert
6724  // to double-precision first and then round to single-precision.
6725  // To avoid double-rounding effects during that operation, we have
6726  // to prepare the input operand. Bits that might be truncated when
6727  // converting to double-precision are replaced by a bit that won't
6728  // be lost at this stage, but is below the single-precision rounding
6729  // position.
6730  //
6731  // However, if -enable-unsafe-fp-math is in effect, accept double
6732  // rounding to avoid the extra overhead.
6733  if (Op.getValueType() == MVT::f32 &&
6734  !Subtarget.hasFPCVT() &&
6735  !DAG.getTarget().Options.UnsafeFPMath) {
6736 
6737  // Twiddle input to make sure the low 11 bits are zero. (If this
6738  // is the case, we are guaranteed the value will fit into the 53 bit
6739  // mantissa of an IEEE double-precision value without rounding.)
6740  // If any of those low 11 bits were not zero originally, make sure
6741  // bit 12 (value 2048) is set instead, so that the final rounding
6742  // to single-precision gets the correct result.
6743  SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
6744  SINT, DAG.getConstant(2047, dl, MVT::i64));
6745  Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
6746  Round, DAG.getConstant(2047, dl, MVT::i64));
6747  Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
6748  Round = DAG.getNode(ISD::AND, dl, MVT::i64,
6749  Round, DAG.getConstant(-2048, dl, MVT::i64));
6750 
6751  // However, we cannot use that value unconditionally: if the magnitude
6752  // of the input value is small, the bit-twiddling we did above might
6753  // end up visibly changing the output. Fortunately, in that case, we
6754  // don't need to twiddle bits since the original input will convert
6755  // exactly to double-precision floating-point already. Therefore,
6756  // construct a conditional to use the original value if the top 11
6757  // bits are all sign-bit copies, and use the rounded value computed
6758  // above otherwise.
6759  SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
6760  SINT, DAG.getConstant(53, dl, MVT::i32));
6761  Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
6762  Cond, DAG.getConstant(1, dl, MVT::i64));
6763  Cond = DAG.getSetCC(dl, MVT::i32,
6764  Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
6765 
6766  SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
6767  }
6768 
6769  ReuseLoadInfo RLI;
6770  SDValue Bits;
6771 
6772  MachineFunction &MF = DAG.getMachineFunction();
6773  if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
6774  Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
6775  RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
6776  spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
6777  } else if (Subtarget.hasLFIWAX() &&
6778  canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
6779  MachineMemOperand *MMO =
6781  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6782  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6783  Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
6784  DAG.getVTList(MVT::f64, MVT::Other),
6785  Ops, MVT::i32, MMO);
6786  spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
6787  } else if (Subtarget.hasFPCVT() &&
6788  canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
6789  MachineMemOperand *MMO =
6791  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6792  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6793  Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
6794  DAG.getVTList(MVT::f64, MVT::Other),
6795  Ops, MVT::i32, MMO);
6796  spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
6797  } else if (((Subtarget.hasLFIWAX() &&
6798  SINT.getOpcode() == ISD::SIGN_EXTEND) ||
6799  (Subtarget.hasFPCVT() &&
6800  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
6801  SINT.getOperand(0).getValueType() == MVT::i32) {
6802  MachineFrameInfo &MFI = MF.getFrameInfo();
6803  EVT PtrVT = getPointerTy(DAG.getDataLayout());
6804 
6805  int FrameIdx = MFI.CreateStackObject(4, 4, false);
6806  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
6807 
6808  SDValue Store =
6809  DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
6811  DAG.getMachineFunction(), FrameIdx));
6812 
6813  assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
6814  "Expected an i32 store");
6815 
6816  RLI.Ptr = FIdx;
6817  RLI.Chain = Store;
6818  RLI.MPI =
6820  RLI.Alignment = 4;
6821 
6822  MachineMemOperand *MMO =
6824  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6825  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6826  Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
6828  dl, DAG.getVTList(MVT::f64, MVT::Other),
6829  Ops, MVT::i32, MMO);
6830  } else
6831  Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
6832 
6833  SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
6834 
6835  if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
6836  FP = DAG.getNode(ISD::FP_ROUND, dl,
6837  MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
6838  return FP;
6839  }
6840 
6841  assert(Op.getOperand(0).getValueType() == MVT::i32 &&
6842  "Unhandled INT_TO_FP type in custom expander!");
6843  // Since we only generate this in 64-bit mode, we can take advantage of
6844  // 64-bit registers. In particular, sign extend the input value into the
6845  // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
6846  // then lfd it and fcfid it.
6847  MachineFunction &MF = DAG.getMachineFunction();
6848  MachineFrameInfo &MFI = MF.getFrameInfo();
6849  EVT PtrVT = getPointerTy(MF.getDataLayout());
6850 
6851  SDValue Ld;
6852  if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
6853  ReuseLoadInfo RLI;
6854  bool ReusingLoad;
6855  if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
6856  DAG))) {
6857  int FrameIdx = MFI.CreateStackObject(4, 4, false);
6858  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
6859 
6860  SDValue Store =
6861  DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
6863  DAG.getMachineFunction(), FrameIdx));
6864 
6865  assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
6866  "Expected an i32 store");
6867 
6868  RLI.Ptr = FIdx;
6869  RLI.Chain = Store;
6870  RLI.MPI =
6872  RLI.Alignment = 4;
6873  }
6874 
6875  MachineMemOperand *MMO =
6876  MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
6877  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6878  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6879  Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
6881  dl, DAG.getVTList(MVT::f64, MVT::Other),
6882  Ops, MVT::i32, MMO);
6883  if (ReusingLoad)
6884  spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
6885  } else {
6886  assert(Subtarget.isPPC64() &&
6887  "i32->FP without LFIWAX supported only on PPC64");
6888 
6889  int FrameIdx = MFI.CreateStackObject(8, 8, false);
6890  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
6891 
6892  SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
6893  Op.getOperand(0));
6894 
6895  // STD the extended value into the stack slot.
6896  SDValue Store = DAG.getStore(
6897  DAG.getEntryNode(), dl, Ext64, FIdx,
6899 
6900  // Load the value as a double.
6901  Ld = DAG.getLoad(
6902  MVT::f64, dl, Store, FIdx,
6904  }
6905 
6906  // FCFID it and return it.
6907  SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
6908  if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
6909  FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
6910  DAG.getIntPtrConstant(0, dl));
6911  return FP;
6912 }
6913 
6914 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
6915  SelectionDAG &DAG) const {
6916  SDLoc dl(Op);
6917  /*
6918  The rounding mode is in bits 30:31 of FPSR, and has the following
6919  settings:
6920  00 Round to nearest
6921  01 Round to 0
6922  10 Round to +inf
6923  11 Round to -inf
6924 
6925  FLT_ROUNDS, on the other hand, expects the following:
6926  -1 Undefined
6927  0 Round to 0
6928  1 Round to nearest
6929  2 Round to +inf
6930  3 Round to -inf
6931 
6932  To perform the conversion, we do:
6933  ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
6934  */
6935 
6936  MachineFunction &MF = DAG.getMachineFunction();
6937  EVT VT = Op.getValueType();
6938  EVT PtrVT = getPointerTy(MF.getDataLayout());
6939 
6940  // Save FP Control Word to register
6941  EVT NodeTys[] = {
6942  MVT::f64, // return register
6943  MVT::Glue // unused in this context
6944  };
6945  SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
6946 
6947  // Save FP register to stack slot
6948  int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false);
6949  SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
6950  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot,
6951  MachinePointerInfo());
6952 
6953  // Load FP Control Word from low 32 bits of stack slot.
6954  SDValue Four = DAG.getConstant(4, dl, PtrVT);
6955  SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
6956  SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo());
6957 
6958  // Transform as necessary
6959  SDValue CWD1 =
6960  DAG.getNode(ISD::AND, dl, MVT::i32,
6961  CWD, DAG.getConstant(3, dl, MVT::i32));
6962  SDValue CWD2 =
6963  DAG.getNode(ISD::SRL, dl, MVT::i32,
6964  DAG.getNode(ISD::AND, dl, MVT::i32,
6965  DAG.getNode(ISD::XOR, dl, MVT::i32,
6966  CWD, DAG.getConstant(3, dl, MVT::i32)),
6967  DAG.getConstant(3, dl, MVT::i32)),
6968  DAG.getConstant(1, dl, MVT::i32));
6969 
6970  SDValue RetVal =
6971  DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
6972 
6973  return DAG.getNode((VT.getSizeInBits() < 16 ?
6974  ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
6975 }
6976 
6977 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
6978  EVT VT = Op.getValueType();
6979  unsigned BitWidth = VT.getSizeInBits();
6980  SDLoc dl(Op);
6981  assert(Op.getNumOperands() == 3 &&
6982  VT == Op.getOperand(1).getValueType() &&
6983  "Unexpected SHL!");
6984 
6985  // Expand into a bunch of logical ops. Note that these ops
6986  // depend on the PPC behavior for oversized shift amounts.
6987  SDValue Lo = Op.getOperand(0);
6988  SDValue Hi = Op.getOperand(1);
6989  SDValue Amt = Op.getOperand(2);
6990  EVT AmtVT = Amt.getValueType();
6991 
6992  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
6993  DAG.getConstant(BitWidth, dl, AmtVT), Amt);
6994  SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
6995  SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
6996  SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
6997  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
6998  DAG.getConstant(-BitWidth, dl, AmtVT));
6999  SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
7000  SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
7001  SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
7002  SDValue OutOps[] = { OutLo, OutHi };
7003  return DAG.getMergeValues(OutOps, dl);
7004 }
7005 
7006 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
7007  EVT VT = Op.getValueType();
7008  SDLoc dl(Op);
7009  unsigned BitWidth = VT.getSizeInBits();
7010  assert(Op.getNumOperands() == 3 &&
7011  VT == Op.getOperand(1).getValueType() &&
7012  "Unexpected SRL!");
7013 
7014  // Expand into a bunch of logical ops. Note that these ops
7015  // depend on the PPC behavior for oversized shift amounts.
7016  SDValue Lo = Op.getOperand(0);
7017  SDValue Hi = Op.getOperand(1);
7018  SDValue Amt = Op.getOperand(2);
7019  EVT AmtVT = Amt.getValueType();
7020 
7021  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
7022  DAG.getConstant(BitWidth, dl, AmtVT), Amt);
7023  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
7024  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
7025  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
7026  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7027  DAG.getConstant(-BitWidth, dl, AmtVT));
7028  SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
7029  SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
7030  SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
7031  SDValue OutOps[] = { OutLo, OutHi };
7032  return DAG.getMergeValues(OutOps, dl);
7033 }
7034 
7035 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
7036  SDLoc dl(Op);
7037  EVT VT = Op.getValueType();
7038  unsigned BitWidth = VT.getSizeInBits();
7039  assert(Op.getNumOperands() == 3 &&
7040  VT == Op.getOperand(1).getValueType() &&
7041  "Unexpected SRA!");
7042 
7043  // Expand into a bunch of logical ops, followed by a select_cc.
7044  SDValue Lo = Op.getOperand(0);
7045  SDValue Hi = Op.getOperand(1);
7046  SDValue Amt = Op.getOperand(2);
7047  EVT AmtVT = Amt.getValueType();
7048 
7049  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
7050  DAG.getConstant(BitWidth, dl, AmtVT), Amt);
7051  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
7052  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
7053  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
7054  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7055  DAG.getConstant(-BitWidth, dl, AmtVT));
7056  SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
7057  SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
7058  SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
7059  Tmp4, Tmp6, ISD::SETLE);
7060  SDValue OutOps[] = { OutLo, OutHi };
7061  return DAG.getMergeValues(OutOps, dl);
7062 }
7063 
7064 //===----------------------------------------------------------------------===//
7065 // Vector related lowering.
7066 //
7067 
7068 /// BuildSplatI - Build a canonical splati of Val with an element size of
7069 /// SplatSize. Cast the result to VT.
7070 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
7071  SelectionDAG &DAG, const SDLoc &dl) {
7072  assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
7073 
7074  static const MVT VTys[] = { // canonical VT to use for each size.
7076  };
7077 
7078  EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
7079 
7080  // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
7081  if (Val == -1)
7082  SplatSize = 1;
7083 
7084  EVT CanonicalVT = VTys[SplatSize-1];
7085 
7086  // Build a canonical splat for this value.
7087  return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
7088 }
7089 
7090 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
7091 /// specified intrinsic ID.
7092 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
7093  const SDLoc &dl, EVT DestVT = MVT::Other) {
7094  if (DestVT == MVT::Other) DestVT = Op.getValueType();
7095  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
7096  DAG.getConstant(IID, dl, MVT::i32), Op);
7097 }
7098 
7099 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
7100 /// specified intrinsic ID.
7101 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
7102  SelectionDAG &DAG, const SDLoc &dl,
7103  EVT DestVT = MVT::Other) {
7104  if (DestVT == MVT::Other) DestVT = LHS.getValueType();
7105  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
7106  DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
7107 }
7108 
7109 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
7110 /// specified intrinsic ID.
7111 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
7112  SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
7113  EVT DestVT = MVT::Other) {
7114  if (DestVT == MVT::Other) DestVT = Op0.getValueType();
7115  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
7116  DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
7117 }
7118 
7119 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
7120 /// amount. The result has the specified value type.
7121 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
7122  SelectionDAG &DAG, const SDLoc &dl) {
7123  // Force LHS/RHS to be the right type.
7124  LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
7125  RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
7126 
7127  int Ops[16];
7128  for (unsigned i = 0; i != 16; ++i)
7129  Ops[i] = i + Amt;
7130  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
7131  return DAG.getNode(ISD::BITCAST, dl, VT, T);
7132 }
7133 
7134 /// Do we have an efficient pattern in a .td file for this node?
7135 ///
7136 /// \param V - pointer to the BuildVectorSDNode being matched
7137 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
7138 ///
7139 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR
7140 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where
7141 /// the opposite is true (expansion is beneficial) are:
7142 /// - The node builds a vector out of integers that are not 32 or 64-bits
7143 /// - The node builds a vector out of constants
7144 /// - The node is a "load-and-splat"
7145 /// In all other cases, we will choose to keep the BUILD_VECTOR.
7147  bool HasDirectMove) {
7148  EVT VecVT = V->getValueType(0);
7149  bool RightType = VecVT == MVT::v2f64 || VecVT == MVT::v4f32 ||
7150  (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
7151  if (!RightType)
7152  return false;
7153 
7154  bool IsSplat = true;
7155  bool IsLoad = false;
7156  SDValue Op0 = V->getOperand(0);
7157 
7158  // This function is called in a block that confirms the node is not a constant
7159  // splat. So a constant BUILD_VECTOR here means the vector is built out of
7160  // different constants.
7161  if (V->isConstant())
7162  return false;
7163  for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
7164  if (V->getOperand(i).isUndef())
7165  return false;
7166  // We want to expand nodes that represent load-and-splat even if the
7167  // loaded value is a floating point truncation or conversion to int.
7168  if (V->getOperand(i).getOpcode() == ISD::LOAD ||
7169  (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
7170  V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
7171  (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
7172  V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
7173  (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
7174  V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
7175  IsLoad = true;
7176  // If the operands are different or the input is not a load and has more
7177  // uses than just this BV node, then it isn't a splat.
7178  if (V->getOperand(i) != Op0 ||
7179  (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
7180  IsSplat = false;
7181  }
7182  return !(IsSplat && IsLoad);
7183 }
7184 
7185 // If this is a case we can't handle, return null and let the default
7186 // expansion code take care of it. If we CAN select this case, and if it
7187 // selects to a single instruction, return Op. Otherwise, if we can codegen
7188 // this case more efficiently than a constant pool load, lower it to the
7189 // sequence of ops that should be used.
7190 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
7191  SelectionDAG &DAG) const {
7192  SDLoc dl(Op);
7194  assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
7195 
7196  if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) {
7197  // We first build an i32 vector, load it into a QPX register,
7198  // then convert it to a floating-point vector and compare it
7199  // to a zero vector to get the boolean result.
7201  int FrameIdx = MFI.CreateStackObject(16, 16, false);
7202  MachinePointerInfo PtrInfo =
7204  EVT PtrVT = getPointerTy(DAG.getDataLayout());
7205  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7206 
7207  assert(BVN->getNumOperands() == 4 &&
7208  "BUILD_VECTOR for v4i1 does not have 4 operands");
7209 
7210  bool IsConst = true;
7211  for (unsigned i = 0; i < 4; ++i) {
7212  if (BVN->getOperand(i).isUndef()) continue;
7213  if (!isa<ConstantSDNode>(BVN->getOperand(i))) {
7214  IsConst = false;
7215  break;
7216  }
7217  }
7218 
7219  if (IsConst) {
7220  Constant *One =
7222  Constant *NegOne =
7224 
7225  Constant *CV[4];
7226  for (unsigned i = 0; i < 4; ++i) {
7227  if (BVN->getOperand(i).isUndef())
7228  CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
7229  else if (isNullConstant(BVN->getOperand(i)))
7230  CV[i] = NegOne;
7231  else
7232  CV[i] = One;
7233  }
7234 
7235  Constant *CP = ConstantVector::get(CV);
7236  SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()),
7237  16 /* alignment */);
7238 
7239  SDValue Ops[] = {DAG.getEntryNode(), CPIdx};
7240  SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other});
7241  return DAG.getMemIntrinsicNode(
7242  PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32,
7244  }
7245 
7246  SmallVector<SDValue, 4> Stores;
7247  for (unsigned i = 0; i < 4; ++i) {
7248  if (BVN->getOperand(i).isUndef()) continue;
7249 
7250  unsigned Offset = 4*i;
7251  SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
7252  Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
7253 
7254  unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize();
7255  if (StoreSize > 4) {
7256  Stores.push_back(
7257  DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx,
7258  PtrInfo.getWithOffset(Offset), MVT::i32));
7259  } else {
7260  SDValue StoreValue = BVN->getOperand(i);
7261  if (StoreSize < 4)
7262  StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue);
7263 
7264  Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx,
7265  PtrInfo.getWithOffset(Offset)));
7266  }
7267  }
7268 
7269  SDValue StoreChain;
7270  if (!Stores.empty())
7271  StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
7272  else
7273  StoreChain = DAG.getEntryNode();
7274 
7275  // Now load from v4i32 into the QPX register; this will extend it to
7276  // v4i64 but not yet convert it to a floating point. Nevertheless, this
7277  // is typed as v4f64 because the QPX register integer states are not
7278  // explicitly represented.
7279 
7280  SDValue Ops[] = {StoreChain,
7281  DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32),
7282  FIdx};
7283  SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other});
7284 
7286  dl, VTs, Ops, MVT::v4i32, PtrInfo);
7287  LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
7288  DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32),
7289  LoadedVect);
7290 
7291  SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64);
7292 
7293  return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ);
7294  }
7295 
7296  // All other QPX vectors are handled by generic code.
7297  if (Subtarget.hasQPX())
7298  return SDValue();
7299 
7300  // Check if this is a splat of a constant value.
7301  APInt APSplatBits, APSplatUndef;
7302  unsigned SplatBitSize;
7303  bool HasAnyUndefs;
7304  if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
7305  HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
7306  SplatBitSize > 32) {
7307  // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
7308  // lowered to VSX instructions under certain conditions.
7309  // Without VSX, there is no pattern more efficient than expanding the node.
7310  if (Subtarget.hasVSX() &&
7312  return Op;
7313  return SDValue();
7314  }
7315 
7316  unsigned SplatBits = APSplatBits.getZExtValue();
7317  unsigned SplatUndef = APSplatUndef.getZExtValue();
7318  unsigned SplatSize = SplatBitSize / 8;
7319 
7320  // First, handle single instruction cases.
7321 
7322  // All zeros?
7323  if (SplatBits == 0) {
7324  // Canonicalize all zero vectors to be v4i32.
7325  if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
7326  SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
7327  Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
7328  }
7329  return Op;
7330  }
7331 
7332  // We have XXSPLTIB for constant splats one byte wide
7333  if (Subtarget.hasP9Vector() && SplatSize == 1) {
7334  // This is a splat of 1-byte elements with some elements potentially undef.
7335  // Rather than trying to match undef in the SDAG patterns, ensure that all
7336  // elements are the same constant.
7337  if (HasAnyUndefs || ISD::isBuildVectorAllOnes(BVN)) {
7338  SmallVector<SDValue, 16> Ops(16, DAG.getConstant(SplatBits,
7339  dl, MVT::i32));
7340  SDValue NewBV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
7341  if (Op.getValueType() != MVT::v16i8)
7342  return DAG.getBitcast(Op.getValueType(), NewBV);
7343  return NewBV;
7344  }
7345  return Op;
7346  }
7347 
7348  // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
7349  int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
7350  (32-SplatBitSize));
7351  if (SextVal >= -16 && SextVal <= 15)
7352  return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
7353 
7354  // Two instruction sequences.
7355 
7356  // If this value is in the range [-32,30] and is even, use:
7357  // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
7358  // If this value is in the range [17,31] and is odd, use:
7359  // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
7360  // If this value is in the range [-31,-17] and is odd, use:
7361  // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
7362  // Note the last two are three-instruction sequences.
7363  if (SextVal >= -32 && SextVal <= 31) {
7364  // To avoid having these optimizations undone by constant folding,
7365  // we convert to a pseudo that will be expanded later into one of
7366  // the above forms.
7367  SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
7368  EVT VT = (SplatSize == 1 ? MVT::v16i8 :
7369  (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
7370  SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
7371  SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
7372  if (VT == Op.getValueType())
7373  return RetVal;
7374  else
7375  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
7376  }
7377 
7378  // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
7379  // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
7380  // for fneg/fabs.
7381  if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
7382  // Make -1 and vspltisw -1:
7383  SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
7384 
7385  // Make the VSLW intrinsic, computing 0x8000_0000.
7386  SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
7387  OnesV, DAG, dl);
7388 
7389  // xor by OnesV to invert it.
7390  Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
7391  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7392  }
7393 
7394  // Check to see if this is a wide variety of vsplti*, binop self cases.
7395  static const signed char SplatCsts[] = {
7396  -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
7397  -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
7398  };
7399 
7400  for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
7401  // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
7402  // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
7403  int i = SplatCsts[idx];
7404 
7405  // Figure out what shift amount will be used by altivec if shifted by i in
7406  // this splat size.
7407  unsigned TypeShiftAmt = i & (SplatBitSize-1);
7408 
7409  // vsplti + shl self.
7410  if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
7411  SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7412  static const unsigned IIDs[] = { // Intrinsic to use for each size.
7413  Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
7414  Intrinsic::ppc_altivec_vslw
7415  };
7416  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7417  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7418  }
7419 
7420  // vsplti + srl self.
7421  if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
7422  SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7423  static const unsigned IIDs[] = { // Intrinsic to use for each size.
7424  Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
7425  Intrinsic::ppc_altivec_vsrw
7426  };
7427  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7428  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7429  }
7430 
7431  // vsplti + sra self.
7432  if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
7433  SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7434  static const unsigned IIDs[] = { // Intrinsic to use for each size.
7435  Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
7436  Intrinsic::ppc_altivec_vsraw
7437  };
7438  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7439  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7440  }
7441 
7442  // vsplti + rol self.
7443  if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
7444  ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
7445  SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7446  static const unsigned IIDs[] = { // Intrinsic to use for each size.
7447  Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
7448  Intrinsic::ppc_altivec_vrlw
7449  };
7450  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7451  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7452  }
7453 
7454  // t = vsplti c, result = vsldoi t, t, 1
7455  if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
7456  SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7457  unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
7458  return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7459  }
7460  // t = vsplti c, result = vsldoi t, t, 2
7461  if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
7462  SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7463  unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
7464  return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7465  }
7466  // t = vsplti c, result = vsldoi t, t, 3
7467  if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
7468  SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7469  unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
7470  return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7471  }
7472  }
7473 
7474  return SDValue();
7475 }
7476 
7477 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
7478 /// the specified operations to build the shuffle.
7479 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
7480  SDValue RHS, SelectionDAG &DAG,
7481  const SDLoc &dl) {
7482  unsigned OpNum = (PFEntry >> 26) & 0x0F;
7483  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
7484  unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
7485 
7486  enum {
7487  OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
7488  OP_VMRGHW,
7489  OP_VMRGLW,
7490  OP_VSPLTISW0,
7491  OP_VSPLTISW1,
7492  OP_VSPLTISW2,
7493  OP_VSPLTISW3,
7494  OP_VSLDOI4,
7495  OP_VSLDOI8,
7496  OP_VSLDOI12
7497  };
7498 
7499  if (OpNum == OP_COPY) {
7500  if (LHSID == (1*9+2)*9+3) return LHS;
7501  assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
7502  return RHS;
7503  }
7504 
7505  SDValue OpLHS, OpRHS;
7506  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
7507  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
7508 
7509  int ShufIdxs[16];
7510  switch (OpNum) {
7511  default: llvm_unreachable("Unknown i32 permute!");
7512  case OP_VMRGHW:
7513  ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
7514  ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
7515  ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
7516  ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
7517  break;
7518  case OP_VMRGLW:
7519  ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
7520  ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
7521  ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
7522  ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
7523  break;
7524  case OP_VSPLTISW0:
7525  for (unsigned i = 0; i != 16; ++i)
7526  ShufIdxs[i] = (i&3)+0;
7527  break;
7528  case OP_VSPLTISW1:
7529  for (unsigned i = 0; i != 16; ++i)
7530  ShufIdxs[i] = (i&3)+4;
7531  break;
7532  case OP_VSPLTISW2:
7533  for (unsigned i = 0; i != 16; ++i)
7534  ShufIdxs[i] = (i&3)+8;
7535  break;
7536  case OP_VSPLTISW3:
7537  for (unsigned i = 0; i != 16; ++i)
7538  ShufIdxs[i] = (i&3)+12;
7539  break;
7540  case OP_VSLDOI4:
7541  return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
7542  case OP_VSLDOI8:
7543  return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
7544  case OP_VSLDOI12:
7545  return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
7546  }
7547  EVT VT = OpLHS.getValueType();
7548  OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
7549  OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
7550  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
7551  return DAG.getNode(ISD::BITCAST, dl, VT, T);
7552 }
7553 
7554 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
7555 /// is a shuffle we can handle in a single instruction, return it. Otherwise,
7556 /// return the code it can be lowered into. Worst case, it can always be
7557 /// lowered into a vperm.
7558 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
7559  SelectionDAG &DAG) const {
7560  SDLoc dl(Op);
7561  SDValue V1 = Op.getOperand(0);
7562  SDValue V2 = Op.getOperand(1);
7563  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
7564  EVT VT = Op.getValueType();
7565  bool isLittleEndian = Subtarget.isLittleEndian();
7566 
7567  unsigned ShiftElts, InsertAtByte;
7568  bool Swap;
7569  if (Subtarget.hasP9Vector() &&
7570  PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
7571  isLittleEndian)) {
7572  if (Swap)
7573  std::swap(V1, V2);
7574  SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
7575  SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
7576  if (ShiftElts) {
7577  SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
7578  DAG.getConstant(ShiftElts, dl, MVT::i32));
7579  SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Shl,
7580  DAG.getConstant(InsertAtByte, dl, MVT::i32));
7581  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
7582  }
7583  SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Conv2,
7584  DAG.getConstant(InsertAtByte, dl, MVT::i32));
7585  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
7586  }
7587 
7588  if (Subtarget.hasVSX()) {
7589  if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
7590  int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
7591 
7592  // If the source for the shuffle is a scalar_to_vector that came from a
7593  // 32-bit load, it will have used LXVWSX so we don't need to splat again.
7594  if (Subtarget.hasP9Vector() &&
7595  ((isLittleEndian && SplatIdx == 3) ||
7596  (!isLittleEndian && SplatIdx == 0))) {
7597  SDValue Src = V1.getOperand(0);
7598  if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR &&
7599  Src.getOperand(0).getOpcode() == ISD::LOAD &&
7600  Src.getOperand(0).hasOneUse())
7601  return V1;
7602  }
7603  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
7604  SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
7605  DAG.getConstant(SplatIdx, dl, MVT::i32));
7606  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
7607  }
7608 
7609  // Left shifts of 8 bytes are actually swaps. Convert accordingly.
7610  if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
7611  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
7612  SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
7613  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
7614  }
7615 
7616  }
7617 
7618  if (Subtarget.hasQPX()) {
7619  if (VT.getVectorNumElements() != 4)
7620  return SDValue();
7621 
7622  if (V2.isUndef()) V2 = V1;
7623 
7624  int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp);
7625  if (AlignIdx != -1) {
7626  return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2,
7627  DAG.getConstant(AlignIdx, dl, MVT::i32));
7628  } else if (SVOp->isSplat()) {
7629  int SplatIdx = SVOp->getSplatIndex();
7630  if (SplatIdx >= 4) {
7631  std::swap(V1, V2);
7632  SplatIdx -= 4;
7633  }
7634 
7635  return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
7636  DAG.getConstant(SplatIdx, dl, MVT::i32));
7637  }
7638 
7639  // Lower this into a qvgpci/qvfperm pair.
7640 
7641  // Compute the qvgpci literal
7642  unsigned idx = 0;
7643  for (unsigned i = 0; i < 4; ++i) {
7644  int m = SVOp->getMaskElt(i);
7645  unsigned mm = m >= 0 ? (unsigned) m : i;
7646  idx |= mm << (3-i)*3;
7647  }
7648 
7649  SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64,
7650  DAG.getConstant(idx, dl, MVT::i32));
7651  return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3);
7652  }
7653 
7654  // Cases that are handled by instructions that take permute immediates
7655  // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
7656  // selected by the instruction selector.
7657  if (V2.isUndef()) {
7658  if (PPC::isSplatShuffleMask(SVOp, 1) ||
7659  PPC::isSplatShuffleMask(SVOp, 2) ||
7660  PPC::isSplatShuffleMask(SVOp, 4) ||
7661  PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
7662  PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
7663  PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
7664  PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
7665  PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
7666  PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
7667  PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
7668  PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
7669  PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
7670  (Subtarget.hasP8Altivec() && (
7671  PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
7672  PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
7673  PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
7674  return Op;
7675  }
7676  }
7677 
7678  // Altivec has a variety of "shuffle immediates" that take two vector inputs
7679  // and produce a fixed permutation. If any of these match, do not lower to
7680  // VPERM.
7681  unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
7682  if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
7683  PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
7684  PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
7685  PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
7686  PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
7687  PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
7688  PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
7689  PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
7690  PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
7691  (Subtarget.hasP8Altivec() && (
7692  PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
7693  PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
7694  PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
7695  return Op;
7696 
7697  // Check to see if this is a shuffle of 4-byte values. If so, we can use our
7698  // perfect shuffle table to emit an optimal matching sequence.
7699  ArrayRef<int> PermMask = SVOp->getMask();
7700 
7701  unsigned PFIndexes[4];
7702  bool isFourElementShuffle = true;
7703  for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
7704  unsigned EltNo = 8; // Start out undef.
7705  for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
7706  if (PermMask[i*4+j] < 0)
7707  continue; // Undef, ignore it.
7708 
7709  unsigned ByteSource = PermMask[i*4+j];
7710  if ((ByteSource & 3) != j) {
7711  isFourElementShuffle = false;
7712  break;
7713  }
7714 
7715  if (EltNo == 8) {
7716  EltNo = ByteSource/4;
7717  } else if (EltNo != ByteSource/4) {
7718  isFourElementShuffle = false;
7719  break;
7720  }
7721  }
7722  PFIndexes[i] = EltNo;
7723  }
7724 
7725  // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
7726  // perfect shuffle vector to determine if it is cost effective to do this as
7727  // discrete instructions, or whether we should use a vperm.
7728  // For now, we skip this for little endian until such time as we have a
7729  // little-endian perfect shuffle table.
7730  if (isFourElementShuffle && !isLittleEndian) {
7731  // Compute the index in the perfect shuffle table.
7732  unsigned PFTableIndex =
7733  PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
7734 
7735  unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
7736  unsigned Cost = (PFEntry >> 30);
7737 
7738  // Determining when to avoid vperm is tricky. Many things affect the cost
7739  // of vperm, particularly how many times the perm mask needs to be computed.
7740  // For example, if the perm mask can be hoisted out of a loop or is already
7741  // used (perhaps because there are multiple permutes with the same shuffle
7742  // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
7743  // the loop requires an extra register.
7744  //
7745  // As a compromise, we only emit discrete instructions if the shuffle can be
7746  // generated in 3 or fewer operations. When we have loop information
7747  // available, if this block is within a loop, we should avoid using vperm
7748  // for 3-operation perms and use a constant pool load instead.
7749  if (Cost < 3)
7750  return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
7751  }
7752 
7753  // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
7754  // vector that will get spilled to the constant pool.
7755  if (V2.isUndef()) V2 = V1;
7756 
7757  // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
7758  // that it is in input element units, not in bytes. Convert now.
7759 
7760  // For little endian, the order of the input vectors is reversed, and
7761  // the permutation mask is complemented with respect to 31. This is
7762  // necessary to produce proper semantics with the big-endian-biased vperm
7763  // instruction.
7764  EVT EltVT = V1.getValueType().getVectorElementType();
7765  unsigned BytesPerElement = EltVT.getSizeInBits()/8;
7766 
7767  SmallVector<SDValue, 16> ResultMask;
7768  for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
7769  unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
7770 
7771  for (unsigned j = 0; j != BytesPerElement; ++j)
7772  if (isLittleEndian)
7773  ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
7774  dl, MVT::i32));
7775  else
7776  ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
7777  MVT::i32));
7778  }
7779 
7780  SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
7781  if (isLittleEndian)
7782  return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
7783  V2, V1, VPermMask);
7784  else
7785  return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
7786  V1, V2, VPermMask);
7787 }
7788 
7789 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
7790 /// vector comparison. If it is, return true and fill in Opc/isDot with
7791 /// information about the intrinsic.
7792 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
7793  bool &isDot, const PPCSubtarget &Subtarget) {
7794  unsigned IntrinsicID =
7795  cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
7796  CompareOpc = -1;
7797  isDot = false;
7798  switch (IntrinsicID) {
7799  default: return false;
7800  // Comparison predicates.
7801  case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break;
7802  case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
7803  case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;
7804  case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break;
7805  case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
7806  case Intrinsic::ppc_altivec_vcmpequd_p:
7807  if (Subtarget.hasP8Altivec()) {
7808  CompareOpc = 199;
7809  isDot = 1;
7810  } else
7811  return false;
7812 
7813  break;
7814  case Intrinsic::ppc_altivec_vcmpneb_p:
7815  case Intrinsic::ppc_altivec_vcmpneh_p:
7816  case Intrinsic::ppc_altivec_vcmpnew_p:
7817  case Intrinsic::ppc_altivec_vcmpnezb_p:
7818  case Intrinsic::ppc_altivec_vcmpnezh_p:
7819  case Intrinsic::ppc_altivec_vcmpnezw_p:
7820  if (Subtarget.hasP9Altivec()) {
7821  switch(IntrinsicID) {
7822  default: llvm_unreachable("Unknown comparison intrinsic.");
7823  case Intrinsic::ppc_altivec_vcmpneb_p: CompareOpc = 7; break;
7824  case Intrinsic::ppc_altivec_vcmpneh_p: CompareOpc = 71; break;
7825  case Intrinsic::ppc_altivec_vcmpnew_p: CompareOpc = 135; break;
7826  case Intrinsic::ppc_altivec_vcmpnezb_p: CompareOpc = 263; break;
7827  case Intrinsic::ppc_altivec_vcmpnezh_p: CompareOpc = 327; break;
7828  case Intrinsic::ppc_altivec_vcmpnezw_p: CompareOpc = 391; break;
7829  }
7830  isDot = 1;
7831  } else
7832  return false;
7833 
7834  break;
7835  case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
7836  case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
7837  case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
7838  case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
7839  case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
7840  case Intrinsic::ppc_altivec_vcmpgtsd_p:
7841  if (Subtarget.hasP8Altivec()) {
7842  CompareOpc = 967;
7843  isDot = 1;
7844  } else
7845  return false;
7846 
7847  break;
7848  case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
7849  case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
7850  case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
7851  case Intrinsic::ppc_altivec_vcmpgtud_p:
7852  if (Subtarget.hasP8Altivec()) {
7853  CompareOpc = 711;
7854  isDot = 1;
7855  } else
7856  return false;
7857 
7858  break;
7859  // VSX predicate comparisons use the same infrastructure
7860  case Intrinsic::ppc_vsx_xvcmpeqdp_p:
7861  case Intrinsic::ppc_vsx_xvcmpgedp_p:
7862  case Intrinsic::ppc_vsx_xvcmpgtdp_p:
7863  case Intrinsic::ppc_vsx_xvcmpeqsp_p:
7864  case Intrinsic::ppc_vsx_xvcmpgesp_p:
7865  case Intrinsic::ppc_vsx_xvcmpgtsp_p:
7866  if (Subtarget.hasVSX()) {
7867  switch (IntrinsicID) {
7868  case Intrinsic::ppc_vsx_xvcmpeqdp_p: CompareOpc = 99; break;
7869  case Intrinsic::ppc_vsx_xvcmpgedp_p: CompareOpc = 115; break;
7870  case Intrinsic::ppc_vsx_xvcmpgtdp_p: CompareOpc = 107; break;
7871  case Intrinsic::ppc_vsx_xvcmpeqsp_p: CompareOpc = 67; break;
7872  case Intrinsic::ppc_vsx_xvcmpgesp_p: CompareOpc = 83; break;
7873  case Intrinsic::ppc_vsx_xvcmpgtsp_p: CompareOpc = 75; break;
7874  }
7875  isDot = 1;
7876  }
7877  else
7878  return false;
7879 
7880  break;
7881 
7882  // Normal Comparisons.
7883  case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break;
7884  case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break;
7885  case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break;
7886  case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break;
7887  case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break;
7888  case Intrinsic::ppc_altivec_vcmpequd:
7889  if (Subtarget.hasP8Altivec()) {
7890  CompareOpc = 199;
7891  isDot = 0;
7892  } else
7893  return false;
7894 
7895  break;
7896  case Intrinsic::ppc_altivec_vcmpneb:
7897  case Intrinsic::ppc_altivec_vcmpneh:
7898  case Intrinsic::ppc_altivec_vcmpnew:
7899  case Intrinsic::ppc_altivec_vcmpnezb:
7900  case Intrinsic::ppc_altivec_vcmpnezh:
7901  case Intrinsic::ppc_altivec_vcmpnezw:
7902  if (Subtarget.hasP9Altivec()) {
7903  switch (IntrinsicID) {
7904  default: llvm_unreachable("Unknown comparison intrinsic.");
7905  case Intrinsic::ppc_altivec_vcmpneb: CompareOpc = 7; break;
7906  case Intrinsic::ppc_altivec_vcmpneh: CompareOpc = 71; break;
7907  case Intrinsic::ppc_altivec_vcmpnew: CompareOpc = 135; break;
7908  case Intrinsic::ppc_altivec_vcmpnezb: CompareOpc = 263; break;
7909  case Intrinsic::ppc_altivec_vcmpnezh: CompareOpc = 327; break;
7910  case Intrinsic::ppc_altivec_vcmpnezw: CompareOpc = 391; break;
7911  }
7912  isDot = 0;
7913  } else
7914  return false;
7915  break;
7916  case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break;
7917  case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break;
7918  case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;
7919  case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break;
7920  case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break;
7921  case Intrinsic::ppc_altivec_vcmpgtsd:
7922  if (Subtarget.hasP8Altivec()) {
7923  CompareOpc = 967;
7924  isDot = 0;
7925  } else
7926  return false;
7927 
7928  break;
7929  case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break;
7930  case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;
7931  case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;
7932  case Intrinsic::ppc_altivec_vcmpgtud:
7933  if (Subtarget.hasP8Altivec()) {
7934  CompareOpc = 711;
7935  isDot = 0;
7936  } else
7937  return false;
7938 
7939  break;
7940  }
7941  return true;
7942 }
7943 
7944 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
7945 /// lower, do it, otherwise return null.
7946 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
7947  SelectionDAG &DAG) const {
7948  unsigned IntrinsicID =
7949  cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
7950 
7951  if (IntrinsicID == Intrinsic::thread_pointer) {
7952  // Reads the thread pointer register, used for __builtin_thread_pointer.
7953  bool is64bit = Subtarget.isPPC64();
7954  return DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
7955  is64bit ? MVT::i64 : MVT::i32);
7956  }
7957 
7958  // If this is a lowered altivec predicate compare, CompareOpc is set to the
7959  // opcode number of the comparison.
7960  SDLoc dl(Op);
7961  int CompareOpc;
7962  bool isDot;
7963  if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
7964  return SDValue(); // Don't custom lower most intrinsics.
7965 
7966  // If this is a non-dot comparison, make the VCMP node and we are done.
7967  if (!isDot) {
7968  SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
7969  Op.getOperand(1), Op.getOperand(2),
7970  DAG.getConstant(CompareOpc, dl, MVT::i32));
7971  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
7972  }
7973 
7974  // Create the PPCISD altivec 'dot' comparison node.
7975  SDValue Ops[] = {
7976  Op.getOperand(2), // LHS
7977  Op.getOperand(3), // RHS
7978  DAG.getConstant(CompareOpc, dl, MVT::i32)
7979  };
7980  EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
7981  SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
7982 
7983  // Now that we have the comparison, emit a copy from the CR to a GPR.
7984  // This is flagged to the above dot comparison.
7985  SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
7986  DAG.getRegister(PPC::CR6, MVT::i32),
7987  CompNode.getValue(1));
7988 
7989  // Unpack the result based on how the target uses it.
7990  unsigned BitNo; // Bit # of CR6.
7991  bool InvertBit; // Invert result?
7992  switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
7993  default: // Can't happen, don't crash on invalid number though.
7994  case 0: // Return the value of the EQ bit of CR6.
7995  BitNo = 0; InvertBit = false;
7996  break;
7997  case 1: // Return the inverted value of the EQ bit of CR6.
7998  BitNo = 0; InvertBit = true;
7999  break;
8000  case 2: // Return the value of the LT bit of CR6.
8001  BitNo = 2; InvertBit = false;
8002  break;
8003  case 3: // Return the inverted value of the LT bit of CR6.
8004  BitNo = 2; InvertBit = true;
8005  break;
8006  }
8007 
8008  // Shift the bit into the low position.
8009  Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
8010  DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
8011  // Isolate the bit.
8012  Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
8013  DAG.getConstant(1, dl, MVT::i32));
8014 
8015  // If we are supposed to, toggle the bit.
8016  if (InvertBit)
8017  Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
8018  DAG.getConstant(1, dl, MVT::i32));
8019  return Flags;
8020 }
8021 
8022 SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
8023  SelectionDAG &DAG) const {
8024  SDLoc dl(Op);
8025  // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int
8026  // instructions), but for smaller types, we need to first extend up to v2i32
8027  // before doing going farther.
8028  if (Op.getValueType() == MVT::v2i64) {
8029  EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
8030  if (ExtVT != MVT::v2i32) {
8031  Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0));
8032  Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op,
8034  ExtVT.getVectorElementType(), 4)));
8035  Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op);
8036  Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op,
8037  DAG.getValueType(MVT::v2i32));
8038  }
8039 
8040  return Op;
8041  }
8042 
8043  return SDValue();
8044 }
8045 
8046 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
8047  SelectionDAG &DAG) const {
8048  SDLoc dl(Op);
8049  // Create a stack slot that is 16-byte aligned.
8051  int FrameIdx = MFI.CreateStackObject(16, 16, false);
8052  EVT PtrVT = getPointerTy(DAG.getDataLayout());
8053  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8054 
8055  // Store the input value into Value#0 of the stack slot.
8056  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
8057  MachinePointerInfo());
8058  // Load it out.
8059  return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
8060 }
8061 
8062 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
8063  SelectionDAG &DAG) const {
8065  "Should only be called for ISD::INSERT_VECTOR_ELT");
8067  // We have legal lowering for constant indices but not for variable ones.
8068  if (C)
8069  return Op;
8070  return SDValue();
8071 }
8072 
8073 SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
8074  SelectionDAG &DAG) const {
8075  SDLoc dl(Op);
8076  SDNode *N = Op.getNode();
8077 
8078  assert(N->getOperand(0).getValueType() == MVT::v4i1 &&
8079  "Unknown extract_vector_elt type");
8080 
8081  SDValue Value = N->getOperand(0);
8082 
8083  // The first part of this is like the store lowering except that we don't
8084  // need to track the chain.
8085 
8086  // The values are now known to be -1 (false) or 1 (true). To convert this
8087  // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
8088  // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
8089  Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
8090 
8091  // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
8092  // understand how to form the extending load.
8093  SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
8094 
8095  Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
8096 
8097  // Now convert to an integer and store.
8098  Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
8099  DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
8100  Value);
8101 
8103  int FrameIdx = MFI.CreateStackObject(16, 16, false);
8104  MachinePointerInfo PtrInfo =
8106  EVT PtrVT = getPointerTy(DAG.getDataLayout());
8107  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8108 
8109  SDValue StoreChain = DAG.getEntryNode();
8110  SDValue Ops[] = {StoreChain,
8111  DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
8112  Value, FIdx};
8113  SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
8114 
8115  StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
8116  dl, VTs, Ops, MVT::v4i32, PtrInfo);
8117 
8118  // Extract the value requested.
8119  unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
8120  SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
8121  Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
8122 
8123  SDValue IntVal =
8124  DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset));
8125 
8126  if (!Subtarget.useCRBits())
8127  return IntVal;
8128 
8129  return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal);
8130 }
8131 
8132 /// Lowering for QPX v4i1 loads
8133 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
8134  SelectionDAG &DAG) const {
8135  SDLoc dl(Op);
8136  LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
8137  SDValue LoadChain = LN->getChain();
8138  SDValue BasePtr = LN->getBasePtr();
8139 
8140  if (Op.getValueType() == MVT::v4f64 ||
8141  Op.getValueType() == MVT::v4f32) {
8142  EVT MemVT = LN->getMemoryVT();
8143  unsigned Alignment = LN->getAlignment();
8144 
8145  // If this load is properly aligned, then it is legal.
8146  if (Alignment >= MemVT.getStoreSize())
8147  return Op;
8148 
8149  EVT ScalarVT = Op.getValueType().getScalarType(),
8150  ScalarMemVT = MemVT.getScalarType();
8151  unsigned Stride = ScalarMemVT.getStoreSize();
8152 
8153  SDValue Vals[4], LoadChains[4];
8154  for (unsigned Idx = 0; Idx < 4; ++Idx) {
8155  SDValue Load;
8156  if (ScalarVT != ScalarMemVT)
8157  Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain,
8158  BasePtr,
8159  LN->getPointerInfo().getWithOffset(Idx * Stride),
8160  ScalarMemVT, MinAlign(Alignment, Idx * Stride),
8161  LN->getMemOperand()->getFlags(), LN->getAAInfo());
8162  else
8163  Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr,
8164  LN->getPointerInfo().getWithOffset(Idx * Stride),
8165  MinAlign(Alignment, Idx * Stride),
8166  LN->getMemOperand()->getFlags(), LN->getAAInfo());
8167 
8168  if (Idx == 0 && LN->isIndexed()) {
8170  "Unknown addressing mode on vector load");
8171  Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(),
8172  LN->getAddressingMode());
8173  }
8174 
8175  Vals[Idx] = Load;
8176  LoadChains[Idx] = Load.getValue(1);
8177 
8178  BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
8179  DAG.getConstant(Stride, dl,
8180  BasePtr.getValueType()));
8181  }
8182 
8183  SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
8184  SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals);
8185 
8186  if (LN->isIndexed()) {
8187  SDValue RetOps[] = { Value, Vals[0].getValue(1), TF };
8188  return DAG.getMergeValues(RetOps, dl);
8189  }
8190 
8191  SDValue RetOps[] = { Value, TF };
8192  return DAG.getMergeValues(RetOps, dl);
8193  }
8194 
8195  assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower");
8196  assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported");
8197 
8198  // To lower v4i1 from a byte array, we load the byte elements of the
8199  // vector and then reuse the BUILD_VECTOR logic.
8200 
8201  SDValue VectElmts[4], VectElmtChains[4];
8202  for (unsigned i = 0; i < 4; ++i) {
8203  SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
8204  Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
8205 
8206  VectElmts[i] = DAG.getExtLoad(
8207  ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx,
8209  /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo());
8210  VectElmtChains[i] = VectElmts[i].getValue(1);
8211  }
8212 
8213  LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains);
8214  SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts);
8215 
8216  SDValue RVals[] = { Value, LoadChain };
8217  return DAG.getMergeValues(RVals, dl);
8218 }
8219 
8220 /// Lowering for QPX v4i1 stores
8221 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
8222  SelectionDAG &DAG) const {
8223  SDLoc dl(Op);
8224  StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
8225  SDValue StoreChain = SN->getChain();
8226  SDValue BasePtr = SN->getBasePtr();
8227  SDValue Value = SN->getValue();
8228 
8229  if (Value.getValueType() == MVT::v4f64 ||
8230  Value.getValueType() == MVT::v4f32) {
8231  EVT MemVT = SN->getMemoryVT();
8232  unsigned Alignment = SN->getAlignment();
8233 
8234  // If this store is properly aligned, then it is legal.
8235  if (Alignment >= MemVT.getStoreSize())
8236  return Op;
8237 
8238  EVT ScalarVT = Value.getValueType().getScalarType(),
8239  ScalarMemVT = MemVT.getScalarType();
8240  unsigned Stride = ScalarMemVT.getStoreSize();
8241 
8242  SDValue Stores[4];
8243  for (unsigned Idx = 0; Idx < 4; ++Idx) {
8244  SDValue Ex = DAG.getNode(
8245  ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
8246  DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout())));
8247  SDValue Store;
8248  if (ScalarVT != ScalarMemVT)
8249  Store =
8250  DAG.getTruncStore(StoreChain, dl, Ex, BasePtr,
8251  SN->getPointerInfo().getWithOffset(Idx * Stride),
8252  ScalarMemVT, MinAlign(Alignment, Idx * Stride),
8253  SN->getMemOperand()->getFlags(), SN->getAAInfo());
8254  else
8255  Store = DAG.getStore(StoreChain, dl, Ex, BasePtr,
8256  SN->getPointerInfo().getWithOffset(Idx * Stride),
8257  MinAlign(Alignment, Idx * Stride),
8258  SN->getMemOperand()->getFlags(), SN->getAAInfo());
8259 
8260  if (Idx == 0 && SN->isIndexed()) {
8262  "Unknown addressing mode on vector store");
8263  Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(),
8264  SN->getAddressingMode());
8265  }
8266 
8267  BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
8268  DAG.getConstant(Stride, dl,
8269  BasePtr.getValueType()));
8270  Stores[Idx] = Store;
8271  }
8272 
8273  SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
8274 
8275  if (SN->isIndexed()) {
8276  SDValue RetOps[] = { TF, Stores[0].getValue(1) };
8277  return DAG.getMergeValues(RetOps, dl);
8278  }
8279 
8280  return TF;
8281  }
8282 
8283  assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported");
8284  assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower");
8285 
8286  // The values are now known to be -1 (false) or 1 (true). To convert this
8287  // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
8288  // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
8289  Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
8290 
8291  // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
8292  // understand how to form the extending load.
8293  SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
8294 
8295  Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
8296 
8297  // Now convert to an integer and store.
8298  Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
8299  DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
8300  Value);
8301 
8303  int FrameIdx = MFI.CreateStackObject(16, 16, false);
8304  MachinePointerInfo PtrInfo =
8306  EVT PtrVT = getPointerTy(DAG.getDataLayout());
8307  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8308 
8309  SDValue Ops[] = {StoreChain,
8310  DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
8311  Value, FIdx};
8312  SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
8313 
8314  StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
8315  dl, VTs, Ops, MVT::v4i32, PtrInfo);
8316 
8317  // Move data into the byte array.
8318  SDValue Loads[4], LoadChains[4];
8319  for (unsigned i = 0; i < 4; ++i) {
8320  unsigned Offset = 4*i;
8321  SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
8322  Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
8323 
8324  Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
8325  PtrInfo.getWithOffset(Offset));
8326  LoadChains[i] = Loads[i].getValue(1);
8327  }
8328 
8329  StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
8330 
8331  SDValue Stores[4];
8332  for (unsigned i = 0; i < 4; ++i) {
8333  SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
8334  Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
8335 
8336  Stores[i] = DAG.getTruncStore(
8337  StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i),
8338  MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(),
8339  SN->getAAInfo());
8340  }
8341 
8342  StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
8343 
8344  return StoreChain;
8345 }
8346 
8347 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
8348  SDLoc dl(Op);
8349  if (Op.getValueType() == MVT::v4i32) {
8350  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8351 
8352  SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl);
8353  SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
8354 
8355  SDValue RHSSwap = // = vrlw RHS, 16
8356  BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
8357 
8358  // Shrinkify inputs to v8i16.
8359  LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
8360  RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
8361  RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
8362 
8363  // Low parts multiplied together, generating 32-bit results (we ignore the
8364  // top parts).
8365  SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
8366  LHS, RHS, DAG, dl, MVT::v4i32);
8367 
8368  SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
8369  LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
8370  // Shift the high parts up 16 bits.
8371  HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
8372  Neg16, DAG, dl);
8373  return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
8374  } else if (Op.getValueType() == MVT::v8i16) {
8375  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8376 
8377  SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
8378 
8379  return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
8380  LHS, RHS, Zero, DAG, dl);
8381  } else if (Op.getValueType() == MVT::v16i8) {
8382  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8383  bool isLittleEndian = Subtarget.isLittleEndian();
8384 
8385  // Multiply the even 8-bit parts, producing 16-bit sums.
8386  SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
8387  LHS, RHS, DAG, dl, MVT::v8i16);
8388  EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
8389 
8390  // Multiply the odd 8-bit parts, producing 16-bit sums.
8391  SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
8392  LHS, RHS, DAG, dl, MVT::v8i16);
8393  OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
8394 
8395  // Merge the results together. Because vmuleub and vmuloub are
8396  // instructions with a big-endian bias, we must reverse the
8397  // element numbering and reverse the meaning of "odd" and "even"
8398  // when generating little endian code.
8399  int Ops[16];
8400  for (unsigned i = 0; i != 8; ++i) {
8401  if (isLittleEndian) {
8402  Ops[i*2 ] = 2*i;
8403  Ops[i*2+1] = 2*i+16;
8404  } else {
8405  Ops[i*2 ] = 2*i+1;
8406  Ops[i*2+1] = 2*i+1+16;
8407  }
8408  }
8409  if (isLittleEndian)
8410  return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
8411  else
8412  return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
8413  } else {
8414  llvm_unreachable("Unknown mul to lower!");
8415  }
8416 }
8417 
8418 /// LowerOperation - Provide custom lowering hooks for some operations.
8419 ///
8421  switch (Op.getOpcode()) {
8422  default: llvm_unreachable("Wasn't expecting to be able to lower this!");
8423  case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
8424  case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
8425  case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
8426  case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
8427  case ISD::JumpTable: return LowerJumpTable(Op, DAG);
8428  case ISD::SETCC: return LowerSETCC(Op, DAG);
8429  case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
8430  case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
8431  case ISD::VASTART:
8432  return LowerVASTART(Op, DAG);
8433 
8434  case ISD::VAARG:
8435  return LowerVAARG(Op, DAG);
8436 
8437  case ISD::VACOPY:
8438  return LowerVACOPY(Op, DAG);
8439 
8440  case ISD::STACKRESTORE:
8441  return LowerSTACKRESTORE(Op, DAG);
8442 
8444  return LowerDYNAMIC_STACKALLOC(Op, DAG);
8445 
8447  return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
8448 
8449  case ISD::EH_DWARF_CFA:
8450  return LowerEH_DWARF_CFA(Op, DAG);
8451 
8452  case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
8453  case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
8454 
8455  case ISD::LOAD: return LowerLOAD(Op, DAG);
8456  case ISD::STORE: return LowerSTORE(Op, DAG);
8457  case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
8458  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
8459  case ISD::FP_TO_UINT:
8460  case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG,
8461  SDLoc(Op));
8462  case ISD::UINT_TO_FP:
8463  case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
8464  case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
8465 
8466  // Lower 64-bit shifts.
8467  case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
8468  case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
8469  case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
8470 
8471  // Vector-related lowering.
8472  case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
8473  case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
8474  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
8475  case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
8476  case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
8477  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
8478  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
8479  case ISD::MUL: return LowerMUL(Op, DAG);
8480 
8481  // For counter-based loop handling.
8482  case ISD::INTRINSIC_W_CHAIN: return SDValue();
8483 
8484  // Frame & Return address.
8485  case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
8486  case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
8487  }
8488 }
8489 
8492  SelectionDAG &DAG) const {
8493  SDLoc dl(N);
8494  switch (N->getOpcode()) {
8495  default:
8496  llvm_unreachable("Do not know how to custom type legalize this operation!");
8497  case ISD::READCYCLECOUNTER: {
8498  SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
8499  SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
8500 
8501  Results.push_back(RTB);
8502  Results.push_back(RTB.getValue(1));
8503  Results.push_back(RTB.getValue(2));
8504  break;
8505  }
8506  case ISD::INTRINSIC_W_CHAIN: {
8507  if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
8508  Intrinsic::ppc_is_decremented_ctr_nonzero)
8509  break;
8510 
8511  assert(N->getValueType(0) == MVT::i1 &&
8512  "Unexpected result type for CTR decrement intrinsic");
8513  EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
8514  N->getValueType(0));
8515  SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
8516  SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
8517  N->getOperand(1));
8518 
8519  Results.push_back(NewInt);
8520  Results.push_back(NewInt.getValue(1));
8521  break;
8522  }
8523  case ISD::VAARG: {
8524  if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
8525  return;
8526 
8527  EVT VT = N->getValueType(0);
8528 
8529  if (VT == MVT::i64) {
8530  SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
8531 
8532  Results.push_back(NewNode);
8533  Results.push_back(NewNode.getValue(1));
8534  }
8535  return;
8536  }
8537  case ISD::FP_ROUND_INREG: {
8538  assert(N->getValueType(0) == MVT::ppcf128);
8540  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
8541  MVT::f64, N->getOperand(0),
8542  DAG.getIntPtrConstant(0, dl));
8543  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
8544  MVT::f64, N->getOperand(0),
8545  DAG.getIntPtrConstant(1, dl));
8546 
8547  // Add the two halves of the long double in round-to-zero mode.
8548  SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8549 
8550  // We know the low half is about to be thrown away, so just use something
8551  // convenient.
8552  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
8553  FPreg, FPreg));
8554  return;
8555  }
8556  case ISD::FP_TO_SINT:
8557  case ISD::FP_TO_UINT:
8558  // LowerFP_TO_INT() can only handle f32 and f64.
8559  if (N->getOperand(0).getValueType() == MVT::ppcf128)
8560  return;
8561  Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
8562  return;
8563  }
8564 }
8565 
8566 //===----------------------------------------------------------------------===//
8567 // Other Lowering Code
8568 //===----------------------------------------------------------------------===//
8569 
8571  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
8573  return Builder.CreateCall(Func, {});
8574 }
8575 
8576 // The mappings for emitLeading/TrailingFence is taken from
8577 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
8579  AtomicOrdering Ord, bool IsStore,
8580  bool IsLoad) const {
8582  return callIntrinsic(Builder, Intrinsic::ppc_sync);
8583  if (isReleaseOrStronger(Ord))
8584  return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
8585  return nullptr;
8586 }
8587 
8589  AtomicOrdering Ord, bool IsStore,
8590  bool IsLoad) const {
8591  if (IsLoad && isAcquireOrStronger(Ord))
8592  return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
8593  // FIXME: this is too conservative, a dependent branch + isync is enough.
8594  // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
8595  // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
8596  // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
8597  return nullptr;
8598 }
8599 
8602  unsigned AtomicSize,
8603  unsigned BinOpcode,
8604  unsigned CmpOpcode,
8605  unsigned CmpPred) const {
8606  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
8607  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8608 
8609  auto LoadMnemonic = PPC::LDARX;
8610  auto StoreMnemonic = PPC::STDCX;
8611  switch (AtomicSize) {
8612  default:
8613  llvm_unreachable("Unexpected size of atomic entity");
8614  case 1:
8615  LoadMnemonic = PPC::LBARX;
8616  StoreMnemonic = PPC::STBCX;
8617  assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
8618  break;
8619  case 2:
8620  LoadMnemonic = PPC::LHARX;
8621  StoreMnemonic = PPC::STHCX;
8622  assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
8623  break;
8624  case 4:
8625  LoadMnemonic = PPC::LWARX;
8626  StoreMnemonic = PPC::STWCX;
8627  break;
8628  case 8:
8629  LoadMnemonic = PPC::LDARX;
8630  StoreMnemonic = PPC::STDCX;
8631  break;
8632  }
8633 
8634  const BasicBlock *LLVM_BB = BB->getBasicBlock();
8635  MachineFunction *F = BB->getParent();
8637 
8638  unsigned dest = MI.getOperand(0).getReg();
8639  unsigned ptrA = MI.getOperand(1).getReg();
8640  unsigned ptrB = MI.getOperand(2).getReg();
8641  unsigned incr = MI.getOperand(3).getReg();
8642  DebugLoc dl = MI.getDebugLoc();
8643 
8644  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
8645  MachineBasicBlock *loop2MBB =
8646  CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
8647  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
8648  F->insert(It, loopMBB);
8649  if (CmpOpcode)
8650  F->insert(It, loop2MBB);
8651  F->insert(It, exitMBB);
8652  exitMBB->splice(exitMBB->begin(), BB,
8653  std::next(MachineBasicBlock::iterator(MI)), BB->end());
8654  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
8655 
8656  MachineRegisterInfo &RegInfo = F->getRegInfo();
8657  unsigned TmpReg = (!BinOpcode) ? incr :
8658  RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
8659  : &PPC::GPRCRegClass);
8660 
8661  // thisMBB:
8662  // ...
8663  // fallthrough --> loopMBB
8664  BB->addSuccessor(loopMBB);
8665 
8666  // loopMBB:
8667  // l[wd]arx dest, ptr
8668  // add r0, dest, incr
8669  // st[wd]cx. r0, ptr
8670  // bne- loopMBB
8671  // fallthrough --> exitMBB
8672 
8673  // For max/min...
8674  // loopMBB:
8675  // l[wd]arx dest, ptr
8676  // cmpl?[wd] incr, dest
8677  // bgt exitMBB
8678  // loop2MBB:
8679  // st[wd]cx. dest, ptr
8680  // bne- loopMBB
8681  // fallthrough --> exitMBB
8682 
8683  BB = loopMBB;
8684  BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
8685  .addReg(ptrA).addReg(ptrB);
8686  if (BinOpcode)
8687  BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
8688  if (CmpOpcode) {
8689  // Signed comparisons of byte or halfword values must be sign-extended.
8690  if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
8691  unsigned ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
8692  BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
8693  ExtReg).addReg(dest);
8694  BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
8695  .addReg(incr).addReg(ExtReg);
8696  } else
8697  BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
8698  .addReg(incr).addReg(dest);
8699 
8700  BuildMI(BB, dl, TII->get(PPC::BCC))
8701  .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
8702  BB->addSuccessor(loop2MBB);
8703  BB->addSuccessor(exitMBB);
8704  BB = loop2MBB;
8705  }
8706  BuildMI(BB, dl, TII->get(StoreMnemonic))
8707  .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
8708  BuildMI(BB, dl, TII->get(PPC::BCC))
8709  .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
8710  BB->addSuccessor(loopMBB);
8711  BB->addSuccessor(exitMBB);
8712 
8713  // exitMBB:
8714  // ...
8715  BB = exitMBB;
8716  return BB;
8717 }
8718 
8721  MachineBasicBlock *BB,
8722  bool is8bit, // operation
8723  unsigned BinOpcode,
8724  unsigned CmpOpcode,
8725  unsigned CmpPred) const {
8726  // If we support part-word atomic mnemonics, just use them
8727  if (Subtarget.hasPartwordAtomics())
8728  return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode,
8729  CmpOpcode, CmpPred);
8730 
8731  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
8732  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8733  // In 64 bit mode we have to use 64 bits for addresses, even though the
8734  // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
8735  // registers without caring whether they're 32 or 64, but here we're
8736  // doing actual arithmetic on the addresses.
8737  bool is64bit = Subtarget.isPPC64();
8738  bool isLittleEndian = Subtarget.isLittleEndian();
8739  unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
8740 
8741  const BasicBlock *LLVM_BB = BB->getBasicBlock();
8742  MachineFunction *F = BB->getParent();
8744 
8745  unsigned dest = MI.getOperand(0).getReg();
8746  unsigned ptrA = MI.getOperand(1).getReg();
8747  unsigned ptrB = MI.getOperand(2).getReg();
8748  unsigned incr = MI.getOperand(3).getReg();
8749  DebugLoc dl = MI.getDebugLoc();
8750 
8751  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
8752  MachineBasicBlock *loop2MBB =
8753  CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
8754  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
8755  F->insert(It, loopMBB);
8756  if (CmpOpcode)
8757  F->insert(It, loop2MBB);
8758  F->insert(It, exitMBB);
8759  exitMBB->splice(exitMBB->begin(), BB,
8760  std::next(MachineBasicBlock::iterator(MI)), BB->end());
8761  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
8762 
8763  MachineRegisterInfo &RegInfo = F->getRegInfo();
8764  const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
8765  : &PPC::GPRCRegClass;
8766  unsigned PtrReg = RegInfo.createVirtualRegister(RC);
8767  unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
8768  unsigned ShiftReg =
8769  isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
8770  unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
8771  unsigned MaskReg = RegInfo.createVirtualRegister(RC);
8772  unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
8773  unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
8774  unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
8775  unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
8776  unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
8777  unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
8778  unsigned Ptr1Reg;
8779  unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
8780 
8781  // thisMBB:
8782  // ...
8783  // fallthrough --> loopMBB
8784  BB->addSuccessor(loopMBB);
8785 
8786  // The 4-byte load must be aligned, while a char or short may be
8787  // anywhere in the word. Hence all this nasty bookkeeping code.
8788  // add ptr1, ptrA, ptrB [copy if ptrA==0]
8789  // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
8790  // xori shift, shift1, 24 [16]
8791  // rlwinm ptr, ptr1, 0, 0, 29
8792  // slw incr2, incr, shift
8793  // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
8794  // slw mask, mask2, shift
8795  // loopMBB:
8796  // lwarx tmpDest, ptr
8797  // add tmp, tmpDest, incr2
8798  // andc tmp2, tmpDest, mask
8799  // and tmp3, tmp, mask
8800  // or tmp4, tmp3, tmp2
8801  // stwcx. tmp4, ptr
8802  // bne- loopMBB
8803  // fallthrough --> exitMBB
8804  // srw dest, tmpDest, shift
8805  if (ptrA != ZeroReg) {
8806  Ptr1Reg = RegInfo.createVirtualRegister(RC);
8807  BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
8808  .addReg(ptrA).addReg(ptrB);
8809  } else {
8810  Ptr1Reg = ptrB;
8811  }
8812  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
8813  .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
8814  if (!isLittleEndian)
8815  BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
8816  .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
8817  if (is64bit)
8818  BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
8819  .addReg(Ptr1Reg).addImm(0).addImm(61);
8820  else
8821  BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
8822  .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
8823  BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
8824  .addReg(incr).addReg(ShiftReg);
8825  if (is8bit)
8826  BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
8827  else {
8828  BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
8829  BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
8830  }
8831  BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
8832  .addReg(Mask2Reg).addReg(ShiftReg);
8833 
8834  BB = loopMBB;
8835  BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
8836  .addReg(ZeroReg).addReg(PtrReg);
8837  if (BinOpcode)
8838  BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
8839  .addReg(Incr2Reg).addReg(TmpDestReg);
8840  BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
8841  .addReg(TmpDestReg).addReg(MaskReg);
8842  BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
8843  .addReg(TmpReg).addReg(MaskReg);
8844  if (CmpOpcode) {
8845  // For unsigned comparisons, we can directly compare the shifted values.
8846  // For signed comparisons we shift and sign extend.
8847  unsigned SReg = RegInfo.createVirtualRegister(RC);
8848  BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), SReg)
8849  .addReg(TmpDestReg).addReg(MaskReg);
8850  unsigned ValueReg = SReg;
8851  unsigned CmpReg = Incr2Reg;
8852  if (CmpOpcode == PPC::CMPW) {
8853  ValueReg = RegInfo.createVirtualRegister(RC);
8854  BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
8855  .addReg(SReg).addReg(ShiftReg);
8856  unsigned ValueSReg = RegInfo.createVirtualRegister(RC);
8857  BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
8858  .addReg(ValueReg);
8859  ValueReg = ValueSReg;
8860  CmpReg = incr;
8861  }
8862  BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
8863  .addReg(CmpReg).addReg(ValueReg);
8864  BuildMI(BB, dl, TII->get(PPC::BCC))
8865  .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
8866  BB->addSuccessor(loop2MBB);
8867  BB->addSuccessor(exitMBB);
8868  BB = loop2MBB;
8869  }
8870  BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
8871  .addReg(Tmp3Reg).addReg(Tmp2Reg);
8872  BuildMI(BB, dl, TII->get(PPC::STWCX))
8873  .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
8874  BuildMI(BB, dl, TII->get(PPC::BCC))
8875  .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
8876  BB->addSuccessor(loopMBB);
8877  BB->addSuccessor(exitMBB);
8878 
8879  // exitMBB:
8880  // ...
8881  BB = exitMBB;
8882  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
8883  .addReg(ShiftReg);
8884  return BB;
8885 }
8886 
8889  MachineBasicBlock *MBB) const {
8890  DebugLoc DL = MI.getDebugLoc();
8891  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8892 
8893  MachineFunction *MF = MBB->getParent();
8895 
8896  const BasicBlock *BB = MBB->getBasicBlock();
8898 
8899  // Memory Reference
8902 
8903  unsigned DstReg = MI.getOperand(0).getReg();
8904  const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
8905  assert(RC->hasType(MVT::i32) && "Invalid destination!");
8906  unsigned mainDstReg = MRI.createVirtualRegister(RC);
8907  unsigned restoreDstReg = MRI.createVirtualRegister(RC);
8908 
8909  MVT PVT = getPointerTy(MF->getDataLayout());
8910  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
8911  "Invalid Pointer Size!");
8912  // For v = setjmp(buf), we generate
8913  //
8914  // thisMBB:
8915  // SjLjSetup mainMBB
8916  // bl mainMBB
8917  // v_restore = 1
8918  // b sinkMBB
8919  //
8920  // mainMBB:
8921  // buf[LabelOffset] = LR
8922  // v_main = 0
8923  //
8924  // sinkMBB:
8925  // v = phi(main, restore)
8926  //
8927 
8928  MachineBasicBlock *thisMBB = MBB;
8929  MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
8930  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
8931  MF->insert(I, mainMBB);
8932  MF->insert(I, sinkMBB);
8933 
8934  MachineInstrBuilder MIB;
8935 
8936  // Transfer the remainder of BB and its successor edges to sinkMBB.
8937  sinkMBB->splice(sinkMBB->begin(), MBB,
8938  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
8939  sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
8940 
8941  // Note that the structure of the jmp_buf used here is not compatible
8942  // with that used by libc, and is not designed to be. Specifically, it
8943  // stores only those 'reserved' registers that LLVM does not otherwise
8944  // understand how to spill. Also, by convention, by the time this
8945  // intrinsic is called, Clang has already stored the frame address in the
8946  // first slot of the buffer and stack address in the third. Following the
8947  // X86 target code, we'll store the jump address in the second slot. We also
8948  // need to save the TOC pointer (R2) to handle jumps between shared
8949  // libraries, and that will be stored in the fourth slot. The thread
8950  // identifier (R13) is not affected.
8951 
8952  // thisMBB:
8953  const int64_t LabelOffset = 1 * PVT.getStoreSize();
8954  const int64_t TOCOffset = 3 * PVT.getStoreSize();
8955  const int64_t BPOffset = 4 * PVT.getStoreSize();
8956 
8957  // Prepare IP either in reg.
8958  const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
8959  unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
8960  unsigned BufReg = MI.getOperand(1).getReg();
8961 
8962  if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
8963  setUsesTOCBasePtr(*MBB->getParent());
8964  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
8965  .addReg(PPC::X2)
8966  .addImm(TOCOffset)
8967  .addReg(BufReg);
8968  MIB.setMemRefs(MMOBegin, MMOEnd);
8969  }
8970 
8971  // Naked functions never have a base pointer, and so we use r1. For all
8972  // other functions, this decision must be delayed until during PEI.
8973  unsigned BaseReg;
8974  if (MF->getFunction()->hasFnAttribute(Attribute::Naked))
8975  BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
8976  else
8977  BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
8978 
8979  MIB = BuildMI(*thisMBB, MI, DL,
8980  TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
8981  .addReg(BaseReg)
8982  .addImm(BPOffset)
8983  .addReg(BufReg);
8984  MIB.setMemRefs(MMOBegin, MMOEnd);
8985 
8986  // Setup
8987  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
8988  const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
8989  MIB.addRegMask(TRI->getNoPreservedMask());
8990 
8991  BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
8992 
8993  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
8994  .addMBB(mainMBB);
8995  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
8996 
8997  thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
8998  thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
8999 
9000  // mainMBB:
9001  // mainDstReg = 0
9002  MIB =
9003  BuildMI(mainMBB, DL,
9004  TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
9005 
9006  // Store IP
9007  if (Subtarget.isPPC64()) {
9008  MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
9009  .addReg(LabelReg)
9010  .addImm(LabelOffset)
9011  .addReg(BufReg);
9012  } else {
9013  MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
9014  .addReg(LabelReg)
9015  .addImm(LabelOffset)
9016  .addReg(BufReg);
9017  }
9018 
9019  MIB.setMemRefs(MMOBegin, MMOEnd);
9020 
9021  BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
9022  mainMBB->addSuccessor(sinkMBB);
9023 
9024  // sinkMBB:
9025  BuildMI(*sinkMBB, sinkMBB->begin(), DL,
9026  TII->get(PPC::PHI), DstReg)
9027  .addReg(mainDstReg).addMBB(mainMBB)
9028  .addReg(restoreDstReg).addMBB(thisMBB);
9029 
9030  MI.eraseFromParent();
9031  return sinkMBB;
9032 }
9033 
9036  MachineBasicBlock *MBB) const {
9037  DebugLoc DL = MI.getDebugLoc();
9038  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9039 
9040  MachineFunction *MF = MBB->getParent();
9042 
9043  // Memory Reference
9046 
9047  MVT PVT = getPointerTy(MF->getDataLayout());
9048  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
9049  "Invalid Pointer Size!");
9050 
9051  const TargetRegisterClass *RC =
9052  (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
9053  unsigned Tmp = MRI.createVirtualRegister(RC);
9054  // Since FP is only updated here but NOT referenced, it's treated as GPR.
9055  unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
9056  unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
9057  unsigned BP =
9058  (PVT == MVT::i64)
9059  ? PPC::X30
9060  : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
9061  : PPC::R30);
9062 
9063  MachineInstrBuilder MIB;
9064 
9065  const int64_t LabelOffset = 1 * PVT.getStoreSize();
9066  const int64_t SPOffset = 2 * PVT.getStoreSize();
9067  const int64_t TOCOffset = 3 * PVT.getStoreSize();
9068  const int64_t BPOffset = 4 * PVT.getStoreSize();
9069 
9070  unsigned BufReg = MI.getOperand(0).getReg();
9071 
9072  // Reload FP (the jumped-to function may not have had a
9073  // frame pointer, and if so, then its r31 will be restored
9074  // as necessary).
9075  if (PVT == MVT::i64) {
9076  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
9077  .addImm(0)
9078  .addReg(BufReg);
9079  } else {
9080  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
9081  .addImm(0)
9082  .addReg(BufReg);
9083  }
9084  MIB.setMemRefs(MMOBegin, MMOEnd);
9085 
9086  // Reload IP
9087  if (PVT == MVT::i64) {
9088  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
9089  .addImm(LabelOffset)
9090  .addReg(BufReg);
9091  } else {
9092  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
9093  .addImm(LabelOffset)
9094  .addReg(BufReg);
9095  }
9096  MIB.setMemRefs(MMOBegin, MMOEnd);
9097 
9098  // Reload SP
9099  if (PVT == MVT::i64) {
9100  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
9101  .addImm(SPOffset)
9102  .addReg(BufReg);
9103  } else {
9104  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
9105  .addImm(SPOffset)
9106  .addReg(BufReg);
9107  }
9108  MIB.setMemRefs(MMOBegin, MMOEnd);
9109 
9110  // Reload BP
9111  if (PVT == MVT::i64) {
9112  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
9113  .addImm(BPOffset)
9114  .addReg(BufReg);
9115  } else {
9116  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
9117  .addImm(BPOffset)
9118  .addReg(BufReg);
9119  }
9120  MIB.setMemRefs(MMOBegin, MMOEnd);
9121 
9122  // Reload TOC
9123  if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
9124  setUsesTOCBasePtr(*MBB->getParent());
9125  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
9126  .addImm(TOCOffset)
9127  .addReg(BufReg);
9128 
9129  MIB.setMemRefs(MMOBegin, MMOEnd);
9130  }
9131 
9132  // Jump
9133  BuildMI(*MBB, MI, DL,
9134  TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
9135  BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
9136 
9137  MI.eraseFromParent();
9138  return MBB;
9139 }
9140 
9143  MachineBasicBlock *BB) const {
9144  if (MI.getOpcode() == TargetOpcode::STACKMAP ||
9145  MI.getOpcode() == TargetOpcode::PATCHPOINT) {
9146  if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() &&
9147  MI.getOpcode() == TargetOpcode::PATCHPOINT) {
9148  // Call lowering should have added an r2 operand to indicate a dependence
9149  // on the TOC base pointer value. It can't however, because there is no
9150  // way to mark the dependence as implicit there, and so the stackmap code
9151  // will confuse it with a regular operand. Instead, add the dependence
9152  // here.
9153  setUsesTOCBasePtr(*BB->getParent());
9154  MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
9155  }
9156 
9157  return emitPatchPoint(MI, BB);
9158  }
9159 
9160  if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
9161  MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
9162  return emitEHSjLjSetJmp(MI, BB);
9163  } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
9164  MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
9165  return emitEHSjLjLongJmp(MI, BB);
9166  }
9167 
9168  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9169 
9170  // To "insert" these instructions we actually have to insert their
9171  // control-flow patterns.
9172  const BasicBlock *LLVM_BB = BB->getBasicBlock();
9174 
9175  MachineFunction *F = BB->getParent();
9176 
9177  if (Subtarget.hasISEL() &&
9178  (MI.getOpcode() == PPC::SELECT_CC_I4 ||
9179  MI.getOpcode() == PPC::SELECT_CC_I8 ||
9180  MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
9182  if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
9183  MI.getOpcode() == PPC::SELECT_CC_I8)
9184  Cond.push_back(MI.getOperand(4));
9185  else
9187  Cond.push_back(MI.getOperand(1));
9188 
9189  DebugLoc dl = MI.getDebugLoc();
9190  TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
9191  MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
9192  } else if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
9193  MI.getOpcode() == PPC::SELECT_CC_I8 ||
9194  MI.getOpcode() == PPC::SELECT_CC_F4 ||
9195  MI.getOpcode() == PPC::SELECT_CC_F8 ||
9196  MI.getOpcode() == PPC::SELECT_CC_QFRC ||
9197  MI.getOpcode() == PPC::SELECT_CC_QSRC ||
9198  MI.getOpcode() == PPC::SELECT_CC_QBRC ||
9199  MI.getOpcode() == PPC::SELECT_CC_VRRC ||
9200  MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
9201  MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
9202  MI.getOpcode() == PPC::SELECT_CC_VSRC ||
9203  MI.getOpcode() == PPC::SELECT_I4 ||
9204  MI.getOpcode() == PPC::SELECT_I8 ||
9205  MI.getOpcode() == PPC::SELECT_F4 ||
9206  MI.getOpcode() == PPC::SELECT_F8 ||
9207  MI.getOpcode() == PPC::SELECT_QFRC ||
9208  MI.getOpcode() == PPC::SELECT_QSRC ||
9209  MI.getOpcode() == PPC::SELECT_QBRC ||
9210  MI.getOpcode() == PPC::SELECT_VRRC ||
9211  MI.getOpcode() == PPC::SELECT_VSFRC ||
9212  MI.getOpcode() == PPC::SELECT_VSSRC ||
9213  MI.getOpcode() == PPC::SELECT_VSRC) {
9214  // The incoming instruction knows the destination vreg to set, the
9215  // condition code register to branch on, the true/false values to
9216  // select between, and a branch opcode to use.
9217 
9218  // thisMBB:
9219  // ...
9220  // TrueVal = ...
9221  // cmpTY ccX, r1, r2
9222  // bCC copy1MBB
9223  // fallthrough --> copy0MBB
9224  MachineBasicBlock *thisMBB = BB;
9225  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
9226  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
9227  DebugLoc dl = MI.getDebugLoc();
9228  F->insert(It, copy0MBB);
9229  F->insert(It, sinkMBB);
9230 
9231  // Transfer the remainder of BB and its successor edges to sinkMBB.
9232  sinkMBB->splice(sinkMBB->begin(), BB,
9233  std::next(MachineBasicBlock::iterator(MI)), BB->end());
9234  sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
9235 
9236  // Next, add the true and fallthrough blocks as its successors.
9237  BB->addSuccessor(copy0MBB);
9238  BB->addSuccessor(sinkMBB);
9239 
9240  if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
9241  MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
9242  MI.getOpcode() == PPC::SELECT_QFRC ||
9243  MI.getOpcode() == PPC::SELECT_QSRC ||
9244  MI.getOpcode() == PPC::SELECT_QBRC ||
9245  MI.getOpcode() == PPC::SELECT_VRRC ||
9246  MI.getOpcode() == PPC::SELECT_VSFRC ||
9247  MI.getOpcode() == PPC::SELECT_VSSRC ||
9248  MI.getOpcode() == PPC::SELECT_VSRC) {
9249  BuildMI(BB, dl, TII->get(PPC::BC))
9250  .addReg(MI.getOperand(1).getReg())
9251  .addMBB(sinkMBB);
9252  } else {
9253  unsigned SelectPred = MI.getOperand(4).getImm();
9254  BuildMI(BB, dl, TII->get(PPC::BCC))
9255  .addImm(SelectPred)
9256  .addReg(MI.getOperand(1).getReg())
9257  .addMBB(sinkMBB);
9258  }
9259 
9260  // copy0MBB:
9261  // %FalseValue = ...
9262  // # fallthrough to sinkMBB
9263  BB = copy0MBB;
9264 
9265  // Update machine-CFG edges
9266  BB->addSuccessor(sinkMBB);
9267 
9268  // sinkMBB:
9269  // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
9270  // ...
9271  BB = sinkMBB;
9272  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
9273  .addReg(MI.getOperand(3).getReg())
9274  .addMBB(copy0MBB)
9275  .addReg(MI.getOperand(2).getReg())
9276  .addMBB(thisMBB);
9277  } else if (MI.getOpcode() == PPC::ReadTB) {
9278  // To read the 64-bit time-base register on a 32-bit target, we read the
9279  // two halves. Should the counter have wrapped while it was being read, we
9280  // need to try again.
9281  // ...
9282  // readLoop:
9283  // mfspr Rx,TBU # load from TBU
9284  // mfspr Ry,TB # load from TB
9285  // mfspr Rz,TBU # load from TBU
9286  // cmpw crX,Rx,Rz # check if 'old'='new'
9287  // bne readLoop # branch if they're not equal
9288  // ...
9289 
9290  MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
9291  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
9292  DebugLoc dl = MI.getDebugLoc();
9293  F->insert(It, readMBB);
9294  F->insert(It, sinkMBB);
9295 
9296  // Transfer the remainder of BB and its successor edges to sinkMBB.
9297  sinkMBB->splice(sinkMBB->begin(), BB,
9298  std::next(MachineBasicBlock::iterator(MI)), BB->end());
9299  sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
9300 
9301  BB->addSuccessor(readMBB);
9302  BB = readMBB;
9303 
9304  MachineRegisterInfo &RegInfo = F->getRegInfo();
9305  unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
9306  unsigned LoReg = MI.getOperand(0).getReg();
9307  unsigned HiReg = MI.getOperand(1).getReg();
9308 
9309  BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
9310  BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
9311  BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
9312 
9313  unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
9314 
9315  BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
9316  .addReg(HiReg).addReg(ReadAgainReg);
9317  BuildMI(BB, dl, TII->get(PPC::BCC))
9318  .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB);
9319 
9320  BB->addSuccessor(readMBB);
9321  BB->addSuccessor(sinkMBB);
9322  } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
9323  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
9324  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
9325  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
9326  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
9327  BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
9328  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
9329  BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
9330 
9331  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
9332  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
9333  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
9334  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
9335  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
9336  BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
9337  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
9338  BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
9339 
9340  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
9341  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
9342  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
9343  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
9344  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
9345  BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
9346  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
9347  BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
9348 
9349  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
9350  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
9351  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
9352  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
9353  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
9354  BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
9355  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
9356  BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
9357 
9358  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
9359  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
9360  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
9361  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
9362  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
9363  BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
9364  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
9365  BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
9366 
9367  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
9368  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
9369  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
9370  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
9371  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
9372  BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
9373  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
9374  BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
9375 
9376  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
9377  BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
9378  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
9379  BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
9380  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
9381  BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
9382  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
9383  BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
9384 
9385  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
9386  BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
9387  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
9388  BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
9389  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
9390  BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
9391  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
9392  BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
9393 
9394  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
9395  BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
9396  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
9397  BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
9398  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
9399  BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
9400  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
9401  BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
9402 
9403  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
9404  BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
9405  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
9406  BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
9407  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
9408  BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
9409  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
9410  BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
9411 
9412  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
9413  BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
9414  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
9415  BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
9416  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
9417  BB = EmitAtomicBinary(MI, BB, 4, 0);
9418  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
9419  BB = EmitAtomicBinary(MI, BB, 8, 0);
9420 
9421  else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
9422  MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
9423  (Subtarget.hasPartwordAtomics() &&
9424  MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
9425  (Subtarget.hasPartwordAtomics() &&
9426  MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
9427  bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
9428 
9429  auto LoadMnemonic = PPC::LDARX;
9430  auto StoreMnemonic = PPC::STDCX;
9431  switch (MI.getOpcode()) {
9432  default:
9433  llvm_unreachable("Compare and swap of unknown size");
9434  case PPC::ATOMIC_CMP_SWAP_I8:
9435  LoadMnemonic = PPC::LBARX;
9436  StoreMnemonic = PPC::STBCX;
9437  assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
9438  break;
9439  case PPC::ATOMIC_CMP_SWAP_I16:
9440  LoadMnemonic = PPC::LHARX;
9441  StoreMnemonic = PPC::STHCX;
9442  assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
9443  break;
9444  case PPC::ATOMIC_CMP_SWAP_I32:
9445  LoadMnemonic = PPC::LWARX;
9446  StoreMnemonic = PPC::STWCX;
9447  break;
9448  case PPC::ATOMIC_CMP_SWAP_I64:
9449  LoadMnemonic = PPC::LDARX;
9450  StoreMnemonic = PPC::STDCX;
9451  break;
9452  }
9453  unsigned dest = MI.getOperand(0).getReg();
9454  unsigned ptrA = MI.getOperand(1).getReg();
9455  unsigned ptrB = MI.getOperand(2).getReg();
9456  unsigned oldval = MI.getOperand(3).getReg();
9457  unsigned newval = MI.getOperand(4).getReg();
9458  DebugLoc dl = MI.getDebugLoc();
9459 
9460  MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
9461  MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
9462  MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
9463  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
9464  F->insert(It, loop1MBB);
9465  F->insert(It, loop2MBB);
9466  F->insert(It, midMBB);
9467  F->insert(It, exitMBB);
9468  exitMBB->splice(exitMBB->begin(), BB,
9469  std::next(MachineBasicBlock::iterator(MI)), BB->end());
9470  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
9471 
9472  // thisMBB:
9473  // ...
9474  // fallthrough --> loopMBB
9475  BB->addSuccessor(loop1MBB);
9476 
9477  // loop1MBB:
9478  // l[bhwd]arx dest, ptr
9479  // cmp[wd] dest, oldval
9480  // bne- midMBB
9481  // loop2MBB:
9482  // st[bhwd]cx. newval, ptr
9483  // bne- loopMBB
9484  // b exitBB
9485  // midMBB:
9486  // st[bhwd]cx. dest, ptr
9487  // exitBB:
9488  BB = loop1MBB;
9489  BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
9490  .addReg(ptrA).addReg(ptrB);
9491  BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
9492  .addReg(oldval).addReg(dest);
9493  BuildMI(BB, dl, TII->get(PPC::BCC))
9494  .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
9495  BB->addSuccessor(loop2MBB);
9496  BB->addSuccessor(midMBB);
9497 
9498  BB = loop2MBB;
9499  BuildMI(BB, dl, TII->get(StoreMnemonic))
9500  .addReg(newval).addReg(ptrA).addReg(ptrB);
9501  BuildMI(BB, dl, TII->get(PPC::BCC))
9502  .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
9503  BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
9504  BB->addSuccessor(loop1MBB);
9505  BB->addSuccessor(exitMBB);
9506 
9507  BB = midMBB;
9508  BuildMI(BB, dl, TII->get(StoreMnemonic))
9509  .addReg(dest).addReg(ptrA).addReg(ptrB);
9510  BB->addSuccessor(exitMBB);
9511 
9512  // exitMBB:
9513  // ...
9514  BB = exitMBB;
9515  } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
9516  MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
9517  // We must use 64-bit registers for addresses when targeting 64-bit,
9518  // since we're actually doing arithmetic on them. Other registers
9519  // can be 32-bit.
9520  bool is64bit = Subtarget.isPPC64();
9521  bool isLittleEndian = Subtarget.isLittleEndian();
9522  bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
9523 
9524  unsigned dest = MI.getOperand(0).getReg();
9525  unsigned ptrA = MI.getOperand(1).getReg();
9526  unsigned ptrB = MI.getOperand(2).getReg();
9527  unsigned oldval = MI.getOperand(3).getReg();
9528  unsigned newval = MI.getOperand(4).getReg();
9529  DebugLoc dl = MI.getDebugLoc();
9530 
9531  MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
9532  MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
9533  MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
9534  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
9535  F->insert(It, loop1MBB);
9536  F->insert(It, loop2MBB);
9537  F->insert(It, midMBB);
9538  F->insert(It, exitMBB);
9539  exitMBB->splice(exitMBB->begin(), BB,
9540  std::next(MachineBasicBlock::iterator(MI)), BB->end());
9541  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
9542 
9543  MachineRegisterInfo &RegInfo = F->getRegInfo();
9544  const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
9545  : &PPC::GPRCRegClass;
9546  unsigned PtrReg = RegInfo.createVirtualRegister(RC);
9547  unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
9548  unsigned ShiftReg =
9549  isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
9550  unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
9551  unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
9552  unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
9553  unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
9554  unsigned MaskReg = RegInfo.createVirtualRegister(RC);
9555  unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
9556  unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
9557  unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
9558  unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
9559  unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
9560  unsigned Ptr1Reg;
9561  unsigned TmpReg = RegInfo.createVirtualRegister(RC);
9562  unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
9563  // thisMBB:
9564  // ...
9565  // fallthrough --> loopMBB
9566  BB->addSuccessor(loop1MBB);
9567 
9568  // The 4-byte load must be aligned, while a char or short may be
9569  // anywhere in the word. Hence all this nasty bookkeeping code.
9570  // add ptr1, ptrA, ptrB [copy if ptrA==0]
9571  // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
9572  // xori shift, shift1, 24 [16]
9573  // rlwinm ptr, ptr1, 0, 0, 29
9574  // slw newval2, newval, shift
9575  // slw oldval2, oldval,shift
9576  // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
9577  // slw mask, mask2, shift
9578  // and newval3, newval2, mask
9579  // and oldval3, oldval2, mask
9580  // loop1MBB:
9581  // lwarx tmpDest, ptr
9582  // and tmp, tmpDest, mask
9583  // cmpw tmp, oldval3
9584  // bne- midMBB
9585  // loop2MBB:
9586  // andc tmp2, tmpDest, mask
9587  // or tmp4, tmp2, newval3
9588  // stwcx. tmp4, ptr
9589  // bne- loop1MBB
9590  // b exitBB
9591  // midMBB:
9592  // stwcx. tmpDest, ptr
9593  // exitBB:
9594  // srw dest, tmpDest, shift
9595  if (ptrA != ZeroReg) {
9596  Ptr1Reg = RegInfo.createVirtualRegister(RC);
9597  BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
9598  .addReg(ptrA).addReg(ptrB);
9599  } else {
9600  Ptr1Reg = ptrB;
9601  }
9602  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
9603  .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
9604  if (!isLittleEndian)
9605  BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
9606  .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
9607  if (is64bit)
9608  BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
9609  .addReg(Ptr1Reg).addImm(0).addImm(61);
9610  else
9611  BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
9612  .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
9613  BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
9614  .addReg(newval).addReg(ShiftReg);
9615  BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
9616  .addReg(oldval).addReg(ShiftReg);
9617  if (is8bit)
9618  BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
9619  else {
9620  BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
9621  BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
9622  .addReg(Mask3Reg).addImm(65535);
9623  }
9624  BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
9625  .addReg(Mask2Reg).addReg(ShiftReg);
9626  BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
9627  .addReg(NewVal2Reg).addReg(MaskReg);
9628  BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
9629  .addReg(OldVal2Reg).addReg(MaskReg);
9630 
9631  BB = loop1MBB;
9632  BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
9633  .addReg(ZeroReg).addReg(PtrReg);
9634  BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
9635  .addReg(TmpDestReg).addReg(MaskReg);
9636  BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
9637  .addReg(TmpReg).addReg(OldVal3Reg);
9638  BuildMI(BB, dl, TII->get(PPC::BCC))
9639  .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
9640  BB->addSuccessor(loop2MBB);
9641  BB->addSuccessor(midMBB);
9642 
9643  BB = loop2MBB;
9644  BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
9645  .addReg(TmpDestReg).addReg(MaskReg);
9646  BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
9647  .addReg(Tmp2Reg).addReg(NewVal3Reg);
9648  BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
9649  .addReg(ZeroReg).addReg(PtrReg);
9650  BuildMI(BB, dl, TII->get(PPC::BCC))
9651  .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
9652  BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
9653  BB->addSuccessor(loop1MBB);
9654  BB->addSuccessor(exitMBB);
9655 
9656  BB = midMBB;
9657  BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
9658  .addReg(ZeroReg).addReg(PtrReg);
9659  BB->addSuccessor(exitMBB);
9660 
9661  // exitMBB:
9662  // ...
9663  BB = exitMBB;
9664  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
9665  .addReg(ShiftReg);
9666  } else if (MI.getOpcode() == PPC::FADDrtz) {
9667  // This pseudo performs an FADD with rounding mode temporarily forced
9668  // to round-to-zero. We emit this via custom inserter since the FPSCR
9669  // is not modeled at the SelectionDAG level.
9670  unsigned Dest = MI.getOperand(0).getReg();
9671  unsigned Src1 = MI.getOperand(1).getReg();
9672  unsigned Src2 = MI.getOperand(2).getReg();
9673  DebugLoc dl = MI.getDebugLoc();
9674 
9675  MachineRegisterInfo &RegInfo = F->getRegInfo();
9676  unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
9677 
9678  // Save FPSCR value.
9679  BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
9680 
9681  // Set rounding mode to round-to-zero.
9682  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
9683  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
9684 
9685  // Perform addition.
9686  BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
9687 
9688  // Restore FPSCR value.
9689  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
9690  } else if (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
9691  MI.getOpcode() == PPC::ANDIo_1_GT_BIT ||
9692  MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
9693  MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) {
9694  unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
9695  MI.getOpcode() == PPC::ANDIo_1_GT_BIT8)
9696  ? PPC::ANDIo8
9697  : PPC::ANDIo;
9698  bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
9699  MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8);
9700 
9701  MachineRegisterInfo &RegInfo = F->getRegInfo();
9702  unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ?
9703  &PPC::GPRCRegClass :
9704  &PPC::G8RCRegClass);
9705 
9706  DebugLoc dl = MI.getDebugLoc();
9707  BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
9708  .addReg(MI.getOperand(1).getReg())
9709  .addImm(1);
9710  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
9711  MI.getOperand(0).getReg())
9712  .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
9713  } else if (MI.getOpcode() == PPC::TCHECK_RET) {
9714  DebugLoc Dl = MI.getDebugLoc();
9715  MachineRegisterInfo &RegInfo = F->getRegInfo();
9716  unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
9717  BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
9718  return BB;
9719  } else {
9720  llvm_unreachable("Unexpected instr type to insert");
9721  }
9722 
9723  MI.eraseFromParent(); // The pseudo instruction is gone now.
9724  return BB;
9725 }
9726 
9727 //===----------------------------------------------------------------------===//
9728 // Target Optimization Hooks
9729 //===----------------------------------------------------------------------===//
9730 
9731 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
9732  // For the estimates, convergence is quadratic, so we essentially double the
9733  // number of digits correct after every iteration. For both FRE and FRSQRTE,
9734  // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
9735  // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
9736  int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
9737  if (VT.getScalarType() == MVT::f64)
9738  RefinementSteps++;
9739  return RefinementSteps;
9740 }
9741 
9742 SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
9743  int Enabled, int &RefinementSteps,
9744  bool &UseOneConstNR,
9745  bool Reciprocal) const {
9746  EVT VT = Operand.getValueType();
9747  if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
9748  (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
9749  (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
9750  (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
9751  (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
9752  (VT == MVT::v4f64 && Subtarget.hasQPX())) {
9753  if (RefinementSteps == ReciprocalEstimate::Unspecified)
9754  RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9755 
9756  UseOneConstNR = true;
9757  return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
9758  }
9759  return SDValue();
9760 }
9761 
9762 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
9763  int Enabled,
9764  int &RefinementSteps) const {
9765  EVT VT = Operand.getValueType();
9766  if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
9767  (VT == MVT::f64 && Subtarget.hasFRE()) ||
9768  (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
9769  (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
9770  (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
9771  (VT == MVT::v4f64 && Subtarget.hasQPX())) {
9772  if (RefinementSteps == ReciprocalEstimate::Unspecified)
9773  RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9774  return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
9775  }
9776  return SDValue();
9777 }
9778 
9779 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
9780  // Note: This functionality is used only when unsafe-fp-math is enabled, and
9781  // on cores with reciprocal estimates (which are used when unsafe-fp-math is
9782  // enabled for division), this functionality is redundant with the default
9783  // combiner logic (once the division -> reciprocal/multiply transformation
9784  // has taken place). As a result, this matters more for older cores than for
9785  // newer ones.
9786 
9787  // Combine multiple FDIVs with the same divisor into multiple FMULs by the
9788  // reciprocal if there are two or more FDIVs (for embedded cores with only
9789  // one FP pipeline) for three or more FDIVs (for generic OOO cores).
9790  switch (Subtarget.getDarwinDirective()) {
9791  default:
9792  return 3;
9793  case PPC::DIR_440:
9794  case PPC::DIR_A2:
9795  case PPC::DIR_E500mc:
9796  case PPC::DIR_E5500:
9797  return 2;
9798  }
9799 }
9800 
9801 // isConsecutiveLSLoc needs to work even if all adds have not yet been
9802 // collapsed, and so we need to look through chains of them.
9804  int64_t& Offset, SelectionDAG &DAG) {
9805  if (DAG.isBaseWithConstantOffset(Loc)) {
9806  Base = Loc.getOperand(0);
9807  Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
9808 
9809  // The base might itself be a base plus an offset, and if so, accumulate
9810  // that as well.
9811  getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
9812  }
9813 }
9814 
9815 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
9816  unsigned Bytes, int Dist,
9817  SelectionDAG &DAG) {
9818  if (VT.getSizeInBits() / 8 != Bytes)
9819  return false;
9820 
9821  SDValue BaseLoc = Base->getBasePtr();
9822  if (Loc.getOpcode() == ISD::FrameIndex) {
9823  if (BaseLoc.getOpcode() != ISD::FrameIndex)
9824  return false;
9825  const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
9826  int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
9827  int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
9828  int FS = MFI.getObjectSize(FI);
9829  int BFS = MFI.getObjectSize(BFI);
9830  if (FS != BFS || FS != (int)Bytes) return false;
9831  return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
9832  }
9833 
9834  SDValue Base1 = Loc, Base2 = BaseLoc;
9835  int64_t Offset1 = 0, Offset2 = 0;
9836  getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
9837  getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
9838  if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
9839  return true;
9840 
9841  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9842  const GlobalValue *GV1 = nullptr;
9843  const GlobalValue *GV2 = nullptr;
9844  Offset1 = 0;
9845  Offset2 = 0;
9846  bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
9847  bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
9848  if (isGA1 && isGA2 && GV1 == GV2)
9849  return Offset1 == (Offset2 + Dist*Bytes);
9850  return false;
9851 }
9852 
9853 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
9854 // not enforce equality of the chain operands.
9855 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
9856  unsigned Bytes, int Dist,
9857  SelectionDAG &DAG) {
9858  if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
9859  EVT VT = LS->getMemoryVT();
9860  SDValue Loc = LS->getBasePtr();
9861  return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
9862  }
9863 
9864  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
9865  EVT VT;
9866  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
9867  default: return false;
9868  case Intrinsic::ppc_qpx_qvlfd:
9869  case Intrinsic::ppc_qpx_qvlfda:
9870  VT = MVT::v4f64;
9871  break;
9872  case Intrinsic::ppc_qpx_qvlfs:
9873  case Intrinsic::ppc_qpx_qvlfsa:
9874  VT = MVT::v4f32;
9875  break;
9876  case Intrinsic::ppc_qpx_qvlfcd:
9877  case Intrinsic::ppc_qpx_qvlfcda:
9878  VT = MVT::v2f64;
9879  break;
9880  case Intrinsic::ppc_qpx_qvlfcs:
9881  case Intrinsic::ppc_qpx_qvlfcsa:
9882  VT = MVT::v2f32;
9883  break;
9884  case Intrinsic::ppc_qpx_qvlfiwa:
9885  case Intrinsic::ppc_qpx_qvlfiwz:
9886  case Intrinsic::ppc_altivec_lvx:
9887  case Intrinsic::ppc_altivec_lvxl:
9888  case Intrinsic::ppc_vsx_lxvw4x:
9889  case Intrinsic::ppc_vsx_lxvw4x_be:
9890  VT = MVT::v4i32;
9891  break;
9892  case Intrinsic::ppc_vsx_lxvd2x:
9893  case Intrinsic::ppc_vsx_lxvd2x_be:
9894  VT = MVT::v2f64;
9895  break;
9896  case Intrinsic::ppc_altivec_lvebx:
9897  VT = MVT::i8;
9898  break;
9899  case Intrinsic::ppc_altivec_lvehx:
9900  VT = MVT::i16;
9901  break;
9902  case Intrinsic::ppc_altivec_lvewx:
9903  VT = MVT::i32;
9904  break;
9905  }
9906 
9907  return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
9908  }
9909 
9910  if (N->getOpcode() == ISD::INTRINSIC_VOID) {
9911  EVT VT;
9912  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
9913  default: return false;
9914  case Intrinsic::ppc_qpx_qvstfd:
9915  case Intrinsic::ppc_qpx_qvstfda:
9916  VT = MVT::v4f64;
9917  break;
9918  case Intrinsic::ppc_qpx_qvstfs:
9919  case Intrinsic::ppc_qpx_qvstfsa:
9920  VT = MVT::v4f32;
9921  break;
9922  case Intrinsic::ppc_qpx_qvstfcd:
9923  case Intrinsic::ppc_qpx_qvstfcda:
9924  VT = MVT::v2f64;
9925  break;
9926  case Intrinsic::ppc_qpx_qvstfcs:
9927  case Intrinsic::ppc_qpx_qvstfcsa:
9928  VT = MVT::v2f32;
9929  break;
9930  case Intrinsic::ppc_qpx_qvstfiw:
9931  case Intrinsic::ppc_qpx_qvstfiwa:
9932  case Intrinsic::ppc_altivec_stvx:
9933  case Intrinsic::ppc_altivec_stvxl:
9934  case Intrinsic::ppc_vsx_stxvw4x:
9935  VT = MVT::v4i32;
9936  break;
9937  case Intrinsic::ppc_vsx_stxvd2x:
9938  VT = MVT::v2f64;
9939  break;
9940  case Intrinsic::ppc_vsx_stxvw4x_be:
9941  VT = MVT::v4i32;
9942  break;
9943  case Intrinsic::ppc_vsx_stxvd2x_be:
9944  VT = MVT::v2f64;
9945  break;
9946  case Intrinsic::ppc_altivec_stvebx:
9947  VT = MVT::i8;
9948  break;
9949  case Intrinsic::ppc_altivec_stvehx:
9950  VT = MVT::i16;
9951  break;
9952  case Intrinsic::ppc_altivec_stvewx:
9953  VT = MVT::i32;
9954  break;
9955  }
9956 
9957  return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
9958  }
9959 
9960  return false;
9961 }
9962 
9963 // Return true is there is a nearyby consecutive load to the one provided
9964 // (regardless of alignment). We search up and down the chain, looking though
9965 // token factors and other loads (but nothing else). As a result, a true result
9966 // indicates that it is safe to create a new consecutive load adjacent to the
9967 // load provided.
9969  SDValue Chain = LD->getChain();
9970  EVT VT = LD->getMemoryVT();
9971 
9972  SmallSet<SDNode *, 16> LoadRoots;
9974  SmallSet<SDNode *, 16> Visited;
9975 
9976  // First, search up the chain, branching to follow all token-factor operands.
9977  // If we find a consecutive load, then we're done, otherwise, record all
9978  // nodes just above the top-level loads and token factors.
9979  while (!Queue.empty()) {
9980  SDNode *ChainNext = Queue.pop_back_val();
9981  if (!Visited.insert(ChainNext).second)
9982  continue;
9983 
9984  if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
9985  if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
9986  return true;
9987 
9988  if (!Visited.count(ChainLD->getChain().getNode()))
9989  Queue.push_back(ChainLD->getChain().getNode());
9990  } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
9991  for (const SDUse &O : ChainNext->ops())
9992  if (!Visited.count(O.getNode()))
9993  Queue.push_back(O.getNode());
9994  } else
9995  LoadRoots.insert(ChainNext);
9996  }
9997 
9998  // Second, search down the chain, starting from the top-level nodes recorded
9999  // in the first phase. These top-level nodes are the nodes just above all
10000  // loads and token factors. Starting with their uses, recursively look though
10001  // all loads (just the chain uses) and token factors to find a consecutive
10002  // load.
10003  Visited.clear();
10004  Queue.clear();
10005 
10006  for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
10007  IE = LoadRoots.end(); I != IE; ++I) {
10008  Queue.push_back(*I);
10009 
10010  while (!Queue.empty()) {
10011  SDNode *LoadRoot = Queue.pop_back_val();
10012  if (!Visited.insert(LoadRoot).second)
10013  continue;
10014 
10015  if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
10016  if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
10017  return true;
10018 
10019  for (SDNode::use_iterator UI = LoadRoot->use_begin(),
10020  UE = LoadRoot->use_end(); UI != UE; ++UI)
10021  if (((isa<MemSDNode>(*UI) &&
10022  cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
10023  UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
10024  Queue.push_back(*UI);
10025  }
10026  }
10027 
10028  return false;
10029 }
10030 
10031 
10032 /// This function is called when we have proved that a SETCC node can be replaced
10033 /// by subtraction (and other supporting instructions) so that the result of
10034 /// comparison is kept in a GPR instead of CR. This function is purely for
10035 /// codegen purposes and has some flags to guide the codegen process.
10036 static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
10037  bool Swap, SDLoc &DL, SelectionDAG &DAG) {
10038 
10039  assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
10040 
10041  // Zero extend the operands to the largest legal integer. Originally, they
10042  // must be of a strictly smaller size.
10043  auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
10044  DAG.getConstant(Size, DL, MVT::i32));
10045  auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
10046  DAG.getConstant(Size, DL, MVT::i32));
10047 
10048  // Swap if needed. Depends on the condition code.
10049  if (Swap)
10050  std::swap(Op0, Op1);
10051 
10052  // Subtract extended integers.
10053  auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
10054 
10055  // Move the sign bit to the least significant position and zero out the rest.
10056  // Now the least significant bit carries the result of original comparison.
10057  auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
10058  DAG.getConstant(Size - 1, DL, MVT::i32));
10059  auto Final = Shifted;
10060 
10061  // Complement the result if needed. Based on the condition code.
10062  if (Complement)
10063  Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
10064  DAG.getConstant(1, DL, MVT::i64));
10065 
10066  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
10067 }
10068 
10069 SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
10070  DAGCombinerInfo &DCI) const {
10071 
10072  assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
10073 
10074  SelectionDAG &DAG = DCI.DAG;
10075  SDLoc DL(N);
10076 
10077  // Size of integers being compared has a critical role in the following
10078  // analysis, so we prefer to do this when all types are legal.
10079  if (!DCI.isAfterLegalizeVectorOps())
10080  return SDValue();
10081 
10082  // If all users of SETCC extend its value to a legal integer type
10083  // then we replace SETCC with a subtraction
10084  for (SDNode::use_iterator UI = N->use_begin(),
10085  UE = N->use_end(); UI != UE; ++UI) {
10086  if (UI->getOpcode() != ISD::ZERO_EXTEND)
10087  return SDValue();
10088  }
10089 
10090  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
10091  auto OpSize = N->getOperand(0).getValueSizeInBits();
10092 
10093  unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
10094 
10095  if (OpSize < Size) {
10096  switch (CC) {
10097  default: break;
10098  case ISD::SETULT:
10099  return generateEquivalentSub(N, Size, false, false, DL, DAG);
10100  case ISD::SETULE:
10101  return generateEquivalentSub(N, Size, true, true, DL, DAG);
10102  case ISD::SETUGT:
10103  return generateEquivalentSub(N, Size, false, true, DL, DAG);
10104  case ISD::SETUGE:
10105  return generateEquivalentSub(N, Size, true, false, DL, DAG);
10106  }
10107  }
10108 
10109  return SDValue();
10110 }
10111 
10112 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
10113  DAGCombinerInfo &DCI) const {
10114  SelectionDAG &DAG = DCI.DAG;
10115  SDLoc dl(N);
10116 
10117  assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
10118  // If we're tracking CR bits, we need to be careful that we don't have:
10119  // trunc(binary-ops(zext(x), zext(y)))
10120  // or
10121  // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
10122  // such that we're unnecessarily moving things into GPRs when it would be
10123  // better to keep them in CR bits.
10124 
10125  // Note that trunc here can be an actual i1 trunc, or can be the effective
10126  // truncation that comes from a setcc or select_cc.
10127  if (N->getOpcode() == ISD::TRUNCATE &&
10128  N->getValueType(0) != MVT::i1)
10129  return SDValue();
10130 
10131  if (N->getOperand(0).getValueType() != MVT::i32 &&
10132  N->getOperand(0).getValueType() != MVT::i64)
10133  return SDValue();
10134 
10135  if (N->getOpcode() == ISD::SETCC ||
10136  N->getOpcode() == ISD::SELECT_CC) {
10137  // If we're looking at a comparison, then we need to make sure that the
10138  // high bits (all except for the first) don't matter the result.
10139  ISD::CondCode CC =
10140  cast<CondCodeSDNode>(N->getOperand(
10141  N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
10142  unsigned OpBits = N->getOperand(0).getValueSizeInBits();
10143 
10144  if (ISD::isSignedIntSetCC(CC)) {
10145  if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
10146  DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
10147  return SDValue();
10148  } else if (ISD::isUnsignedIntSetCC(CC)) {
10149  if (!DAG.MaskedValueIsZero(N->getOperand(0),
10150  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
10151  !DAG.MaskedValueIsZero(N->getOperand(1),
10152  APInt::getHighBitsSet(OpBits, OpBits-1)))
10153  return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
10154  : SDValue());
10155  } else {
10156  // This is neither a signed nor an unsigned comparison, just make sure
10157  // that the high bits are equal.
10158  APInt Op1Zero, Op1One;
10159  APInt Op2Zero, Op2One;
10160  DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One);
10161  DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One);
10162 
10163  // We don't really care about what is known about the first bit (if
10164  // anything), so clear it in all masks prior to comparing them.
10165  Op1Zero.clearBit(0); Op1One.clearBit(0);
10166  Op2Zero.clearBit(0); Op2One.clearBit(0);
10167 
10168  if (Op1Zero != Op2Zero || Op1One != Op2One)
10169  return SDValue();
10170  }
10171  }
10172 
10173  // We now know that the higher-order bits are irrelevant, we just need to
10174  // make sure that all of the intermediate operations are bit operations, and
10175  // all inputs are extensions.
10176  if (N->getOperand(0).getOpcode() != ISD::AND &&
10177  N->getOperand(0).getOpcode() != ISD::OR &&
10178  N->getOperand(0).getOpcode() != ISD::XOR &&
10179  N->getOperand(0).getOpcode() != ISD::SELECT &&
10180  N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
10181  N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
10182  N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
10183  N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
10185  return SDValue();
10186 
10187  if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
10188  N->getOperand(1).getOpcode() != ISD::AND &&
10189  N->getOperand(1).getOpcode() != ISD::OR &&
10190  N->getOperand(1).getOpcode() != ISD::XOR &&
10191  N->getOperand(1).getOpcode() != ISD::SELECT &&
10192  N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
10193  N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
10194  N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
10195  N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
10197  return SDValue();
10198 
10199  SmallVector<SDValue, 4> Inputs;
10200  SmallVector<SDValue, 8> BinOps, PromOps;
10201  SmallPtrSet<SDNode *, 16> Visited;
10202 
10203  for (unsigned i = 0; i < 2; ++i) {
10204  if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
10205  N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
10206  N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
10207  N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
10208  isa<ConstantSDNode>(N->getOperand(i)))
10209  Inputs.push_back(N->getOperand(i));
10210  else
10211  BinOps.push_back(N->getOperand(i));
10212 
10213  if (N->getOpcode() == ISD::TRUNCATE)
10214  break;
10215  }
10216 
10217  // Visit all inputs, collect all binary operations (and, or, xor and
10218  // select) that are all fed by extensions.
10219  while (!BinOps.empty()) {
10220  SDValue BinOp = BinOps.back();
10221  BinOps.pop_back();
10222 
10223  if (!Visited.insert(BinOp.getNode()).second)
10224  continue;
10225 
10226  PromOps.push_back(BinOp);
10227 
10228  for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
10229  // The condition of the select is not promoted.
10230  if (BinOp.getOpcode() == ISD::SELECT && i == 0)
10231  continue;
10232  if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
10233  continue;
10234 
10235  if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
10236  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
10237  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
10238  BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
10239  isa<ConstantSDNode>(BinOp.getOperand(i))) {
10240  Inputs.push_back(BinOp.getOperand(i));
10241  } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
10242  BinOp.getOperand(i).getOpcode() == ISD::OR ||
10243  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
10244  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
10245  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
10246  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
10247  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
10248  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
10249  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
10250  BinOps.push_back(BinOp.getOperand(i));
10251  } else {
10252  // We have an input that is not an extension or another binary
10253  // operation; we'll abort this transformation.
10254  return SDValue();
10255  }
10256  }
10257  }
10258 
10259  // Make sure that this is a self-contained cluster of operations (which
10260  // is not quite the same thing as saying that everything has only one
10261  // use).
10262  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
10263  if (isa<ConstantSDNode>(Inputs[i]))
10264  continue;
10265 
10266  for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
10267  UE = Inputs[i].getNode()->use_end();
10268  UI != UE; ++UI) {
10269  SDNode *User = *UI;
10270  if (User != N && !Visited.count(User))
10271  return SDValue();
10272 
10273  // Make sure that we're not going to promote the non-output-value
10274  // operand(s) or SELECT or SELECT_CC.
10275  // FIXME: Although we could sometimes handle this, and it does occur in
10276  // practice that one of the condition inputs to the select is also one of
10277  // the outputs, we currently can't deal with this.
10278  if (User->getOpcode() == ISD::SELECT) {
10279  if (User->getOperand(0) == Inputs[i])
10280  return SDValue();
10281  } else if (User->getOpcode() == ISD::SELECT_CC) {
10282  if (User->getOperand(0) == Inputs[i] ||
10283  User->getOperand(1) == Inputs[i])
10284  return SDValue();
10285  }
10286  }
10287  }
10288 
10289  for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
10290  for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
10291  UE = PromOps[i].getNode()->use_end();
10292  UI != UE; ++UI) {
10293  SDNode *User = *UI;
10294  if (User != N && !Visited.count(User))
10295  return SDValue();
10296 
10297  // Make sure that we're not going to promote the non-output-value
10298  // operand(s) or SELECT or SELECT_CC.
10299  // FIXME: Although we could sometimes handle this, and it does occur in
10300  // practice that one of the condition inputs to the select is also one of
10301  // the outputs, we currently can't deal with this.
10302  if (User->getOpcode() == ISD::SELECT) {
10303  if (User->getOperand(0) == PromOps[i])
10304  return SDValue();
10305  } else if (User->getOpcode() == ISD::SELECT_CC) {
10306  if (User->getOperand(0) == PromOps[i] ||
10307  User->getOperand(1) == PromOps[i])
10308  return SDValue();
10309  }
10310  }
10311  }
10312 
10313  // Replace all inputs with the extension operand.
10314  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
10315  // Constants may have users outside the cluster of to-be-promoted nodes,
10316  // and so we need to replace those as we do the promotions.
10317  if (isa<ConstantSDNode>(Inputs[i]))
10318  continue;
10319  else
10320  DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
10321  }
10322 
10323  std::list<HandleSDNode> PromOpHandles;
10324  for (auto &PromOp : PromOps)
10325  PromOpHandles.emplace_back(PromOp);
10326 
10327  // Replace all operations (these are all the same, but have a different
10328  // (i1) return type). DAG.getNode will validate that the types of
10329  // a binary operator match, so go through the list in reverse so that
10330  // we've likely promoted both operands first. Any intermediate truncations or
10331  // extensions disappear.
10332  while (!PromOpHandles.empty()) {
10333  SDValue PromOp = PromOpHandles.back().getValue();
10334  PromOpHandles.pop_back();
10335 
10336  if (PromOp.getOpcode() == ISD::TRUNCATE ||
10337  PromOp.getOpcode() == ISD::SIGN_EXTEND ||
10338  PromOp.getOpcode() == ISD::ZERO_EXTEND ||
10339  PromOp.getOpcode() == ISD::ANY_EXTEND) {
10340  if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
10341  PromOp.getOperand(0).getValueType() != MVT::i1) {
10342  // The operand is not yet ready (see comment below).
10343  PromOpHandles.emplace_front(PromOp);
10344  continue;
10345  }
10346 
10347  SDValue RepValue = PromOp.getOperand(0);
10348  if (isa<ConstantSDNode>(RepValue))
10349  RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
10350 
10351  DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
10352  continue;
10353  }
10354 
10355  unsigned C;
10356  switch (PromOp.getOpcode()) {
10357  default: C = 0; break;
10358  case ISD::SELECT: C = 1; break;
10359  case ISD::SELECT_CC: C = 2; break;
10360  }
10361 
10362  if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
10363  PromOp.getOperand(C).getValueType() != MVT::i1) ||
10364  (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
10365  PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
10366  // The to-be-promoted operands of this node have not yet been
10367  // promoted (this should be rare because we're going through the
10368  // list backward, but if one of the operands has several users in
10369  // this cluster of to-be-promoted nodes, it is possible).
10370  PromOpHandles.emplace_front(PromOp);
10371  continue;
10372  }
10373 
10374  SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
10375  PromOp.getNode()->op_end());
10376 
10377  // If there are any constant inputs, make sure they're replaced now.
10378  for (unsigned i = 0; i < 2; ++i)
10379  if (isa<ConstantSDNode>(Ops[C+i]))
10380  Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
10381 
10382  DAG.ReplaceAllUsesOfValueWith(PromOp,
10383  DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
10384  }
10385 
10386  // Now we're left with the initial truncation itself.
10387  if (N->getOpcode() == ISD::TRUNCATE)
10388  return N->getOperand(0);
10389 
10390  // Otherwise, this is a comparison. The operands to be compared have just
10391  // changed type (to i1), but everything else is the same.
10392  return SDValue(N, 0);
10393 }
10394 
10395 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
10396  DAGCombinerInfo &DCI) const {
10397  SelectionDAG &DAG = DCI.DAG;
10398  SDLoc dl(N);
10399 
10400  // If we're tracking CR bits, we need to be careful that we don't have:
10401  // zext(binary-ops(trunc(x), trunc(y)))
10402  // or
10403  // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
10404  // such that we're unnecessarily moving things into CR bits that can more
10405  // efficiently stay in GPRs. Note that if we're not certain that the high
10406  // bits are set as required by the final extension, we still may need to do
10407  // some masking to get the proper behavior.
10408 
10409  // This same functionality is important on PPC64 when dealing with
10410  // 32-to-64-bit extensions; these occur often when 32-bit values are used as
10411  // the return values of functions. Because it is so similar, it is handled
10412  // here as well.
10413 
10414  if (N->getValueType(0) != MVT::i32 &&
10415  N->getValueType(0) != MVT::i64)
10416  return SDValue();
10417 
10418  if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
10419  (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
10420  return SDValue();
10421 
10422  if (N->getOperand(0).getOpcode() != ISD::AND &&
10423  N->getOperand(0).getOpcode() != ISD::OR &&
10424  N->getOperand(0).getOpcode() != ISD::XOR &&
10425  N->getOperand(0).getOpcode() != ISD::SELECT &&
10426  N->getOperand(0).getOpcode() != ISD::SELECT_CC)
10427  return SDValue();
10428 
10429  SmallVector<SDValue, 4> Inputs;
10430  SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
10431  SmallPtrSet<SDNode *, 16> Visited;
10432 
10433  // Visit all inputs, collect all binary operations (and, or, xor and
10434  // select) that are all fed by truncations.
10435  while (!BinOps.empty()) {
10436  SDValue BinOp = BinOps.back();
10437  BinOps.pop_back();
10438 
10439  if (!Visited.insert(BinOp.getNode()).second)
10440  continue;
10441 
10442  PromOps.push_back(BinOp);
10443 
10444  for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
10445  // The condition of the select is not promoted.
10446  if (BinOp.getOpcode() == ISD::SELECT && i == 0)
10447  continue;
10448  if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
10449  continue;
10450 
10451  if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
10452  isa<ConstantSDNode>(BinOp.getOperand(i))) {
10453  Inputs.push_back(BinOp.getOperand(i));
10454  } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
10455  BinOp.getOperand(i).getOpcode() == ISD::OR ||
10456  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
10457  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
10458  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
10459  BinOps.push_back(BinOp.getOperand(i));
10460  } else {
10461  // We have an input that is not a truncation or another binary
10462  // operation; we'll abort this transformation.
10463  return SDValue();
10464  }
10465  }
10466  }
10467 
10468  // The operands of a select that must be truncated when the select is
10469  // promoted because the operand is actually part of the to-be-promoted set.
10470  DenseMap<SDNode *, EVT> SelectTruncOp[2];
10471 
10472  // Make sure that this is a self-contained cluster of operations (which
10473  // is not quite the same thing as saying that everything has only one
10474  // use).
10475  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
10476  if (isa<ConstantSDNode>(Inputs[i]))
10477  continue;
10478 
10479  for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
10480  UE = Inputs[i].getNode()->use_end();
10481  UI != UE; ++UI) {
10482  SDNode *User = *UI;
10483  if (User != N && !Visited.count(User))
10484  return SDValue();
10485 
10486  // If we're going to promote the non-output-value operand(s) or SELECT or
10487  // SELECT_CC, record them for truncation.
10488  if (User->getOpcode() == ISD::SELECT) {
10489  if (User->getOperand(0) == Inputs[i])
10490  SelectTruncOp[0].insert(std::make_pair(User,
10491  User->getOperand(0).getValueType()));
10492  } else if (User->getOpcode() == ISD::SELECT_CC) {
10493  if (User->getOperand(0) == Inputs[i])
10494  SelectTruncOp[0].insert(std::make_pair(User,
10495  User->getOperand(0).getValueType()));
10496  if (User->getOperand(1) == Inputs[i])
10497  SelectTruncOp[1].insert(std::make_pair(User,
10498  User->getOperand(1).getValueType()));
10499  }
10500  }
10501  }
10502 
10503  for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
10504  for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
10505  UE = PromOps[i].getNode()->use_end();
10506  UI != UE; ++UI) {
10507  SDNode *User = *UI;
10508  if (User != N && !Visited.count(User))
10509  return SDValue();
10510 
10511  // If we're going to promote the non-output-value operand(s) or SELECT or
10512  // SELECT_CC, record them for truncation.
10513  if (User->getOpcode() == ISD::SELECT) {
10514  if (User->getOperand(0) == PromOps[i])
10515  SelectTruncOp[0].insert(std::make_pair(User,
10516  User->getOperand(0).getValueType()));
10517  } else if (User->getOpcode() == ISD::SELECT_CC) {
10518  if (User->getOperand(0) == PromOps[i])
10519  SelectTruncOp[0].insert(std::make_pair(User,
10520  User->getOperand(0).getValueType()));
10521  if (User->getOperand(1) == PromOps[i])
10522  SelectTruncOp[1].insert(std::make_pair(User,
10523  User->getOperand(1).getValueType()));
10524  }
10525  }
10526  }
10527 
10528  unsigned PromBits = N->getOperand(0).getValueSizeInBits();
10529  bool ReallyNeedsExt = false;
10530  if (N->getOpcode() != ISD::ANY_EXTEND) {
10531  // If all of the inputs are not already sign/zero extended, then
10532  // we'll still need to do that at the end.
10533  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
10534  if (isa<ConstantSDNode>(Inputs[i]))
10535  continue;
10536 
10537  unsigned OpBits =
10538  Inputs[i].getOperand(0).getValueSizeInBits();
10539  assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
10540 
10541  if ((N->getOpcode() == ISD::ZERO_EXTEND &&
10542  !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
10543  APInt::getHighBitsSet(OpBits,
10544  OpBits-PromBits))) ||
10545  (N->getOpcode() == ISD::SIGN_EXTEND &&
10546  DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
10547  (OpBits-(PromBits-1)))) {
10548  ReallyNeedsExt = true;
10549  break;
10550  }
10551  }
10552  }
10553 
10554  // Replace all inputs, either with the truncation operand, or a
10555  // truncation or extension to the final output type.
10556  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
10557  // Constant inputs need to be replaced with the to-be-promoted nodes that
10558  // use them because they might have users outside of the cluster of
10559  // promoted nodes.
10560  if (isa<ConstantSDNode>(Inputs[i]))
10561  continue;
10562 
10563  SDValue InSrc = Inputs[i].getOperand(0);
10564  if (Inputs[i].getValueType() == N->getValueType(0))
10565  DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
10566  else if (N->getOpcode() == ISD::SIGN_EXTEND)
10567  DAG.ReplaceAllUsesOfValueWith(Inputs[i],
10568  DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
10569  else if (N->getOpcode() == ISD::ZERO_EXTEND)
10570  DAG.ReplaceAllUsesOfValueWith(Inputs[i],
10571  DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
10572  else
10573  DAG.ReplaceAllUsesOfValueWith(Inputs[i],
10574  DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
10575  }
10576 
10577  std::list<HandleSDNode> PromOpHandles;
10578  for (auto &PromOp : PromOps)
10579  PromOpHandles.emplace_back(PromOp);
10580 
10581  // Replace all operations (these are all the same, but have a different
10582  // (promoted) return type). DAG.getNode will validate that the types of
10583  // a binary operator match, so go through the list in reverse so that
10584  // we've likely promoted both operands first.
10585  while (!PromOpHandles.empty()) {
10586  SDValue PromOp = PromOpHandles.back().getValue();
10587  PromOpHandles.pop_back();
10588 
10589  unsigned C;
10590  switch (PromOp.getOpcode()) {
10591  default: C = 0; break;
10592  case ISD::SELECT: C = 1; break;
10593  case ISD::SELECT_CC: C = 2; break;
10594  }
10595 
10596  if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
10597  PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
10598  (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
10599  PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
10600  // The to-be-promoted operands of this node have not yet been
10601  // promoted (this should be rare because we're going through the
10602  // list backward, but if one of the operands has several users in
10603  // this cluster of to-be-promoted nodes, it is possible).
10604  PromOpHandles.emplace_front(PromOp);
10605  continue;
10606  }
10607 
10608  // For SELECT and SELECT_CC nodes, we do a similar check for any
10609  // to-be-promoted comparison inputs.
10610  if (PromOp.getOpcode() == ISD::SELECT ||
10611  PromOp.getOpcode() == ISD::SELECT_CC) {
10612  if ((SelectTruncOp[0].count(PromOp.getNode()) &&
10613  PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
10614  (SelectTruncOp[1].count(PromOp.getNode()) &&
10615  PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
10616  PromOpHandles.emplace_front(PromOp);
10617  continue;
10618  }
10619  }
10620 
10621  SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
10622  PromOp.getNode()->op_end());
10623 
10624  // If this node has constant inputs, then they'll need to be promoted here.
10625  for (unsigned i = 0; i < 2; ++i) {
10626  if (!isa<ConstantSDNode>(Ops[C+i]))
10627  continue;
10628  if (Ops[C+i].getValueType() == N->getValueType(0))
10629  continue;
10630 
10631  if (N->getOpcode() == ISD::SIGN_EXTEND)
10632  Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
10633  else if (N->getOpcode() == ISD::ZERO_EXTEND)
10634  Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
10635  else
10636  Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
10637  }
10638 
10639  // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
10640  // truncate them again to the original value type.
10641  if (PromOp.getOpcode() == ISD::SELECT ||
10642  PromOp.getOpcode() == ISD::SELECT_CC) {
10643  auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
10644  if (SI0 != SelectTruncOp[0].end())
10645  Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
10646  auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
10647  if (SI1 != SelectTruncOp[1].end())
10648  Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
10649  }
10650 
10651  DAG.ReplaceAllUsesOfValueWith(PromOp,
10652  DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
10653  }
10654 
10655  // Now we're left with the initial extension itself.
10656  if (!ReallyNeedsExt)
10657  return N->getOperand(0);
10658 
10659  // To zero extend, just mask off everything except for the first bit (in the
10660  // i1 case).
10661  if (N->getOpcode() == ISD::ZERO_EXTEND)
10662  return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
10664  N->getValueSizeInBits(0), PromBits),
10665  dl, N->getValueType(0)));
10666 
10667  assert(N->getOpcode() == ISD::SIGN_EXTEND &&
10668  "Invalid extension type");
10669  EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
10670  SDValue ShiftCst =
10671  DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
10672  return DAG.getNode(
10673  ISD::SRA, dl, N->getValueType(0),
10674  DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
10675  ShiftCst);
10676 }
10677 
10678 /// \brief Reduces the number of fp-to-int conversion when building a vector.
10679 ///
10680 /// If this vector is built out of floating to integer conversions,
10681 /// transform it to a vector built out of floating point values followed by a
10682 /// single floating to integer conversion of the vector.
10683 /// Namely (build_vector (fptosi $A), (fptosi $B), ...)
10684 /// becomes (fptosi (build_vector ($A, $B, ...)))
10685 SDValue PPCTargetLowering::
10686 combineElementTruncationToVectorTruncation(SDNode *N,
10687  DAGCombinerInfo &DCI) const {
10689  "Should be called with a BUILD_VECTOR node");
10690 
10691  SelectionDAG &DAG = DCI.DAG;
10692  SDLoc dl(N);
10693 
10694  SDValue FirstInput = N->getOperand(0);
10695  assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
10696  "The input operand must be an fp-to-int conversion.");
10697 
10698  // This combine happens after legalization so the fp_to_[su]i nodes are
10699  // already converted to PPCSISD nodes.
10700  unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
10701  if (FirstConversion == PPCISD::FCTIDZ ||
10702  FirstConversion == PPCISD::FCTIDUZ ||
10703  FirstConversion == PPCISD::FCTIWZ ||
10704  FirstConversion == PPCISD::FCTIWUZ) {
10705  bool IsSplat = true;
10706  bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
10707  FirstConversion == PPCISD::FCTIWUZ;
10708  EVT SrcVT = FirstInput.getOperand(0).getValueType();
10710  EVT TargetVT = N->getValueType(0);
10711  for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
10712  if (N->getOperand(i).getOpcode() != PPCISD::MFVSR)
10713  return SDValue();
10714  unsigned NextConversion = N->getOperand(i).getOperand(0).getOpcode();
10715  if (NextConversion != FirstConversion)
10716  return SDValue();
10717  if (N->getOperand(i) != FirstInput)
10718  IsSplat = false;
10719  }
10720 
10721  // If this is a splat, we leave it as-is since there will be only a single
10722  // fp-to-int conversion followed by a splat of the integer. This is better
10723  // for 32-bit and smaller ints and neutral for 64-bit ints.
10724  if (IsSplat)
10725  return SDValue();
10726 
10727  // Now that we know we have the right type of node, get its operands
10728  for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
10729  SDValue In = N->getOperand(i).getOperand(0);
10730  // For 32-bit values, we need to add an FP_ROUND node.
10731  if (Is32Bit) {
10732  if (In.isUndef())
10733  Ops.push_back(DAG.getUNDEF(SrcVT));
10734  else {
10735  SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
10736  MVT::f32, In.getOperand(0),
10737  DAG.getIntPtrConstant(1, dl));
10738  Ops.push_back(Trunc);
10739  }
10740  } else
10741  Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
10742  }
10743 
10744  unsigned Opcode;
10745  if (FirstConversion == PPCISD::FCTIDZ ||
10746  FirstConversion == PPCISD::FCTIWZ)
10747  Opcode = ISD::FP_TO_SINT;
10748  else
10749  Opcode = ISD::FP_TO_UINT;
10750 
10751  EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
10752  SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
10753  return DAG.getNode(Opcode, dl, TargetVT, BV);
10754  }
10755  return SDValue();
10756 }
10757 
10758 /// \brief Reduce the number of loads when building a vector.
10759 ///
10760 /// Building a vector out of multiple loads can be converted to a load
10761 /// of the vector type if the loads are consecutive. If the loads are
10762 /// consecutive but in descending order, a shuffle is added at the end
10763 /// to reorder the vector.
10766  "Should be called with a BUILD_VECTOR node");
10767 
10768  SDLoc dl(N);
10769  bool InputsAreConsecutiveLoads = true;
10770  bool InputsAreReverseConsecutive = true;
10771  unsigned ElemSize = N->getValueType(0).getScalarSizeInBits() / 8;
10772  SDValue FirstInput = N->getOperand(0);
10773  bool IsRoundOfExtLoad = false;
10774 
10775  if (FirstInput.getOpcode() == ISD::FP_ROUND &&
10776  FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
10777  LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
10778  IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
10779  }
10780  // Not a build vector of (possibly fp_rounded) loads.
10781  if (!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD)
10782  return SDValue();
10783 
10784  for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
10785  // If any inputs are fp_round(extload), they all must be.
10786  if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
10787  return SDValue();
10788 
10789  SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
10790  N->getOperand(i);
10791  if (NextInput.getOpcode() != ISD::LOAD)
10792  return SDValue();
10793 
10794  SDValue PreviousInput =
10795  IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
10796  LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
10797  LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
10798 
10799  // If any inputs are fp_round(extload), they all must be.
10800  if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
10801  return SDValue();
10802 
10803  if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
10804  InputsAreConsecutiveLoads = false;
10805  if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
10806  InputsAreReverseConsecutive = false;
10807 
10808  // Exit early if the loads are neither consecutive nor reverse consecutive.
10809  if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
10810  return SDValue();
10811  }
10812 
10813  assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
10814  "The loads cannot be both consecutive and reverse consecutive.");
10815 
10816  SDValue FirstLoadOp =
10817  IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
10818  SDValue LastLoadOp =
10819  IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
10820  N->getOperand(N->getNumOperands()-1);
10821 
10822  LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
10823  LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
10824  if (InputsAreConsecutiveLoads) {
10825  assert(LD1 && "Input needs to be a LoadSDNode.");
10826  return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
10827  LD1->getBasePtr(), LD1->getPointerInfo(),
10828  LD1->getAlignment());
10829  }
10830  if (InputsAreReverseConsecutive) {
10831  assert(LDL && "Input needs to be a LoadSDNode.");
10832  SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
10833  LDL->getBasePtr(), LDL->getPointerInfo(),
10834  LDL->getAlignment());
10836  for (int i = N->getNumOperands() - 1; i >= 0; i--)
10837  Ops.push_back(i);
10838 
10839  return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
10840  DAG.getUNDEF(N->getValueType(0)), Ops);
10841  }
10842  return SDValue();
10843 }
10844 
10845 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
10846  DAGCombinerInfo &DCI) const {
10848  "Should be called with a BUILD_VECTOR node");
10849 
10850  SelectionDAG &DAG = DCI.DAG;
10851  SDLoc dl(N);
10852 
10853  if (!Subtarget.hasVSX())
10854  return SDValue();
10855 
10856  // The target independent DAG combiner will leave a build_vector of
10857  // float-to-int conversions intact. We can generate MUCH better code for
10858  // a float-to-int conversion of a vector of floats.
10859  SDValue FirstInput = N->getOperand(0);
10860  if (FirstInput.getOpcode() == PPCISD::MFVSR) {
10861  SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
10862  if (Reduced)
10863  return Reduced;
10864  }
10865 
10866  // If we're building a vector out of consecutive loads, just load that
10867  // vector type.
10868  SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
10869  if (Reduced)
10870  return Reduced;
10871 
10872  if (N->getValueType(0) != MVT::v2f64)
10873  return SDValue();
10874 
10875  // Looking for:
10876  // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
10877  if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
10878  FirstInput.getOpcode() != ISD::UINT_TO_FP)
10879  return SDValue();
10880  if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
10882  return SDValue();
10883  if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
10884  return SDValue();
10885 
10886  SDValue Ext1 = FirstInput.getOperand(0);
10887  SDValue Ext2 = N->getOperand(1).getOperand(0);
10888  if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
10890  return SDValue();
10891 
10892  ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
10893  ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
10894  if (!Ext1Op || !Ext2Op)
10895  return SDValue();
10896  if (Ext1.getValueType() != MVT::i32 ||
10897  Ext2.getValueType() != MVT::i32)
10898  if (Ext1.getOperand(0) != Ext2.getOperand(0))
10899  return SDValue();
10900 
10901  int FirstElem = Ext1Op->getZExtValue();
10902  int SecondElem = Ext2Op->getZExtValue();
10903  int SubvecIdx;
10904  if (FirstElem == 0 && SecondElem == 1)
10905  SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
10906  else if (FirstElem == 2 && SecondElem == 3)
10907  SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
10908  else
10909  return SDValue();
10910 
10911  SDValue SrcVec = Ext1.getOperand(0);
10912  auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
10914  return DAG.getNode(NodeType, dl, MVT::v2f64,
10915  SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
10916 }
10917 
10918 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
10919  DAGCombinerInfo &DCI) const {
10920  assert((N->getOpcode() == ISD::SINT_TO_FP ||
10921  N->getOpcode() == ISD::UINT_TO_FP) &&
10922  "Need an int -> FP conversion node here");
10923 
10924  if (useSoftFloat() || !Subtarget.has64BitSupport())
10925  return SDValue();
10926 
10927  SelectionDAG &DAG = DCI.DAG;
10928  SDLoc dl(N);
10929  SDValue Op(N, 0);
10930 
10931  SDValue FirstOperand(Op.getOperand(0));
10932  bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
10933  (FirstOperand.getValueType() == MVT::i8 ||
10934  FirstOperand.getValueType() == MVT::i16);
10935  if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
10936  bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
10937  bool DstDouble = Op.getValueType() == MVT::f64;
10938  unsigned ConvOp = Signed ?
10939  (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
10940  (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
10941  SDValue WidthConst =
10942  DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
10943  dl, false);
10944  LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
10945  SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
10947  DAG.getVTList(MVT::f64, MVT::Other),
10948  Ops, MVT::i8, LDN->getMemOperand());
10949 
10950  // For signed conversion, we need to sign-extend the value in the VSR
10951  if (Signed) {
10952  SDValue ExtOps[] = { Ld, WidthConst };
10953  SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
10954  return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
10955  } else
10956  return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
10957  }
10958 
10959  // Don't handle ppc_fp128 here or i1 conversions.
10960  if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
10961  return SDValue();
10962  if (Op.getOperand(0).getValueType() == MVT::i1)
10963  return SDValue();
10964 
10965  // For i32 intermediate values, unfortunately, the conversion functions
10966  // leave the upper 32 bits of the value are undefined. Within the set of
10967  // scalar instructions, we have no method for zero- or sign-extending the
10968  // value. Thus, we cannot handle i32 intermediate values here.
10969  if (Op.getOperand(0).getValueType() == MVT::i32)
10970  return SDValue();
10971 
10972  assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
10973  "UINT_TO_FP is supported only with FPCVT");
10974 
10975  // If we have FCFIDS, then use it when converting to single-precision.
10976  // Otherwise, convert to double-precision and then round.
10977  unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
10979  : PPCISD::FCFIDS)
10981  : PPCISD::FCFID);
10982  MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
10983  ? MVT::f32
10984  : MVT::f64;
10985 
10986  // If we're converting from a float, to an int, and back to a float again,
10987  // then we don't need the store/load pair at all.
10988  if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
10989  Subtarget.hasFPCVT()) ||
10990  (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
10991  SDValue Src = Op.getOperand(0).getOperand(0);
10992  if (Src.getValueType() == MVT::f32) {
10993  Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
10994  DCI.AddToWorklist(Src.getNode());
10995  } else if (Src.getValueType() != MVT::f64) {
10996  // Make sure that we don't pick up a ppc_fp128 source value.
10997  return SDValue();
10998  }
10999 
11000  unsigned FCTOp =
11003 
11004  SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
11005  SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
11006 
11007  if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
11008  FP = DAG.getNode(ISD::FP_ROUND, dl,
11009  MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
11010  DCI.AddToWorklist(FP.getNode());
11011  }
11012 
11013  return FP;
11014  }
11015 
11016  return SDValue();
11017 }
11018 
11019 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
11020 // builtins) into loads with swaps.
11022  DAGCombinerInfo &DCI) const {
11023  SelectionDAG &DAG = DCI.DAG;
11024  SDLoc dl(N);
11025  SDValue Chain;
11026  SDValue Base;
11027  MachineMemOperand *MMO;
11028 
11029  switch (N->getOpcode()) {
11030  default:
11031  llvm_unreachable("Unexpected opcode for little endian VSX load");
11032  case ISD::LOAD: {
11033  LoadSDNode *LD = cast<LoadSDNode>(N);
11034  Chain = LD->getChain();
11035  Base = LD->getBasePtr();
11036  MMO = LD->getMemOperand();
11037  // If the MMO suggests this isn't a load of a full vector, leave
11038  // things alone. For a built-in, we have to make the change for
11039  // correctness, so if there is a size problem that will be a bug.
11040  if (MMO->getSize() < 16)
11041  return SDValue();
11042  break;
11043  }
11044  case ISD::INTRINSIC_W_CHAIN: {
11045  MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
11046  Chain = Intrin->getChain();
11047  // Similarly to the store case below, Intrin->getBasePtr() doesn't get
11048  // us what we want. Get operand 2 instead.
11049  Base = Intrin->getOperand(2);
11050  MMO = Intrin->getMemOperand();
11051  break;
11052  }
11053  }
11054 
11055  MVT VecTy = N->getValueType(0).getSimpleVT();
11056  SDValue LoadOps[] = { Chain, Base };
11059  LoadOps, MVT::v2f64, MMO);
11060 
11061  DCI.AddToWorklist(Load.getNode());
11062  Chain = Load.getValue(1);
11063  SDValue Swap = DAG.getNode(
11064  PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
11065  DCI.AddToWorklist(Swap.getNode());
11066 
11067  // Add a bitcast if the resulting load type doesn't match v2f64.
11068  if (VecTy != MVT::v2f64) {
11069  SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
11070  DCI.AddToWorklist(N.getNode());
11071  // Package {bitcast value, swap's chain} to match Load's shape.
11072  return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
11073  N, Swap.getValue(1));
11074  }
11075 
11076  return Swap;
11077 }
11078 
11079 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
11080 // builtins) into stores with swaps.
11082  DAGCombinerInfo &DCI) const {
11083  SelectionDAG &DAG = DCI.DAG;
11084  SDLoc dl(N);
11085  SDValue Chain;
11086  SDValue Base;
11087  unsigned SrcOpnd;
11088  MachineMemOperand *MMO;
11089 
11090  switch (N->getOpcode()) {
11091  default:
11092  llvm_unreachable("Unexpected opcode for little endian VSX store");
11093  case ISD::STORE: {
11094  StoreSDNode *ST = cast<StoreSDNode>(N);
11095  Chain = ST->getChain();
11096  Base = ST->getBasePtr();
11097  MMO = ST->getMemOperand();
11098  SrcOpnd = 1;
11099  // If the MMO suggests this isn't a store of a full vector, leave
11100  // things alone. For a built-in, we have to make the change for
11101  // correctness, so if there is a size problem that will be a bug.
11102  if (MMO->getSize() < 16)
11103  return SDValue();
11104  break;
11105  }
11106  case ISD::INTRINSIC_VOID: {
11107  MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
11108  Chain = Intrin->getChain();
11109  // Intrin->getBasePtr() oddly does not get what we want.
11110  Base = Intrin->getOperand(3);
11111  MMO = Intrin->getMemOperand();
11112  SrcOpnd = 2;
11113  break;
11114  }
11115  }
11116 
11117  SDValue Src = N->getOperand(SrcOpnd);
11118  MVT VecTy = Src.getValueType().getSimpleVT();
11119 
11120  // All stores are done as v2f64 and possible bit cast.
11121  if (VecTy != MVT::v2f64) {
11122  Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
11123  DCI.AddToWorklist(Src.getNode());
11124  }
11125 
11126  SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
11127  DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
11128  DCI.AddToWorklist(Swap.getNode());
11129  Chain = Swap.getValue(1);
11130  SDValue StoreOps[] = { Chain, Swap, Base };
11132  DAG.getVTList(MVT::Other),
11133  StoreOps, VecTy, MMO);
11134  DCI.AddToWorklist(Store.getNode());
11135  return Store;
11136 }
11137 
11139  DAGCombinerInfo &DCI) const {
11140  SelectionDAG &DAG = DCI.DAG;
11141  SDLoc dl(N);
11142  switch (N->getOpcode()) {
11143  default: break;
11144  case PPCISD::SHL:
11145  if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
11146  return N->getOperand(0);
11147  break;
11148  case PPCISD::SRL:
11149  if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
11150  return N->getOperand(0);
11151  break;
11152  case PPCISD::SRA:
11153  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
11154  if (C->isNullValue() || // 0 >>s V -> 0.
11155  C->isAllOnesValue()) // -1 >>s V -> -1.
11156  return N->getOperand(0);
11157  }
11158  break;
11159  case ISD::SIGN_EXTEND:
11160  case ISD::ZERO_EXTEND:
11161  case ISD::ANY_EXTEND:
11162  return DAGCombineExtBoolTrunc(N, DCI);
11163  case ISD::TRUNCATE:
11164  case ISD::SETCC:
11165  case ISD::SELECT_CC:
11166  return DAGCombineTruncBoolExt(N, DCI);
11167  case ISD::SINT_TO_FP:
11168  case ISD::UINT_TO_FP:
11169  return combineFPToIntToFP(N, DCI);
11170  case ISD::STORE: {
11171  EVT Op1VT = N->getOperand(1).getValueType();
11172  bool ValidTypeForStoreFltAsInt = (Op1VT == MVT::i32) ||
11173  (Subtarget.hasP9Vector() && (Op1VT == MVT::i8 || Op1VT == MVT::i16));
11174 
11175  // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
11176  if (Subtarget.hasSTFIWX() && !cast<StoreSDNode>(N)->isTruncatingStore() &&
11177  N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
11178  ValidTypeForStoreFltAsInt &&
11180  SDValue Val = N->getOperand(1).getOperand(0);
11181  if (Val.getValueType() == MVT::f32) {
11182  Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
11183  DCI.AddToWorklist(Val.getNode());
11184  }
11185  Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
11186  DCI.AddToWorklist(Val.getNode());
11187 
11188  if (Op1VT == MVT::i32) {
11189  SDValue Ops[] = {
11190  N->getOperand(0), Val, N->getOperand(2),
11191  DAG.getValueType(N->getOperand(1).getValueType())
11192  };
11193 
11194  Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
11195  DAG.getVTList(MVT::Other), Ops,
11196  cast<StoreSDNode>(N)->getMemoryVT(),
11197  cast<StoreSDNode>(N)->getMemOperand());
11198  } else {
11199  unsigned WidthInBytes =
11200  N->getOperand(1).getValueType() == MVT::i8 ? 1 : 2;
11201  SDValue WidthConst = DAG.getIntPtrConstant(WidthInBytes, dl, false);
11202 
11203  SDValue Ops[] = {
11204  N->getOperand(0), Val, N->getOperand(2), WidthConst,
11205  DAG.getValueType(N->getOperand(1).getValueType())
11206  };
11207  Val = DAG.getMemIntrinsicNode(PPCISD::STXSIX, dl,
11208  DAG.getVTList(MVT::Other), Ops,
11209  cast<StoreSDNode>(N)->getMemoryVT(),
11210  cast<StoreSDNode>(N)->getMemOperand());
11211  }
11212 
11213  DCI.AddToWorklist(Val.getNode());
11214  return Val;
11215  }
11216 
11217  // Turn STORE (BSWAP) -> sthbrx/stwbrx.
11218  if (cast<StoreSDNode>(N)->isUnindexed() &&
11219  N->getOperand(1).getOpcode() == ISD::BSWAP &&
11220  N->getOperand(1).getNode()->hasOneUse() &&
11221  (N->getOperand(1).getValueType() == MVT::i32 ||
11222  N->getOperand(1).getValueType() == MVT::i16 ||
11223  (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
11224  N->getOperand(1).getValueType() == MVT::i64))) {
11225  SDValue BSwapOp = N->getOperand(1).getOperand(0);
11226  // Do an any-extend to 32-bits if this is a half-word input.
11227  if (BSwapOp.getValueType() == MVT::i16)
11228  BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
11229 
11230  SDValue Ops[] = {
11231  N->getOperand(0), BSwapOp, N->getOperand(2),
11232  DAG.getValueType(N->getOperand(1).getValueType())
11233  };
11234  return
11236  Ops, cast<StoreSDNode>(N)->getMemoryVT(),
11237  cast<StoreSDNode>(N)->getMemOperand());
11238  }
11239 
11240  // For little endian, VSX stores require generating xxswapd/lxvd2x.
11241  // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
11242  EVT VT = N->getOperand(1).getValueType();
11243  if (VT.isSimple()) {
11244  MVT StoreVT = VT.getSimpleVT();
11245  if (Subtarget.needsSwapsForVSXMemOps() &&
11246  (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
11247  StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
11248  return expandVSXStoreForLE(N, DCI);
11249  }
11250  break;
11251  }
11252  case ISD::LOAD: {
11253  LoadSDNode *LD = cast<LoadSDNode>(N);
11254  EVT VT = LD->getValueType(0);
11255 
11256  // For little endian, VSX loads require generating lxvd2x/xxswapd.
11257  // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
11258  if (VT.isSimple()) {
11259  MVT LoadVT = VT.getSimpleVT();
11260  if (Subtarget.needsSwapsForVSXMemOps() &&
11261  (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
11262  LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
11263  return expandVSXLoadForLE(N, DCI);
11264  }
11265 
11266  // We sometimes end up with a 64-bit integer load, from which we extract
11267  // two single-precision floating-point numbers. This happens with
11268  // std::complex<float>, and other similar structures, because of the way we
11269  // canonicalize structure copies. However, if we lack direct moves,
11270  // then the final bitcasts from the extracted integer values to the
11271  // floating-point numbers turn into store/load pairs. Even with direct moves,
11272  // just loading the two floating-point numbers is likely better.
11273  auto ReplaceTwoFloatLoad = [&]() {
11274  if (VT != MVT::i64)
11275  return false;
11276 
11277  if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
11278  LD->isVolatile())
11279  return false;
11280 
11281  // We're looking for a sequence like this:
11282  // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
11283  // t16: i64 = srl t13, Constant:i32<32>
11284  // t17: i32 = truncate t16
11285  // t18: f32 = bitcast t17
11286  // t19: i32 = truncate t13
11287  // t20: f32 = bitcast t19
11288 
11289  if (!LD->hasNUsesOfValue(2, 0))
11290  return false;
11291 
11292  auto UI = LD->use_begin();
11293  while (UI.getUse().getResNo() != 0) ++UI;
11294  SDNode *Trunc = *UI++;
11295  while (UI.getUse().getResNo() != 0) ++UI;
11296  SDNode *RightShift = *UI;
11297  if (Trunc->getOpcode() != ISD::TRUNCATE)
11298  std::swap(Trunc, RightShift);
11299 
11300  if (Trunc->getOpcode() != ISD::TRUNCATE ||
11301  Trunc->getValueType(0) != MVT::i32 ||
11302  !Trunc->hasOneUse())
11303  return false;
11304  if (RightShift->getOpcode() != ISD::SRL ||
11305  !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
11306  RightShift->getConstantOperandVal(1) != 32 ||
11307  !RightShift->hasOneUse())
11308  return false;
11309 
11310  SDNode *Trunc2 = *RightShift->use_begin();
11311  if (Trunc2->getOpcode() != ISD::TRUNCATE ||
11312  Trunc2->getValueType(0) != MVT::i32 ||
11313  !Trunc2->hasOneUse())
11314  return false;
11315 
11316  SDNode *Bitcast = *Trunc->use_begin();
11317  SDNode *Bitcast2 = *Trunc2->use_begin();
11318 
11319  if (Bitcast->getOpcode() != ISD::BITCAST ||
11320  Bitcast->getValueType(0) != MVT::f32)
11321  return false;
11322  if (Bitcast2->getOpcode() != ISD::BITCAST ||
11323  Bitcast2->getValueType(0) != MVT::f32)
11324  return false;
11325 
11326  if (Subtarget.isLittleEndian())
11327  std::swap(Bitcast, Bitcast2);
11328 
11329  // Bitcast has the second float (in memory-layout order) and Bitcast2
11330  // has the first one.
11331 
11332  SDValue BasePtr = LD->getBasePtr();
11333  if (LD->isIndexed()) {
11335  "Non-pre-inc AM on PPC?");
11336  BasePtr =
11337  DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11338  LD->getOffset());
11339  }
11340 
11341  auto MMOFlags =
11342  LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
11343  SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
11344  LD->getPointerInfo(), LD->getAlignment(),
11345  MMOFlags, LD->getAAInfo());
11346  SDValue AddPtr =
11347  DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
11348  BasePtr, DAG.getIntPtrConstant(4, dl));
11349  SDValue FloatLoad2 = DAG.getLoad(
11350  MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
11351  LD->getPointerInfo().getWithOffset(4),
11352  MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
11353 
11354  if (LD->isIndexed()) {
11355  // Note that DAGCombine should re-form any pre-increment load(s) from
11356  // what is produced here if that makes sense.
11357  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
11358  }
11359 
11360  DCI.CombineTo(Bitcast2, FloatLoad);
11361  DCI.CombineTo(Bitcast, FloatLoad2);
11362 
11363  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
11364  SDValue(FloatLoad2.getNode(), 1));
11365  return true;
11366  };
11367 
11368  if (ReplaceTwoFloatLoad())
11369  return SDValue(N, 0);
11370 
11371  EVT MemVT = LD->getMemoryVT();
11372  Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
11373  unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
11374  Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
11375  unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy);
11376  if (LD->isUnindexed() && VT.isVector() &&
11377  ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
11378  // P8 and later hardware should just use LOAD.
11379  !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 ||
11380  VT == MVT::v4i32 || VT == MVT::v4f32)) ||
11381  (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) &&
11382  LD->getAlignment() >= ScalarABIAlignment)) &&
11383  LD->getAlignment() < ABIAlignment) {
11384  // This is a type-legal unaligned Altivec or QPX load.
11385  SDValue Chain = LD->getChain();
11386  SDValue Ptr = LD->getBasePtr();
11387  bool isLittleEndian = Subtarget.isLittleEndian();
11388 
11389  // This implements the loading of unaligned vectors as described in
11390  // the venerable Apple Velocity Engine overview. Specifically:
11391  // https://developer.apple.com/hardwaredrivers/ve/alignment.html
11392  // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
11393  //
11394  // The general idea is to expand a sequence of one or more unaligned
11395  // loads into an alignment-based permutation-control instruction (lvsl
11396  // or lvsr), a series of regular vector loads (which always truncate
11397  // their input address to an aligned address), and a series of
11398  // permutations. The results of these permutations are the requested
11399  // loaded values. The trick is that the last "extra" load is not taken
11400  // from the address you might suspect (sizeof(vector) bytes after the
11401  // last requested load), but rather sizeof(vector) - 1 bytes after the
11402  // last requested vector. The point of this is to avoid a page fault if
11403  // the base address happened to be aligned. This works because if the
11404  // base address is aligned, then adding less than a full vector length
11405  // will cause the last vector in the sequence to be (re)loaded.
11406  // Otherwise, the next vector will be fetched as you might suspect was
11407  // necessary.
11408 
11409  // We might be able to reuse the permutation generation from
11410  // a different base address offset from this one by an aligned amount.
11411  // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
11412  // optimization later.
11413  Intrinsic::ID Intr, IntrLD, IntrPerm;
11414  MVT PermCntlTy, PermTy, LDTy;
11415  if (Subtarget.hasAltivec()) {
11416  Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr :
11417  Intrinsic::ppc_altivec_lvsl;
11418  IntrLD = Intrinsic::ppc_altivec_lvx;
11419  IntrPerm = Intrinsic::ppc_altivec_vperm;
11420  PermCntlTy = MVT::v16i8;
11421  PermTy = MVT::v4i32;
11422  LDTy = MVT::v4i32;
11423  } else {
11424  Intr = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld :
11425  Intrinsic::ppc_qpx_qvlpcls;
11426  IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd :
11427  Intrinsic::ppc_qpx_qvlfs;
11428  IntrPerm = Intrinsic::ppc_qpx_qvfperm;
11429  PermCntlTy = MVT::v4f64;
11430  PermTy = MVT::v4f64;
11431  LDTy = MemVT.getSimpleVT();
11432  }
11433 
11434  SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
11435 
11436  // Create the new MMO for the new base load. It is like the original MMO,
11437  // but represents an area in memory almost twice the vector size centered
11438  // on the original address. If the address is unaligned, we might start
11439  // reading up to (sizeof(vector)-1) bytes below the address of the
11440  // original unaligned load.
11441  MachineFunction &MF = DAG.getMachineFunction();
11442  MachineMemOperand *BaseMMO =
11444  -(long)MemVT.getStoreSize()+1,
11445  2*MemVT.getStoreSize()-1);
11446 
11447  // Create the new base load.
11448  SDValue LDXIntID =
11449  DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
11450  SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
11451  SDValue BaseLoad =
11453  DAG.getVTList(PermTy, MVT::Other),
11454  BaseLoadOps, LDTy, BaseMMO);
11455 
11456  // Note that the value of IncOffset (which is provided to the next
11457  // load's pointer info offset value, and thus used to calculate the
11458  // alignment), and the value of IncValue (which is actually used to
11459  // increment the pointer value) are different! This is because we
11460  // require the next load to appear to be aligned, even though it
11461  // is actually offset from the base pointer by a lesser amount.
11462  int IncOffset = VT.getSizeInBits() / 8;
11463  int IncValue = IncOffset;
11464 
11465  // Walk (both up and down) the chain looking for another load at the real
11466  // (aligned) offset (the alignment of the other load does not matter in
11467  // this case). If found, then do not use the offset reduction trick, as
11468  // that will prevent the loads from being later combined (as they would
11469  // otherwise be duplicates).
11470  if (!findConsecutiveLoad(LD, DAG))
11471  --IncValue;
11472 
11473  SDValue Increment =
11474  DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
11475  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
11476 
11477  MachineMemOperand *ExtraMMO =
11479  1, 2*MemVT.getStoreSize()-1);
11480  SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
11481  SDValue ExtraLoad =
11483  DAG.getVTList(PermTy, MVT::Other),
11484  ExtraLoadOps, LDTy, ExtraMMO);
11485 
11487  BaseLoad.getValue(1), ExtraLoad.getValue(1));
11488 
11489  // Because vperm has a big-endian bias, we must reverse the order
11490  // of the input vectors and complement the permute control vector
11491  // when generating little endian code. We have already handled the
11492  // latter by using lvsr instead of lvsl, so just reverse BaseLoad
11493  // and ExtraLoad here.
11494  SDValue Perm;
11495  if (isLittleEndian)
11496  Perm = BuildIntrinsicOp(IntrPerm,
11497  ExtraLoad, BaseLoad, PermCntl, DAG, dl);
11498  else
11499  Perm = BuildIntrinsicOp(IntrPerm,
11500  BaseLoad, ExtraLoad, PermCntl, DAG, dl);
11501 
11502  if (VT != PermTy)
11503  Perm = Subtarget.hasAltivec() ?
11504  DAG.getNode(ISD::BITCAST, dl, VT, Perm) :
11505  DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX
11506  DAG.getTargetConstant(1, dl, MVT::i64));
11507  // second argument is 1 because this rounding
11508  // is always exact.
11509 
11510  // The output of the permutation is our loaded result, the TokenFactor is
11511  // our new chain.
11512  DCI.CombineTo(N, Perm, TF);
11513  return SDValue(N, 0);
11514  }
11515  }
11516  break;
11517  case ISD::INTRINSIC_WO_CHAIN: {
11518  bool isLittleEndian = Subtarget.isLittleEndian();
11519  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
11520  Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
11521  : Intrinsic::ppc_altivec_lvsl);
11522  if ((IID == Intr ||
11523  IID == Intrinsic::ppc_qpx_qvlpcld ||
11524  IID == Intrinsic::ppc_qpx_qvlpcls) &&
11525  N->getOperand(1)->getOpcode() == ISD::ADD) {
11526  SDValue Add = N->getOperand(1);
11527 
11528  int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
11529  5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
11530 
11531  if (DAG.MaskedValueIsZero(Add->getOperand(1),
11532  APInt::getAllOnesValue(Bits /* alignment */)
11533  .zext(Add.getScalarValueSizeInBits()))) {
11534  SDNode *BasePtr = Add->getOperand(0).getNode();
11535  for (SDNode::use_iterator UI = BasePtr->use_begin(),
11536  UE = BasePtr->use_end();
11537  UI != UE; ++UI) {
11538  if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
11539  cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) {
11540  // We've found another LVSL/LVSR, and this address is an aligned
11541  // multiple of that one. The results will be the same, so use the
11542  // one we've just found instead.
11543 
11544  return SDValue(*UI, 0);
11545  }
11546  }
11547  }
11548 
11549  if (isa<ConstantSDNode>(Add->getOperand(1))) {
11550  SDNode *BasePtr = Add->getOperand(0).getNode();
11551  for (SDNode::use_iterator UI = BasePtr->use_begin(),
11552  UE = BasePtr->use_end(); UI != UE; ++UI) {
11553  if (UI->getOpcode() == ISD::ADD &&
11554  isa<ConstantSDNode>(UI->getOperand(1)) &&
11555  (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
11556  cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
11557  (1ULL << Bits) == 0) {
11558  SDNode *OtherAdd = *UI;
11559  for (SDNode::use_iterator VI = OtherAdd->use_begin(),
11560  VE = OtherAdd->use_end(); VI != VE; ++VI) {
11561  if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
11562  cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
11563  return SDValue(*VI, 0);
11564  }
11565  }
11566  }
11567  }
11568  }
11569  }
11570  }
11571 
11572  break;
11573  case ISD::INTRINSIC_W_CHAIN: {
11574  // For little endian, VSX loads require generating lxvd2x/xxswapd.
11575  // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
11576  if (Subtarget.needsSwapsForVSXMemOps()) {
11577  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
11578  default:
11579  break;
11580  case Intrinsic::ppc_vsx_lxvw4x:
11581  case Intrinsic::ppc_vsx_lxvd2x:
11582  return expandVSXLoadForLE(N, DCI);
11583  }
11584  }
11585  break;
11586  }
11587  case ISD::INTRINSIC_VOID: {
11588  // For little endian, VSX stores require generating xxswapd/stxvd2x.
11589  // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
11590  if (Subtarget.needsSwapsForVSXMemOps()) {
11591  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
11592  default:
11593  break;
11594  case Intrinsic::ppc_vsx_stxvw4x:
11595  case Intrinsic::ppc_vsx_stxvd2x:
11596  return expandVSXStoreForLE(N, DCI);
11597  }
11598  }
11599  break;
11600  }
11601  case ISD::BSWAP:
11602  // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
11603  if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
11604  N->getOperand(0).hasOneUse() &&
11605  (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
11606  (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
11607  N->getValueType(0) == MVT::i64))) {
11608  SDValue Load = N->getOperand(0);
11609  LoadSDNode *LD = cast<LoadSDNode>(Load);
11610  // Create the byte-swapping load.
11611  SDValue Ops[] = {
11612  LD->getChain(), // Chain
11613  LD->getBasePtr(), // Ptr
11614  DAG.getValueType(N->getValueType(0)) // VT
11615  };
11616  SDValue BSLoad =
11618  DAG.getVTList(N->getValueType(0) == MVT::i64 ?
11620  Ops, LD->getMemoryVT(), LD->getMemOperand());
11621 
11622  // If this is an i16 load, insert the truncate.
11623  SDValue ResVal = BSLoad;
11624  if (N->getValueType(0) == MVT::i16)
11625  ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
11626 
11627  // First, combine the bswap away. This makes the value produced by the
11628  // load dead.
11629  DCI.CombineTo(N, ResVal);
11630 
11631  // Next, combine the load away, we give it a bogus result value but a real
11632  // chain result. The result value is dead because the bswap is dead.
11633  DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
11634 
11635  // Return N so it doesn't get rechecked!
11636  return SDValue(N, 0);
11637  }
11638 
11639  break;
11640  case PPCISD::VCMP: {
11641  // If a VCMPo node already exists with exactly the same operands as this
11642  // node, use its result instead of this node (VCMPo computes both a CR6 and
11643  // a normal output).
11644  //
11645  if (!N->getOperand(0).hasOneUse() &&
11646  !N->getOperand(1).hasOneUse() &&
11647  !N->getOperand(2).hasOneUse()) {
11648 
11649  // Scan all of the users of the LHS, looking for VCMPo's that match.
11650  SDNode *VCMPoNode = nullptr;
11651 
11652  SDNode *LHSN = N->getOperand(0).getNode();
11653  for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
11654  UI != E; ++UI)
11655  if (UI->getOpcode() == PPCISD::VCMPo &&
11656  UI->getOperand(1) == N->getOperand(1) &&
11657  UI->getOperand(2) == N->getOperand(2) &&
11658  UI->getOperand(0) == N->getOperand(0)) {
11659  VCMPoNode = *UI;
11660  break;
11661  }
11662 
11663  // If there is no VCMPo node, or if the flag value has a single use, don't
11664  // transform this.
11665  if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
11666  break;
11667 
11668  // Look at the (necessarily single) use of the flag value. If it has a
11669  // chain, this transformation is more complex. Note that multiple things
11670  // could use the value result, which we should ignore.
11671  SDNode *FlagUser = nullptr;
11672  for (SDNode::use_iterator UI = VCMPoNode->use_begin();
11673  FlagUser == nullptr; ++UI) {
11674  assert(UI != VCMPoNode->use_end() && "Didn't find user!");
11675  SDNode *User = *UI;
11676  for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
11677  if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
11678  FlagUser = User;
11679  break;
11680  }
11681  }
11682  }
11683 
11684  // If the user is a MFOCRF instruction, we know this is safe.
11685  // Otherwise we give up for right now.
11686  if (FlagUser->getOpcode() == PPCISD::MFOCRF)
11687  return SDValue(VCMPoNode, 0);
11688  }
11689  break;
11690  }
11691  case ISD::BRCOND: {
11692  SDValue Cond = N->getOperand(1);
11693  SDValue Target = N->getOperand(2);
11694 
11695  if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
11696  cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
11697  Intrinsic::ppc_is_decremented_ctr_nonzero) {
11698 
11699  // We now need to make the intrinsic dead (it cannot be instruction
11700  // selected).
11701  DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
11702  assert(Cond.getNode()->hasOneUse() &&
11703  "Counter decrement has more than one use");
11704 
11705  return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
11706  N->getOperand(0), Target);
11707  }
11708  }
11709  break;
11710  case ISD::BR_CC: {
11711  // If this is a branch on an altivec predicate comparison, lower this so
11712  // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
11713  // lowering is done pre-legalize, because the legalizer lowers the predicate
11714  // compare down to code that is difficult to reassemble.
11715  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
11716  SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
11717 
11718  // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
11719  // value. If so, pass-through the AND to get to the intrinsic.
11720  if (LHS.getOpcode() == ISD::AND &&
11722  cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
11723  Intrinsic::ppc_is_decremented_ctr_nonzero &&
11724  isa<ConstantSDNode>(LHS.getOperand(1)) &&
11725  !isNullConstant(LHS.getOperand(1)))
11726  LHS = LHS.getOperand(0);
11727 
11728  if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
11729  cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
11730  Intrinsic::ppc_is_decremented_ctr_nonzero &&
11731  isa<ConstantSDNode>(RHS)) {
11732  assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
11733  "Counter decrement comparison is not EQ or NE");
11734 
11735  unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
11736  bool isBDNZ = (CC == ISD::SETEQ && Val) ||
11737  (CC == ISD::SETNE && !Val);
11738 
11739  // We now need to make the intrinsic dead (it cannot be instruction
11740  // selected).
11741  DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
11742  assert(LHS.getNode()->hasOneUse() &&
11743  "Counter decrement has more than one use");
11744 
11745  return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
11746  N->getOperand(0), N->getOperand(4));
11747  }
11748 
11749  int CompareOpc;
11750  bool isDot;
11751 
11752  if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
11753  isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
11754  getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
11755  assert(isDot && "Can't compare against a vector result!");
11756 
11757  // If this is a comparison against something other than 0/1, then we know
11758  // that the condition is never/always true.
11759  unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
11760  if (Val != 0 && Val != 1) {
11761  if (CC == ISD::SETEQ) // Cond never true, remove branch.
11762  return N->getOperand(0);
11763  // Always !=, turn it into an unconditional branch.
11764  return DAG.getNode(ISD::BR, dl, MVT::Other,
11765  N->getOperand(0), N->getOperand(4));
11766  }
11767 
11768  bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
11769 
11770  // Create the PPCISD altivec 'dot' comparison node.
11771  SDValue Ops[] = {
11772  LHS.getOperand(2), // LHS of compare
11773  LHS.getOperand(3), // RHS of compare
11774  DAG.getConstant(CompareOpc, dl, MVT::i32)
11775  };
11776  EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
11777  SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
11778 
11779  // Unpack the result based on how the target uses it.
11780  PPC::Predicate CompOpc;
11781  switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
11782  default: // Can't happen, don't crash on invalid number though.
11783  case 0: // Branch on the value of the EQ bit of CR6.
11784  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
11785  break;
11786  case 1: // Branch on the inverted value of the EQ bit of CR6.
11787  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
11788  break;
11789  case 2: // Branch on the value of the LT bit of CR6.
11790  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
11791  break;
11792  case 3: // Branch on the inverted value of the LT bit of CR6.
11793  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
11794  break;
11795  }
11796 
11797  return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
11798  DAG.getConstant(CompOpc, dl, MVT::i32),
11799  DAG.getRegister(PPC::CR6, MVT::i32),
11800  N->getOperand(4), CompNode.getValue(1));
11801  }
11802  break;
11803  }
11804  case ISD::BUILD_VECTOR:
11805  return DAGCombineBuildVector(N, DCI);
11806  }
11807 
11808  return SDValue();
11809 }
11810 
11811 SDValue
11813  SelectionDAG &DAG,
11814  std::vector<SDNode *> *Created) const {
11815  // fold (sdiv X, pow2)
11816  EVT VT = N->getValueType(0);
11817  if (VT == MVT::i64 && !Subtarget.isPPC64())
11818  return SDValue();
11819  if ((VT != MVT::i32 && VT != MVT::i64) ||
11820  !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
11821  return SDValue();
11822 
11823  SDLoc DL(N);
11824  SDValue N0 = N->getOperand(0);
11825 
11826  bool IsNegPow2 = (-Divisor).isPowerOf2();
11827  unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
11828  SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
11829 
11830  SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
11831  if (Created)
11832  Created->push_back(Op.getNode());
11833 
11834  if (IsNegPow2) {
11835  Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
11836  if (Created)
11837  Created->push_back(Op.getNode());
11838  }
11839 
11840  return Op;
11841 }
11842 
11843 //===----------------------------------------------------------------------===//
11844 // Inline Assembly Support
11845 //===----------------------------------------------------------------------===//
11846 
11848  APInt &KnownZero,
11849  APInt &KnownOne,
11850  const SelectionDAG &DAG,
11851  unsigned Depth) const {
11852  KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
11853  switch (Op.getOpcode()) {
11854  default: break;
11855  case PPCISD::LBRX: {
11856  // lhbrx is known to have the top bits cleared out.
11857  if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
11858  KnownZero = 0xFFFF0000;
11859  break;
11860  }
11861  case ISD::INTRINSIC_WO_CHAIN: {
11862  switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
11863  default: break;
11864  case Intrinsic::ppc_altivec_vcmpbfp_p:
11865  case Intrinsic::ppc_altivec_vcmpeqfp_p:
11866  case Intrinsic::ppc_altivec_vcmpequb_p:
11867  case Intrinsic::ppc_altivec_vcmpequh_p:
11868  case Intrinsic::ppc_altivec_vcmpequw_p:
11869  case Intrinsic::ppc_altivec_vcmpequd_p:
11870  case Intrinsic::ppc_altivec_vcmpgefp_p:
11871  case Intrinsic::ppc_altivec_vcmpgtfp_p:
11872  case Intrinsic::ppc_altivec_vcmpgtsb_p:
11873  case Intrinsic::ppc_altivec_vcmpgtsh_p:
11874  case Intrinsic::ppc_altivec_vcmpgtsw_p:
11875  case Intrinsic::ppc_altivec_vcmpgtsd_p:
11876  case Intrinsic::ppc_altivec_vcmpgtub_p:
11877  case Intrinsic::ppc_altivec_vcmpgtuh_p:
11878  case Intrinsic::ppc_altivec_vcmpgtuw_p:
11879  case Intrinsic::ppc_altivec_vcmpgtud_p:
11880  KnownZero = ~1U; // All bits but the low one are known to be zero.
11881  break;
11882  }
11883  }
11884  }
11885 }
11886 
11888  switch (Subtarget.getDarwinDirective()) {
11889  default: break;
11890  case PPC::DIR_970:
11891  case PPC::DIR_PWR4:
11892  case PPC::DIR_PWR5:
11893  case PPC::DIR_PWR5X:
11894  case PPC::DIR_PWR6:
11895  case PPC::DIR_PWR6X:
11896  case PPC::DIR_PWR7:
11897  case PPC::DIR_PWR8:
11898  case PPC::DIR_PWR9: {
11899  if (!ML)
11900  break;
11901 
11902  const PPCInstrInfo *TII = Subtarget.getInstrInfo();
11903 
11904  // For small loops (between 5 and 8 instructions), align to a 32-byte
11905  // boundary so that the entire loop fits in one instruction-cache line.
11906  uint64_t LoopSize = 0;
11907  for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
11908  for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
11909  LoopSize += TII->getInstSizeInBytes(*J);
11910  if (LoopSize > 32)
11911  break;
11912  }
11913 
11914  if (LoopSize > 16 && LoopSize <= 32)
11915  return 5;
11916 
11917  break;
11918  }
11919  }
11920 
11922 }
11923 
11924 /// getConstraintType - Given a constraint, return the type of
11925 /// constraint it is for this target.
11928  if (Constraint.size() == 1) {
11929  switch (Constraint[0]) {
11930  default: break;
11931  case 'b':
11932  case 'r':
11933  case 'f':
11934  case 'd':
11935  case 'v':
11936  case 'y':
11937  return C_RegisterClass;
11938  case 'Z':
11939  // FIXME: While Z does indicate a memory constraint, it specifically
11940  // indicates an r+r address (used in conjunction with the 'y' modifier
11941  // in the replacement string). Currently, we're forcing the base
11942  // register to be r0 in the asm printer (which is interpreted as zero)
11943  // and forming the complete address in the second register. This is
11944  // suboptimal.
11945  return C_Memory;
11946  }
11947  } else if (Constraint == "wc") { // individual CR bits.
11948  return C_RegisterClass;
11949  } else if (Constraint == "wa" || Constraint == "wd" ||
11950  Constraint == "wf" || Constraint == "ws") {
11951  return C_RegisterClass; // VSX registers.
11952  }
11953  return TargetLowering::getConstraintType(Constraint);
11954 }
11955 
11956 /// Examine constraint type and operand type and determine a weight value.
11957 /// This object must already have been set up with the operand type
11958 /// and the current alternative constraint selected.
11961  AsmOperandInfo &info, const char *constraint) const {
11962  ConstraintWeight weight = CW_Invalid;
11963  Value *CallOperandVal = info.CallOperandVal;
11964  // If we don't have a value, we can't do a match,
11965  // but allow it at the lowest weight.
11966  if (!CallOperandVal)
11967  return CW_Default;
11968  Type *type = CallOperandVal->getType();
11969 
11970  // Look at the constraint type.
11971  if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
11972  return CW_Register; // an individual CR bit.
11973  else if ((StringRef(constraint) == "wa" ||
11974  StringRef(constraint) == "wd" ||
11975  StringRef(constraint) == "wf") &&
11976  type->isVectorTy())
11977  return CW_Register;
11978  else if (StringRef(constraint) == "ws" && type->isDoubleTy())
11979  return CW_Register;
11980 
11981  switch (*constraint) {
11982  default:
11983  weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
11984  break;
11985  case 'b':
11986  if (type->isIntegerTy())
11987  weight = CW_Register;
11988  break;
11989  case 'f':
11990  if (type->isFloatTy())
11991  weight = CW_Register;
11992  break;
11993  case 'd':
11994  if (type->isDoubleTy())
11995  weight = CW_Register;
11996  break;
11997  case 'v':
11998  if (type->isVectorTy())
11999  weight = CW_Register;
12000  break;
12001  case 'y':
12002  weight = CW_Register;
12003  break;
12004  case 'Z':
12005  weight = CW_Memory;
12006  break;
12007  }
12008  return weight;
12009 }
12010 
12011 std::pair<unsigned, const TargetRegisterClass *>
12013  StringRef Constraint,
12014  MVT VT) const {
12015  if (Constraint.size() == 1) {
12016  // GCC RS6000 Constraint Letters
12017  switch (Constraint[0]) {
12018  case 'b': // R1-R31
12019  if (VT == MVT::i64 && Subtarget.isPPC64())
12020  return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
12021  return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
12022  case 'r': // R0-R31
12023  if (VT == MVT::i64 && Subtarget.isPPC64())
12024  return std::make_pair(0U, &PPC::G8RCRegClass);
12025  return std::make_pair(0U, &PPC::GPRCRegClass);
12026  // 'd' and 'f' constraints are both defined to be "the floating point
12027  // registers", where one is for 32-bit and the other for 64-bit. We don't
12028  // really care overly much here so just give them all the same reg classes.
12029  case 'd':
12030  case 'f':
12031  if (VT == MVT::f32 || VT == MVT::i32)
12032  return std::make_pair(0U, &PPC::F4RCRegClass);
12033  if (VT == MVT::f64 || VT == MVT::i64)
12034  return std::make_pair(0U, &PPC::F8RCRegClass);
12035  if (VT == MVT::v4f64 && Subtarget.hasQPX())
12036  return std::make_pair(0U, &PPC::QFRCRegClass);
12037  if (VT == MVT::v4f32 && Subtarget.hasQPX())
12038  return std::make_pair(0U, &PPC::QSRCRegClass);
12039  break;
12040  case 'v':
12041  if (VT == MVT::v4f64 && Subtarget.hasQPX())
12042  return std::make_pair(0U, &PPC::QFRCRegClass);
12043  if (VT == MVT::v4f32 && Subtarget.hasQPX())
12044  return std::make_pair(0U, &PPC::QSRCRegClass);
12045  if (Subtarget.hasAltivec())
12046  return std::make_pair(0U, &PPC::VRRCRegClass);
12047  case 'y': // crrc
12048  return std::make_pair(0U, &PPC::CRRCRegClass);
12049  }
12050  } else if (Constraint == "wc" && Subtarget.useCRBits()) {
12051  // An individual CR bit.
12052  return std::make_pair(0U, &PPC::CRBITRCRegClass);
12053  } else if ((Constraint == "wa" || Constraint == "wd" ||
12054  Constraint == "wf") && Subtarget.hasVSX()) {
12055  return std::make_pair(0U, &PPC::VSRCRegClass);
12056  } else if (Constraint == "ws" && Subtarget.hasVSX()) {
12057  if (VT == MVT::f32 && Subtarget.hasP8Vector())
12058  return std::make_pair(0U, &PPC::VSSRCRegClass);
12059  else
12060  return std::make_pair(0U, &PPC::VSFRCRegClass);
12061  }
12062 
12063  std::pair<unsigned, const TargetRegisterClass *> R =
12064  TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
12065 
12066  // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
12067  // (which we call X[0-9]+). If a 64-bit value has been requested, and a
12068  // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
12069  // register.
12070  // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
12071  // the AsmName field from *RegisterInfo.td, then this would not be necessary.
12072  if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
12073  PPC::GPRCRegClass.contains(R.first))
12074  return std::make_pair(TRI->getMatchingSuperReg(R.first,
12075  PPC::sub_32, &PPC::G8RCRegClass),
12076  &PPC::G8RCRegClass);
12077 
12078  // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
12079  if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
12080  R.first = PPC::CR0;
12081  R.second = &PPC::CRRCRegClass;
12082  }
12083 
12084  return R;
12085 }
12086 
12087 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
12088 /// vector. If it is invalid, don't add anything to Ops.
12090  std::string &Constraint,
12091  std::vector<SDValue>&Ops,
12092  SelectionDAG &DAG) const {
12093  SDValue Result;
12094 
12095  // Only support length 1 constraints.
12096  if (Constraint.length() > 1) return;
12097 
12098  char Letter = Constraint[0];
12099  switch (Letter) {
12100  default: break;
12101  case 'I':
12102  case 'J':
12103  case 'K':
12104  case 'L':
12105  case 'M':
12106  case 'N':
12107  case 'O':
12108  case 'P': {
12110  if (!CST) return; // Must be an immediate to match.
12111  SDLoc dl(Op);
12112  int64_t Value = CST->getSExtValue();
12113  EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
12114  // numbers are printed as such.
12115  switch (Letter) {
12116  default: llvm_unreachable("Unknown constraint letter!");
12117  case 'I': // "I" is a signed 16-bit constant.
12118  if (isInt<16>(Value))
12119  Result = DAG.getTargetConstant(Value, dl, TCVT);
12120  break;
12121  case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
12122  if (isShiftedUInt<16, 16>(Value))
12123  Result = DAG.getTargetConstant(Value, dl, TCVT);
12124  break;
12125  case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
12126  if (isShiftedInt<16, 16>(Value))
12127  Result = DAG.getTargetConstant(Value, dl, TCVT);
12128  break;
12129  case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
12130  if (isUInt<16>(Value))
12131  Result = DAG.getTargetConstant(Value, dl, TCVT);
12132  break;
12133  case 'M': // "M" is a constant that is greater than 31.
12134  if (Value > 31)
12135  Result = DAG.getTargetConstant(Value, dl, TCVT);
12136  break;
12137  case 'N': // "N" is a positive constant that is an exact power of two.
12138  if (Value > 0 && isPowerOf2_64(Value))
12139  Result = DAG.getTargetConstant(Value, dl, TCVT);
12140  break;
12141  case 'O': // "O" is the constant zero.
12142  if (Value == 0)
12143  Result = DAG.getTargetConstant(Value, dl, TCVT);
12144  break;
12145  case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
12146  if (isInt<16>(-Value))
12147  Result = DAG.getTargetConstant(Value, dl, TCVT);
12148  break;
12149  }
12150  break;
12151  }
12152  }
12153 
12154  if (Result.getNode()) {
12155  Ops.push_back(Result);
12156  return;
12157  }
12158 
12159  // Handle standard constraint letters.
12160  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
12161 }
12162 
12163 // isLegalAddressingMode - Return true if the addressing mode represented
12164 // by AM is legal for this target, for a load/store of the specified type.
12166  const AddrMode &AM, Type *Ty,
12167  unsigned AS) const {
12168  // PPC does not allow r+i addressing modes for vectors!
12169  if (Ty->isVectorTy() && AM.BaseOffs != 0)
12170  return false;
12171 
12172  // PPC allows a sign-extended 16-bit immediate field.
12173  if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
12174  return false;
12175 
12176  // No global is ever allowed as a base.
12177  if (AM.BaseGV)
12178  return false;
12179 
12180  // PPC only support r+r,
12181  switch (AM.Scale) {
12182  case 0: // "r+i" or just "i", depending on HasBaseReg.
12183  break;
12184  case 1:
12185  if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
12186  return false;
12187  // Otherwise we have r+r or r+i.
12188  break;
12189  case 2:
12190  if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
12191  return false;
12192  // Allow 2*r as r+r.
12193  break;
12194  default:
12195  // No other scales are supported.
12196  return false;
12197  }
12198 
12199  return true;
12200 }
12201 
12202 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
12203  SelectionDAG &DAG) const {
12204  MachineFunction &MF = DAG.getMachineFunction();
12205  MachineFrameInfo &MFI = MF.getFrameInfo();
12206  MFI.setReturnAddressIsTaken(true);
12207 
12209  return SDValue();
12210 
12211  SDLoc dl(Op);
12212  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
12213 
12214  // Make sure the function does not optimize away the store of the RA to
12215  // the stack.
12216  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
12217  FuncInfo->setLRStoreRequired();
12218  bool isPPC64 = Subtarget.isPPC64();
12219  auto PtrVT = getPointerTy(MF.getDataLayout());
12220 
12221  if (Depth > 0) {
12222  SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
12223  SDValue Offset =
12224  DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
12225  isPPC64 ? MVT::i64 : MVT::i32);
12226  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
12227  DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
12228  MachinePointerInfo());
12229  }
12230 
12231  // Just load the return address off the stack.
12232  SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
12233  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
12234  MachinePointerInfo());
12235 }
12236 
12237 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
12238  SelectionDAG &DAG) const {
12239  SDLoc dl(Op);
12240  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
12241 
12242  MachineFunction &MF = DAG.getMachineFunction();
12243  MachineFrameInfo &MFI = MF.getFrameInfo();
12244  MFI.setFrameAddressIsTaken(true);
12245 
12246  EVT PtrVT = getPointerTy(MF.getDataLayout());
12247  bool isPPC64 = PtrVT == MVT::i64;
12248 
12249  // Naked functions never have a frame pointer, and so we use r1. For all
12250  // other functions, this decision must be delayed until during PEI.
12251  unsigned FrameReg;
12252  if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
12253  FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
12254  else
12255  FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
12256 
12257  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
12258  PtrVT);
12259  while (Depth--)
12260  FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
12261  FrameAddr, MachinePointerInfo());
12262  return FrameAddr;
12263 }
12264 
12265 // FIXME? Maybe this could be a TableGen attribute on some registers and
12266 // this table could be generated automatically from RegInfo.
12267 unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
12268  SelectionDAG &DAG) const {
12269  bool isPPC64 = Subtarget.isPPC64();
12270  bool isDarwinABI = Subtarget.isDarwinABI();
12271 
12272  if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
12273  (!isPPC64 && VT != MVT::i32))
12274  report_fatal_error("Invalid register global variable type");
12275 
12276  bool is64Bit = isPPC64 && VT == MVT::i64;
12277  unsigned Reg = StringSwitch<unsigned>(RegName)
12278  .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
12279  .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2)
12280  .Case("r13", (!isPPC64 && isDarwinABI) ? 0 :
12281  (is64Bit ? PPC::X13 : PPC::R13))
12282  .Default(0);
12283 
12284  if (Reg)
12285  return Reg;
12286  report_fatal_error("Invalid register name global variable");
12287 }
12288 
12289 bool
12291  // The PowerPC target isn't yet aware of offsets.
12292  return false;
12293 }
12294 
12296  const CallInst &I,
12297  unsigned Intrinsic) const {
12298 
12299  switch (Intrinsic) {
12300  case Intrinsic::ppc_qpx_qvlfd:
12301  case Intrinsic::ppc_qpx_qvlfs:
12302  case Intrinsic::ppc_qpx_qvlfcd:
12303  case Intrinsic::ppc_qpx_qvlfcs:
12304  case Intrinsic::ppc_qpx_qvlfiwa:
12305  case Intrinsic::ppc_qpx_qvlfiwz:
12306  case Intrinsic::ppc_altivec_lvx:
12307  case Intrinsic::ppc_altivec_lvxl:
12308  case Intrinsic::ppc_altivec_lvebx:
12309  case Intrinsic::ppc_altivec_lvehx:
12310  case Intrinsic::ppc_altivec_lvewx:
12311  case Intrinsic::ppc_vsx_lxvd2x:
12312  case Intrinsic::ppc_vsx_lxvw4x: {
12313  EVT VT;
12314  switch (Intrinsic) {
12315  case Intrinsic::ppc_altivec_lvebx:
12316  VT = MVT::i8;
12317  break;
12318  case Intrinsic::ppc_altivec_lvehx:
12319  VT = MVT::i16;
12320  break;
12321  case Intrinsic::ppc_altivec_lvewx:
12322  VT = MVT::i32;
12323  break;
12324  case Intrinsic::ppc_vsx_lxvd2x:
12325  VT = MVT::v2f64;
12326  break;
12327  case Intrinsic::ppc_qpx_qvlfd:
12328  VT = MVT::v4f64;
12329  break;
12330  case Intrinsic::ppc_qpx_qvlfs:
12331  VT = MVT::v4f32;
12332  break;
12333  case Intrinsic::ppc_qpx_qvlfcd:
12334  VT = MVT::v2f64;
12335  break;
12336  case Intrinsic::ppc_qpx_qvlfcs:
12337  VT = MVT::v2f32;
12338  break;
12339  default:
12340  VT = MVT::v4i32;
12341  break;
12342  }
12343 
12344  Info.opc = ISD::INTRINSIC_W_CHAIN;
12345  Info.memVT = VT;
12346  Info.ptrVal = I.getArgOperand(0);
12347  Info.offset = -VT.getStoreSize()+1;
12348  Info.size = 2*VT.getStoreSize()-1;
12349  Info.align = 1;
12350  Info.vol = false;
12351  Info.readMem = true;
12352  Info.writeMem = false;
12353  return true;
12354  }
12355  case Intrinsic::ppc_qpx_qvlfda:
12356  case Intrinsic::ppc_qpx_qvlfsa:
12357  case Intrinsic::ppc_qpx_qvlfcda:
12358  case Intrinsic::ppc_qpx_qvlfcsa:
12359  case Intrinsic::ppc_qpx_qvlfiwaa:
12360  case Intrinsic::ppc_qpx_qvlfiwza: {
12361  EVT VT;
12362  switch (Intrinsic) {
12363  case Intrinsic::ppc_qpx_qvlfda:
12364  VT = MVT::v4f64;
12365  break;
12366  case Intrinsic::ppc_qpx_qvlfsa:
12367  VT = MVT::v4f32;
12368  break;
12369  case Intrinsic::ppc_qpx_qvlfcda:
12370  VT = MVT::v2f64;
12371  break;
12372  case Intrinsic::ppc_qpx_qvlfcsa:
12373  VT = MVT::v2f32;
12374  break;
12375  default:
12376  VT = MVT::v4i32;
12377  break;
12378  }
12379 
12380  Info.opc = ISD::INTRINSIC_W_CHAIN;
12381  Info.memVT = VT;
12382  Info.ptrVal = I.getArgOperand(0);
12383  Info.offset = 0;
12384  Info.size = VT.getStoreSize();
12385  Info.align = 1;
12386  Info.vol = false;
12387  Info.readMem = true;
12388  Info.writeMem = false;
12389  return true;
12390  }
12391  case Intrinsic::ppc_qpx_qvstfd:
12392  case Intrinsic::ppc_qpx_qvstfs:
12393  case Intrinsic::ppc_qpx_qvstfcd:
12394  case Intrinsic::ppc_qpx_qvstfcs:
12395  case Intrinsic::ppc_qpx_qvstfiw:
12396  case Intrinsic::ppc_altivec_stvx:
12397  case Intrinsic::ppc_altivec_stvxl:
12398  case Intrinsic::ppc_altivec_stvebx:
12399  case Intrinsic::ppc_altivec_stvehx:
12400  case Intrinsic::ppc_altivec_stvewx:
12401  case Intrinsic::ppc_vsx_stxvd2x:
12402  case Intrinsic::ppc_vsx_stxvw4x: {
12403  EVT VT;
12404  switch (Intrinsic) {
12405  case Intrinsic::ppc_altivec_stvebx:
12406  VT = MVT::i8;
12407  break;
12408  case Intrinsic::ppc_altivec_stvehx:
12409  VT = MVT::i16;
12410  break;
12411  case Intrinsic::ppc_altivec_stvewx:
12412  VT = MVT::i32;
12413  break;
12414  case Intrinsic::ppc_vsx_stxvd2x:
12415  VT = MVT::v2f64;
12416  break;
12417  case Intrinsic::ppc_qpx_qvstfd:
12418  VT = MVT::v4f64;
12419  break;
12420  case Intrinsic::ppc_qpx_qvstfs:
12421  VT = MVT::v4f32;
12422  break;
12423  case Intrinsic::ppc_qpx_qvstfcd:
12424  VT = MVT::v2f64;
12425  break;
12426  case Intrinsic::ppc_qpx_qvstfcs:
12427  VT = MVT::v2f32;
12428  break;
12429  default:
12430  VT = MVT::v4i32;
12431  break;
12432  }
12433 
12434  Info.opc = ISD::INTRINSIC_VOID;
12435  Info.memVT = VT;
12436  Info.ptrVal = I.getArgOperand(1);
12437  Info.offset = -VT.getStoreSize()+1;
12438  Info.size = 2*VT.getStoreSize()-1;
12439  Info.align = 1;
12440  Info.vol = false;
12441  Info.readMem = false;
12442  Info.writeMem = true;
12443  return true;
12444  }
12445  case Intrinsic::ppc_qpx_qvstfda:
12446  case Intrinsic::ppc_qpx_qvstfsa:
12447  case Intrinsic::ppc_qpx_qvstfcda:
12448  case Intrinsic::ppc_qpx_qvstfcsa:
12449  case Intrinsic::ppc_qpx_qvstfiwa: {
12450  EVT VT;
12451  switch (Intrinsic) {
12452  case Intrinsic::ppc_qpx_qvstfda:
12453  VT = MVT::v4f64;
12454  break;
12455  case Intrinsic::ppc_qpx_qvstfsa:
12456  VT = MVT::v4f32;
12457  break;
12458  case Intrinsic::ppc_qpx_qvstfcda:
12459  VT = MVT::v2f64;
12460  break;
12461  case Intrinsic::ppc_qpx_qvstfcsa:
12462  VT = MVT::v2f32;
12463  break;
12464  default:
12465  VT = MVT::v4i32;
12466  break;
12467  }
12468 
12469  Info.opc = ISD::INTRINSIC_VOID;
12470  Info.memVT = VT;
12471  Info.ptrVal = I.getArgOperand(1);
12472  Info.offset = 0;
12473  Info.size = VT.getStoreSize();
12474  Info.align = 1;
12475  Info.vol = false;
12476  Info.readMem = false;
12477  Info.writeMem = true;
12478  return true;
12479  }
12480  default:
12481  break;
12482  }
12483 
12484  return false;
12485 }
12486 
12487 /// getOptimalMemOpType - Returns the target specific optimal type for load
12488 /// and store operations as a result of memset, memcpy, and memmove
12489 /// lowering. If DstAlign is zero that means it's safe to destination
12490 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
12491 /// means there isn't a need to check it against alignment requirement,
12492 /// probably because the source does not need to be loaded. If 'IsMemset' is
12493 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
12494 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
12495 /// source is constant so it does not need to be loaded.
12496 /// It returns EVT::Other if the type should be determined using generic
12497 /// target-independent logic.
12499  unsigned DstAlign, unsigned SrcAlign,
12500  bool IsMemset, bool ZeroMemset,
12501  bool MemcpyStrSrc,
12502  MachineFunction &MF) const {
12503  if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
12504  const Function *F = MF.getFunction();
12505  // When expanding a memset, require at least two QPX instructions to cover
12506  // the cost of loading the value to be stored from the constant pool.
12507  if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
12508  (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
12509  !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
12510  return MVT::v4f64;
12511  }
12512 
12513  // We should use Altivec/VSX loads and stores when available. For unaligned
12514  // addresses, unaligned VSX loads are only fast starting with the P8.
12515  if (Subtarget.hasAltivec() && Size >= 16 &&
12516  (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
12517  ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
12518  return MVT::v4i32;
12519  }
12520 
12521  if (Subtarget.isPPC64()) {
12522  return MVT::i64;
12523  }
12524 
12525  return MVT::i32;
12526 }
12527 
12528 /// \brief Returns true if it is beneficial to convert a load of a constant
12529 /// to just the constant itself.
12531  Type *Ty) const {
12532  assert(Ty->isIntegerTy());
12533 
12534  unsigned BitSize = Ty->getPrimitiveSizeInBits();
12535  return !(BitSize == 0 || BitSize > 64);
12536 }
12537 
12539  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
12540  return false;
12541  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
12542  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
12543  return NumBits1 == 64 && NumBits2 == 32;
12544 }
12545 
12547  if (!VT1.isInteger() || !VT2.isInteger())
12548  return false;
12549  unsigned NumBits1 = VT1.getSizeInBits();
12550  unsigned NumBits2 = VT2.getSizeInBits();
12551  return NumBits1 == 64 && NumBits2 == 32;
12552 }
12553 
12555  // Generally speaking, zexts are not free, but they are free when they can be
12556  // folded with other operations.
12557  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
12558  EVT MemVT = LD->getMemoryVT();
12559  if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
12560  (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
12561  (LD->getExtensionType() == ISD::NON_EXTLOAD ||
12562  LD->getExtensionType() == ISD::ZEXTLOAD))
12563  return true;
12564  }
12565 
12566  // FIXME: Add other cases...
12567  // - 32-bit shifts with a zext to i64
12568  // - zext after ctlz, bswap, etc.
12569  // - zext after and by a constant mask
12570 
12571  return TargetLowering::isZExtFree(Val, VT2);
12572 }
12573 
12575  assert(VT.isFloatingPoint());
12576  return true;
12577 }
12578 
12580  return isInt<16>(Imm) || isUInt<16>(Imm);
12581 }
12582 
12584  return isInt<16>(Imm) || isUInt<16>(Imm);
12585 }
12586 
12588  unsigned,
12589  unsigned,
12590  bool *Fast) const {
12591  if (DisablePPCUnaligned)
12592  return false;
12593 
12594  // PowerPC supports unaligned memory access for simple non-vector types.
12595  // Although accessing unaligned addresses is not as efficient as accessing
12596  // aligned addresses, it is generally more efficient than manual expansion,
12597  // and generally only traps for software emulation when crossing page
12598  // boundaries.
12599 
12600  if (!VT.isSimple())
12601  return false;
12602 
12603  if (VT.getSimpleVT().isVector()) {
12604  if (Subtarget.hasVSX()) {
12605  if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
12606  VT != MVT::v4f32 && VT != MVT::v4i32)
12607  return false;
12608  } else {
12609  return false;
12610  }
12611  }
12612 
12613  if (VT == MVT::ppcf128)
12614  return false;
12615 
12616  if (Fast)
12617  *Fast = true;
12618 
12619  return true;
12620 }
12621 
12623  VT = VT.getScalarType();
12624 
12625  if (!VT.isSimple())
12626  return false;
12627 
12628  switch (VT.getSimpleVT().SimpleTy) {
12629  case MVT::f32:
12630  case MVT::f64:
12631  return true;
12632  default:
12633  break;
12634  }
12635 
12636  return false;
12637 }
12638 
12639 const MCPhysReg *
12641  // LR is a callee-save register, but we must treat it as clobbered by any call
12642  // site. Hence we include LR in the scratch registers, which are in turn added
12643  // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
12644  // to CTR, which is used by any indirect call.
12645  static const MCPhysReg ScratchRegs[] = {
12646  PPC::X12, PPC::LR8, PPC::CTR8, 0
12647  };
12648 
12649  return ScratchRegs;
12650 }
12651 
12653  const Constant *PersonalityFn) const {
12654  return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
12655 }
12656 
12658  const Constant *PersonalityFn) const {
12659  return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
12660 }
12661 
12662 bool
12664  EVT VT , unsigned DefinedValues) const {
12665  if (VT == MVT::v2i64)
12666  return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
12667 
12668  if (Subtarget.hasVSX() || Subtarget.hasQPX())
12669  return true;
12670 
12671  return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
12672 }
12673 
12675  if (DisableILPPref || Subtarget.enableMachineScheduler())
12677 
12678  return Sched::ILP;
12679 }
12680 
12681 // Create a fast isel object.
12682 FastISel *
12684  const TargetLibraryInfo *LibInfo) const {
12685  return PPC::createFastISel(FuncInfo, LibInfo);
12686 }
12687 
12689  if (Subtarget.isDarwinABI()) return;
12690  if (!Subtarget.isPPC64()) return;
12691 
12692  // Update IsSplitCSR in PPCFunctionInfo
12693  PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>();
12694  PFI->setIsSplitCSR(true);
12695 }
12696 
12698  MachineBasicBlock *Entry,
12699  const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
12700  const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
12701  const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
12702  if (!IStart)
12703  return;
12704 
12705  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12706  MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
12707  MachineBasicBlock::iterator MBBI = Entry->begin();
12708  for (const MCPhysReg *I = IStart; *I; ++I) {
12709  const TargetRegisterClass *RC = nullptr;
12710  if (PPC::G8RCRegClass.contains(*I))
12711  RC = &PPC::G8RCRegClass;
12712  else if (PPC::F8RCRegClass.contains(*I))
12713  RC = &PPC::F8RCRegClass;
12714  else if (PPC::CRRCRegClass.contains(*I))
12715  RC = &PPC::CRRCRegClass;
12716  else if (PPC::VRRCRegClass.contains(*I))
12717  RC = &PPC::VRRCRegClass;
12718  else
12719  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
12720 
12721  unsigned NewVR = MRI->createVirtualRegister(RC);
12722  // Create copy from CSR to a virtual register.
12723  // FIXME: this currently does not emit CFI pseudo-instructions, it works
12724  // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
12725  // nounwind. If we want to generalize this later, we may need to emit
12726  // CFI pseudo-instructions.
12728  Attribute::NoUnwind) &&
12729  "Function should be nounwind in insertCopiesSplitCSR!");
12730  Entry->addLiveIn(*I);
12731  BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
12732  .addReg(*I);
12733 
12734  // Insert the copy-back instructions right before the terminator
12735  for (auto *Exit : Exits)
12736  BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
12737  TII->get(TargetOpcode::COPY), *I)
12738  .addReg(NewVR);
12739  }
12740 }
12741 
12742 // Override to enable LOAD_STACK_GUARD lowering on Linux.
12744  if (!Subtarget.isTargetLinux())
12746  return true;
12747 }
12748 
12749 // Override to disable global variable loading on Linux.
12751  if (!Subtarget.isTargetLinux())
12753 }
12754 
12755 bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
12756 
12757  if (!VT.isSimple() || !Subtarget.hasVSX())
12758  return false;
12759 
12760  switch(VT.getSimpleVT().SimpleTy) {
12761  default:
12762  // For FP types that are currently not supported by PPC backend, return
12763  // false. Examples: f16, f80.
12764  return false;
12765  case MVT::f32:
12766  case MVT::f64:
12767  case MVT::ppcf128:
12768  return Imm.isPosZero();
12769  }
12770 }
bool hasType(MVT vt) const
Return true if this TargetRegisterClass has the ValueType vt.
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
IterTy arg_end() const
Definition: CallSite.h:532
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
static bool resideInSameSection(const Function *Caller, SDValue Callee, const TargetMachine &TM)
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:673
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
cl::opt< bool > ANDIGlueBug
X3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
void setFrameAddressIsTaken(bool T)
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:500
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:467
unsigned getValueSizeInBits(unsigned ResNo) const
Returns MVT::getSizeInBits(getValueType(ResNo)).
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
bool hasLDBRX() const
Definition: PPCSubtarget.h:253
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:102
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:241
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
LinkageTypes getLinkage() const
Definition: GlobalValue.h:429
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:524
SDValue getValue(unsigned R) const
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
FormattedString left_justify(StringRef Str, unsigned Width)
left_justify - append spaces after string so total output is Width characters.
Definition: Format.h:143
Return with a flag operand, matched by 'blr'.
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
static Instruction * callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id)
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
MVT getValVT() const
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
bool hasComdat() const
Definition: GlobalObject.h:91
void setVarArgsNumGPR(unsigned Num)
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool use64BitRegs() const
use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit registers in 32-bit mode when...
Definition: PPCSubtarget.h:213
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
static bool hasSameArgumentList(const Function *CallerFn, ImmutableCallSite *CS)
bool isTargetELF() const
Definition: PPCSubtarget.h:293
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant, which is required to be operand #1) half of the integer or float value specified as operand #0.
Definition: ISDOpcodes.h:184
unsigned getRegisterByName(const char *RegName, EVT VT, SelectionDAG &DAG) const override
Return the register ID of the name passed in.
#define R4(n)
VisibilityTypes getVisibility() const
Definition: GlobalValue.h:219
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
Flags getFlags() const
Return the raw flags of the source value,.
unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:458
LLVMContext * getContext() const
Definition: SelectionDAG.h:333
LLVMContext & Context
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
QVFPERM = This corresponds to the QPX qvfperm instruction.
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1309
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
Definition: SelectionDAG.h:804
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd)...
Definition: SelectionDAG.h:724
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:298
SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
STATISTIC(NumFunctions,"Total number of functions")
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:304
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
size_t i
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:572
unsigned getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
LocInfo getLocInfo() const
GPRC = address of GLOBAL_OFFSET_TABLE.
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:313
bool isLittleEndian() const
Definition: PPCSubtarget.h:225
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:52
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State)
bool CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State)
const TargetMachine & getTargetMachine() const
bool isDarwinABI() const
Definition: PPCSubtarget.h:297
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
class llvm::RegisterBankInfo GPR
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
bool hasISEL() const
Definition: PPCSubtarget.h:249
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:329
unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition: PPC.h:85
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:330
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain...
Definition: ISDOpcodes.h:615
This class represents a function call, abstracting a target machine's calling convention.
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in the KnownZero/KnownO...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
size_type count(PtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:380
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:536
const GlobalValue * getGlobal() const
QBRC, CHAIN = QVLFSb CHAIN, Ptr The 4xf32 load used for v4i1 constants.
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
LLVM_NODISCARD bool equals_lower(StringRef RHS) const
equals_lower - Check for string equality, ignoring case.
Definition: StringRef.h:173
bool hasLazyResolverStub(const GlobalValue *GV) const
hasLazyResolverStub - Return true if accesses to the specified global have to go through a dyld lazy ...
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Function Alias Analysis Results
Type * getTypeForEVT(LLVMContext &Context) const
getTypeForEVT - This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:204
unsigned getSizeInBits() const
static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit, SDValue GA)
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:148
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:131
unsigned getByValSize() const
bool hasP9Altivec() const
Definition: PPCSubtarget.h:247
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
CHAIN = RFEBB CHAIN, State - Return from event-based branch.
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
unsigned getNumOperands() const
Return the number of values used by this operation.
static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
bool isDarwin() const
isDarwin - True if this is any darwin platform.
Definition: PPCSubtarget.h:289
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:100
unsigned getNumOperands() const
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:271
A debug info location.
Definition: DebugLoc.h:34
const SDValue & getOperand(unsigned Num) const
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
unsigned getVarArgsNumGPR() const
CALL - A direct function call.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT TVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
#define R2(n)
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned char TargetFlags=0)
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Definition: CallSite.h:266
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
static BranchProbability getOne()
Value * CallOperandVal
If this is the result output operand or a clobber, this is null, otherwise it is the incoming operand...
unsigned getValNo() const
const SDValue & getBasePtr() const
SDValue getConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offs=0, bool isT=false, unsigned char TargetFlags=0)
void setVarArgsNumFPR(unsigned Num)
bool hasAltivec() const
Definition: PPCSubtarget.h:239
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:369
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:39
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructions which can compare a register against the immediate without having to materialize the immediate into a register.
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:881
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition: ISDOpcodes.h:263
bool isRegLoc() const
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
return AArch64::GPR64RegClass contains(Reg)
SDValue getExternalSymbol(const char *Sym, EVT VT)
bool hasQPX() const
Definition: PPCSubtarget.h:241
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:165
unsigned arg_size() const
Definition: CallSite.h:211
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
GlobalBaseReg - On Darwin, this node represents the result of the mflr at function entry...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, unsigned Align=1, bool *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation...
bool hasP9Vector() const
Definition: PPCSubtarget.h:246
bool isFPExtFree(EVT VT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic...
Definition: ISDOpcodes.h:114
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:209
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations...
Definition: ISDOpcodes.h:388
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:133
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
lazy value info
The address of a basic block.
Definition: Constants.h:822
void setVarArgsStackOffset(int Offset)
A description of a memory reference used in the backend.
std::string getEVTString() const
getEVTString - This function returns value type as a string, e.g.
Definition: ValueTypes.cpp:120
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
struct fuzzer::@269 Flags
const HexagonInstrInfo * TII
static Type * getFloatTy(LLVMContext &C)
Definition: Type.cpp:157
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:440
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false)
Shift and rotation operations.
Definition: ISDOpcodes.h:344
bool hasFPRND() const
Definition: PPCSubtarget.h:237
Class to represent struct types.
Definition: DerivedTypes.h:199
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
Base class for LoadSDNode and StoreSDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:327
ValTy * getCalledValue() const
getCalledValue - Return the pointer to function that is being called.
Definition: CallSite.h:102
unsigned getMinReservedArea() const
static void advance(T &it, size_t Val)
static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, unsigned MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:190
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:994
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:588
Optional< StringRef > getSectionPrefix() const
Get the section prefix for this function.
Definition: Function.cpp:1307
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
LLVM_ATTRIBUTE_ALWAYS_INLINE R Default(const T &Value) const
Definition: StringSwitch.h:244
FLT_ROUNDS_ - Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest 2 Round to ...
Definition: ISDOpcodes.h:475
Reg
All possible values of the reg field in the ModR/M byte.
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
MachinePointerInfo getWithOffset(int64_t O) const
SimpleValueType SimpleTy
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA)...
Definition: ISDOpcodes.h:96
The memory access is dereferenceable (i.e., doesn't trap).
virtual bool isJumpTableRelative() const
EVT getScalarType() const
getScalarType - If this is a vector type, return the element type, otherwise return this...
Definition: ValueTypes.h:233
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
Direct move from a GPR to a VSX register (algebraic)
X3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:509
int getMaskElt(unsigned Idx) const
bool isPositionIndependent() const
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
unsigned getStoreSize() const
getStoreSize - Return the number of bytes overwritten by a store of the specified value type...
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG...
Definition: ISDOpcodes.h:73
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
QVALIGNI = This corresponds to the QPX qvaligni instruction.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
Definition: ValueTypes.h:123
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:60
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:405
SmallVector< ISD::InputArg, 32 > Ins
AtomicOrdering
Atomic ordering for LLVM's memory model.
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:611
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
EVT getVectorElementType() const
getVectorElementType - Given a vector type, return the type of each element.
Definition: ValueTypes.h:239
unsigned getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
unsigned getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
Context object for machine code objects.
Definition: MCContext.h:51
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector...
static const unsigned PerfectShuffleTable[6561+1]
bool isInConsecutiveRegs() const
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ATTRIBUTE_ALWAYS_INLINE StringSwitch & Case(const char(&S)[N], const T &Value)
Definition: StringSwitch.h:74
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
unsigned getLocReg() const
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:31
#define F(x, y, z)
Definition: MD5.cpp:51
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:410
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
void insertCopiesSplitCSR(MachineBasicBlock *Entry, const SmallVectorImpl< MachineBasicBlock * > &Exits) const override
Insert explicit copies in entry and exit blocks.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
SDValue getRegisterMask(const uint32_t *RegMask)
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table...
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:535
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
MachineBasicBlock * MBB
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:363
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
G8RC = ADDIS_TLSGD_HA X2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
Class to represent array types.
Definition: DerivedTypes.h:345
This contains information for each constraint that we are lowering.
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg...
MO_NLP_HIDDEN_FLAG - If this bit is set, the symbol reference is to a symbol with hidden visibility...
Definition: PPC.h:79
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself...
SmallVector< ISD::OutputArg, 32 > Outs
StringRef getSection() const
Get the custom section of this global if it has one.
Definition: GlobalObject.h:81
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
bool hasInvariantFunctionDescriptors() const
Definition: PPCSubtarget.h:262
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:220
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:949
const SDValue & getBasePtr() const
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:737
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments, on Darwin.
const MCPhysReg * getCalleeSavedRegsViaCopy(const MachineFunction *MF) const
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:842
EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const override
getOptimalMemOpType - Returns the target specific optimal type for load and store operations as a res...
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(std::begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:791
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
EVT getMemoryVT() const
Return the type of the in-memory value.
bool isInConsecutiveRegsLast() const
int64_t getImm() const
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:487
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:656
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is 0.0 or -0.0.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base, int64_t &Offset, SelectionDAG &DAG)
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
Definition: StringRef.h:135
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:875
bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State)
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
XXINSERT - The PPC VSX insert instruction.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:328
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset)
Stack pointer relative access.
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
This class is used to represent ISD::STORE nodes.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:273
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:453
uint32_t FloatToBits(float Float)
FloatToBits - This function takes a float and returns the bit equivalent 32-bit integer.
Definition: MathExtras.h:580
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:121
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:274
TargetInstrInfo - Interface to description of machine instruction set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:518
bool isELFv2ABI() const
static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget)
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
mmo_iterator memoperands_end() const
Definition: MachineInstr.h:359
SDNode * getNode() const
get the SDNode which holds the desired result
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
MinAlign - A and B are either alignments or offsets.
Definition: MathExtras.h:589
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:262
unsigned getStoreSize() const
getStoreSize - Return the number of bytes overwritten by a store of the specified value type...
Definition: ValueTypes.h:268
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:43
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set, or Regs.size() if they are all allocated.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
Instruction * emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const override
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:166
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineInstrBuilder & setMemRefs(MachineInstr::mmo_iterator b, MachineInstr::mmo_iterator e) const
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:551
const SDValue & getBasePtr() const
unsigned const MachineRegisterInfo * MRI
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:111
FSEL - Traditional three-operand fsel node.
unsigned getFramePointerSaveOffset() const
getFramePointerSaveOffset - Return the previous frame offset to save the frame pointer.
CodeModel::Model getCodeModel() const
Returns the code model.
MVT - Machine Value Type.
LLVM Basic Block Representation.
Definition: BasicBlock.h:51
const SDValue & getOperand(unsigned i) const
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:48
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
Simple binary floating point operators.
Definition: ISDOpcodes.h:246
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
bool isNonTemporal() const
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:219
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, bool Vol=false, bool ReadMem=true, bool WriteMem=true, unsigned Size=0)
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MVT getLocVT() const
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
static unsigned getSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI)
Get the size in bits of Reg.
This is an important base class in LLVM.
Definition: Constant.h:42
virtual unsigned getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
MO_NLP_FLAG - If this bit is set, the symbol reference is actually to the non_lazy_ptr for the global...
Definition: PPC.h:74
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE...
Definition: ISDOpcodes.h:637
G8RC = ADDIS_DTPREL_HA X3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
bool isVector() const
isVector - Return true if this is a vector value type.
Direct move from a VSX register to a GPR.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:115
static bool is64Bit(const char *name)
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:818
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:279
const Constant * getConstVal() const
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:36
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:279
unsigned getVarArgsNumFPR() const
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:145
static ManagedStatic< OptionRegistry > OR
Definition: Options.cpp:31
unsigned getLiveInVirtReg(unsigned PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in physical ...
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the specified isSplatShuffleMask...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:368
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
STFIWX - The STFIWX instruction.
POPCNTDKind hasPOPCNTD() const
Definition: PPCSubtarget.h:284
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target...
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned getScalarValueSizeInBits() const
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
bool useLoadStackGuardNode() const override
Override to support customized stack guard loading.
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:176
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
constexpr bool isPowerOf2_64(uint64_t Value)
isPowerOf2_64 - This function returns true if the argument is a power of two 0 (64 bit edition...
Definition: MathExtras.h:405
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:154
This class provides iterator support for SDUse operands that use a specific SDNode.
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:667
bool any_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:743
static bool isIntS16Immediate(SDNode *N, short &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate...
uint32_t Offset
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
void setNoInfs(bool b)
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1255
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setVarArgsFrameIndex(int Index)
unsigned getOpcode() const
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:676
static const MCPhysReg QFPR[]
QFPR - The set of QPX registers that should be allocated for arguments.
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline...
CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:57
void setPrefFunctionAlignment(unsigned Align)
Set the target's preferred function alignment.
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
static mvt_range vector_valuetypes()
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
bool hasFSQRT() const
Definition: PPCSubtarget.h:229
bool has64BitSupport() const
has64BitSupport - Return true if the selected CPU supports 64-bit instructions, regardless of whether...
Definition: PPCSubtarget.h:206
virtual bool useLoadStackGuardNode() const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
arg_iterator arg_begin()
Definition: Function.h:550
G8RC = ADDIS_TLSLD_HA X2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
self_iterator getIterator()
Definition: ilist_node.h:81
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:80
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool Immutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
QVESPLATI = This corresponds to the QPX qvesplati instruction.
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:628
void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isVolatile() const
const SDValue & getValue() const
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
Common code between 32-bit and 64-bit PowerPC targets.
unsigned MaxStoresPerMemmove
Specify maximum bytes of store instructions per memmove call.
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:485
bool getFunctionSections() const
Return true if functions should be emitted into their own section, corresponding to -ffunction-sectio...
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1...
EVT - Extended Value Type.
Definition: ValueTypes.h:31
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1337
std::vector< ArgListEntry > ArgListTy
void initializeSplitCSR(MachineBasicBlock *Entry) const override
Perform necessary initialization to handle a subset of CSRs explicitly via copies.
void setMinReservedArea(unsigned size)
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool hasFPCVT() const
Definition: PPCSubtarget.h:238
This structure contains all information that is necessary for lowering calls.
bool shouldAssumeDSOLocal(const Module &M, const GlobalValue *GV) const
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
bool isSVR4ABI() const
Definition: PPCSubtarget.h:298
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements)
getVectorVT - Returns the EVT that represents a vector NumElements in length, where each element is o...
Definition: ValueTypes.h:70
This class contains a discriminated union of information about pointers in memory operands...
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:391
void setUseUnderscoreLongJmp(bool Val)
Indicate whether this target prefers to use _longjmp to implement llvm.longjmp or the version without...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
User::const_op_iterator arg_iterator
arg_iterator - The type of iterator to use when looping over actual arguments at this call site...
Definition: CallSite.h:205
Instruction * emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
const BlockAddress * getBlockAddress() const
bool hasPartwordAtomics() const
Definition: PPCSubtarget.h:265
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
virtual void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned DstReg, ArrayRef< MachineOperand > Cond, unsigned TrueReg, unsigned FalseReg) const
Insert a select instruction into MBB before I that will copy TrueReg to DstReg when Cond is true...
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
const uint32_t * getNoPreservedMask() const override
SDValue CreateStackTemporary(EVT VT, unsigned minAlign=1)
Create a stack temporary, suitable for holding the specified value type.
bool isTargetLinux() const
Definition: PPCSubtarget.h:295
bool hasRecipPrec() const
Definition: PPCSubtarget.h:234
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
static bool isReleaseOrStronger(AtomicOrdering ao)
const MachinePointerInfo & getPointerInfo() const
unsigned getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
bool hasLFIWAX() const
Definition: PPCSubtarget.h:236
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
unsigned getByValAlign() const
The memory access writes data.
const SDValue & getOffset() const
unsigned getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only...
bool needsSwapsForVSXMemOps() const
Definition: PPCSubtarget.h:280
Extract a subvector from unsigned integer vector and convert to FP.
ArrayRef< int > getMask() const
static unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall, bool isPatchPoint, bool hasNest, SmallVectorImpl< std::pair< unsigned, SDValue >> &RegsToPass, SmallVectorImpl< SDValue > &Ops, std::vector< EVT > &NodeTys, ImmutableCallSite *CS, const PPCSubtarget &Subtarget)
IterTy arg_begin() const
Definition: CallSite.h:528
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type...
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:709
SDValue getTargetConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:546
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:540
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
QBFLT = Access the underlying QPX floating-point boolean representation.
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:689
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef...
bool isPosZero() const
Definition: APFloat.h:1046
const PPCRegisterInfo * getRegisterInfo() const override
Definition: PPCSubtarget.h:186
bool hasP8Altivec() const
Definition: PPCSubtarget.h:244
uint64_t * Vals
Iterator for intrusive lists based on ilist_node.
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs, bool HasQPX)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
CCState - This class holds information needed while lowering arguments and return values...
X3 = GET_TLSLD_ADDR X3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:425
bool hasDirectMove() const
Definition: PPCSubtarget.h:266
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override
Returns true if the target can instruction select the specified FP immediate natively.
#define R6(n)
PICLevel::Level getPICLevel() const
Returns the PIC level (small or large model)
Definition: Module.cpp:488
void setNode(SDNode *N)
set the SDNode
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side...
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, std::vector< SDNode * > *Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators...
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:285
bool isJumpTableRelative() const override
bool isInvariant() const
shadow stack gc Shadow Stack GC Lowering
XXSPLT - The PPC VSX splat instructions.
bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State)
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
VECSHL - The PPC VSX shift left instruction.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:166
X = FP_ROUND_INREG(Y, VT) - This operator takes an FP register, and rounds it to a floating point val...
Definition: ISDOpcodes.h:482
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:179
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool useSoftFloat() const
Definition: PPCSubtarget.h:208
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS model, produces an ADD instruction that ...
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:230
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:639
Provides information about what library functions are available for the current target.
bool isVector(MCInstrInfo const &MCII, MCInst const &MCI)
CCValAssign - Represent assignment of one arg/retval to a location.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo)
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:649
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:27
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:566
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
const DataFlowGraph & G
Definition: RDFGraph.cpp:206
static const char * Target
const SDValue & getChain() const
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:347
static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildSplatI - Build a canonical splati of Val with an element size of SplatSize.
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID...
CHAIN = SC CHAIN, Imm128 - System call.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
This is an abstract virtual class for memory operations.
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node. ...
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:121
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always beneficiates from combining into FMA for a given value type...
void setHasMultipleConditionRegisters(bool hasManyRegs=true)
Tells the code generator that the target has multiple (allocatable) condition registers that can be u...
X3 = GET_TLS_ADDR X3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase, but as an MCExpr.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset...
bool isDereferenceable() const
void setNoNaNs(bool b)
Represents one node in the SelectionDAG.
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:623
VPERM - The PPC VPERM Instruction.
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:584
STXSIX - The STXSI[bh]X instruction.
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
static mvt_range integer_valuetypes()
i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after execu...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
SDValue getCALLSEQ_START(SDValue Chain, SDValue Op, const SDLoc &DL)
Return a new CALLSEQ_START node, which always must have a glue result (to ensure it's not CSE'd)...
Definition: SelectionDAG.h:715
G8RC = ADDIS_GOT_TPREL_HA X2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Class to represent vector types.
Definition: DerivedTypes.h:369
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SynchronizationScope SynchScope=CrossThread, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
Target - Wrapper for Target specific information.
Class for arbitrary precision integers.
Definition: APInt.h:77
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
QVGPCI = This corresponds to the QPX qvgpci instruction.
virtual const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const
Return a mask of call-preserved registers for the given calling convention on the current function...
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate, that is the target has add instructions which can add a register and the immediate without having to materialize the immediate into a register.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove)
Do we have an efficient pattern in a .td file for this node?
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:354
void setMinFunctionAlignment(unsigned Align)
Set the target's minimum function alignment (in log2(bytes))
int64_t getSExtValue() const
op_iterator op_begin() const
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:195
unsigned EmulatedTLS
EmulatedTLS - This flag enables emulated TLS model, using emutls function in the runtime library...
virtual const TargetRegisterClass * getRegClassFor(MVT VT) const
Return the register class that should be used for the specified value type.
static use_iterator use_end()
void setPrefLoopAlignment(unsigned Align)
Set the target's preferred loop alignment.
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:400
bool isMemLoc() const
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:403
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be represented as an inde...
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2...
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:250
bool hasSTFIWX() const
Definition: PPCSubtarget.h:235
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:464
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:259
bool enableMachineScheduler() const override
static bool isAcquireOrStronger(AtomicOrdering ao)
GPRC = address of GLOBAL_OFFSET_TABLE.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase, but as an MCExpr.
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca...
Definition: ISDOpcodes.h:758
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
block_iterator block_end() const
Definition: LoopInfo.h:142
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:560
Representation of each machine instruction.
Definition: MachineInstr.h:52
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer, a SRCVALUE for the destination, and a SRCVALUE for the source.
Definition: ISDOpcodes.h:633
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack...
These are IR-level optimization flags that may be propagated to SDNodes.
Represents a use of a SDNode.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:578
SmallVector< SDValue, 32 > OutVals
GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch history rolling buffer entry...
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:333
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:226
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
bool isUndef() const
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:610
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Reciprocal estimate instructions (unary FP ops).
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:205
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:418
bool useLongCalls() const
Definition: PPCSubtarget.h:279
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:536
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
ImmutableCallSite - establish a view to a call site for examination.
Definition: CallSite.h:665
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:256
void clearBit(unsigned bitPosition)
Set a given bit to 0.
Definition: APInt.cpp:562
static MachineOperand CreateImm(int64_t Val)
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
ArrayRef< SDUse > ops() const
bool hasComdat() const
Definition: GlobalValue.h:213
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
Direct move from a GPR to a VSX register (zero)
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
The CMPB instruction (takes two operands of i32 or i64).
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
bool hasVSX() const
Definition: PPCSubtarget.h:242
The memory access always returns the same value (or traps).
CallInst * CreateCall(Value *Callee, ArrayRef< Value * > Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1579
unsigned MaxStoresPerMemmoveOptSize
Maximum number of store instructions that may be substituted for a call to memmove, used for functions with OptSize attribute.
unsigned MaxStoresPerMemcpyOptSize
Maximum number of store operations that may be substituted for a call to memcpy, used for functions w...
void setStackPointerRegisterToSaveRestore(unsigned R)
If set to a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save and restore.
bool isISA3_0() const
Definition: PPCSubtarget.h:278
CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a byte-swapping store instruction. ...
op_iterator op_end() const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
const SDValue & getOffset() const
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:707
static void setUsesTOCBasePtr(MachineFunction &MF)
static volatile int Zero
TC_RETURN - A tail call return.
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:530
unsigned MaxStoresPerMemcpy
Specify maximum bytes of store instructions per memcpy call.
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:312
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.setjmp intrinsic.
Definition: ISDOpcodes.h:108
static bool isConstantOrUndef(int Op, int Val)
isConstantOrUndef - Op is either an undef node or a ConstantSDNode.
static bool isSplat(ArrayRef< Value * > VL)
int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS, const AllocaInst *Alloca=nullptr)
Create a new statically sized stack object, returning a nonnegative identifier to represent it...
static bool isFunctionGlobalAddress(SDValue Callee)
bool useCRBits() const
useCRBits - Return true if we should store and manipulate i1 values in the individual condition regis...
Definition: PPCSubtarget.h:217
EVT getValueType() const
Return the ValueType of the referenced return value.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
bool hasP8Vector() const
Definition: PPCSubtarget.h:243
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
SDValue getIndexedStore(SDValue OrigStoe, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:291
unsigned getReg() const
getReg - Returns the register number.
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
Definition: ValueTypes.h:118
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void setReturnAddressIsTaken(bool s)
bool isSimple() const
isSimple - Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:107
unsigned getAlignment() const
void setMinStackArgumentAlignment(unsigned Align)
Set the minimum stack alignment of an argument (in log2(bytes)).
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:108
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, bool Aligned) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:537
LLVM Value Representation.
Definition: Value.h:71
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:249
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
SDValue getRegister(unsigned Reg, EVT VT)
void setUseUnderscoreSetJmp(bool Val)
Indicate whether this target prefers to use _setjmp to implement llvm.setjmp or the version without _...
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2...
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
const ArgumentListType & getArgumentList() const
Get the underlying elements of the Function...
Definition: Function.h:499
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:830
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:239
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
SDValue getValueType(EVT)
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:81
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:685
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
These nodes represent the 32-bit PPC shifts that operate on 6-bit shift amounts.
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits...
unsigned getDarwinDirective() const
getDarwinDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:168
uint64_t getSize() const
Return the size in bytes of the memory reference.
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:331
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.Val alone...
#define DEBUG(X)
Definition: Debug.h:100
Primary interface to the complete machine description for the target machine.
block_iterator block_begin() const
Definition: LoopInfo.h:141
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
bool hasFRSQRTE() const
Definition: PPCSubtarget.h:232
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:980
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:377
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
bool hasFRES() const
Definition: PPCSubtarget.h:231
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
unsigned MaxStoresPerMemsetOptSize
Maximum number of stores operations that may be substituted for the call to memset, used for functions with OptSize attribute.
bool hasFCPSGN() const
Definition: PPCSubtarget.h:228
static BranchProbability getZero()
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
bool hasFRE() const
Definition: PPCSubtarget.h:230
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:197
unsigned getLocMemOffset() const
Conversion operators.
Definition: ISDOpcodes.h:397
Extract a subvector from signed integer vector and convert to FP.
int * Ptr
isLittleEndian(LE)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:381
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:698
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:406
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
unsigned getAlignment() const
unsigned AllocateReg(unsigned Reg)
AllocateReg - Attempt to allocate one register.
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
On a symbol operand "FOO", this indicates that the reference is actually to "FOO@plt".
Definition: PPC.h:66
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, FLOG, FLOG2, FLOG10, FEXP, FEXP2, FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary floating point operations.
Definition: ISDOpcodes.h:516
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain...
unsigned AllocateStack(unsigned Size, unsigned Align)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
bool hasFRSQRTES() const
Definition: PPCSubtarget.h:233
RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the altivec VCMP*o instructions.
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:694
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:321
EVT changeVectorElementTypeToInteger() const
changeVectorElementTypeToInteger - Return a vector with the same number of elements as this vector...
Definition: ValueTypes.h:80
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase...
Definition: PPC.h:70
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:226
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
StringRef getSection() const
Definition: Globals.cpp:145
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:529
void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
uint64_t getZExtValue() const
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:799
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:358
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:326
int isQVALIGNIShuffleMask(SDNode *N)
If this is a qvaligni shuffle mask, return the shift amount, otherwise return -1. ...
unsigned getVectorNumElements() const
getVectorNumElements - Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:248
This class is used to represent ISD::LOAD nodes.
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary...
Definition: ISDOpcodes.h:545
bool useSoftFloat() const override