LLVM  3.7.0
PPCISelLowering.cpp
Go to the documentation of this file.
1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the PPCISelLowering class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "PPCISelLowering.h"
16 #include "PPCCallingConv.h"
17 #include "PPCMachineFunctionInfo.h"
18 #include "PPCPerfectShuffle.h"
19 #include "PPCTargetMachine.h"
20 #include "PPCTargetObjectFile.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/StringSwitch.h"
23 #include "llvm/ADT/Triple.h"
32 #include "llvm/IR/CallingConv.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/Function.h"
36 #include "llvm/IR/Intrinsics.h"
42 
43 using namespace llvm;
44 
45 // FIXME: Remove this once soft-float is supported.
46 static cl::opt<bool> DisablePPCFloatInVariadic("disable-ppc-float-in-variadic",
47 cl::desc("disable saving float registers for va_start on PPC"), cl::Hidden);
48 
49 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
50 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
51 
52 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
53 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
54 
55 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
56 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
57 
58 // FIXME: Remove this once the bug has been fixed!
60 
62  const PPCSubtarget &STI)
63  : TargetLowering(TM), Subtarget(STI) {
64  // Use _setjmp/_longjmp instead of setjmp/longjmp.
67 
68  // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
69  // arguments are at least 4/8 bytes aligned.
70  bool isPPC64 = Subtarget.isPPC64();
71  setMinStackArgumentAlignment(isPPC64 ? 8:4);
72 
73  // Set up the register classes.
74  addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
75  addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
76  addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
77 
78  // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
79  for (MVT VT : MVT::integer_valuetypes()) {
82  }
83 
85 
86  // PowerPC has pre-inc load and store's.
101 
102  if (Subtarget.useCRBits()) {
104 
105  if (isPPC64 || Subtarget.hasFPCVT()) {
108  isPPC64 ? MVT::i64 : MVT::i32);
111  isPPC64 ? MVT::i64 : MVT::i32);
112  } else {
115  }
116 
117  // PowerPC does not support direct load / store of condition registers
120 
121  // FIXME: Remove this once the ANDI glue bug is fixed:
122  if (ANDIGlueBug)
124 
125  for (MVT VT : MVT::integer_valuetypes()) {
129  }
130 
131  addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
132  }
133 
134  // This is used in the ppcf128->int sequence. Note it has different semantics
135  // from FP_ROUND: that rounds to nearest, this rounds to zero.
137 
138  // We do not currently implement these libm ops for PowerPC.
145 
146  // PowerPC has no SREM/UREM instructions
151 
152  // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
161 
162  // We don't support sin/cos/sqrt/fmod/pow
175 
177 
178  // If we're enabling GP optimizations, use hardware square root
179  if (!Subtarget.hasFSQRT() &&
180  !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
181  Subtarget.hasFRE()))
183 
184  if (!Subtarget.hasFSQRT() &&
185  !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
186  Subtarget.hasFRES()))
188 
189  if (Subtarget.hasFCPSGN()) {
192  } else {
195  }
196 
197  if (Subtarget.hasFPRND()) {
202 
207  }
208 
209  // PowerPC does not have BSWAP, CTPOP or CTTZ
218 
219  if (Subtarget.hasPOPCNTD()) {
222  } else {
225  }
226 
227  // PowerPC does not have ROTR
230 
231  if (!Subtarget.useCRBits()) {
232  // PowerPC does not have Select
237  }
238 
239  // PowerPC wants to turn select_cc of FP into fsel when possible.
242 
243  // PowerPC wants to optimize integer setcc a bit
244  if (!Subtarget.useCRBits())
246 
247  // PowerPC does not have BRCOND which requires SetCC
248  if (!Subtarget.useCRBits())
250 
252 
253  // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
255 
256  // PowerPC does not have [U|S]INT_TO_FP
259 
264 
265  // We cannot sextinreg(i1). Expand to shifts.
267 
268  // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
269  // SjLj exception handling but a light-weight setjmp/longjmp replacement to
270  // support continuation, user-level threading, and etc.. As a result, no
271  // other SjLj exception interfaces are implemented and please don't build
272  // your own exception handling based on them.
273  // LLVM/Clang supports zero-cost DWARF exception handling.
276 
277  // We want to legalize GlobalAddress and ConstantPool nodes into the
278  // appropriate instructions to materialize the address.
289 
290  // TRAP is legal.
292 
293  // TRAMPOLINE is custom lowered.
296 
297  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
299 
300  if (Subtarget.isSVR4ABI()) {
301  if (isPPC64) {
302  // VAARG always uses double-word chunks, so promote anything smaller.
312  } else {
313  // VAARG is custom lowered with the 32-bit SVR4 ABI.
316  }
317  } else
319 
320  if (Subtarget.isSVR4ABI() && !isPPC64)
321  // VACOPY is custom lowered with the 32-bit SVR4 ABI.
323  else
325 
326  // Use the default implementation.
332 
333  // We want to custom lower some of our intrinsics.
335 
336  // To handle counter-based loop conditions.
338 
339  // Comparisons that require checking two conditions.
352 
353  if (Subtarget.has64BitSupport()) {
354  // They also have instructions for converting between i64 and fp.
359  // This is just the low 32 bits of a (signed) fp->i64 conversion.
360  // We cannot do this with Promote because i64 is not a legal type.
362 
363  if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
365  } else {
366  // PowerPC does not have FP_TO_UINT on 32-bit implementations.
368  }
369 
370  // With the instructions enabled under FPCVT, we can do everything.
371  if (Subtarget.hasFPCVT()) {
372  if (Subtarget.has64BitSupport()) {
377  }
378 
383  }
384 
385  if (Subtarget.use64BitRegs()) {
386  // 64-bit PowerPC implementations can support i64 types directly
387  addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
388  // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
390  // 64-bit PowerPC wants to expand i128 shifts itself.
394  } else {
395  // 32-bit PowerPC wants to expand i64 shifts itself.
399  }
400 
401  if (Subtarget.hasAltivec()) {
402  // First set operation action for all vector types to expand. Then we
403  // will selectively turn on ones that can be effectively codegen'd.
404  for (MVT VT : MVT::vector_valuetypes()) {
405  // add/sub are legal for all supported vector VT's.
408 
409  // Vector instructions introduced in P8
410  if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
413  }
414  else {
417  }
418 
419  // We promote all shuffles to v16i8.
422 
423  // We promote all non-typed operations to v4i32.
436 
437  // No other operations are legal.
478 
479  for (MVT InnerVT : MVT::vector_valuetypes()) {
480  setTruncStoreAction(VT, InnerVT, Expand);
481  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
482  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
483  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
484  }
485  }
486 
487  // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
488  // with merges, splats, etc.
490 
496  Subtarget.useCRBits() ? Legal : Expand);
506 
507  addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
508  addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
509  addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
510  addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
511 
514 
515  if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
518  }
519 
520 
521  if (Subtarget.hasP8Altivec())
523  else
525 
528 
531 
536 
537  // Altivec does not contain unordered floating-point compare instructions
542 
543  if (Subtarget.hasVSX()) {
546 
552 
554 
557 
560 
566 
567  // Share the Altivec comparison restrictions.
572 
575 
577 
578  if (Subtarget.hasP8Vector())
579  addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
580 
581  addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
582 
583  addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
584  addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
585  addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
586 
587  if (Subtarget.hasP8Altivec()) {
591 
593  }
594  else {
598 
600 
601  // VSX v2i64 only supports non-arithmetic operations.
604  }
605 
610 
612 
617 
618  // Vector operation legalization checks the result type of
619  // SIGN_EXTEND_INREG, overall legalization checks the inner type.
624 
625  addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
626  }
627 
628  if (Subtarget.hasP8Altivec()) {
629  addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
630  addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
631  }
632  }
633 
634  if (Subtarget.hasQPX()) {
639 
642 
645 
648 
649  if (!Subtarget.useCRBits())
652 
660 
663 
667 
679 
682 
685 
686  addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
687 
692 
695 
698 
699  if (!Subtarget.useCRBits())
702 
710 
713 
725 
728 
731 
732  addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
733 
737 
738  if (!Subtarget.useCRBits())
741 
744 
752 
755 
756  addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
757 
762 
767 
770 
771  // These need to set FE_INEXACT, and so cannot be vectorized here.
774 
775  if (TM.Options.UnsafeFPMath) {
778 
781  } else {
784 
787  }
788  }
789 
790  if (Subtarget.has64BitSupport())
792 
794 
795  if (!isPPC64) {
798  }
799 
801 
802  if (Subtarget.hasAltivec()) {
803  // Altivec instructions set fields to all zeros or all ones.
805  }
806 
807  if (!isPPC64) {
808  // These libcalls are not available in 32-bit.
812  }
813 
814  if (isPPC64) {
818  } else {
822  }
823 
824  // We have target-specific dag combine patterns for the following nodes:
826  if (Subtarget.hasFPCVT())
831  if (Subtarget.useCRBits())
837 
841 
842  if (Subtarget.useCRBits()) {
846  }
847 
848  // Use reciprocal estimates.
849  if (TM.Options.UnsafeFPMath) {
852  }
853 
854  // Darwin long double math library functions have $LDBL128 appended.
855  if (Subtarget.isDarwin()) {
856  setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
857  setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
858  setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
859  setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
860  setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
861  setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
862  setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
863  setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
864  setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
865  setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
866  }
867 
868  // With 32 condition bits, we don't need to sink (and duplicate) compares
869  // aggressively in CodeGenPrep.
870  if (Subtarget.useCRBits()) {
873  }
874 
876  if (Subtarget.isDarwin())
878 
879  switch (Subtarget.getDarwinDirective()) {
880  default: break;
881  case PPC::DIR_970:
882  case PPC::DIR_A2:
883  case PPC::DIR_E500mc:
884  case PPC::DIR_E5500:
885  case PPC::DIR_PWR4:
886  case PPC::DIR_PWR5:
887  case PPC::DIR_PWR5X:
888  case PPC::DIR_PWR6:
889  case PPC::DIR_PWR6X:
890  case PPC::DIR_PWR7:
891  case PPC::DIR_PWR8:
894  break;
895  }
896 
898 
899  if (Subtarget.enableMachineScheduler())
901  else
903 
905 
906  // The Freescale cores do better with aggressive inlining of memcpy and
907  // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
908  if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
909  Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
910  MaxStoresPerMemset = 32;
912  MaxStoresPerMemcpy = 32;
914  MaxStoresPerMemmove = 32;
916  } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
917  // The A2 also benefits from (very) aggressive inlining of memcpy and
918  // friends. The overhead of a the function call, even when warm, can be
919  // over one hundred cycles.
920  MaxStoresPerMemset = 128;
921  MaxStoresPerMemcpy = 128;
922  MaxStoresPerMemmove = 128;
923  }
924 }
925 
926 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
927 /// the desired ByVal argument alignment.
928 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
929  unsigned MaxMaxAlign) {
930  if (MaxAlign == MaxMaxAlign)
931  return;
932  if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
933  if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
934  MaxAlign = 32;
935  else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
936  MaxAlign = 16;
937  } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
938  unsigned EltAlign = 0;
939  getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
940  if (EltAlign > MaxAlign)
941  MaxAlign = EltAlign;
942  } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
943  for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
944  unsigned EltAlign = 0;
945  getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign);
946  if (EltAlign > MaxAlign)
947  MaxAlign = EltAlign;
948  if (MaxAlign == MaxMaxAlign)
949  break;
950  }
951  }
952 }
953 
954 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
955 /// function arguments in the caller parameter area.
957  const DataLayout &DL) const {
958  // Darwin passes everything on 4 byte boundary.
959  if (Subtarget.isDarwin())
960  return 4;
961 
962  // 16byte and wider vectors are passed on 16byte boundary.
963  // The rest is 8 on PPC64 and 4 on PPC32 boundary.
964  unsigned Align = Subtarget.isPPC64() ? 8 : 4;
965  if (Subtarget.hasAltivec() || Subtarget.hasQPX())
966  getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
967  return Align;
968 }
969 
970 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
971  switch ((PPCISD::NodeType)Opcode) {
972  case PPCISD::FIRST_NUMBER: break;
973  case PPCISD::FSEL: return "PPCISD::FSEL";
974  case PPCISD::FCFID: return "PPCISD::FCFID";
975  case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
976  case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
977  case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
978  case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
979  case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
980  case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
981  case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
982  case PPCISD::FRE: return "PPCISD::FRE";
983  case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
984  case PPCISD::STFIWX: return "PPCISD::STFIWX";
985  case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
986  case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
987  case PPCISD::VPERM: return "PPCISD::VPERM";
988  case PPCISD::CMPB: return "PPCISD::CMPB";
989  case PPCISD::Hi: return "PPCISD::Hi";
990  case PPCISD::Lo: return "PPCISD::Lo";
991  case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
992  case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
993  case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
994  case PPCISD::SRL: return "PPCISD::SRL";
995  case PPCISD::SRA: return "PPCISD::SRA";
996  case PPCISD::SHL: return "PPCISD::SHL";
997  case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
998  case PPCISD::CALL: return "PPCISD::CALL";
999  case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1000  case PPCISD::MTCTR: return "PPCISD::MTCTR";
1001  case PPCISD::BCTRL: return "PPCISD::BCTRL";
1002  case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1003  case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
1004  case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1005  case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1006  case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1007  case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1008  case PPCISD::MFVSR: return "PPCISD::MFVSR";
1009  case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1010  case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1011  case PPCISD::ANDIo_1_EQ_BIT: return "PPCISD::ANDIo_1_EQ_BIT";
1012  case PPCISD::ANDIo_1_GT_BIT: return "PPCISD::ANDIo_1_GT_BIT";
1013  case PPCISD::VCMP: return "PPCISD::VCMP";
1014  case PPCISD::VCMPo: return "PPCISD::VCMPo";
1015  case PPCISD::LBRX: return "PPCISD::LBRX";
1016  case PPCISD::STBRX: return "PPCISD::STBRX";
1017  case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1018  case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1019  case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1020  case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1021  case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1022  case PPCISD::BDNZ: return "PPCISD::BDNZ";
1023  case PPCISD::BDZ: return "PPCISD::BDZ";
1024  case PPCISD::MFFS: return "PPCISD::MFFS";
1025  case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1026  case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1027  case PPCISD::CR6SET: return "PPCISD::CR6SET";
1028  case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1029  case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1030  case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1031  case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1032  case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1033  case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1034  case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1035  case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1036  case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1037  case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1038  case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1039  case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1040  case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1041  case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1042  case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1043  case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1044  case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1045  case PPCISD::SC: return "PPCISD::SC";
1046  case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1047  case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1048  case PPCISD::RFEBB: return "PPCISD::RFEBB";
1049  case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1050  case PPCISD::QVFPERM: return "PPCISD::QVFPERM";
1051  case PPCISD::QVGPCI: return "PPCISD::QVGPCI";
1052  case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI";
1053  case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI";
1054  case PPCISD::QBFLT: return "PPCISD::QBFLT";
1055  case PPCISD::QVLFSb: return "PPCISD::QVLFSb";
1056  }
1057  return nullptr;
1058 }
1059 
1061  EVT VT) const {
1062  if (!VT.isVector())
1063  return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1064 
1065  if (Subtarget.hasQPX())
1067 
1069 }
1070 
1072  assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1073  return true;
1074 }
1075 
1076 //===----------------------------------------------------------------------===//
1077 // Node matching predicates, for use by the tblgen matching code.
1078 //===----------------------------------------------------------------------===//
1079 
1080 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1081 static bool isFloatingPointZero(SDValue Op) {
1082  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1083  return CFP->getValueAPF().isZero();
1084  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1085  // Maybe this has already been legalized into the constant pool?
1086  if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1087  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1088  return CFP->getValueAPF().isZero();
1089  }
1090  return false;
1091 }
1092 
1093 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1094 /// true if Op is undef or if it matches the specified value.
1095 static bool isConstantOrUndef(int Op, int Val) {
1096  return Op < 0 || Op == Val;
1097 }
1098 
1099 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1100 /// VPKUHUM instruction.
1101 /// The ShuffleKind distinguishes between big-endian operations with
1102 /// two different inputs (0), either-endian operations with two identical
1103 /// inputs (1), and little-endian operations with two different inputs (2).
1104 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1106  SelectionDAG &DAG) {
1107  bool IsLE = DAG.getDataLayout().isLittleEndian();
1108  if (ShuffleKind == 0) {
1109  if (IsLE)
1110  return false;
1111  for (unsigned i = 0; i != 16; ++i)
1112  if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1113  return false;
1114  } else if (ShuffleKind == 2) {
1115  if (!IsLE)
1116  return false;
1117  for (unsigned i = 0; i != 16; ++i)
1118  if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1119  return false;
1120  } else if (ShuffleKind == 1) {
1121  unsigned j = IsLE ? 0 : 1;
1122  for (unsigned i = 0; i != 8; ++i)
1123  if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1124  !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1125  return false;
1126  }
1127  return true;
1128 }
1129 
1130 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1131 /// VPKUWUM instruction.
1132 /// The ShuffleKind distinguishes between big-endian operations with
1133 /// two different inputs (0), either-endian operations with two identical
1134 /// inputs (1), and little-endian operations with two different inputs (2).
1135 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1137  SelectionDAG &DAG) {
1138  bool IsLE = DAG.getDataLayout().isLittleEndian();
1139  if (ShuffleKind == 0) {
1140  if (IsLE)
1141  return false;
1142  for (unsigned i = 0; i != 16; i += 2)
1143  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1144  !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1145  return false;
1146  } else if (ShuffleKind == 2) {
1147  if (!IsLE)
1148  return false;
1149  for (unsigned i = 0; i != 16; i += 2)
1150  if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1151  !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1152  return false;
1153  } else if (ShuffleKind == 1) {
1154  unsigned j = IsLE ? 0 : 2;
1155  for (unsigned i = 0; i != 8; i += 2)
1156  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1157  !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1158  !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1159  !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1160  return false;
1161  }
1162  return true;
1163 }
1164 
1165 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1166 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1167 /// current subtarget.
1168 ///
1169 /// The ShuffleKind distinguishes between big-endian operations with
1170 /// two different inputs (0), either-endian operations with two identical
1171 /// inputs (1), and little-endian operations with two different inputs (2).
1172 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1174  SelectionDAG &DAG) {
1175  const PPCSubtarget& Subtarget =
1176  static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1177  if (!Subtarget.hasP8Vector())
1178  return false;
1179 
1180  bool IsLE = DAG.getDataLayout().isLittleEndian();
1181  if (ShuffleKind == 0) {
1182  if (IsLE)
1183  return false;
1184  for (unsigned i = 0; i != 16; i += 4)
1185  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1186  !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1187  !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1188  !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1189  return false;
1190  } else if (ShuffleKind == 2) {
1191  if (!IsLE)
1192  return false;
1193  for (unsigned i = 0; i != 16; i += 4)
1194  if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1195  !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1196  !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1197  !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1198  return false;
1199  } else if (ShuffleKind == 1) {
1200  unsigned j = IsLE ? 0 : 4;
1201  for (unsigned i = 0; i != 8; i += 4)
1202  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1203  !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1204  !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1205  !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1206  !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1207  !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1208  !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1209  !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1210  return false;
1211  }
1212  return true;
1213 }
1214 
1215 /// isVMerge - Common function, used to match vmrg* shuffles.
1216 ///
1217 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1218  unsigned LHSStart, unsigned RHSStart) {
1219  if (N->getValueType(0) != MVT::v16i8)
1220  return false;
1221  assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1222  "Unsupported merge size!");
1223 
1224  for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1225  for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1226  if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1227  LHSStart+j+i*UnitSize) ||
1228  !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1229  RHSStart+j+i*UnitSize))
1230  return false;
1231  }
1232  return true;
1233 }
1234 
1235 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1236 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1237 /// The ShuffleKind distinguishes between big-endian merges with two
1238 /// different inputs (0), either-endian merges with two identical inputs (1),
1239 /// and little-endian merges with two different inputs (2). For the latter,
1240 /// the input operands are swapped (see PPCInstrAltivec.td).
1242  unsigned ShuffleKind, SelectionDAG &DAG) {
1243  if (DAG.getDataLayout().isLittleEndian()) {
1244  if (ShuffleKind == 1) // unary
1245  return isVMerge(N, UnitSize, 0, 0);
1246  else if (ShuffleKind == 2) // swapped
1247  return isVMerge(N, UnitSize, 0, 16);
1248  else
1249  return false;
1250  } else {
1251  if (ShuffleKind == 1) // unary
1252  return isVMerge(N, UnitSize, 8, 8);
1253  else if (ShuffleKind == 0) // normal
1254  return isVMerge(N, UnitSize, 8, 24);
1255  else
1256  return false;
1257  }
1258 }
1259 
1260 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1261 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1262 /// The ShuffleKind distinguishes between big-endian merges with two
1263 /// different inputs (0), either-endian merges with two identical inputs (1),
1264 /// and little-endian merges with two different inputs (2). For the latter,
1265 /// the input operands are swapped (see PPCInstrAltivec.td).
1267  unsigned ShuffleKind, SelectionDAG &DAG) {
1268  if (DAG.getDataLayout().isLittleEndian()) {
1269  if (ShuffleKind == 1) // unary
1270  return isVMerge(N, UnitSize, 8, 8);
1271  else if (ShuffleKind == 2) // swapped
1272  return isVMerge(N, UnitSize, 8, 24);
1273  else
1274  return false;
1275  } else {
1276  if (ShuffleKind == 1) // unary
1277  return isVMerge(N, UnitSize, 0, 0);
1278  else if (ShuffleKind == 0) // normal
1279  return isVMerge(N, UnitSize, 0, 16);
1280  else
1281  return false;
1282  }
1283 }
1284 
1285 /**
1286  * \brief Common function used to match vmrgew and vmrgow shuffles
1287  *
1288  * The indexOffset determines whether to look for even or odd words in
1289  * the shuffle mask. This is based on the of the endianness of the target
1290  * machine.
1291  * - Little Endian:
1292  * - Use offset of 0 to check for odd elements
1293  * - Use offset of 4 to check for even elements
1294  * - Big Endian:
1295  * - Use offset of 0 to check for even elements
1296  * - Use offset of 4 to check for odd elements
1297  * A detailed description of the vector element ordering for little endian and
1298  * big endian can be found at
1299  * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1300  * Targeting your applications - what little endian and big endian IBM XL C/C++
1301  * compiler differences mean to you
1302  *
1303  * The mask to the shuffle vector instruction specifies the indices of the
1304  * elements from the two input vectors to place in the result. The elements are
1305  * numbered in array-access order, starting with the first vector. These vectors
1306  * are always of type v16i8, thus each vector will contain 16 elements of size
1307  * 8. More info on the shuffle vector can be found in the
1308  * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1309  * Language Reference.
1310  *
1311  * The RHSStartValue indicates whether the same input vectors are used (unary)
1312  * or two different input vectors are used, based on the following:
1313  * - If the instruction uses the same vector for both inputs, the range of the
1314  * indices will be 0 to 15. In this case, the RHSStart value passed should
1315  * be 0.
1316  * - If the instruction has two different vectors then the range of the
1317  * indices will be 0 to 31. In this case, the RHSStart value passed should
1318  * be 16 (indices 0-15 specify elements in the first vector while indices 16
1319  * to 31 specify elements in the second vector).
1320  *
1321  * \param[in] N The shuffle vector SD Node to analyze
1322  * \param[in] IndexOffset Specifies whether to look for even or odd elements
1323  * \param[in] RHSStartValue Specifies the starting index for the righthand input
1324  * vector to the shuffle_vector instruction
1325  * \return true iff this shuffle vector represents an even or odd word merge
1326  */
1327 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1328  unsigned RHSStartValue) {
1329  if (N->getValueType(0) != MVT::v16i8)
1330  return false;
1331 
1332  for (unsigned i = 0; i < 2; ++i)
1333  for (unsigned j = 0; j < 4; ++j)
1334  if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1335  i*RHSStartValue+j+IndexOffset) ||
1336  !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1337  i*RHSStartValue+j+IndexOffset+8))
1338  return false;
1339  return true;
1340 }
1341 
1342 /**
1343  * \brief Determine if the specified shuffle mask is suitable for the vmrgew or
1344  * vmrgow instructions.
1345  *
1346  * \param[in] N The shuffle vector SD Node to analyze
1347  * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1348  * \param[in] ShuffleKind Identify the type of merge:
1349  * - 0 = big-endian merge with two different inputs;
1350  * - 1 = either-endian merge with two identical inputs;
1351  * - 2 = little-endian merge with two different inputs (inputs are swapped for
1352  * little-endian merges).
1353  * \param[in] DAG The current SelectionDAG
1354  * \return true iff this shuffle mask
1355  */
1357  unsigned ShuffleKind, SelectionDAG &DAG) {
1358  if (DAG.getDataLayout().isLittleEndian()) {
1359  unsigned indexOffset = CheckEven ? 4 : 0;
1360  if (ShuffleKind == 1) // Unary
1361  return isVMerge(N, indexOffset, 0);
1362  else if (ShuffleKind == 2) // swapped
1363  return isVMerge(N, indexOffset, 16);
1364  else
1365  return false;
1366  }
1367  else {
1368  unsigned indexOffset = CheckEven ? 0 : 4;
1369  if (ShuffleKind == 1) // Unary
1370  return isVMerge(N, indexOffset, 0);
1371  else if (ShuffleKind == 0) // Normal
1372  return isVMerge(N, indexOffset, 16);
1373  else
1374  return false;
1375  }
1376  return false;
1377 }
1378 
1379 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1380 /// amount, otherwise return -1.
1381 /// The ShuffleKind distinguishes between big-endian operations with two
1382 /// different inputs (0), either-endian operations with two identical inputs
1383 /// (1), and little-endian operations with two different inputs (2). For the
1384 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
1385 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1386  SelectionDAG &DAG) {
1387  if (N->getValueType(0) != MVT::v16i8)
1388  return -1;
1389 
1390  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1391 
1392  // Find the first non-undef value in the shuffle mask.
1393  unsigned i;
1394  for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1395  /*search*/;
1396 
1397  if (i == 16) return -1; // all undef.
1398 
1399  // Otherwise, check to see if the rest of the elements are consecutively
1400  // numbered from this value.
1401  unsigned ShiftAmt = SVOp->getMaskElt(i);
1402  if (ShiftAmt < i) return -1;
1403 
1404  ShiftAmt -= i;
1405  bool isLE = DAG.getDataLayout().isLittleEndian();
1406 
1407  if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1408  // Check the rest of the elements to see if they are consecutive.
1409  for (++i; i != 16; ++i)
1410  if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1411  return -1;
1412  } else if (ShuffleKind == 1) {
1413  // Check the rest of the elements to see if they are consecutive.
1414  for (++i; i != 16; ++i)
1415  if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1416  return -1;
1417  } else
1418  return -1;
1419 
1420  if (isLE)
1421  ShiftAmt = 16 - ShiftAmt;
1422 
1423  return ShiftAmt;
1424 }
1425 
1426 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1427 /// specifies a splat of a single element that is suitable for input to
1428 /// VSPLTB/VSPLTH/VSPLTW.
1430  assert(N->getValueType(0) == MVT::v16i8 &&
1431  (EltSize == 1 || EltSize == 2 || EltSize == 4));
1432 
1433  // The consecutive indices need to specify an element, not part of two
1434  // different elements. So abandon ship early if this isn't the case.
1435  if (N->getMaskElt(0) % EltSize != 0)
1436  return false;
1437 
1438  // This is a splat operation if each element of the permute is the same, and
1439  // if the value doesn't reference the second vector.
1440  unsigned ElementBase = N->getMaskElt(0);
1441 
1442  // FIXME: Handle UNDEF elements too!
1443  if (ElementBase >= 16)
1444  return false;
1445 
1446  // Check that the indices are consecutive, in the case of a multi-byte element
1447  // splatted with a v16i8 mask.
1448  for (unsigned i = 1; i != EltSize; ++i)
1449  if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1450  return false;
1451 
1452  for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1453  if (N->getMaskElt(i) < 0) continue;
1454  for (unsigned j = 0; j != EltSize; ++j)
1455  if (N->getMaskElt(i+j) != N->getMaskElt(j))
1456  return false;
1457  }
1458  return true;
1459 }
1460 
1461 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
1462 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
1463 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
1464  SelectionDAG &DAG) {
1465  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1466  assert(isSplatShuffleMask(SVOp, EltSize));
1467  if (DAG.getDataLayout().isLittleEndian())
1468  return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
1469  else
1470  return SVOp->getMaskElt(0) / EltSize;
1471 }
1472 
1473 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
1474 /// by using a vspltis[bhw] instruction of the specified element size, return
1475 /// the constant being splatted. The ByteSize field indicates the number of
1476 /// bytes of each element [124] -> [bhw].
1477 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
1478  SDValue OpVal(nullptr, 0);
1479 
1480  // If ByteSize of the splat is bigger than the element size of the
1481  // build_vector, then we have a case where we are checking for a splat where
1482  // multiple elements of the buildvector are folded together into a single
1483  // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
1484  unsigned EltSize = 16/N->getNumOperands();
1485  if (EltSize < ByteSize) {
1486  unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
1487  SDValue UniquedVals[4];
1488  assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
1489 
1490  // See if all of the elements in the buildvector agree across.
1491  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1492  if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1493  // If the element isn't a constant, bail fully out.
1494  if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
1495 
1496 
1497  if (!UniquedVals[i&(Multiple-1)].getNode())
1498  UniquedVals[i&(Multiple-1)] = N->getOperand(i);
1499  else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
1500  return SDValue(); // no match.
1501  }
1502 
1503  // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
1504  // either constant or undef values that are identical for each chunk. See
1505  // if these chunks can form into a larger vspltis*.
1506 
1507  // Check to see if all of the leading entries are either 0 or -1. If
1508  // neither, then this won't fit into the immediate field.
1509  bool LeadingZero = true;
1510  bool LeadingOnes = true;
1511  for (unsigned i = 0; i != Multiple-1; ++i) {
1512  if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
1513 
1514  LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
1515  LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
1516  }
1517  // Finally, check the least significant entry.
1518  if (LeadingZero) {
1519  if (!UniquedVals[Multiple-1].getNode())
1520  return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
1521  int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
1522  if (Val < 16) // 0,0,0,4 -> vspltisw(4)
1523  return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
1524  }
1525  if (LeadingOnes) {
1526  if (!UniquedVals[Multiple-1].getNode())
1527  return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
1528  int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
1529  if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
1530  return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
1531  }
1532 
1533  return SDValue();
1534  }
1535 
1536  // Check to see if this buildvec has a single non-undef value in its elements.
1537  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1538  if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1539  if (!OpVal.getNode())
1540  OpVal = N->getOperand(i);
1541  else if (OpVal != N->getOperand(i))
1542  return SDValue();
1543  }
1544 
1545  if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
1546 
1547  unsigned ValSizeInBytes = EltSize;
1548  uint64_t Value = 0;
1549  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1550  Value = CN->getZExtValue();
1551  } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1552  assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
1553  Value = FloatToBits(CN->getValueAPF().convertToFloat());
1554  }
1555 
1556  // If the splat value is larger than the element value, then we can never do
1557  // this splat. The only case that we could fit the replicated bits into our
1558  // immediate field for would be zero, and we prefer to use vxor for it.
1559  if (ValSizeInBytes < ByteSize) return SDValue();
1560 
1561  // If the element value is larger than the splat value, check if it consists
1562  // of a repeated bit pattern of size ByteSize.
1563  if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
1564  return SDValue();
1565 
1566  // Properly sign extend the value.
1567  int MaskVal = SignExtend32(Value, ByteSize * 8);
1568 
1569  // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
1570  if (MaskVal == 0) return SDValue();
1571 
1572  // Finally, if this value fits in a 5 bit sext field, return it
1573  if (SignExtend32<5>(MaskVal) == MaskVal)
1574  return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
1575  return SDValue();
1576 }
1577 
1578 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
1579 /// amount, otherwise return -1.
1581  EVT VT = N->getValueType(0);
1582  if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
1583  return -1;
1584 
1585  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1586 
1587  // Find the first non-undef value in the shuffle mask.
1588  unsigned i;
1589  for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
1590  /*search*/;
1591 
1592  if (i == 4) return -1; // all undef.
1593 
1594  // Otherwise, check to see if the rest of the elements are consecutively
1595  // numbered from this value.
1596  unsigned ShiftAmt = SVOp->getMaskElt(i);
1597  if (ShiftAmt < i) return -1;
1598  ShiftAmt -= i;
1599 
1600  // Check the rest of the elements to see if they are consecutive.
1601  for (++i; i != 4; ++i)
1602  if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1603  return -1;
1604 
1605  return ShiftAmt;
1606 }
1607 
1608 //===----------------------------------------------------------------------===//
1609 // Addressing Mode Selection
1610 //===----------------------------------------------------------------------===//
1611 
1612 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
1613 /// or 64-bit immediate, and if the value can be accurately represented as a
1614 /// sign extension from a 16-bit value. If so, this returns true and the
1615 /// immediate.
1616 static bool isIntS16Immediate(SDNode *N, short &Imm) {
1617  if (!isa<ConstantSDNode>(N))
1618  return false;
1619 
1620  Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
1621  if (N->getValueType(0) == MVT::i32)
1622  return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
1623  else
1624  return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
1625 }
1626 static bool isIntS16Immediate(SDValue Op, short &Imm) {
1627  return isIntS16Immediate(Op.getNode(), Imm);
1628 }
1629 
1630 
1631 /// SelectAddressRegReg - Given the specified addressed, check to see if it
1632 /// can be represented as an indexed [r+r] operation. Returns false if it
1633 /// can be more efficiently represented with [r+imm].
1635  SDValue &Index,
1636  SelectionDAG &DAG) const {
1637  short imm = 0;
1638  if (N.getOpcode() == ISD::ADD) {
1639  if (isIntS16Immediate(N.getOperand(1), imm))
1640  return false; // r+i
1641  if (N.getOperand(1).getOpcode() == PPCISD::Lo)
1642  return false; // r+i
1643 
1644  Base = N.getOperand(0);
1645  Index = N.getOperand(1);
1646  return true;
1647  } else if (N.getOpcode() == ISD::OR) {
1648  if (isIntS16Immediate(N.getOperand(1), imm))
1649  return false; // r+i can fold it if we can.
1650 
1651  // If this is an or of disjoint bitfields, we can codegen this as an add
1652  // (for better address arithmetic) if the LHS and RHS of the OR are provably
1653  // disjoint.
1654  APInt LHSKnownZero, LHSKnownOne;
1655  APInt RHSKnownZero, RHSKnownOne;
1656  DAG.computeKnownBits(N.getOperand(0),
1657  LHSKnownZero, LHSKnownOne);
1658 
1659  if (LHSKnownZero.getBoolValue()) {
1660  DAG.computeKnownBits(N.getOperand(1),
1661  RHSKnownZero, RHSKnownOne);
1662  // If all of the bits are known zero on the LHS or RHS, the add won't
1663  // carry.
1664  if (~(LHSKnownZero | RHSKnownZero) == 0) {
1665  Base = N.getOperand(0);
1666  Index = N.getOperand(1);
1667  return true;
1668  }
1669  }
1670  }
1671 
1672  return false;
1673 }
1674 
1675 // If we happen to be doing an i64 load or store into a stack slot that has
1676 // less than a 4-byte alignment, then the frame-index elimination may need to
1677 // use an indexed load or store instruction (because the offset may not be a
1678 // multiple of 4). The extra register needed to hold the offset comes from the
1679 // register scavenger, and it is possible that the scavenger will need to use
1680 // an emergency spill slot. As a result, we need to make sure that a spill slot
1681 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
1682 // stack slot.
1683 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
1684  // FIXME: This does not handle the LWA case.
1685  if (VT != MVT::i64)
1686  return;
1687 
1688  // NOTE: We'll exclude negative FIs here, which come from argument
1689  // lowering, because there are no known test cases triggering this problem
1690  // using packed structures (or similar). We can remove this exclusion if
1691  // we find such a test case. The reason why this is so test-case driven is
1692  // because this entire 'fixup' is only to prevent crashes (from the
1693  // register scavenger) on not-really-valid inputs. For example, if we have:
1694  // %a = alloca i1
1695  // %b = bitcast i1* %a to i64*
1696  // store i64* a, i64 b
1697  // then the store should really be marked as 'align 1', but is not. If it
1698  // were marked as 'align 1' then the indexed form would have been
1699  // instruction-selected initially, and the problem this 'fixup' is preventing
1700  // won't happen regardless.
1701  if (FrameIdx < 0)
1702  return;
1703 
1704  MachineFunction &MF = DAG.getMachineFunction();
1705  MachineFrameInfo *MFI = MF.getFrameInfo();
1706 
1707  unsigned Align = MFI->getObjectAlignment(FrameIdx);
1708  if (Align >= 4)
1709  return;
1710 
1711  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1712  FuncInfo->setHasNonRISpills();
1713 }
1714 
1715 /// Returns true if the address N can be represented by a base register plus
1716 /// a signed 16-bit displacement [r+imm], and if it is not better
1717 /// represented as reg+reg. If Aligned is true, only accept displacements
1718 /// suitable for STD and friends, i.e. multiples of 4.
1720  SDValue &Base,
1721  SelectionDAG &DAG,
1722  bool Aligned) const {
1723  // FIXME dl should come from parent load or store, not from address
1724  SDLoc dl(N);
1725  // If this can be more profitably realized as r+r, fail.
1726  if (SelectAddressRegReg(N, Disp, Base, DAG))
1727  return false;
1728 
1729  if (N.getOpcode() == ISD::ADD) {
1730  short imm = 0;
1731  if (isIntS16Immediate(N.getOperand(1), imm) &&
1732  (!Aligned || (imm & 3) == 0)) {
1733  Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
1734  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
1735  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1736  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1737  } else {
1738  Base = N.getOperand(0);
1739  }
1740  return true; // [r+i]
1741  } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
1742  // Match LOAD (ADD (X, Lo(G))).
1743  assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
1744  && "Cannot handle constant offsets yet!");
1745  Disp = N.getOperand(1).getOperand(0); // The global address.
1746  assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
1747  Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
1748  Disp.getOpcode() == ISD::TargetConstantPool ||
1749  Disp.getOpcode() == ISD::TargetJumpTable);
1750  Base = N.getOperand(0);
1751  return true; // [&g+r]
1752  }
1753  } else if (N.getOpcode() == ISD::OR) {
1754  short imm = 0;
1755  if (isIntS16Immediate(N.getOperand(1), imm) &&
1756  (!Aligned || (imm & 3) == 0)) {
1757  // If this is an or of disjoint bitfields, we can codegen this as an add
1758  // (for better address arithmetic) if the LHS and RHS of the OR are
1759  // provably disjoint.
1760  APInt LHSKnownZero, LHSKnownOne;
1761  DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
1762 
1763  if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
1764  // If all of the bits are known zero on the LHS or RHS, the add won't
1765  // carry.
1766  if (FrameIndexSDNode *FI =
1767  dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
1768  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1769  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1770  } else {
1771  Base = N.getOperand(0);
1772  }
1773  Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
1774  return true;
1775  }
1776  }
1777  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
1778  // Loading from a constant address.
1779 
1780  // If this address fits entirely in a 16-bit sext immediate field, codegen
1781  // this as "d, 0"
1782  short Imm;
1783  if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
1784  Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
1785  Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
1786  CN->getValueType(0));
1787  return true;
1788  }
1789 
1790  // Handle 32-bit sext immediates with LIS + addr mode.
1791  if ((CN->getValueType(0) == MVT::i32 ||
1792  (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
1793  (!Aligned || (CN->getZExtValue() & 3) == 0)) {
1794  int Addr = (int)CN->getZExtValue();
1795 
1796  // Otherwise, break this down into an LIS + disp.
1797  Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
1798 
1799  Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
1800  MVT::i32);
1801  unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
1802  Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
1803  return true;
1804  }
1805  }
1806 
1807  Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
1808  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
1809  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1810  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1811  } else
1812  Base = N;
1813  return true; // [r+0]
1814 }
1815 
1816 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
1817 /// represented as an indexed [r+r] operation.
1819  SDValue &Index,
1820  SelectionDAG &DAG) const {
1821  // Check to see if we can easily represent this as an [r+r] address. This
1822  // will fail if it thinks that the address is more profitably represented as
1823  // reg+imm, e.g. where imm = 0.
1824  if (SelectAddressRegReg(N, Base, Index, DAG))
1825  return true;
1826 
1827  // If the operand is an addition, always emit this as [r+r], since this is
1828  // better (for code size, and execution, as the memop does the add for free)
1829  // than emitting an explicit add.
1830  if (N.getOpcode() == ISD::ADD) {
1831  Base = N.getOperand(0);
1832  Index = N.getOperand(1);
1833  return true;
1834  }
1835 
1836  // Otherwise, do it the hard way, using R0 as the base register.
1837  Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
1838  N.getValueType());
1839  Index = N;
1840  return true;
1841 }
1842 
1843 /// getPreIndexedAddressParts - returns true by value, base pointer and
1844 /// offset pointer and addressing mode by reference if the node's address
1845 /// can be legally represented as pre-indexed load / store address.
1847  SDValue &Offset,
1848  ISD::MemIndexedMode &AM,
1849  SelectionDAG &DAG) const {
1850  if (DisablePPCPreinc) return false;
1851 
1852  bool isLoad = true;
1853  SDValue Ptr;
1854  EVT VT;
1855  unsigned Alignment;
1856  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1857  Ptr = LD->getBasePtr();
1858  VT = LD->getMemoryVT();
1859  Alignment = LD->getAlignment();
1860  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
1861  Ptr = ST->getBasePtr();
1862  VT = ST->getMemoryVT();
1863  Alignment = ST->getAlignment();
1864  isLoad = false;
1865  } else
1866  return false;
1867 
1868  // PowerPC doesn't have preinc load/store instructions for vectors (except
1869  // for QPX, which does have preinc r+r forms).
1870  if (VT.isVector()) {
1871  if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
1872  return false;
1873  } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
1874  AM = ISD::PRE_INC;
1875  return true;
1876  }
1877  }
1878 
1879  if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
1880 
1881  // Common code will reject creating a pre-inc form if the base pointer
1882  // is a frame index, or if N is a store and the base pointer is either
1883  // the same as or a predecessor of the value being stored. Check for
1884  // those situations here, and try with swapped Base/Offset instead.
1885  bool Swap = false;
1886 
1887  if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
1888  Swap = true;
1889  else if (!isLoad) {
1890  SDValue Val = cast<StoreSDNode>(N)->getValue();
1891  if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
1892  Swap = true;
1893  }
1894 
1895  if (Swap)
1896  std::swap(Base, Offset);
1897 
1898  AM = ISD::PRE_INC;
1899  return true;
1900  }
1901 
1902  // LDU/STU can only handle immediates that are a multiple of 4.
1903  if (VT != MVT::i64) {
1904  if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false))
1905  return false;
1906  } else {
1907  // LDU/STU need an address with at least 4-byte alignment.
1908  if (Alignment < 4)
1909  return false;
1910 
1911  if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true))
1912  return false;
1913  }
1914 
1915  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1916  // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
1917  // sext i32 to i64 when addr mode is r+i.
1918  if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
1919  LD->getExtensionType() == ISD::SEXTLOAD &&
1920  isa<ConstantSDNode>(Offset))
1921  return false;
1922  }
1923 
1924  AM = ISD::PRE_INC;
1925  return true;
1926 }
1927 
1928 //===----------------------------------------------------------------------===//
1929 // LowerOperation implementation
1930 //===----------------------------------------------------------------------===//
1931 
1932 /// GetLabelAccessInfo - Return true if we should reference labels using a
1933 /// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
1935  const PPCSubtarget &Subtarget,
1936  unsigned &HiOpFlags, unsigned &LoOpFlags,
1937  const GlobalValue *GV = nullptr) {
1938  HiOpFlags = PPCII::MO_HA;
1939  LoOpFlags = PPCII::MO_LO;
1940 
1941  // Don't use the pic base if not in PIC relocation model.
1942  bool isPIC = TM.getRelocationModel() == Reloc::PIC_;
1943 
1944  if (isPIC) {
1945  HiOpFlags |= PPCII::MO_PIC_FLAG;
1946  LoOpFlags |= PPCII::MO_PIC_FLAG;
1947  }
1948 
1949  // If this is a reference to a global value that requires a non-lazy-ptr, make
1950  // sure that instruction lowering adds it.
1951  if (GV && Subtarget.hasLazyResolverStub(GV)) {
1952  HiOpFlags |= PPCII::MO_NLP_FLAG;
1953  LoOpFlags |= PPCII::MO_NLP_FLAG;
1954 
1955  if (GV->hasHiddenVisibility()) {
1956  HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
1957  LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
1958  }
1959  }
1960 
1961  return isPIC;
1962 }
1963 
1964 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
1965  SelectionDAG &DAG) {
1966  SDLoc DL(HiPart);
1967  EVT PtrVT = HiPart.getValueType();
1968  SDValue Zero = DAG.getConstant(0, DL, PtrVT);
1969 
1970  SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
1971  SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
1972 
1973  // With PIC, the first instruction is actually "GR+hi(&G)".
1974  if (isPIC)
1975  Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
1976  DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
1977 
1978  // Generate non-pic code that has direct accesses to the constant pool.
1979  // The address of the global is just (hi(&g)+lo(&g)).
1980  return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
1981 }
1982 
1984  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1985  FuncInfo->setUsesTOCBasePtr();
1986 }
1987 
1988 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
1990 }
1991 
1992 static SDValue getTOCEntry(SelectionDAG &DAG, SDLoc dl, bool Is64Bit,
1993  SDValue GA) {
1994  EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
1995  SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) :
1996  DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
1997 
1998  SDValue Ops[] = { GA, Reg };
1999  return DAG.getMemIntrinsicNode(PPCISD::TOC_ENTRY, dl,
2000  DAG.getVTList(VT, MVT::Other), Ops, VT,
2001  MachinePointerInfo::getGOT(), 0, false, true,
2002  false, 0);
2003 }
2004 
2005 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2006  SelectionDAG &DAG) const {
2007  EVT PtrVT = Op.getValueType();
2008  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2009  const Constant *C = CP->getConstVal();
2010 
2011  // 64-bit SVR4 ABI code is always position-independent.
2012  // The actual address of the GlobalValue is stored in the TOC.
2013  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2014  setUsesTOCBasePtr(DAG);
2015  SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
2016  return getTOCEntry(DAG, SDLoc(CP), true, GA);
2017  }
2018 
2019  unsigned MOHiFlag, MOLoFlag;
2020  bool isPIC =
2021  GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
2022 
2023  if (isPIC && Subtarget.isSVR4ABI()) {
2024  SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
2026  return getTOCEntry(DAG, SDLoc(CP), false, GA);
2027  }
2028 
2029  SDValue CPIHi =
2030  DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
2031  SDValue CPILo =
2032  DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
2033  return LowerLabelRef(CPIHi, CPILo, isPIC, DAG);
2034 }
2035 
2036 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
2037  EVT PtrVT = Op.getValueType();
2038  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2039 
2040  // 64-bit SVR4 ABI code is always position-independent.
2041  // The actual address of the GlobalValue is stored in the TOC.
2042  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2043  setUsesTOCBasePtr(DAG);
2044  SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2045  return getTOCEntry(DAG, SDLoc(JT), true, GA);
2046  }
2047 
2048  unsigned MOHiFlag, MOLoFlag;
2049  bool isPIC =
2050  GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
2051 
2052  if (isPIC && Subtarget.isSVR4ABI()) {
2053  SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
2055  return getTOCEntry(DAG, SDLoc(GA), false, GA);
2056  }
2057 
2058  SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
2059  SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
2060  return LowerLabelRef(JTIHi, JTILo, isPIC, DAG);
2061 }
2062 
2063 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
2064  SelectionDAG &DAG) const {
2065  EVT PtrVT = Op.getValueType();
2066  BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
2067  const BlockAddress *BA = BASDN->getBlockAddress();
2068 
2069  // 64-bit SVR4 ABI code is always position-independent.
2070  // The actual BlockAddress is stored in the TOC.
2071  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2072  setUsesTOCBasePtr(DAG);
2073  SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
2074  return getTOCEntry(DAG, SDLoc(BASDN), true, GA);
2075  }
2076 
2077  unsigned MOHiFlag, MOLoFlag;
2078  bool isPIC =
2079  GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
2080  SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
2081  SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
2082  return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
2083 }
2084 
2085 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2086  SelectionDAG &DAG) const {
2087 
2088  // FIXME: TLS addresses currently use medium model code sequences,
2089  // which is the most useful form. Eventually support for small and
2090  // large models could be added if users need it, at the cost of
2091  // additional complexity.
2092  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2093  SDLoc dl(GA);
2094  const GlobalValue *GV = GA->getGlobal();
2095  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2096  bool is64bit = Subtarget.isPPC64();
2097  const Module *M = DAG.getMachineFunction().getFunction()->getParent();
2098  PICLevel::Level picLevel = M->getPICLevel();
2099 
2101 
2102  if (Model == TLSModel::LocalExec) {
2103  SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2105  SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2107  SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
2108  is64bit ? MVT::i64 : MVT::i32);
2109  SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
2110  return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
2111  }
2112 
2113  if (Model == TLSModel::InitialExec) {
2114  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2115  SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2116  PPCII::MO_TLS);
2117  SDValue GOTPtr;
2118  if (is64bit) {
2119  setUsesTOCBasePtr(DAG);
2120  SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2121  GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
2122  PtrVT, GOTReg, TGA);
2123  } else
2124  GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
2125  SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
2126  PtrVT, TGA, GOTPtr);
2127  return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
2128  }
2129 
2130  if (Model == TLSModel::GeneralDynamic) {
2131  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2132  SDValue GOTPtr;
2133  if (is64bit) {
2134  setUsesTOCBasePtr(DAG);
2135  SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2136  GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
2137  GOTReg, TGA);
2138  } else {
2139  if (picLevel == PICLevel::Small)
2140  GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2141  else
2142  GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2143  }
2144  return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
2145  GOTPtr, TGA, TGA);
2146  }
2147 
2148  if (Model == TLSModel::LocalDynamic) {
2149  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2150  SDValue GOTPtr;
2151  if (is64bit) {
2152  setUsesTOCBasePtr(DAG);
2153  SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2154  GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
2155  GOTReg, TGA);
2156  } else {
2157  if (picLevel == PICLevel::Small)
2158  GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2159  else
2160  GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2161  }
2162  SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
2163  PtrVT, GOTPtr, TGA, TGA);
2164  SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
2165  PtrVT, TLSAddr, TGA);
2166  return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
2167  }
2168 
2169  llvm_unreachable("Unknown TLS model!");
2170 }
2171 
2172 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
2173  SelectionDAG &DAG) const {
2174  EVT PtrVT = Op.getValueType();
2175  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
2176  SDLoc DL(GSDN);
2177  const GlobalValue *GV = GSDN->getGlobal();
2178 
2179  // 64-bit SVR4 ABI code is always position-independent.
2180  // The actual address of the GlobalValue is stored in the TOC.
2181  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2182  setUsesTOCBasePtr(DAG);
2183  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
2184  return getTOCEntry(DAG, DL, true, GA);
2185  }
2186 
2187  unsigned MOHiFlag, MOLoFlag;
2188  bool isPIC =
2189  GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag, GV);
2190 
2191  if (isPIC && Subtarget.isSVR4ABI()) {
2192  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
2193  GSDN->getOffset(),
2195  return getTOCEntry(DAG, DL, false, GA);
2196  }
2197 
2198  SDValue GAHi =
2199  DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
2200  SDValue GALo =
2201  DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
2202 
2203  SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG);
2204 
2205  // If the global reference is actually to a non-lazy-pointer, we have to do an
2206  // extra load to get the address of the global.
2207  if (MOHiFlag & PPCII::MO_NLP_FLAG)
2208  Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(),
2209  false, false, false, 0);
2210  return Ptr;
2211 }
2212 
2213 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
2214  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2215  SDLoc dl(Op);
2216 
2217  if (Op.getValueType() == MVT::v2i64) {
2218  // When the operands themselves are v2i64 values, we need to do something
2219  // special because VSX has no underlying comparison operations for these.
2220  if (Op.getOperand(0).getValueType() == MVT::v2i64) {
2221  // Equality can be handled by casting to the legal type for Altivec
2222  // comparisons, everything else needs to be expanded.
2223  if (CC == ISD::SETEQ || CC == ISD::SETNE) {
2224  return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
2225  DAG.getSetCC(dl, MVT::v4i32,
2226  DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
2227  DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
2228  CC));
2229  }
2230 
2231  return SDValue();
2232  }
2233 
2234  // We handle most of these in the usual way.
2235  return Op;
2236  }
2237 
2238  // If we're comparing for equality to zero, expose the fact that this is
2239  // implented as a ctlz/srl pair on ppc, so that the dag combiner can
2240  // fold the new nodes.
2241  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2242  if (C->isNullValue() && CC == ISD::SETEQ) {
2243  EVT VT = Op.getOperand(0).getValueType();
2244  SDValue Zext = Op.getOperand(0);
2245  if (VT.bitsLT(MVT::i32)) {
2246  VT = MVT::i32;
2247  Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
2248  }
2249  unsigned Log2b = Log2_32(VT.getSizeInBits());
2250  SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
2251  SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
2252  DAG.getConstant(Log2b, dl, MVT::i32));
2253  return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
2254  }
2255  // Leave comparisons against 0 and -1 alone for now, since they're usually
2256  // optimized. FIXME: revisit this when we can custom lower all setcc
2257  // optimizations.
2258  if (C->isAllOnesValue() || C->isNullValue())
2259  return SDValue();
2260  }
2261 
2262  // If we have an integer seteq/setne, turn it into a compare against zero
2263  // by xor'ing the rhs with the lhs, which is faster than setting a
2264  // condition register, reading it back out, and masking the correct bit. The
2265  // normal approach here uses sub to do this instead of xor. Using xor exposes
2266  // the result to other bit-twiddling opportunities.
2267  EVT LHSVT = Op.getOperand(0).getValueType();
2268  if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
2269  EVT VT = Op.getValueType();
2270  SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
2271  Op.getOperand(1));
2272  return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
2273  }
2274  return SDValue();
2275 }
2276 
2277 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
2278  const PPCSubtarget &Subtarget) const {
2279  SDNode *Node = Op.getNode();
2280  EVT VT = Node->getValueType(0);
2281  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
2282  SDValue InChain = Node->getOperand(0);
2283  SDValue VAListPtr = Node->getOperand(1);
2284  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
2285  SDLoc dl(Node);
2286 
2287  assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
2288 
2289  // gpr_index
2290  SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2291  VAListPtr, MachinePointerInfo(SV), MVT::i8,
2292  false, false, false, 0);
2293  InChain = GprIndex.getValue(1);
2294 
2295  if (VT == MVT::i64) {
2296  // Check if GprIndex is even
2297  SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
2298  DAG.getConstant(1, dl, MVT::i32));
2299  SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
2300  DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
2301  SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
2302  DAG.getConstant(1, dl, MVT::i32));
2303  // Align GprIndex to be even if it isn't
2304  GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
2305  GprIndex);
2306  }
2307 
2308  // fpr index is 1 byte after gpr
2309  SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2310  DAG.getConstant(1, dl, MVT::i32));
2311 
2312  // fpr
2313  SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2314  FprPtr, MachinePointerInfo(SV), MVT::i8,
2315  false, false, false, 0);
2316  InChain = FprIndex.getValue(1);
2317 
2318  SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2319  DAG.getConstant(8, dl, MVT::i32));
2320 
2321  SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2322  DAG.getConstant(4, dl, MVT::i32));
2323 
2324  // areas
2325  SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr,
2326  MachinePointerInfo(), false, false,
2327  false, 0);
2328  InChain = OverflowArea.getValue(1);
2329 
2330  SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr,
2331  MachinePointerInfo(), false, false,
2332  false, 0);
2333  InChain = RegSaveArea.getValue(1);
2334 
2335  // select overflow_area if index > 8
2336  SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
2337  DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
2338 
2339  // adjustment constant gpr_index * 4/8
2340  SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
2341  VT.isInteger() ? GprIndex : FprIndex,
2342  DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
2343  MVT::i32));
2344 
2345  // OurReg = RegSaveArea + RegConstant
2346  SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
2347  RegConstant);
2348 
2349  // Floating types are 32 bytes into RegSaveArea
2350  if (VT.isFloatingPoint())
2351  OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
2352  DAG.getConstant(32, dl, MVT::i32));
2353 
2354  // increase {f,g}pr_index by 1 (or 2 if VT is i64)
2355  SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
2356  VT.isInteger() ? GprIndex : FprIndex,
2357  DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
2358  MVT::i32));
2359 
2360  InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
2361  VT.isInteger() ? VAListPtr : FprPtr,
2362  MachinePointerInfo(SV),
2363  MVT::i8, false, false, 0);
2364 
2365  // determine if we should load from reg_save_area or overflow_area
2366  SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
2367 
2368  // increase overflow_area by 4/8 if gpr/fpr > 8
2369  SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
2370  DAG.getConstant(VT.isInteger() ? 4 : 8,
2371  dl, MVT::i32));
2372 
2373  OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
2374  OverflowAreaPlusN);
2375 
2376  InChain = DAG.getTruncStore(InChain, dl, OverflowArea,
2377  OverflowAreaPtr,
2379  MVT::i32, false, false, 0);
2380 
2381  return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(),
2382  false, false, false, 0);
2383 }
2384 
2385 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG,
2386  const PPCSubtarget &Subtarget) const {
2387  assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
2388 
2389  // We have to copy the entire va_list struct:
2390  // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
2391  return DAG.getMemcpy(Op.getOperand(0), Op,
2392  Op.getOperand(1), Op.getOperand(2),
2393  DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true,
2395 }
2396 
2397 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
2398  SelectionDAG &DAG) const {
2399  return Op.getOperand(0);
2400 }
2401 
2402 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
2403  SelectionDAG &DAG) const {
2404  SDValue Chain = Op.getOperand(0);
2405  SDValue Trmp = Op.getOperand(1); // trampoline
2406  SDValue FPtr = Op.getOperand(2); // nested function
2407  SDValue Nest = Op.getOperand(3); // 'nest' parameter value
2408  SDLoc dl(Op);
2409 
2410  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
2411  bool isPPC64 = (PtrVT == MVT::i64);
2412  Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
2413 
2416 
2417  Entry.Ty = IntPtrTy;
2418  Entry.Node = Trmp; Args.push_back(Entry);
2419 
2420  // TrampSize == (isPPC64 ? 48 : 40);
2421  Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
2422  isPPC64 ? MVT::i64 : MVT::i32);
2423  Args.push_back(Entry);
2424 
2425  Entry.Node = FPtr; Args.push_back(Entry);
2426  Entry.Node = Nest; Args.push_back(Entry);
2427 
2428  // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
2430  CLI.setDebugLoc(dl).setChain(Chain)
2431  .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
2432  DAG.getExternalSymbol("__trampoline_setup", PtrVT),
2433  std::move(Args), 0);
2434 
2435  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2436  return CallResult.second;
2437 }
2438 
2439 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
2440  const PPCSubtarget &Subtarget) const {
2441  MachineFunction &MF = DAG.getMachineFunction();
2442  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2443 
2444  SDLoc dl(Op);
2445 
2446  if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
2447  // vastart just stores the address of the VarArgsFrameIndex slot into the
2448  // memory location argument.
2450  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2451  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2452  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
2453  MachinePointerInfo(SV),
2454  false, false, 0);
2455  }
2456 
2457  // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
2458  // We suppose the given va_list is already allocated.
2459  //
2460  // typedef struct {
2461  // char gpr; /* index into the array of 8 GPRs
2462  // * stored in the register save area
2463  // * gpr=0 corresponds to r3,
2464  // * gpr=1 to r4, etc.
2465  // */
2466  // char fpr; /* index into the array of 8 FPRs
2467  // * stored in the register save area
2468  // * fpr=0 corresponds to f1,
2469  // * fpr=1 to f2, etc.
2470  // */
2471  // char *overflow_arg_area;
2472  // /* location on stack that holds
2473  // * the next overflow argument
2474  // */
2475  // char *reg_save_area;
2476  // /* where r3:r10 and f1:f8 (if saved)
2477  // * are stored
2478  // */
2479  // } va_list[1];
2480 
2481 
2482  SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
2483  SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
2484 
2486 
2487  SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
2488  PtrVT);
2489  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
2490  PtrVT);
2491 
2492  uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
2493  SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
2494 
2495  uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
2496  SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
2497 
2498  uint64_t FPROffset = 1;
2499  SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
2500 
2501  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2502 
2503  // Store first byte : number of int regs
2504  SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
2505  Op.getOperand(1),
2506  MachinePointerInfo(SV),
2507  MVT::i8, false, false, 0);
2508  uint64_t nextOffset = FPROffset;
2509  SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
2510  ConstFPROffset);
2511 
2512  // Store second byte : number of float regs
2513  SDValue secondStore =
2514  DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
2515  MachinePointerInfo(SV, nextOffset), MVT::i8,
2516  false, false, 0);
2517  nextOffset += StackOffset;
2518  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
2519 
2520  // Store second word : arguments given on stack
2521  SDValue thirdStore =
2522  DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
2523  MachinePointerInfo(SV, nextOffset),
2524  false, false, 0);
2525  nextOffset += FrameOffset;
2526  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
2527 
2528  // Store third word : arguments given in registers
2529  return DAG.getStore(thirdStore, dl, FR, nextPtr,
2530  MachinePointerInfo(SV, nextOffset),
2531  false, false, 0);
2532 
2533 }
2534 
2535 #include "PPCGenCallingConv.inc"
2536 
2537 // Function whose sole purpose is to kill compiler warnings
2538 // stemming from unused functions included from PPCGenCallingConv.inc.
2539 CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
2540  return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
2541 }
2542 
2543 bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
2544  CCValAssign::LocInfo &LocInfo,
2545  ISD::ArgFlagsTy &ArgFlags,
2546  CCState &State) {
2547  return true;
2548 }
2549 
2550 bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
2551  MVT &LocVT,
2552  CCValAssign::LocInfo &LocInfo,
2553  ISD::ArgFlagsTy &ArgFlags,
2554  CCState &State) {
2555  static const MCPhysReg ArgRegs[] = {
2556  PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2557  PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2558  };
2559  const unsigned NumArgRegs = array_lengthof(ArgRegs);
2560 
2561  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
2562 
2563  // Skip one register if the first unallocated register has an even register
2564  // number and there are still argument registers available which have not been
2565  // allocated yet. RegNum is actually an index into ArgRegs, which means we
2566  // need to skip a register if RegNum is odd.
2567  if (RegNum != NumArgRegs && RegNum % 2 == 1) {
2568  State.AllocateReg(ArgRegs[RegNum]);
2569  }
2570 
2571  // Always return false here, as this function only makes sure that the first
2572  // unallocated register has an odd register number and does not actually
2573  // allocate a register for the current argument.
2574  return false;
2575 }
2576 
2577 bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
2578  MVT &LocVT,
2579  CCValAssign::LocInfo &LocInfo,
2580  ISD::ArgFlagsTy &ArgFlags,
2581  CCState &State) {
2582  static const MCPhysReg ArgRegs[] = {
2583  PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
2584  PPC::F8
2585  };
2586 
2587  const unsigned NumArgRegs = array_lengthof(ArgRegs);
2588 
2589  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
2590 
2591  // If there is only one Floating-point register left we need to put both f64
2592  // values of a split ppc_fp128 value on the stack.
2593  if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
2594  State.AllocateReg(ArgRegs[RegNum]);
2595  }
2596 
2597  // Always return false here, as this function only makes sure that the two f64
2598  // values a ppc_fp128 value is split into are both passed in registers or both
2599  // passed on the stack and does not actually allocate a register for the
2600  // current argument.
2601  return false;
2602 }
2603 
2604 /// FPR - The set of FP registers that should be allocated for arguments,
2605 /// on Darwin.
2606 static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
2607  PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
2608  PPC::F11, PPC::F12, PPC::F13};
2609 
2610 /// QFPR - The set of QPX registers that should be allocated for arguments.
2611 static const MCPhysReg QFPR[] = {
2612  PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
2613  PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
2614 
2615 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
2616 /// the stack.
2618  unsigned PtrByteSize) {
2619  unsigned ArgSize = ArgVT.getStoreSize();
2620  if (Flags.isByVal())
2621  ArgSize = Flags.getByValSize();
2622 
2623  // Round up to multiples of the pointer size, except for array members,
2624  // which are always packed.
2625  if (!Flags.isInConsecutiveRegs())
2626  ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2627 
2628  return ArgSize;
2629 }
2630 
2631 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
2632 /// on the stack.
2633 static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
2635  unsigned PtrByteSize) {
2636  unsigned Align = PtrByteSize;
2637 
2638  // Altivec parameters are padded to a 16 byte boundary.
2639  if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
2640  ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
2641  ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
2642  ArgVT == MVT::v1i128)
2643  Align = 16;
2644  // QPX vector types stored in double-precision are padded to a 32 byte
2645  // boundary.
2646  else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
2647  Align = 32;
2648 
2649  // ByVal parameters are aligned as requested.
2650  if (Flags.isByVal()) {
2651  unsigned BVAlign = Flags.getByValAlign();
2652  if (BVAlign > PtrByteSize) {
2653  if (BVAlign % PtrByteSize != 0)
2655  "ByVal alignment is not a multiple of the pointer size");
2656 
2657  Align = BVAlign;
2658  }
2659  }
2660 
2661  // Array members are always packed to their original alignment.
2662  if (Flags.isInConsecutiveRegs()) {
2663  // If the array member was split into multiple registers, the first
2664  // needs to be aligned to the size of the full type. (Except for
2665  // ppcf128, which is only aligned as its f64 components.)
2666  if (Flags.isSplit() && OrigVT != MVT::ppcf128)
2667  Align = OrigVT.getStoreSize();
2668  else
2669  Align = ArgVT.getStoreSize();
2670  }
2671 
2672  return Align;
2673 }
2674 
2675 /// CalculateStackSlotUsed - Return whether this argument will use its
2676 /// stack slot (instead of being passed in registers). ArgOffset,
2677 /// AvailableFPRs, and AvailableVRs must hold the current argument
2678 /// position, and will be updated to account for this argument.
2679 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
2681  unsigned PtrByteSize,
2682  unsigned LinkageSize,
2683  unsigned ParamAreaSize,
2684  unsigned &ArgOffset,
2685  unsigned &AvailableFPRs,
2686  unsigned &AvailableVRs, bool HasQPX) {
2687  bool UseMemory = false;
2688 
2689  // Respect alignment of argument on the stack.
2690  unsigned Align =
2691  CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
2692  ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
2693  // If there's no space left in the argument save area, we must
2694  // use memory (this check also catches zero-sized arguments).
2695  if (ArgOffset >= LinkageSize + ParamAreaSize)
2696  UseMemory = true;
2697 
2698  // Allocate argument on the stack.
2699  ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
2700  if (Flags.isInConsecutiveRegsLast())
2701  ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2702  // If we overran the argument save area, we must use memory
2703  // (this check catches arguments passed partially in memory)
2704  if (ArgOffset > LinkageSize + ParamAreaSize)
2705  UseMemory = true;
2706 
2707  // However, if the argument is actually passed in an FPR or a VR,
2708  // we don't use memory after all.
2709  if (!Flags.isByVal()) {
2710  if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
2711  // QPX registers overlap with the scalar FP registers.
2712  (HasQPX && (ArgVT == MVT::v4f32 ||
2713  ArgVT == MVT::v4f64 ||
2714  ArgVT == MVT::v4i1)))
2715  if (AvailableFPRs > 0) {
2716  --AvailableFPRs;
2717  return false;
2718  }
2719  if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
2720  ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
2721  ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
2722  ArgVT == MVT::v1i128)
2723  if (AvailableVRs > 0) {
2724  --AvailableVRs;
2725  return false;
2726  }
2727  }
2728 
2729  return UseMemory;
2730 }
2731 
2732 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
2733 /// ensure minimum alignment required for target.
2735  unsigned NumBytes) {
2736  unsigned TargetAlign = Lowering->getStackAlignment();
2737  unsigned AlignMask = TargetAlign - 1;
2738  NumBytes = (NumBytes + AlignMask) & ~AlignMask;
2739  return NumBytes;
2740 }
2741 
2742 SDValue
2743 PPCTargetLowering::LowerFormalArguments(SDValue Chain,
2744  CallingConv::ID CallConv, bool isVarArg,
2746  &Ins,
2747  SDLoc dl, SelectionDAG &DAG,
2748  SmallVectorImpl<SDValue> &InVals)
2749  const {
2750  if (Subtarget.isSVR4ABI()) {
2751  if (Subtarget.isPPC64())
2752  return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
2753  dl, DAG, InVals);
2754  else
2755  return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
2756  dl, DAG, InVals);
2757  } else {
2758  return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
2759  dl, DAG, InVals);
2760  }
2761 }
2762 
2763 SDValue
2764 PPCTargetLowering::LowerFormalArguments_32SVR4(
2765  SDValue Chain,
2766  CallingConv::ID CallConv, bool isVarArg,
2768  &Ins,
2769  SDLoc dl, SelectionDAG &DAG,
2770  SmallVectorImpl<SDValue> &InVals) const {
2771 
2772  // 32-bit SVR4 ABI Stack Frame Layout:
2773  // +-----------------------------------+
2774  // +--> | Back chain |
2775  // | +-----------------------------------+
2776  // | | Floating-point register save area |
2777  // | +-----------------------------------+
2778  // | | General register save area |
2779  // | +-----------------------------------+
2780  // | | CR save word |
2781  // | +-----------------------------------+
2782  // | | VRSAVE save word |
2783  // | +-----------------------------------+
2784  // | | Alignment padding |
2785  // | +-----------------------------------+
2786  // | | Vector register save area |
2787  // | +-----------------------------------+
2788  // | | Local variable space |
2789  // | +-----------------------------------+
2790  // | | Parameter list area |
2791  // | +-----------------------------------+
2792  // | | LR save word |
2793  // | +-----------------------------------+
2794  // SP--> +--- | Back chain |
2795  // +-----------------------------------+
2796  //
2797  // Specifications:
2798  // System V Application Binary Interface PowerPC Processor Supplement
2799  // AltiVec Technology Programming Interface Manual
2800 
2801  MachineFunction &MF = DAG.getMachineFunction();
2802  MachineFrameInfo *MFI = MF.getFrameInfo();
2803  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2804 
2806  // Potential tail calls could cause overwriting of argument stack slots.
2807  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
2808  (CallConv == CallingConv::Fast));
2809  unsigned PtrByteSize = 4;
2810 
2811  // Assign locations to all of the incoming arguments.
2813  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2814  *DAG.getContext());
2815 
2816  // Reserve space for the linkage area on the stack.
2817  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
2818  CCInfo.AllocateStack(LinkageSize, PtrByteSize);
2819 
2820  CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
2821 
2822  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2823  CCValAssign &VA = ArgLocs[i];
2824 
2825  // Arguments stored in registers.
2826  if (VA.isRegLoc()) {
2827  const TargetRegisterClass *RC;
2828  EVT ValVT = VA.getValVT();
2829 
2830  switch (ValVT.getSimpleVT().SimpleTy) {
2831  default:
2832  llvm_unreachable("ValVT not supported by formal arguments Lowering");
2833  case MVT::i1:
2834  case MVT::i32:
2835  RC = &PPC::GPRCRegClass;
2836  break;
2837  case MVT::f32:
2838  if (Subtarget.hasP8Vector())
2839  RC = &PPC::VSSRCRegClass;
2840  else
2841  RC = &PPC::F4RCRegClass;
2842  break;
2843  case MVT::f64:
2844  if (Subtarget.hasVSX())
2845  RC = &PPC::VSFRCRegClass;
2846  else
2847  RC = &PPC::F8RCRegClass;
2848  break;
2849  case MVT::v16i8:
2850  case MVT::v8i16:
2851  case MVT::v4i32:
2852  RC = &PPC::VRRCRegClass;
2853  break;
2854  case MVT::v4f32:
2855  RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
2856  break;
2857  case MVT::v2f64:
2858  case MVT::v2i64:
2859  RC = &PPC::VSHRCRegClass;
2860  break;
2861  case MVT::v4f64:
2862  RC = &PPC::QFRCRegClass;
2863  break;
2864  case MVT::v4i1:
2865  RC = &PPC::QBRCRegClass;
2866  break;
2867  }
2868 
2869  // Transform the arguments stored in physical registers into virtual ones.
2870  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2871  SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
2872  ValVT == MVT::i1 ? MVT::i32 : ValVT);
2873 
2874  if (ValVT == MVT::i1)
2875  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
2876 
2877  InVals.push_back(ArgValue);
2878  } else {
2879  // Argument stored in memory.
2880  assert(VA.isMemLoc());
2881 
2882  unsigned ArgSize = VA.getLocVT().getStoreSize();
2883  int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
2884  isImmutable);
2885 
2886  // Create load nodes to retrieve arguments from the stack.
2887  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2888  InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
2890  false, false, false, 0));
2891  }
2892  }
2893 
2894  // Assign locations to all of the incoming aggregate by value arguments.
2895  // Aggregates passed by value are stored in the local variable space of the
2896  // caller's stack frame, right above the parameter list area.
2897  SmallVector<CCValAssign, 16> ByValArgLocs;
2898  CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2899  ByValArgLocs, *DAG.getContext());
2900 
2901  // Reserve stack space for the allocations in CCInfo.
2902  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
2903 
2904  CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
2905 
2906  // Area that is at least reserved in the caller of this function.
2907  unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
2908  MinReservedArea = std::max(MinReservedArea, LinkageSize);
2909 
2910  // Set the size that is at least reserved in caller of this function. Tail
2911  // call optimized function's reserved stack space needs to be aligned so that
2912  // taking the difference between two stack areas will result in an aligned
2913  // stack.
2914  MinReservedArea =
2915  EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
2916  FuncInfo->setMinReservedArea(MinReservedArea);
2917 
2918  SmallVector<SDValue, 8> MemOps;
2919 
2920  // If the function takes variable number of arguments, make a frame index for
2921  // the start of the first vararg value... for expansion of llvm.va_start.
2922  if (isVarArg) {
2923  static const MCPhysReg GPArgRegs[] = {
2924  PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2925  PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2926  };
2927  const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
2928 
2929  static const MCPhysReg FPArgRegs[] = {
2930  PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
2931  PPC::F8
2932  };
2933  unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
2935  NumFPArgRegs = 0;
2936 
2937  FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
2938  FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
2939 
2940  // Make room for NumGPArgRegs and NumFPArgRegs.
2941  int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
2942  NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
2943 
2944  FuncInfo->setVarArgsStackOffset(
2945  MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
2946  CCInfo.getNextStackOffset(), true));
2947 
2948  FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
2949  SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2950 
2951  // The fixed integer arguments of a variadic function are stored to the
2952  // VarArgsFrameIndex on the stack so that they may be loaded by deferencing
2953  // the result of va_next.
2954  for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
2955  // Get an existing live-in vreg, or add a new one.
2956  unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
2957  if (!VReg)
2958  VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
2959 
2960  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2961  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2962  MachinePointerInfo(), false, false, 0);
2963  MemOps.push_back(Store);
2964  // Increment the address by four for the next argument to store
2965  SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
2966  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
2967  }
2968 
2969  // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
2970  // is set.
2971  // The double arguments are stored to the VarArgsFrameIndex
2972  // on the stack.
2973  for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
2974  // Get an existing live-in vreg, or add a new one.
2975  unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
2976  if (!VReg)
2977  VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
2978 
2979  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
2980  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2981  MachinePointerInfo(), false, false, 0);
2982  MemOps.push_back(Store);
2983  // Increment the address by eight for the next argument to store
2984  SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
2985  PtrVT);
2986  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
2987  }
2988  }
2989 
2990  if (!MemOps.empty())
2991  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
2992 
2993  return Chain;
2994 }
2995 
2996 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
2997 // value to MVT::i64 and then truncate to the correct register size.
2998 SDValue
2999 PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
3000  SelectionDAG &DAG, SDValue ArgVal,
3001  SDLoc dl) const {
3002  if (Flags.isSExt())
3003  ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3004  DAG.getValueType(ObjectVT));
3005  else if (Flags.isZExt())
3006  ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3007  DAG.getValueType(ObjectVT));
3008 
3009  return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3010 }
3011 
3012 SDValue
3013 PPCTargetLowering::LowerFormalArguments_64SVR4(
3014  SDValue Chain,
3015  CallingConv::ID CallConv, bool isVarArg,
3017  &Ins,
3018  SDLoc dl, SelectionDAG &DAG,
3019  SmallVectorImpl<SDValue> &InVals) const {
3020  // TODO: add description of PPC stack frame format, or at least some docs.
3021  //
3022  bool isELFv2ABI = Subtarget.isELFv2ABI();
3023  bool isLittleEndian = Subtarget.isLittleEndian();
3024  MachineFunction &MF = DAG.getMachineFunction();
3025  MachineFrameInfo *MFI = MF.getFrameInfo();
3026  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3027 
3028  assert(!(CallConv == CallingConv::Fast && isVarArg) &&
3029  "fastcc not supported on varargs functions");
3030 
3032  // Potential tail calls could cause overwriting of argument stack slots.
3033  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3034  (CallConv == CallingConv::Fast));
3035  unsigned PtrByteSize = 8;
3036  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3037 
3038  static const MCPhysReg GPR[] = {
3039  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3040  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3041  };
3042  static const MCPhysReg VR[] = {
3043  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3044  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3045  };
3046  static const MCPhysReg VSRH[] = {
3047  PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
3048  PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
3049  };
3050 
3051  const unsigned Num_GPR_Regs = array_lengthof(GPR);
3052  const unsigned Num_FPR_Regs = 13;
3053  const unsigned Num_VR_Regs = array_lengthof(VR);
3054  const unsigned Num_QFPR_Regs = Num_FPR_Regs;
3055 
3056  // Do a first pass over the arguments to determine whether the ABI
3057  // guarantees that our caller has allocated the parameter save area
3058  // on its stack frame. In the ELFv1 ABI, this is always the case;
3059  // in the ELFv2 ABI, it is true if this is a vararg function or if
3060  // any parameter is located in a stack slot.
3061 
3062  bool HasParameterArea = !isELFv2ABI || isVarArg;
3063  unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
3064  unsigned NumBytes = LinkageSize;
3065  unsigned AvailableFPRs = Num_FPR_Regs;
3066  unsigned AvailableVRs = Num_VR_Regs;
3067  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3068  if (Ins[i].Flags.isNest())
3069  continue;
3070 
3071  if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
3072  PtrByteSize, LinkageSize, ParamAreaSize,
3073  NumBytes, AvailableFPRs, AvailableVRs,
3074  Subtarget.hasQPX()))
3075  HasParameterArea = true;
3076  }
3077 
3078  // Add DAG nodes to load the arguments or copy them out of registers. On
3079  // entry to a function on PPC, the arguments start after the linkage area,
3080  // although the first ones are often in registers.
3081 
3082  unsigned ArgOffset = LinkageSize;
3083  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3084  unsigned &QFPR_idx = FPR_idx;
3085  SmallVector<SDValue, 8> MemOps;
3087  unsigned CurArgIdx = 0;
3088  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3089  SDValue ArgVal;
3090  bool needsLoad = false;
3091  EVT ObjectVT = Ins[ArgNo].VT;
3092  EVT OrigVT = Ins[ArgNo].ArgVT;
3093  unsigned ObjSize = ObjectVT.getStoreSize();
3094  unsigned ArgSize = ObjSize;
3095  ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3096  if (Ins[ArgNo].isOrigArg()) {
3097  std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3098  CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3099  }
3100  // We re-align the argument offset for each argument, except when using the
3101  // fast calling convention, when we need to make sure we do that only when
3102  // we'll actually use a stack slot.
3103  unsigned CurArgOffset, Align;
3104  auto ComputeArgOffset = [&]() {
3105  /* Respect alignment of argument on the stack. */
3106  Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
3107  ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3108  CurArgOffset = ArgOffset;
3109  };
3110 
3111  if (CallConv != CallingConv::Fast) {
3112  ComputeArgOffset();
3113 
3114  /* Compute GPR index associated with argument offset. */
3115  GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3116  GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
3117  }
3118 
3119  // FIXME the codegen can be much improved in some cases.
3120  // We do not have to keep everything in memory.
3121  if (Flags.isByVal()) {
3122  assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3123 
3124  if (CallConv == CallingConv::Fast)
3125  ComputeArgOffset();
3126 
3127  // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3128  ObjSize = Flags.getByValSize();
3129  ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3130  // Empty aggregate parameters do not take up registers. Examples:
3131  // struct { } a;
3132  // union { } b;
3133  // int c[0];
3134  // etc. However, we have to provide a place-holder in InVals, so
3135  // pretend we have an 8-byte item at the current address for that
3136  // purpose.
3137  if (!ObjSize) {
3138  int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
3139  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3140  InVals.push_back(FIN);
3141  continue;
3142  }
3143 
3144  // Create a stack object covering all stack doublewords occupied
3145  // by the argument. If the argument is (fully or partially) on
3146  // the stack, or if the argument is fully in registers but the
3147  // caller has allocated the parameter save anyway, we can refer
3148  // directly to the caller's stack frame. Otherwise, create a
3149  // local copy in our own frame.
3150  int FI;
3151  if (HasParameterArea ||
3152  ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
3153  FI = MFI->CreateFixedObject(ArgSize, ArgOffset, false, true);
3154  else
3155  FI = MFI->CreateStackObject(ArgSize, Align, false);
3156  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3157 
3158  // Handle aggregates smaller than 8 bytes.
3159  if (ObjSize < PtrByteSize) {
3160  // The value of the object is its address, which differs from the
3161  // address of the enclosing doubleword on big-endian systems.
3162  SDValue Arg = FIN;
3163  if (!isLittleEndian) {
3164  SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
3165  Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
3166  }
3167  InVals.push_back(Arg);
3168 
3169  if (GPR_idx != Num_GPR_Regs) {
3170  unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3171  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3172  SDValue Store;
3173 
3174  if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
3175  EVT ObjType = (ObjSize == 1 ? MVT::i8 :
3176  (ObjSize == 2 ? MVT::i16 : MVT::i32));
3177  Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
3178  MachinePointerInfo(FuncArg),
3179  ObjType, false, false, 0);
3180  } else {
3181  // For sizes that don't fit a truncating store (3, 5, 6, 7),
3182  // store the whole register as-is to the parameter save area
3183  // slot.
3184  Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3185  MachinePointerInfo(FuncArg),
3186  false, false, 0);
3187  }
3188 
3189  MemOps.push_back(Store);
3190  }
3191  // Whether we copied from a register or not, advance the offset
3192  // into the parameter save area by a full doubleword.
3193  ArgOffset += PtrByteSize;
3194  continue;
3195  }
3196 
3197  // The value of the object is its address, which is the address of
3198  // its first stack doubleword.
3199  InVals.push_back(FIN);
3200 
3201  // Store whatever pieces of the object are in registers to memory.
3202  for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
3203  if (GPR_idx == Num_GPR_Regs)
3204  break;
3205 
3206  unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3207  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3208  SDValue Addr = FIN;
3209  if (j) {
3210  SDValue Off = DAG.getConstant(j, dl, PtrVT);
3211  Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
3212  }
3213  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
3214  MachinePointerInfo(FuncArg, j),
3215  false, false, 0);
3216  MemOps.push_back(Store);
3217  ++GPR_idx;
3218  }
3219  ArgOffset += ArgSize;
3220  continue;
3221  }
3222 
3223  switch (ObjectVT.getSimpleVT().SimpleTy) {
3224  default: llvm_unreachable("Unhandled argument type!");
3225  case MVT::i1:
3226  case MVT::i32:
3227  case MVT::i64:
3228  if (Flags.isNest()) {
3229  // The 'nest' parameter, if any, is passed in R11.
3230  unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
3231  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3232 
3233  if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3234  ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3235 
3236  break;
3237  }
3238 
3239  // These can be scalar arguments or elements of an integer array type
3240  // passed directly. Clang may use those instead of "byval" aggregate
3241  // types to avoid forcing arguments to memory unnecessarily.
3242  if (GPR_idx != Num_GPR_Regs) {
3243  unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3244  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3245 
3246  if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3247  // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3248  // value to MVT::i64 and then truncate to the correct register size.
3249  ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3250  } else {
3251  if (CallConv == CallingConv::Fast)
3252  ComputeArgOffset();
3253 
3254  needsLoad = true;
3255  ArgSize = PtrByteSize;
3256  }
3257  if (CallConv != CallingConv::Fast || needsLoad)
3258  ArgOffset += 8;
3259  break;
3260 
3261  case MVT::f32:
3262  case MVT::f64:
3263  // These can be scalar arguments or elements of a float array type
3264  // passed directly. The latter are used to implement ELFv2 homogenous
3265  // float aggregates.
3266  if (FPR_idx != Num_FPR_Regs) {
3267  unsigned VReg;
3268 
3269  if (ObjectVT == MVT::f32)
3270  VReg = MF.addLiveIn(FPR[FPR_idx],
3271  Subtarget.hasP8Vector()
3272  ? &PPC::VSSRCRegClass
3273  : &PPC::F4RCRegClass);
3274  else
3275  VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
3276  ? &PPC::VSFRCRegClass
3277  : &PPC::F8RCRegClass);
3278 
3279  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3280  ++FPR_idx;
3281  } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
3282  // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
3283  // once we support fp <-> gpr moves.
3284 
3285  // This can only ever happen in the presence of f32 array types,
3286  // since otherwise we never run out of FPRs before running out
3287  // of GPRs.
3288  unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3289  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3290 
3291  if (ObjectVT == MVT::f32) {
3292  if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
3293  ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
3294  DAG.getConstant(32, dl, MVT::i32));
3295  ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
3296  }
3297 
3298  ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
3299  } else {
3300  if (CallConv == CallingConv::Fast)
3301  ComputeArgOffset();
3302 
3303  needsLoad = true;
3304  }
3305 
3306  // When passing an array of floats, the array occupies consecutive
3307  // space in the argument area; only round up to the next doubleword
3308  // at the end of the array. Otherwise, each float takes 8 bytes.
3309  if (CallConv != CallingConv::Fast || needsLoad) {
3310  ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
3311  ArgOffset += ArgSize;
3312  if (Flags.isInConsecutiveRegsLast())
3313  ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3314  }
3315  break;
3316  case MVT::v4f32:
3317  case MVT::v4i32:
3318  case MVT::v8i16:
3319  case MVT::v16i8:
3320  case MVT::v2f64:
3321  case MVT::v2i64:
3322  case MVT::v1i128:
3323  if (!Subtarget.hasQPX()) {
3324  // These can be scalar arguments or elements of a vector array type
3325  // passed directly. The latter are used to implement ELFv2 homogenous
3326  // vector aggregates.
3327  if (VR_idx != Num_VR_Regs) {
3328  unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
3329  MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
3330  MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3331  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3332  ++VR_idx;
3333  } else {
3334  if (CallConv == CallingConv::Fast)
3335  ComputeArgOffset();
3336 
3337  needsLoad = true;
3338  }
3339  if (CallConv != CallingConv::Fast || needsLoad)
3340  ArgOffset += 16;
3341  break;
3342  } // not QPX
3343 
3344  assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
3345  "Invalid QPX parameter type");
3346  /* fall through */
3347 
3348  case MVT::v4f64:
3349  case MVT::v4i1:
3350  // QPX vectors are treated like their scalar floating-point subregisters
3351  // (except that they're larger).
3352  unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
3353  if (QFPR_idx != Num_QFPR_Regs) {
3354  const TargetRegisterClass *RC;
3355  switch (ObjectVT.getSimpleVT().SimpleTy) {
3356  case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
3357  case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
3358  default: RC = &PPC::QBRCRegClass; break;
3359  }
3360 
3361  unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
3362  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3363  ++QFPR_idx;
3364  } else {
3365  if (CallConv == CallingConv::Fast)
3366  ComputeArgOffset();
3367  needsLoad = true;
3368  }
3369  if (CallConv != CallingConv::Fast || needsLoad)
3370  ArgOffset += Sz;
3371  break;
3372  }
3373 
3374  // We need to load the argument to a virtual register if we determined
3375  // above that we ran out of physical registers of the appropriate type.
3376  if (needsLoad) {
3377  if (ObjSize < ArgSize && !isLittleEndian)
3378  CurArgOffset += ArgSize - ObjSize;
3379  int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
3380  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3381  ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
3382  false, false, false, 0);
3383  }
3384 
3385  InVals.push_back(ArgVal);
3386  }
3387 
3388  // Area that is at least reserved in the caller of this function.
3389  unsigned MinReservedArea;
3390  if (HasParameterArea)
3391  MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
3392  else
3393  MinReservedArea = LinkageSize;
3394 
3395  // Set the size that is at least reserved in caller of this function. Tail
3396  // call optimized functions' reserved stack space needs to be aligned so that
3397  // taking the difference between two stack areas will result in an aligned
3398  // stack.
3399  MinReservedArea =
3400  EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3401  FuncInfo->setMinReservedArea(MinReservedArea);
3402 
3403  // If the function takes variable number of arguments, make a frame index for
3404  // the start of the first vararg value... for expansion of llvm.va_start.
3405  if (isVarArg) {
3406  int Depth = ArgOffset;
3407 
3408  FuncInfo->setVarArgsFrameIndex(
3409  MFI->CreateFixedObject(PtrByteSize, Depth, true));
3410  SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3411 
3412  // If this function is vararg, store any remaining integer argument regs
3413  // to their spots on the stack so that they may be loaded by deferencing the
3414  // result of va_next.
3415  for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3416  GPR_idx < Num_GPR_Regs; ++GPR_idx) {
3417  unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3418  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3419  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3420  MachinePointerInfo(), false, false, 0);
3421  MemOps.push_back(Store);
3422  // Increment the address by four for the next argument to store
3423  SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
3424  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3425  }
3426  }
3427 
3428  if (!MemOps.empty())
3429  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3430 
3431  return Chain;
3432 }
3433 
3434 SDValue
3435 PPCTargetLowering::LowerFormalArguments_Darwin(
3436  SDValue Chain,
3437  CallingConv::ID CallConv, bool isVarArg,
3439  &Ins,
3440  SDLoc dl, SelectionDAG &DAG,
3441  SmallVectorImpl<SDValue> &InVals) const {
3442  // TODO: add description of PPC stack frame format, or at least some docs.
3443  //
3444  MachineFunction &MF = DAG.getMachineFunction();
3445  MachineFrameInfo *MFI = MF.getFrameInfo();
3446  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3447 
3449  bool isPPC64 = PtrVT == MVT::i64;
3450  // Potential tail calls could cause overwriting of argument stack slots.
3451  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3452  (CallConv == CallingConv::Fast));
3453  unsigned PtrByteSize = isPPC64 ? 8 : 4;
3454  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3455  unsigned ArgOffset = LinkageSize;
3456  // Area that is at least reserved in caller of this function.
3457  unsigned MinReservedArea = ArgOffset;
3458 
3459  static const MCPhysReg GPR_32[] = { // 32-bit registers.
3460  PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3461  PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3462  };
3463  static const MCPhysReg GPR_64[] = { // 64-bit registers.
3464  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3465  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3466  };
3467  static const MCPhysReg VR[] = {
3468  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3469  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3470  };
3471 
3472  const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
3473  const unsigned Num_FPR_Regs = 13;
3474  const unsigned Num_VR_Regs = array_lengthof( VR);
3475 
3476  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3477 
3478  const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
3479 
3480  // In 32-bit non-varargs functions, the stack space for vectors is after the
3481  // stack space for non-vectors. We do not use this space unless we have
3482  // too many vectors to fit in registers, something that only occurs in
3483  // constructed examples:), but we have to walk the arglist to figure
3484  // that out...for the pathological case, compute VecArgOffset as the
3485  // start of the vector parameter area. Computing VecArgOffset is the
3486  // entire point of the following loop.
3487  unsigned VecArgOffset = ArgOffset;
3488  if (!isVarArg && !isPPC64) {
3489  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
3490  ++ArgNo) {
3491  EVT ObjectVT = Ins[ArgNo].VT;
3492  ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3493 
3494  if (Flags.isByVal()) {
3495  // ObjSize is the true size, ArgSize rounded up to multiple of regs.
3496  unsigned ObjSize = Flags.getByValSize();
3497  unsigned ArgSize =
3498  ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3499  VecArgOffset += ArgSize;
3500  continue;
3501  }
3502 
3503  switch(ObjectVT.getSimpleVT().SimpleTy) {
3504  default: llvm_unreachable("Unhandled argument type!");
3505  case MVT::i1:
3506  case MVT::i32:
3507  case MVT::f32:
3508  VecArgOffset += 4;
3509  break;
3510  case MVT::i64: // PPC64
3511  case MVT::f64:
3512  // FIXME: We are guaranteed to be !isPPC64 at this point.
3513  // Does MVT::i64 apply?
3514  VecArgOffset += 8;
3515  break;
3516  case MVT::v4f32:
3517  case MVT::v4i32:
3518  case MVT::v8i16:
3519  case MVT::v16i8:
3520  // Nothing to do, we're only looking at Nonvector args here.
3521  break;
3522  }
3523  }
3524  }
3525  // We've found where the vector parameter area in memory is. Skip the
3526  // first 12 parameters; these don't use that memory.
3527  VecArgOffset = ((VecArgOffset+15)/16)*16;
3528  VecArgOffset += 12*16;
3529 
3530  // Add DAG nodes to load the arguments or copy them out of registers. On
3531  // entry to a function on PPC, the arguments start after the linkage area,
3532  // although the first ones are often in registers.
3533 
3534  SmallVector<SDValue, 8> MemOps;
3535  unsigned nAltivecParamsAtEnd = 0;
3537  unsigned CurArgIdx = 0;
3538  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3539  SDValue ArgVal;
3540  bool needsLoad = false;
3541  EVT ObjectVT = Ins[ArgNo].VT;
3542  unsigned ObjSize = ObjectVT.getSizeInBits()/8;
3543  unsigned ArgSize = ObjSize;
3544  ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3545  if (Ins[ArgNo].isOrigArg()) {
3546  std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3547  CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3548  }
3549  unsigned CurArgOffset = ArgOffset;
3550 
3551  // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
3552  if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
3553  ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
3554  if (isVarArg || isPPC64) {
3555  MinReservedArea = ((MinReservedArea+15)/16)*16;
3556  MinReservedArea += CalculateStackSlotSize(ObjectVT,
3557  Flags,
3558  PtrByteSize);
3559  } else nAltivecParamsAtEnd++;
3560  } else
3561  // Calculate min reserved area.
3562  MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
3563  Flags,
3564  PtrByteSize);
3565 
3566  // FIXME the codegen can be much improved in some cases.
3567  // We do not have to keep everything in memory.
3568  if (Flags.isByVal()) {
3569  assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3570 
3571  // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3572  ObjSize = Flags.getByValSize();
3573  ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3574  // Objects of size 1 and 2 are right justified, everything else is
3575  // left justified. This means the memory address is adjusted forwards.
3576  if (ObjSize==1 || ObjSize==2) {
3577  CurArgOffset = CurArgOffset + (4 - ObjSize);
3578  }
3579  // The value of the object is its address.
3580  int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, false, true);
3581  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3582  InVals.push_back(FIN);
3583  if (ObjSize==1 || ObjSize==2) {
3584  if (GPR_idx != Num_GPR_Regs) {
3585  unsigned VReg;
3586  if (isPPC64)
3587  VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3588  else
3589  VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3590  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3591  EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
3592  SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
3593  MachinePointerInfo(FuncArg),
3594  ObjType, false, false, 0);
3595  MemOps.push_back(Store);
3596  ++GPR_idx;
3597  }
3598 
3599  ArgOffset += PtrByteSize;
3600 
3601  continue;
3602  }
3603  for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
3604  // Store whatever pieces of the object are in registers
3605  // to memory. ArgOffset will be the address of the beginning
3606  // of the object.
3607  if (GPR_idx != Num_GPR_Regs) {
3608  unsigned VReg;
3609  if (isPPC64)
3610  VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3611  else
3612  VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3613  int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
3614  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3615  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3616  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3617  MachinePointerInfo(FuncArg, j),
3618  false, false, 0);
3619  MemOps.push_back(Store);
3620  ++GPR_idx;
3621  ArgOffset += PtrByteSize;
3622  } else {
3623  ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
3624  break;
3625  }
3626  }
3627  continue;
3628  }
3629 
3630  switch (ObjectVT.getSimpleVT().SimpleTy) {
3631  default: llvm_unreachable("Unhandled argument type!");
3632  case MVT::i1:
3633  case MVT::i32:
3634  if (!isPPC64) {
3635  if (GPR_idx != Num_GPR_Regs) {
3636  unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3637  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3638 
3639  if (ObjectVT == MVT::i1)
3640  ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
3641 
3642  ++GPR_idx;
3643  } else {
3644  needsLoad = true;
3645  ArgSize = PtrByteSize;
3646  }
3647  // All int arguments reserve stack space in the Darwin ABI.
3648  ArgOffset += PtrByteSize;
3649  break;
3650  }
3651  // FALLTHROUGH
3652  case MVT::i64: // PPC64
3653  if (GPR_idx != Num_GPR_Regs) {
3654  unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3655  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3656 
3657  if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3658  // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3659  // value to MVT::i64 and then truncate to the correct register size.
3660  ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3661 
3662  ++GPR_idx;
3663  } else {
3664  needsLoad = true;
3665  ArgSize = PtrByteSize;
3666  }
3667  // All int arguments reserve stack space in the Darwin ABI.
3668  ArgOffset += 8;
3669  break;
3670 
3671  case MVT::f32:
3672  case MVT::f64:
3673  // Every 4 bytes of argument space consumes one of the GPRs available for
3674  // argument passing.
3675  if (GPR_idx != Num_GPR_Regs) {
3676  ++GPR_idx;
3677  if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
3678  ++GPR_idx;
3679  }
3680  if (FPR_idx != Num_FPR_Regs) {
3681  unsigned VReg;
3682 
3683  if (ObjectVT == MVT::f32)
3684  VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
3685  else
3686  VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
3687 
3688  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3689  ++FPR_idx;
3690  } else {
3691  needsLoad = true;
3692  }
3693 
3694  // All FP arguments reserve stack space in the Darwin ABI.
3695  ArgOffset += isPPC64 ? 8 : ObjSize;
3696  break;
3697  case MVT::v4f32:
3698  case MVT::v4i32:
3699  case MVT::v8i16:
3700  case MVT::v16i8:
3701  // Note that vector arguments in registers don't reserve stack space,
3702  // except in varargs functions.
3703  if (VR_idx != Num_VR_Regs) {
3704  unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3705  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3706  if (isVarArg) {
3707  while ((ArgOffset % 16) != 0) {
3708  ArgOffset += PtrByteSize;
3709  if (GPR_idx != Num_GPR_Regs)
3710  GPR_idx++;
3711  }
3712  ArgOffset += 16;
3713  GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
3714  }
3715  ++VR_idx;
3716  } else {
3717  if (!isVarArg && !isPPC64) {
3718  // Vectors go after all the nonvectors.
3719  CurArgOffset = VecArgOffset;
3720  VecArgOffset += 16;
3721  } else {
3722  // Vectors are aligned.
3723  ArgOffset = ((ArgOffset+15)/16)*16;
3724  CurArgOffset = ArgOffset;
3725  ArgOffset += 16;
3726  }
3727  needsLoad = true;
3728  }
3729  break;
3730  }
3731 
3732  // We need to load the argument to a virtual register if we determined above
3733  // that we ran out of physical registers of the appropriate type.
3734  if (needsLoad) {
3735  int FI = MFI->CreateFixedObject(ObjSize,
3736  CurArgOffset + (ArgSize - ObjSize),
3737  isImmutable);
3738  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3739  ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
3740  false, false, false, 0);
3741  }
3742 
3743  InVals.push_back(ArgVal);
3744  }
3745 
3746  // Allow for Altivec parameters at the end, if needed.
3747  if (nAltivecParamsAtEnd) {
3748  MinReservedArea = ((MinReservedArea+15)/16)*16;
3749  MinReservedArea += 16*nAltivecParamsAtEnd;
3750  }
3751 
3752  // Area that is at least reserved in the caller of this function.
3753  MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
3754 
3755  // Set the size that is at least reserved in caller of this function. Tail
3756  // call optimized functions' reserved stack space needs to be aligned so that
3757  // taking the difference between two stack areas will result in an aligned
3758  // stack.
3759  MinReservedArea =
3760  EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3761  FuncInfo->setMinReservedArea(MinReservedArea);
3762 
3763  // If the function takes variable number of arguments, make a frame index for
3764  // the start of the first vararg value... for expansion of llvm.va_start.
3765  if (isVarArg) {
3766  int Depth = ArgOffset;
3767 
3768  FuncInfo->setVarArgsFrameIndex(
3769  MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
3770  Depth, true));
3771  SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3772 
3773  // If this function is vararg, store any remaining integer argument regs
3774  // to their spots on the stack so that they may be loaded by deferencing the
3775  // result of va_next.
3776  for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
3777  unsigned VReg;
3778 
3779  if (isPPC64)
3780  VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3781  else
3782  VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3783 
3784  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3785  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3786  MachinePointerInfo(), false, false, 0);
3787  MemOps.push_back(Store);
3788  // Increment the address by four for the next argument to store
3789  SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3790  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3791  }
3792  }
3793 
3794  if (!MemOps.empty())
3795  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3796 
3797  return Chain;
3798 }
3799 
3800 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
3801 /// adjusted to accommodate the arguments for the tailcall.
3802 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
3803  unsigned ParamSize) {
3804 
3805  if (!isTailCall) return 0;
3806 
3808  unsigned CallerMinReservedArea = FI->getMinReservedArea();
3809  int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
3810  // Remember only if the new adjustement is bigger.
3811  if (SPDiff < FI->getTailCallSPDelta())
3812  FI->setTailCallSPDelta(SPDiff);
3813 
3814  return SPDiff;
3815 }
3816 
3817 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
3818 /// for tail call optimization. Targets which want to do tail call
3819 /// optimization should implement this function.
3820 bool
3821 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
3822  CallingConv::ID CalleeCC,
3823  bool isVarArg,
3824  const SmallVectorImpl<ISD::InputArg> &Ins,
3825  SelectionDAG& DAG) const {
3826  if (!getTargetMachine().Options.GuaranteedTailCallOpt)
3827  return false;
3828 
3829  // Variable argument functions are not supported.
3830  if (isVarArg)
3831  return false;
3832 
3833  MachineFunction &MF = DAG.getMachineFunction();
3834  CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
3835  if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
3836  // Functions containing by val parameters are not supported.
3837  for (unsigned i = 0; i != Ins.size(); i++) {
3838  ISD::ArgFlagsTy Flags = Ins[i].Flags;
3839  if (Flags.isByVal()) return false;
3840  }
3841 
3842  // Non-PIC/GOT tail calls are supported.
3844  return true;
3845 
3846  // At the moment we can only do local tail calls (in same module, hidden
3847  // or protected) if we are generating PIC.
3848  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
3849  return G->getGlobal()->hasHiddenVisibility()
3850  || G->getGlobal()->hasProtectedVisibility();
3851  }
3852 
3853  return false;
3854 }
3855 
3856 /// isCallCompatibleAddress - Return the immediate to use if the specified
3857 /// 32-bit value is representable in the immediate field of a BxA instruction.
3860  if (!C) return nullptr;
3861 
3862  int Addr = C->getZExtValue();
3863  if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
3864  SignExtend32<26>(Addr) != Addr)
3865  return nullptr; // Top 6 bits have to be sext of immediate.
3866 
3867  return DAG.getConstant((int)C->getZExtValue() >> 2, SDLoc(Op),
3869  DAG.getDataLayout())).getNode();
3870 }
3871 
3872 namespace {
3873 
3874 struct TailCallArgumentInfo {
3875  SDValue Arg;
3876  SDValue FrameIdxOp;
3877  int FrameIdx;
3878 
3879  TailCallArgumentInfo() : FrameIdx(0) {}
3880 };
3881 
3882 }
3883 
3884 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
3885 static void
3887  SDValue Chain,
3888  const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
3889  SmallVectorImpl<SDValue> &MemOpChains,
3890  SDLoc dl) {
3891  for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
3892  SDValue Arg = TailCallArgs[i].Arg;
3893  SDValue FIN = TailCallArgs[i].FrameIdxOp;
3894  int FI = TailCallArgs[i].FrameIdx;
3895  // Store relative to framepointer.
3896  MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
3898  false, false, 0));
3899  }
3900 }
3901 
3902 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
3903 /// the appropriate stack slot for the tail call optimized function call.
3905  MachineFunction &MF,
3906  SDValue Chain,
3907  SDValue OldRetAddr,
3908  SDValue OldFP,
3909  int SPDiff,
3910  bool isPPC64,
3911  bool isDarwinABI,
3912  SDLoc dl) {
3913  if (SPDiff) {
3914  // Calculate the new stack slot for the return address.
3915  int SlotSize = isPPC64 ? 8 : 4;
3916  const PPCFrameLowering *FL =
3917  MF.getSubtarget<PPCSubtarget>().getFrameLowering();
3918  int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
3919  int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
3920  NewRetAddrLoc, true);
3921  EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
3922  SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
3923  Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
3925  false, false, 0);
3926 
3927  // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
3928  // slot as the FP is never overwritten.
3929  if (isDarwinABI) {
3930  int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
3931  int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
3932  true);
3933  SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
3934  Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
3936  false, false, 0);
3937  }
3938  }
3939  return Chain;
3940 }
3941 
3942 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
3943 /// the position of the argument.
3944 static void
3946  SDValue Arg, int SPDiff, unsigned ArgOffset,
3947  SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
3948  int Offset = ArgOffset + SPDiff;
3949  uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
3950  int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
3951  EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
3952  SDValue FIN = DAG.getFrameIndex(FI, VT);
3953  TailCallArgumentInfo Info;
3954  Info.Arg = Arg;
3955  Info.FrameIdxOp = FIN;
3956  Info.FrameIdx = FI;
3957  TailCallArguments.push_back(Info);
3958 }
3959 
3960 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
3961 /// stack slot. Returns the chain as result and the loaded frame pointers in
3962 /// LROpOut/FPOpout. Used when tail calling.
3963 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
3964  int SPDiff,
3965  SDValue Chain,
3966  SDValue &LROpOut,
3967  SDValue &FPOpOut,
3968  bool isDarwinABI,
3969  SDLoc dl) const {
3970  if (SPDiff) {
3971  // Load the LR and FP stack slot for later adjusting.
3972  EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
3973  LROpOut = getReturnAddrFrameIndex(DAG);
3974  LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
3975  false, false, false, 0);
3976  Chain = SDValue(LROpOut.getNode(), 1);
3977 
3978  // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
3979  // slot as the FP is never overwritten.
3980  if (isDarwinABI) {
3981  FPOpOut = getFramePointerFrameIndex(DAG);
3982  FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(),
3983  false, false, false, 0);
3984  Chain = SDValue(FPOpOut.getNode(), 1);
3985  }
3986  }
3987  return Chain;
3988 }
3989 
3990 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
3991 /// by "Src" to address "Dst" of size "Size". Alignment information is
3992 /// specified by the specific parameter attribute. The copy will be passed as
3993 /// a byval function parameter.
3994 /// Sometimes what we are copying is the end of a larger object, the part that
3995 /// does not fit in registers.
3996 static SDValue
3998  ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
3999  SDLoc dl) {
4000  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
4001  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
4002  false, false, false, MachinePointerInfo(),
4003  MachinePointerInfo());
4004 }
4005 
4006 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
4007 /// tail calls.
4008 static void
4010  SDValue Arg, SDValue PtrOff, int SPDiff,
4011  unsigned ArgOffset, bool isPPC64, bool isTailCall,
4012  bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
4013  SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments,
4014  SDLoc dl) {
4015  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4016  if (!isTailCall) {
4017  if (isVector) {
4018  SDValue StackPtr;
4019  if (isPPC64)
4020  StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4021  else
4022  StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4023  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4024  DAG.getConstant(ArgOffset, dl, PtrVT));
4025  }
4026  MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
4027  MachinePointerInfo(), false, false, 0));
4028  // Calculate and remember argument location.
4029  } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
4030  TailCallArguments);
4031 }
4032 
4033 static
4034 void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
4035  SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
4036  SDValue LROp, SDValue FPOp, bool isDarwinABI,
4037  SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
4038  MachineFunction &MF = DAG.getMachineFunction();
4039 
4040  // Emit a sequence of copyto/copyfrom virtual registers for arguments that
4041  // might overwrite each other in case of tail call optimization.
4042  SmallVector<SDValue, 8> MemOpChains2;
4043  // Do not flag preceding copytoreg stuff together with the following stuff.
4044  InFlag = SDValue();
4045  StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
4046  MemOpChains2, dl);
4047  if (!MemOpChains2.empty())
4048  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4049 
4050  // Store the return address to the appropriate stack slot.
4051  Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
4052  isPPC64, isDarwinABI, dl);
4053 
4054  // Emit callseq_end just before tailcall node.
4055  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4056  DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4057  InFlag = Chain.getValue(1);
4058 }
4059 
4060 // Is this global address that of a function that can be called by name? (as
4061 // opposed to something that must hold a descriptor for an indirect call).
4062 static bool isFunctionGlobalAddress(SDValue Callee) {
4063  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
4064  if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
4066  return false;
4067 
4068  return G->getGlobal()->getType()->getElementType()->isFunctionTy();
4069  }
4070 
4071  return false;
4072 }
4073 
4074 static
4075 unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
4076  SDValue &Chain, SDValue CallSeqStart, SDLoc dl, int SPDiff,
4077  bool isTailCall, bool IsPatchPoint, bool hasNest,
4078  SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
4079  SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
4080  ImmutableCallSite *CS, const PPCSubtarget &Subtarget) {
4081 
4082  bool isPPC64 = Subtarget.isPPC64();
4083  bool isSVR4ABI = Subtarget.isSVR4ABI();
4084  bool isELFv2ABI = Subtarget.isELFv2ABI();
4085 
4086  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4087  NodeTys.push_back(MVT::Other); // Returns a chain
4088  NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use.
4089 
4090  unsigned CallOpc = PPCISD::CALL;
4091 
4092  bool needIndirectCall = true;
4093  if (!isSVR4ABI || !isPPC64)
4094  if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
4095  // If this is an absolute destination address, use the munged value.
4096  Callee = SDValue(Dest, 0);
4097  needIndirectCall = false;
4098  }
4099 
4100  if (isFunctionGlobalAddress(Callee)) {
4101  GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
4102  // A call to a TLS address is actually an indirect call to a
4103  // thread-specific pointer.
4104  unsigned OpFlags = 0;
4105  if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
4106  (Subtarget.getTargetTriple().isMacOSX() &&
4107  Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
4109  (Subtarget.isTargetELF() && !isPPC64 &&
4110  !G->getGlobal()->hasLocalLinkage() &&
4112  // PC-relative references to external symbols should go through $stub,
4113  // unless we're building with the leopard linker or later, which
4114  // automatically synthesizes these stubs.
4115  OpFlags = PPCII::MO_PLT_OR_STUB;
4116  }
4117 
4118  // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
4119  // every direct call is) turn it into a TargetGlobalAddress /
4120  // TargetExternalSymbol node so that legalize doesn't hack it.
4121  Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
4122  Callee.getValueType(), 0, OpFlags);
4123  needIndirectCall = false;
4124  }
4125 
4126  if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4127  unsigned char OpFlags = 0;
4128 
4129  if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
4130  (Subtarget.getTargetTriple().isMacOSX() &&
4131  Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) ||
4132  (Subtarget.isTargetELF() && !isPPC64 &&
4134  // PC-relative references to external symbols should go through $stub,
4135  // unless we're building with the leopard linker or later, which
4136  // automatically synthesizes these stubs.
4137  OpFlags = PPCII::MO_PLT_OR_STUB;
4138  }
4139 
4140  Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
4141  OpFlags);
4142  needIndirectCall = false;
4143  }
4144 
4145  if (IsPatchPoint) {
4146  // We'll form an invalid direct call when lowering a patchpoint; the full
4147  // sequence for an indirect call is complicated, and many of the
4148  // instructions introduced might have side effects (and, thus, can't be
4149  // removed later). The call itself will be removed as soon as the
4150  // argument/return lowering is complete, so the fact that it has the wrong
4151  // kind of operands should not really matter.
4152  needIndirectCall = false;
4153  }
4154 
4155  if (needIndirectCall) {
4156  // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
4157  // to do the call, we can't use PPCISD::CALL.
4158  SDValue MTCTROps[] = {Chain, Callee, InFlag};
4159 
4160  if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
4161  // Function pointers in the 64-bit SVR4 ABI do not point to the function
4162  // entry point, but to the function descriptor (the function entry point
4163  // address is part of the function descriptor though).
4164  // The function descriptor is a three doubleword structure with the
4165  // following fields: function entry point, TOC base address and
4166  // environment pointer.
4167  // Thus for a call through a function pointer, the following actions need
4168  // to be performed:
4169  // 1. Save the TOC of the caller in the TOC save area of its stack
4170  // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
4171  // 2. Load the address of the function entry point from the function
4172  // descriptor.
4173  // 3. Load the TOC of the callee from the function descriptor into r2.
4174  // 4. Load the environment pointer from the function descriptor into
4175  // r11.
4176  // 5. Branch to the function entry point address.
4177  // 6. On return of the callee, the TOC of the caller needs to be
4178  // restored (this is done in FinishCall()).
4179  //
4180  // The loads are scheduled at the beginning of the call sequence, and the
4181  // register copies are flagged together to ensure that no other
4182  // operations can be scheduled in between. E.g. without flagging the
4183  // copies together, a TOC access in the caller could be scheduled between
4184  // the assignment of the callee TOC and the branch to the callee, which
4185  // results in the TOC access going through the TOC of the callee instead
4186  // of going through the TOC of the caller, which leads to incorrect code.
4187 
4188  // Load the address of the function entry point from the function
4189  // descriptor.
4190  SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
4191  if (LDChain.getValueType() == MVT::Glue)
4192  LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
4193 
4194  bool LoadsInv = Subtarget.hasInvariantFunctionDescriptors();
4195 
4196  MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr);
4197  SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
4198  false, false, LoadsInv, 8);
4199 
4200  // Load environment pointer into r11.
4201  SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
4202  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
4203  SDValue LoadEnvPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddPtr,
4204  MPI.getWithOffset(16), false, false,
4205  LoadsInv, 8);
4206 
4207  SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
4208  SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
4209  SDValue TOCPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddTOC,
4210  MPI.getWithOffset(8), false, false,
4211  LoadsInv, 8);
4212 
4213  setUsesTOCBasePtr(DAG);
4214  SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
4215  InFlag);
4216  Chain = TOCVal.getValue(0);
4217  InFlag = TOCVal.getValue(1);
4218 
4219  // If the function call has an explicit 'nest' parameter, it takes the
4220  // place of the environment pointer.
4221  if (!hasNest) {
4222  SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
4223  InFlag);
4224 
4225  Chain = EnvVal.getValue(0);
4226  InFlag = EnvVal.getValue(1);
4227  }
4228 
4229  MTCTROps[0] = Chain;
4230  MTCTROps[1] = LoadFuncPtr;
4231  MTCTROps[2] = InFlag;
4232  }
4233 
4234  Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
4235  makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
4236  InFlag = Chain.getValue(1);
4237 
4238  NodeTys.clear();
4239  NodeTys.push_back(MVT::Other);
4240  NodeTys.push_back(MVT::Glue);
4241  Ops.push_back(Chain);
4242  CallOpc = PPCISD::BCTRL;
4243  Callee.setNode(nullptr);
4244  // Add use of X11 (holding environment pointer)
4245  if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest)
4246  Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
4247  // Add CTR register as callee so a bctr can be emitted later.
4248  if (isTailCall)
4249  Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
4250  }
4251 
4252  // If this is a direct call, pass the chain and the callee.
4253  if (Callee.getNode()) {
4254  Ops.push_back(Chain);
4255  Ops.push_back(Callee);
4256  }
4257  // If this is a tail call add stack pointer delta.
4258  if (isTailCall)
4259  Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
4260 
4261  // Add argument registers to the end of the list so that they are known live
4262  // into the call.
4263  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4264  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4265  RegsToPass[i].second.getValueType()));
4266 
4267  // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
4268  // into the call.
4269  if (isSVR4ABI && isPPC64 && !IsPatchPoint) {
4270  setUsesTOCBasePtr(DAG);
4271  Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
4272  }
4273 
4274  return CallOpc;
4275 }
4276 
4277 static
4278 bool isLocalCall(const SDValue &Callee)
4279 {
4280  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4281  return G->getGlobal()->isStrongDefinitionForLinker();
4282  return false;
4283 }
4284 
4285 SDValue
4286 PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
4287  CallingConv::ID CallConv, bool isVarArg,
4288  const SmallVectorImpl<ISD::InputArg> &Ins,
4289  SDLoc dl, SelectionDAG &DAG,
4290  SmallVectorImpl<SDValue> &InVals) const {
4291 
4293  CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
4294  *DAG.getContext());
4295  CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
4296 
4297  // Copy all of the result registers out of their specified physreg.
4298  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4299  CCValAssign &VA = RVLocs[i];
4300  assert(VA.isRegLoc() && "Can only return in registers!");
4301 
4302  SDValue Val = DAG.getCopyFromReg(Chain, dl,
4303  VA.getLocReg(), VA.getLocVT(), InFlag);
4304  Chain = Val.getValue(1);
4305  InFlag = Val.getValue(2);
4306 
4307  switch (VA.getLocInfo()) {
4308  default: llvm_unreachable("Unknown loc info!");
4309  case CCValAssign::Full: break;
4310  case CCValAssign::AExt:
4311  Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4312  break;
4313  case CCValAssign::ZExt:
4314  Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
4315  DAG.getValueType(VA.getValVT()));
4316  Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4317  break;
4318  case CCValAssign::SExt:
4319  Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
4320  DAG.getValueType(VA.getValVT()));
4321  Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4322  break;
4323  }
4324 
4325  InVals.push_back(Val);
4326  }
4327 
4328  return Chain;
4329 }
4330 
4331 SDValue
4332 PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
4333  bool isTailCall, bool isVarArg, bool IsPatchPoint,
4334  bool hasNest, SelectionDAG &DAG,
4335  SmallVector<std::pair<unsigned, SDValue>, 8>
4336  &RegsToPass,
4337  SDValue InFlag, SDValue Chain,
4338  SDValue CallSeqStart, SDValue &Callee,
4339  int SPDiff, unsigned NumBytes,
4340  const SmallVectorImpl<ISD::InputArg> &Ins,
4341  SmallVectorImpl<SDValue> &InVals,
4342  ImmutableCallSite *CS) const {
4343 
4344  std::vector<EVT> NodeTys;
4346  unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
4347  SPDiff, isTailCall, IsPatchPoint, hasNest,
4348  RegsToPass, Ops, NodeTys, CS, Subtarget);
4349 
4350  // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
4351  if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
4352  Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
4353 
4354  // When performing tail call optimization the callee pops its arguments off
4355  // the stack. Account for this here so these bytes can be pushed back on in
4356  // PPCFrameLowering::eliminateCallFramePseudoInstr.
4357  int BytesCalleePops =
4358  (CallConv == CallingConv::Fast &&
4360 
4361  // Add a register mask operand representing the call-preserved registers.
4362  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4363  const uint32_t *Mask =
4364  TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
4365  assert(Mask && "Missing call preserved mask for calling convention");
4366  Ops.push_back(DAG.getRegisterMask(Mask));
4367 
4368  if (InFlag.getNode())
4369  Ops.push_back(InFlag);
4370 
4371  // Emit tail call.
4372  if (isTailCall) {
4373  assert(((Callee.getOpcode() == ISD::Register &&
4374  cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
4375  Callee.getOpcode() == ISD::TargetExternalSymbol ||
4376  Callee.getOpcode() == ISD::TargetGlobalAddress ||
4377  isa<ConstantSDNode>(Callee)) &&
4378  "Expecting an global address, external symbol, absolute value or register");
4379 
4381  return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
4382  }
4383 
4384  // Add a NOP immediately after the branch instruction when using the 64-bit
4385  // SVR4 ABI. At link time, if caller and callee are in a different module and
4386  // thus have a different TOC, the call will be replaced with a call to a stub
4387  // function which saves the current TOC, loads the TOC of the callee and
4388  // branches to the callee. The NOP will be replaced with a load instruction
4389  // which restores the TOC of the caller from the TOC save slot of the current
4390  // stack frame. If caller and callee belong to the same module (and have the
4391  // same TOC), the NOP will remain unchanged.
4392 
4393  if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
4394  !IsPatchPoint) {
4395  if (CallOpc == PPCISD::BCTRL) {
4396  // This is a call through a function pointer.
4397  // Restore the caller TOC from the save area into R2.
4398  // See PrepareCall() for more information about calls through function
4399  // pointers in the 64-bit SVR4 ABI.
4400  // We are using a target-specific load with r2 hard coded, because the
4401  // result of a target-independent load would never go directly into r2,
4402  // since r2 is a reserved register (which prevents the register allocator
4403  // from allocating it), resulting in an additional register being
4404  // allocated and an unnecessary move instruction being generated.
4405  CallOpc = PPCISD::BCTRL_LOAD_TOC;
4406 
4407  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4408  SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
4409  unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
4410  SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
4411  SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
4412 
4413  // The address needs to go after the chain input but before the flag (or
4414  // any other variadic arguments).
4415  Ops.insert(std::next(Ops.begin()), AddTOC);
4416  } else if ((CallOpc == PPCISD::CALL) &&
4417  (!isLocalCall(Callee) ||
4419  // Otherwise insert NOP for non-local calls.
4420  CallOpc = PPCISD::CALL_NOP;
4421  }
4422 
4423  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
4424  InFlag = Chain.getValue(1);
4425 
4426  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4427  DAG.getIntPtrConstant(BytesCalleePops, dl, true),
4428  InFlag, dl);
4429  if (!Ins.empty())
4430  InFlag = Chain.getValue(1);
4431 
4432  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
4433  Ins, dl, DAG, InVals);
4434 }
4435 
4436 SDValue
4437 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
4438  SmallVectorImpl<SDValue> &InVals) const {
4439  SelectionDAG &DAG = CLI.DAG;
4440  SDLoc &dl = CLI.DL;
4442  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
4444  SDValue Chain = CLI.Chain;
4445  SDValue Callee = CLI.Callee;
4446  bool &isTailCall = CLI.IsTailCall;
4447  CallingConv::ID CallConv = CLI.CallConv;
4448  bool isVarArg = CLI.IsVarArg;
4449  bool IsPatchPoint = CLI.IsPatchPoint;
4450  ImmutableCallSite *CS = CLI.CS;
4451 
4452  if (isTailCall)
4453  isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
4454  Ins, DAG);
4455 
4456  if (!isTailCall && CS && CS->isMustTailCall())
4457  report_fatal_error("failed to perform tail call elimination on a call "
4458  "site marked musttail");
4459 
4460  if (Subtarget.isSVR4ABI()) {
4461  if (Subtarget.isPPC64())
4462  return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
4463  isTailCall, IsPatchPoint, Outs, OutVals, Ins,
4464  dl, DAG, InVals, CS);
4465  else
4466  return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
4467  isTailCall, IsPatchPoint, Outs, OutVals, Ins,
4468  dl, DAG, InVals, CS);
4469  }
4470 
4471  return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
4472  isTailCall, IsPatchPoint, Outs, OutVals, Ins,
4473  dl, DAG, InVals, CS);
4474 }
4475 
4476 SDValue
4477 PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
4478  CallingConv::ID CallConv, bool isVarArg,
4479  bool isTailCall, bool IsPatchPoint,
4480  const SmallVectorImpl<ISD::OutputArg> &Outs,
4481  const SmallVectorImpl<SDValue> &OutVals,
4482  const SmallVectorImpl<ISD::InputArg> &Ins,
4483  SDLoc dl, SelectionDAG &DAG,
4484  SmallVectorImpl<SDValue> &InVals,
4485  ImmutableCallSite *CS) const {
4486  // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
4487  // of the 32-bit SVR4 ABI stack frame layout.
4488 
4489  assert((CallConv == CallingConv::C ||
4490  CallConv == CallingConv::Fast) && "Unknown calling convention!");
4491 
4492  unsigned PtrByteSize = 4;
4493 
4494  MachineFunction &MF = DAG.getMachineFunction();
4495 
4496  // Mark this function as potentially containing a function that contains a
4497  // tail call. As a consequence the frame pointer will be used for dynamicalloc
4498  // and restoring the callers stack pointer in this functions epilog. This is
4499  // done because by tail calling the called function might overwrite the value
4500  // in this function's (MF) stack pointer stack slot 0(SP).
4501  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
4502  CallConv == CallingConv::Fast)
4503  MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
4504 
4505  // Count how many bytes are to be pushed on the stack, including the linkage
4506  // area, parameter list area and the part of the local variable space which
4507  // contains copies of aggregates which are passed by value.
4508 
4509  // Assign locations to all of the outgoing arguments.
4511  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4512  *DAG.getContext());
4513 
4514  // Reserve space for the linkage area on the stack.
4515  CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
4516  PtrByteSize);
4517 
4518  if (isVarArg) {
4519  // Handle fixed and variable vector arguments differently.
4520  // Fixed vector arguments go into registers as long as registers are
4521  // available. Variable vector arguments always go into memory.
4522  unsigned NumArgs = Outs.size();
4523 
4524  for (unsigned i = 0; i != NumArgs; ++i) {
4525  MVT ArgVT = Outs[i].VT;
4526  ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
4527  bool Result;
4528 
4529  if (Outs[i].IsFixed) {
4530  Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
4531  CCInfo);
4532  } else {
4533  Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
4534  ArgFlags, CCInfo);
4535  }
4536 
4537  if (Result) {
4538 #ifndef NDEBUG
4539  errs() << "Call operand #" << i << " has unhandled type "
4540  << EVT(ArgVT).getEVTString() << "\n";
4541 #endif
4542  llvm_unreachable(nullptr);
4543  }
4544  }
4545  } else {
4546  // All arguments are treated the same.
4547  CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
4548  }
4549 
4550  // Assign locations to all of the outgoing aggregate by value arguments.
4551  SmallVector<CCValAssign, 16> ByValArgLocs;
4552  CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4553  ByValArgLocs, *DAG.getContext());
4554 
4555  // Reserve stack space for the allocations in CCInfo.
4556  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
4557 
4558  CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
4559 
4560  // Size of the linkage area, parameter list area and the part of the local
4561  // space variable where copies of aggregates which are passed by value are
4562  // stored.
4563  unsigned NumBytes = CCByValInfo.getNextStackOffset();
4564 
4565  // Calculate by how many bytes the stack has to be adjusted in case of tail
4566  // call optimization.
4567  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
4568 
4569  // Adjust the stack pointer for the new arguments...
4570  // These operations are automatically eliminated by the prolog/epilog pass
4571  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4572  dl);
4573  SDValue CallSeqStart = Chain;
4574 
4575  // Load the return address and frame pointer so it can be moved somewhere else
4576  // later.
4577  SDValue LROp, FPOp;
4578  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false,
4579  dl);
4580 
4581  // Set up a copy of the stack pointer for use loading and storing any
4582  // arguments that may not fit in the registers available for argument
4583  // passing.
4584  SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4585 
4587  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
4588  SmallVector<SDValue, 8> MemOpChains;
4589 
4590  bool seenFloatArg = false;
4591  // Walk the register/memloc assignments, inserting copies/loads.
4592  for (unsigned i = 0, j = 0, e = ArgLocs.size();
4593  i != e;
4594  ++i) {
4595  CCValAssign &VA = ArgLocs[i];
4596  SDValue Arg = OutVals[i];
4597  ISD::ArgFlagsTy Flags = Outs[i].Flags;
4598 
4599  if (Flags.isByVal()) {
4600  // Argument is an aggregate which is passed by value, thus we need to
4601  // create a copy of it in the local variable space of the current stack
4602  // frame (which is the stack frame of the caller) and pass the address of
4603  // this copy to the callee.
4604  assert((j < ByValArgLocs.size()) && "Index out of bounds!");
4605  CCValAssign &ByValVA = ByValArgLocs[j++];
4606  assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
4607 
4608  // Memory reserved in the local variable space of the callers stack frame.
4609  unsigned LocMemOffset = ByValVA.getLocMemOffset();
4610 
4611  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
4612  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
4613  StackPtr, PtrOff);
4614 
4615  // Create a copy of the argument in the local area of the current
4616  // stack frame.
4617  SDValue MemcpyCall =
4618  CreateCopyOfByValArgument(Arg, PtrOff,
4619  CallSeqStart.getNode()->getOperand(0),
4620  Flags, DAG, dl);
4621 
4622  // This must go outside the CALLSEQ_START..END.
4623  SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
4624  CallSeqStart.getNode()->getOperand(1),
4625  SDLoc(MemcpyCall));
4626  DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
4627  NewCallSeqStart.getNode());
4628  Chain = CallSeqStart = NewCallSeqStart;
4629 
4630  // Pass the address of the aggregate copy on the stack either in a
4631  // physical register or in the parameter list area of the current stack
4632  // frame to the callee.
4633  Arg = PtrOff;
4634  }
4635 
4636  if (VA.isRegLoc()) {
4637  if (Arg.getValueType() == MVT::i1)
4638  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
4639 
4640  seenFloatArg |= VA.getLocVT().isFloatingPoint();
4641  // Put argument in a physical register.
4642  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
4643  } else {
4644  // Put argument in the parameter list area of the current stack frame.
4645  assert(VA.isMemLoc());
4646  unsigned LocMemOffset = VA.getLocMemOffset();
4647 
4648  if (!isTailCall) {
4649  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
4650  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
4651  StackPtr, PtrOff);
4652 
4653  MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
4655  false, false, 0));
4656  } else {
4657  // Calculate and remember argument location.
4658  CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
4659  TailCallArguments);
4660  }
4661  }
4662  }
4663 
4664  if (!MemOpChains.empty())
4665  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
4666 
4667  // Build a sequence of copy-to-reg nodes chained together with token chain
4668  // and flag operands which copy the outgoing args into the appropriate regs.
4669  SDValue InFlag;
4670  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4671  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4672  RegsToPass[i].second, InFlag);
4673  InFlag = Chain.getValue(1);
4674  }
4675 
4676  // Set CR bit 6 to true if this is a vararg call with floating args passed in
4677  // registers.
4678  if (isVarArg) {
4679  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
4680  SDValue Ops[] = { Chain, InFlag };
4681 
4682  Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
4683  dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
4684 
4685  InFlag = Chain.getValue(1);
4686  }
4687 
4688  if (isTailCall)
4689  PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
4690  false, TailCallArguments);
4691 
4692  return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint,
4693  /* unused except on PPC64 ELFv1 */ false, DAG,
4694  RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
4695  NumBytes, Ins, InVals, CS);
4696 }
4697 
4698 // Copy an argument into memory, being careful to do this outside the
4699 // call sequence for the call to which the argument belongs.
4700 SDValue
4701 PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
4702  SDValue CallSeqStart,
4703  ISD::ArgFlagsTy Flags,
4704  SelectionDAG &DAG,
4705  SDLoc dl) const {
4706  SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
4707  CallSeqStart.getNode()->getOperand(0),
4708  Flags, DAG, dl);
4709  // The MEMCPY must go outside the CALLSEQ_START..END.
4710  SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
4711  CallSeqStart.getNode()->getOperand(1),
4712  SDLoc(MemcpyCall));
4713  DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
4714  NewCallSeqStart.getNode());
4715  return NewCallSeqStart;
4716 }
4717 
4718 SDValue
4719 PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
4720  CallingConv::ID CallConv, bool isVarArg,
4721  bool isTailCall, bool IsPatchPoint,
4722  const SmallVectorImpl<ISD::OutputArg> &Outs,
4723  const SmallVectorImpl<SDValue> &OutVals,
4724  const SmallVectorImpl<ISD::InputArg> &Ins,
4725  SDLoc dl, SelectionDAG &DAG,
4726  SmallVectorImpl<SDValue> &InVals,
4727  ImmutableCallSite *CS) const {
4728 
4729  bool isELFv2ABI = Subtarget.isELFv2ABI();
4730  bool isLittleEndian = Subtarget.isLittleEndian();
4731  unsigned NumOps = Outs.size();
4732  bool hasNest = false;
4733 
4734  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4735  unsigned PtrByteSize = 8;
4736 
4737  MachineFunction &MF = DAG.getMachineFunction();
4738 
4739  // Mark this function as potentially containing a function that contains a
4740  // tail call. As a consequence the frame pointer will be used for dynamicalloc
4741  // and restoring the callers stack pointer in this functions epilog. This is
4742  // done because by tail calling the called function might overwrite the value
4743  // in this function's (MF) stack pointer stack slot 0(SP).
4744  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
4745  CallConv == CallingConv::Fast)
4746  MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
4747 
4748  assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4749  "fastcc not supported on varargs functions");
4750 
4751  // Count how many bytes are to be pushed on the stack, including the linkage
4752  // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
4753  // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
4754  // area is 32 bytes reserved space for [SP][CR][LR][TOC].
4755  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4756  unsigned NumBytes = LinkageSize;
4757  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4758  unsigned &QFPR_idx = FPR_idx;
4759 
4760  static const MCPhysReg GPR[] = {
4761  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4762  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4763  };
4764  static const MCPhysReg VR[] = {
4765  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4766  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4767  };
4768  static const MCPhysReg VSRH[] = {
4769  PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
4770  PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
4771  };
4772 
4773  const unsigned NumGPRs = array_lengthof(GPR);
4774  const unsigned NumFPRs = 13;
4775  const unsigned NumVRs = array_lengthof(VR);
4776  const unsigned NumQFPRs = NumFPRs;
4777 
4778  // When using the fast calling convention, we don't provide backing for
4779  // arguments that will be in registers.
4780  unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
4781 
4782  // Add up all the space actually used.
4783  for (unsigned i = 0; i != NumOps; ++i) {
4784  ISD::ArgFlagsTy Flags = Outs[i].Flags;
4785  EVT ArgVT = Outs[i].VT;
4786  EVT OrigVT = Outs[i].ArgVT;
4787 
4788  if (Flags.isNest())
4789  continue;
4790 
4791  if (CallConv == CallingConv::Fast) {
4792  if (Flags.isByVal())
4793  NumGPRsUsed += (Flags.getByValSize()+7)/8;
4794  else
4795  switch (ArgVT.getSimpleVT().SimpleTy) {
4796  default: llvm_unreachable("Unexpected ValueType for argument!");
4797  case MVT::i1:
4798  case MVT::i32:
4799  case MVT::i64:
4800  if (++NumGPRsUsed <= NumGPRs)
4801  continue;
4802  break;
4803  case MVT::v4i32:
4804  case MVT::v8i16:
4805  case MVT::v16i8:
4806  case MVT::v2f64:
4807  case MVT::v2i64:
4808  case MVT::v1i128:
4809  if (++NumVRsUsed <= NumVRs)
4810  continue;
4811  break;
4812  case MVT::v4f32:
4813  // When using QPX, this is handled like a FP register, otherwise, it
4814  // is an Altivec register.
4815  if (Subtarget.hasQPX()) {
4816  if (++NumFPRsUsed <= NumFPRs)
4817  continue;
4818  } else {
4819  if (++NumVRsUsed <= NumVRs)
4820  continue;
4821  }
4822  break;
4823  case MVT::f32:
4824  case MVT::f64:
4825  case MVT::v4f64: // QPX
4826  case MVT::v4i1: // QPX
4827  if (++NumFPRsUsed <= NumFPRs)
4828  continue;
4829  break;
4830  }
4831  }
4832 
4833  /* Respect alignment of argument on the stack. */
4834  unsigned Align =
4835  CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4836  NumBytes = ((NumBytes + Align - 1) / Align) * Align;
4837 
4838  NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4839  if (Flags.isInConsecutiveRegsLast())
4840  NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4841  }
4842 
4843  unsigned NumBytesActuallyUsed = NumBytes;
4844 
4845  // The prolog code of the callee may store up to 8 GPR argument registers to
4846  // the stack, allowing va_start to index over them in memory if its varargs.
4847  // Because we cannot tell if this is needed on the caller side, we have to
4848  // conservatively assume that it is needed. As such, make sure we have at
4849  // least enough stack space for the caller to store the 8 GPRs.
4850  // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
4851  NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
4852 
4853  // Tail call needs the stack to be aligned.
4854  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
4855  CallConv == CallingConv::Fast)
4856  NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
4857 
4858  // Calculate by how many bytes the stack has to be adjusted in case of tail
4859  // call optimization.
4860  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
4861 
4862  // To protect arguments on the stack from being clobbered in a tail call,
4863  // force all the loads to happen before doing any other lowering.
4864  if (isTailCall)
4865  Chain = DAG.getStackArgumentTokenFactor(Chain);
4866 
4867  // Adjust the stack pointer for the new arguments...
4868  // These operations are automatically eliminated by the prolog/epilog pass
4869  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4870  dl);
4871  SDValue CallSeqStart = Chain;
4872 
4873  // Load the return address and frame pointer so it can be move somewhere else
4874  // later.
4875  SDValue LROp, FPOp;
4876  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
4877  dl);
4878 
4879  // Set up a copy of the stack pointer for use loading and storing any
4880  // arguments that may not fit in the registers available for argument
4881  // passing.
4882  SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4883 
4884  // Figure out which arguments are going to go in registers, and which in
4885  // memory. Also, if this is a vararg function, floating point operations
4886  // must be stored to our stack, and loaded into integer regs as well, if
4887  // any integer regs are available for argument passing.
4888  unsigned ArgOffset = LinkageSize;
4889 
4891  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
4892 
4893  SmallVector<SDValue, 8> MemOpChains;
4894  for (unsigned i = 0; i != NumOps; ++i) {
4895  SDValue Arg = OutVals[i];
4896  ISD::ArgFlagsTy Flags = Outs[i].Flags;
4897  EVT ArgVT = Outs[i].VT;
4898  EVT OrigVT = Outs[i].ArgVT;
4899 
4900  // PtrOff will be used to store the current argument to the stack if a
4901  // register cannot be found for it.
4902  SDValue PtrOff;
4903 
4904  // We re-align the argument offset for each argument, except when using the
4905  // fast calling convention, when we need to make sure we do that only when
4906  // we'll actually use a stack slot.
4907  auto ComputePtrOff = [&]() {
4908  /* Respect alignment of argument on the stack. */
4909  unsigned Align =
4910  CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4911  ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
4912 
4913  PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
4914 
4915  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
4916  };
4917 
4918  if (CallConv != CallingConv::Fast) {
4919  ComputePtrOff();
4920 
4921  /* Compute GPR index associated with argument offset. */
4922  GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4923  GPR_idx = std::min(GPR_idx, NumGPRs);
4924  }
4925 
4926  // Promote integers to 64-bit values.
4927  if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
4928  // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
4929  unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
4930  Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
4931  }
4932 
4933  // FIXME memcpy is used way more than necessary. Correctness first.
4934  // Note: "by value" is code for passing a structure by value, not
4935  // basic types.
4936  if (Flags.isByVal()) {
4937  // Note: Size includes alignment padding, so
4938  // struct x { short a; char b; }
4939  // will have Size = 4. With #pragma pack(1), it will have Size = 3.
4940  // These are the proper values we need for right-justifying the
4941  // aggregate in a parameter register.
4942  unsigned Size = Flags.getByValSize();
4943 
4944  // An empty aggregate parameter takes up no storage and no
4945  // registers.
4946  if (Size == 0)
4947  continue;
4948 
4949  if (CallConv == CallingConv::Fast)
4950  ComputePtrOff();
4951 
4952  // All aggregates smaller than 8 bytes must be passed right-justified.
4953  if (Size==1 || Size==2 || Size==4) {
4954  EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
4955  if (GPR_idx != NumGPRs) {
4956  SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
4957  MachinePointerInfo(), VT,
4958  false, false, false, 0);
4959  MemOpChains.push_back(Load.getValue(1));
4960  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4961 
4962  ArgOffset += PtrByteSize;
4963  continue;
4964  }
4965  }
4966 
4967  if (GPR_idx == NumGPRs && Size < 8) {
4968  SDValue AddPtr = PtrOff;
4969  if (!isLittleEndian) {
4970  SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
4971  PtrOff.getValueType());
4972  AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
4973  }
4974  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
4975  CallSeqStart,
4976  Flags, DAG, dl);
4977  ArgOffset += PtrByteSize;
4978  continue;
4979  }
4980  // Copy entire object into memory. There are cases where gcc-generated
4981  // code assumes it is there, even if it could be put entirely into
4982  // registers. (This is not what the doc says.)
4983 
4984  // FIXME: The above statement is likely due to a misunderstanding of the
4985  // documents. All arguments must be copied into the parameter area BY
4986  // THE CALLEE in the event that the callee takes the address of any
4987  // formal argument. That has not yet been implemented. However, it is
4988  // reasonable to use the stack area as a staging area for the register
4989  // load.
4990 
4991  // Skip this for small aggregates, as we will use the same slot for a
4992  // right-justified copy, below.
4993  if (Size >= 8)
4994  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
4995  CallSeqStart,
4996  Flags, DAG, dl);
4997 
4998  // When a register is available, pass a small aggregate right-justified.
4999  if (Size < 8 && GPR_idx != NumGPRs) {
5000  // The easiest way to get this right-justified in a register
5001  // is to copy the structure into the rightmost portion of a
5002  // local variable slot, then load the whole slot into the
5003  // register.
5004  // FIXME: The memcpy seems to produce pretty awful code for
5005  // small aggregates, particularly for packed ones.
5006  // FIXME: It would be preferable to use the slot in the
5007  // parameter save area instead of a new local variable.
5008  SDValue AddPtr = PtrOff;
5009  if (!isLittleEndian) {
5010  SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
5011  AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5012  }
5013  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5014  CallSeqStart,
5015  Flags, DAG, dl);
5016 
5017  // Load the slot into the register.
5018  SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff,
5020  false, false, false, 0);
5021  MemOpChains.push_back(Load.getValue(1));
5022  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5023 
5024  // Done with this argument.
5025  ArgOffset += PtrByteSize;
5026  continue;
5027  }
5028 
5029  // For aggregates larger than PtrByteSize, copy the pieces of the
5030  // object that fit into registers from the parameter save area.
5031  for (unsigned j=0; j<Size; j+=PtrByteSize) {
5032  SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
5033  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
5034  if (GPR_idx != NumGPRs) {
5035  SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
5037  false, false, false, 0);
5038  MemOpChains.push_back(Load.getValue(1));
5039  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5040  ArgOffset += PtrByteSize;
5041  } else {
5042  ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
5043  break;
5044  }
5045  }
5046  continue;
5047  }
5048 
5049  switch (Arg.getSimpleValueType().SimpleTy) {
5050  default: llvm_unreachable("Unexpected ValueType for argument!");
5051  case MVT::i1:
5052  case MVT::i32:
5053  case MVT::i64:
5054  if (Flags.isNest()) {
5055  // The 'nest' parameter, if any, is passed in R11.
5056  RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
5057  hasNest = true;
5058  break;
5059  }
5060 
5061  // These can be scalar arguments or elements of an integer array type
5062  // passed directly. Clang may use those instead of "byval" aggregate
5063  // types to avoid forcing arguments to memory unnecessarily.
5064  if (GPR_idx != NumGPRs) {
5065  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
5066  } else {
5067  if (CallConv == CallingConv::Fast)
5068  ComputePtrOff();
5069 
5070  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5071  true, isTailCall, false, MemOpChains,
5072  TailCallArguments, dl);
5073  if (CallConv == CallingConv::Fast)
5074  ArgOffset += PtrByteSize;
5075  }
5076  if (CallConv != CallingConv::Fast)
5077  ArgOffset += PtrByteSize;
5078  break;
5079  case MVT::f32:
5080  case MVT::f64: {
5081  // These can be scalar arguments or elements of a float array type
5082  // passed directly. The latter are used to implement ELFv2 homogenous
5083  // float aggregates.
5084 
5085  // Named arguments go into FPRs first, and once they overflow, the
5086  // remaining arguments go into GPRs and then the parameter save area.
5087  // Unnamed arguments for vararg functions always go to GPRs and
5088  // then the parameter save area. For now, put all arguments to vararg
5089  // routines always in both locations (FPR *and* GPR or stack slot).
5090  bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
5091  bool NeededLoad = false;
5092 
5093  // First load the argument into the next available FPR.
5094  if (FPR_idx != NumFPRs)
5095  RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
5096 
5097  // Next, load the argument into GPR or stack slot if needed.
5098  if (!NeedGPROrStack)
5099  ;
5100  else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) {
5101  // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
5102  // once we support fp <-> gpr moves.
5103 
5104  // In the non-vararg case, this can only ever happen in the
5105  // presence of f32 array types, since otherwise we never run
5106  // out of FPRs before running out of GPRs.
5107  SDValue ArgVal;
5108 
5109  // Double values are always passed in a single GPR.
5110  if (Arg.getValueType() != MVT::f32) {
5111  ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
5112 
5113  // Non-array float values are extended and passed in a GPR.
5114  } else if (!Flags.isInConsecutiveRegs()) {
5115  ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5116  ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
5117 
5118  // If we have an array of floats, we collect every odd element
5119  // together with its predecessor into one GPR.
5120  } else if (ArgOffset % PtrByteSize != 0) {
5121  SDValue Lo, Hi;
5122  Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
5123  Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5124  if (!isLittleEndian)
5125  std::swap(Lo, Hi);
5126  ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
5127 
5128  // The final element, if even, goes into the first half of a GPR.
5129  } else if (Flags.isInConsecutiveRegsLast()) {
5130  ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5131  ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
5132  if (!isLittleEndian)
5133  ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
5134  DAG.getConstant(32, dl, MVT::i32));
5135 
5136  // Non-final even elements are skipped; they will be handled
5137  // together the with subsequent argument on the next go-around.
5138  } else
5139  ArgVal = SDValue();
5140 
5141  if (ArgVal.getNode())
5142  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
5143  } else {
5144  if (CallConv == CallingConv::Fast)
5145  ComputePtrOff();
5146 
5147  // Single-precision floating-point values are mapped to the
5148  // second (rightmost) word of the stack doubleword.
5149  if (Arg.getValueType() == MVT::f32 &&
5150  !isLittleEndian && !Flags.isInConsecutiveRegs()) {
5151  SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
5152  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
5153  }
5154 
5155  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5156  true, isTailCall, false, MemOpChains,
5157  TailCallArguments, dl);
5158 
5159  NeededLoad = true;
5160  }
5161  // When passing an array of floats, the array occupies consecutive
5162  // space in the argument area; only round up to the next doubleword
5163  // at the end of the array. Otherwise, each float takes 8 bytes.
5164  if (CallConv != CallingConv::Fast || NeededLoad) {
5165  ArgOffset += (Arg.getValueType() == MVT::f32 &&
5166  Flags.isInConsecutiveRegs()) ? 4 : 8;
5167  if (Flags.isInConsecutiveRegsLast())
5168  ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5169  }
5170  break;
5171  }
5172  case MVT::v4f32:
5173  case MVT::v4i32:
5174  case MVT::v8i16:
5175  case MVT::v16i8:
5176  case MVT::v2f64:
5177  case MVT::v2i64:
5178  case MVT::v1i128:
5179  if (!Subtarget.hasQPX()) {
5180  // These can be scalar arguments or elements of a vector array type
5181  // passed directly. The latter are used to implement ELFv2 homogenous
5182  // vector aggregates.
5183 
5184  // For a varargs call, named arguments go into VRs or on the stack as
5185  // usual; unnamed arguments always go to the stack or the corresponding
5186  // GPRs when within range. For now, we always put the value in both
5187  // locations (or even all three).
5188  if (isVarArg) {
5189  // We could elide this store in the case where the object fits
5190  // entirely in R registers. Maybe later.
5191  SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
5192  MachinePointerInfo(), false, false, 0);
5193  MemOpChains.push_back(Store);
5194  if (VR_idx != NumVRs) {
5195  SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
5197  false, false, false, 0);
5198  MemOpChains.push_back(Load.getValue(1));
5199 
5200  unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
5201  Arg.getSimpleValueType() == MVT::v2i64) ?
5202  VSRH[VR_idx] : VR[VR_idx];
5203  ++VR_idx;
5204 
5205  RegsToPass.push_back(std::make_pair(VReg, Load));
5206  }
5207  ArgOffset += 16;
5208  for (unsigned i=0; i<16; i+=PtrByteSize) {
5209  if (GPR_idx == NumGPRs)
5210  break;
5211  SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5212  DAG.getConstant(i, dl, PtrVT));
5213  SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
5214  false, false, false, 0);
5215  MemOpChains.push_back(Load.getValue(1));
5216  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5217  }
5218  break;
5219  }
5220 
5221  // Non-varargs Altivec params go into VRs or on the stack.
5222  if (VR_idx != NumVRs) {
5223  unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
5224  Arg.getSimpleValueType() == MVT::v2i64) ?
5225  VSRH[VR_idx] : VR[VR_idx];
5226  ++VR_idx;
5227 
5228  RegsToPass.push_back(std::make_pair(VReg, Arg));
5229  } else {
5230  if (CallConv == CallingConv::Fast)
5231  ComputePtrOff();
5232 
5233  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5234  true, isTailCall, true, MemOpChains,
5235  TailCallArguments, dl);
5236  if (CallConv == CallingConv::Fast)
5237  ArgOffset += 16;
5238  }
5239 
5240  if (CallConv != CallingConv::Fast)
5241  ArgOffset += 16;
5242  break;
5243  } // not QPX
5244 
5245  assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
5246  "Invalid QPX parameter type");
5247 
5248  /* fall through */
5249  case MVT::v4f64:
5250  case MVT::v4i1: {
5251  bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
5252  if (isVarArg) {
5253  // We could elide this store in the case where the object fits
5254  // entirely in R registers. Maybe later.
5255  SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
5256  MachinePointerInfo(), false, false, 0);
5257  MemOpChains.push_back(Store);
5258  if (QFPR_idx != NumQFPRs) {
5259  SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl,
5260  Store, PtrOff, MachinePointerInfo(),
5261  false, false, false, 0);
5262  MemOpChains.push_back(Load.getValue(1));
5263  RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
5264  }
5265  ArgOffset += (IsF32 ? 16 : 32);
5266  for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) {
5267  if (GPR_idx == NumGPRs)
5268  break;
5269  SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5270  DAG.getConstant(i, dl, PtrVT));
5271  SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
5272  false, false, false, 0);
5273  MemOpChains.push_back(Load.getValue(1));
5274  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5275  }
5276  break;
5277  }
5278 
5279  // Non-varargs QPX params go into registers or on the stack.
5280  if (QFPR_idx != NumQFPRs) {
5281  RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
5282  } else {
5283  if (CallConv == CallingConv::Fast)
5284  ComputePtrOff();
5285 
5286  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5287  true, isTailCall, true, MemOpChains,
5288  TailCallArguments, dl);
5289  if (CallConv == CallingConv::Fast)
5290  ArgOffset += (IsF32 ? 16 : 32);
5291  }
5292 
5293  if (CallConv != CallingConv::Fast)
5294  ArgOffset += (IsF32 ? 16 : 32);
5295  break;
5296  }
5297  }
5298  }
5299 
5300  assert(NumBytesActuallyUsed == ArgOffset);
5301  (void)NumBytesActuallyUsed;
5302 
5303  if (!MemOpChains.empty())
5304  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5305 
5306  // Check if this is an indirect call (MTCTR/BCTRL).
5307  // See PrepareCall() for more information about calls through function
5308  // pointers in the 64-bit SVR4 ABI.
5309  if (!isTailCall && !IsPatchPoint &&
5310  !isFunctionGlobalAddress(Callee) &&
5311  !isa<ExternalSymbolSDNode>(Callee)) {
5312  // Load r2 into a virtual register and store it to the TOC save area.
5313  setUsesTOCBasePtr(DAG);
5314  SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
5315  // TOC save area offset.
5316  unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5317  SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5318  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5319  Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
5320  MachinePointerInfo::getStack(TOCSaveOffset),
5321  false, false, 0);
5322  // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
5323  // This does not mean the MTCTR instruction must use R12; it's easier
5324  // to model this as an extra parameter, so do that.
5325  if (isELFv2ABI && !IsPatchPoint)
5326  RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
5327  }
5328 
5329  // Build a sequence of copy-to-reg nodes chained together with token chain
5330  // and flag operands which copy the outgoing args into the appropriate regs.
5331  SDValue InFlag;
5332  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5333  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5334  RegsToPass[i].second, InFlag);
5335  InFlag = Chain.getValue(1);
5336  }
5337 
5338  if (isTailCall)
5339  PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
5340  FPOp, true, TailCallArguments);
5341 
5342  return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint,
5343  hasNest, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
5344  Callee, SPDiff, NumBytes, Ins, InVals, CS);
5345 }
5346 
5347 SDValue
5348 PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
5349  CallingConv::ID CallConv, bool isVarArg,
5350  bool isTailCall, bool IsPatchPoint,
5351  const SmallVectorImpl<ISD::OutputArg> &Outs,
5352  const SmallVectorImpl<SDValue> &OutVals,
5353  const SmallVectorImpl<ISD::InputArg> &Ins,
5354  SDLoc dl, SelectionDAG &DAG,
5355  SmallVectorImpl<SDValue> &InVals,
5356  ImmutableCallSite *CS) const {
5357 
5358  unsigned NumOps = Outs.size();
5359 
5360  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5361  bool isPPC64 = PtrVT == MVT::i64;
5362  unsigned PtrByteSize = isPPC64 ? 8 : 4;
5363 
5364  MachineFunction &MF = DAG.getMachineFunction();
5365 
5366  // Mark this function as potentially containing a function that contains a
5367  // tail call. As a consequence the frame pointer will be used for dynamicalloc
5368  // and restoring the callers stack pointer in this functions epilog. This is
5369  // done because by tail calling the called function might overwrite the value
5370  // in this function's (MF) stack pointer stack slot 0(SP).
5371  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5372  CallConv == CallingConv::Fast)
5373  MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5374 
5375  // Count how many bytes are to be pushed on the stack, including the linkage
5376  // area, and parameter passing area. We start with 24/48 bytes, which is
5377  // prereserved space for [SP][CR][LR][3 x unused].
5378  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5379  unsigned NumBytes = LinkageSize;
5380 
5381  // Add up all the space actually used.
5382  // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
5383  // they all go in registers, but we must reserve stack space for them for
5384  // possible use by the caller. In varargs or 64-bit calls, parameters are
5385  // assigned stack space in order, with padding so Altivec parameters are
5386  // 16-byte aligned.
5387  unsigned nAltivecParamsAtEnd = 0;
5388  for (unsigned i = 0; i != NumOps; ++i) {
5389  ISD::ArgFlagsTy Flags = Outs[i].Flags;
5390  EVT ArgVT = Outs[i].VT;
5391  // Varargs Altivec parameters are padded to a 16 byte boundary.
5392  if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
5393  ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
5394  ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
5395  if (!isVarArg && !isPPC64) {
5396  // Non-varargs Altivec parameters go after all the non-Altivec
5397  // parameters; handle those later so we know how much padding we need.
5398  nAltivecParamsAtEnd++;
5399  continue;
5400  }
5401  // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
5402  NumBytes = ((NumBytes+15)/16)*16;
5403  }
5404  NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5405  }
5406 
5407  // Allow for Altivec parameters at the end, if needed.
5408  if (nAltivecParamsAtEnd) {
5409  NumBytes = ((NumBytes+15)/16)*16;
5410  NumBytes += 16*nAltivecParamsAtEnd;
5411  }
5412 
5413  // The prolog code of the callee may store up to 8 GPR argument registers to
5414  // the stack, allowing va_start to index over them in memory if its varargs.
5415  // Because we cannot tell if this is needed on the caller side, we have to
5416  // conservatively assume that it is needed. As such, make sure we have at
5417  // least enough stack space for the caller to store the 8 GPRs.
5418  NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
5419 
5420  // Tail call needs the stack to be aligned.
5421  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5422  CallConv == CallingConv::Fast)
5423  NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
5424 
5425  // Calculate by how many bytes the stack has to be adjusted in case of tail
5426  // call optimization.
5427  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5428 
5429  // To protect arguments on the stack from being clobbered in a tail call,
5430  // force all the loads to happen before doing any other lowering.
5431  if (isTailCall)
5432  Chain = DAG.getStackArgumentTokenFactor(Chain);
5433 
5434  // Adjust the stack pointer for the new arguments...
5435  // These operations are automatically eliminated by the prolog/epilog pass
5436  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5437  dl);
5438  SDValue CallSeqStart = Chain;
5439 
5440  // Load the return address and frame pointer so it can be move somewhere else
5441  // later.
5442  SDValue LROp, FPOp;
5443  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
5444  dl);
5445 
5446  // Set up a copy of the stack pointer for use loading and storing any
5447  // arguments that may not fit in the registers available for argument
5448  // passing.
5449  SDValue StackPtr;
5450  if (isPPC64)
5451  StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5452  else
5453  StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5454 
5455  // Figure out which arguments are going to go in registers, and which in
5456  // memory. Also, if this is a vararg function, floating point operations
5457  // must be stored to our stack, and loaded into integer regs as well, if
5458  // any integer regs are available for argument passing.
5459  unsigned ArgOffset = LinkageSize;
5460  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5461 
5462  static const MCPhysReg GPR_32[] = { // 32-bit registers.
5463  PPC::R3, PPC::R4, PPC::R5, PPC::R6,
5464  PPC::R7, PPC::R8, PPC::R9, PPC::R10,
5465  };
5466  static const MCPhysReg GPR_64[] = { // 64-bit registers.
5467  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5468  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5469  };
5470  static const MCPhysReg VR[] = {
5471  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5472  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5473  };
5474  const unsigned NumGPRs = array_lengthof(GPR_32);
5475  const unsigned NumFPRs = 13;
5476  const unsigned NumVRs = array_lengthof(VR);
5477 
5478  const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
5479 
5481  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5482 
5483  SmallVector<SDValue, 8> MemOpChains;
5484  for (unsigned i = 0; i != NumOps; ++i) {
5485  SDValue Arg = OutVals[i];
5486  ISD::ArgFlagsTy Flags = Outs[i].Flags;
5487 
5488  // PtrOff will be used to store the current argument to the stack if a
5489  // register cannot be found for it.
5490  SDValue PtrOff;
5491 
5492  PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
5493 
5494  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5495 
5496  // On PPC64, promote integers to 64-bit values.
5497  if (isPPC64 && Arg.getValueType() == MVT::i32) {
5498  // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5499  unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5500  Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
5501  }
5502 
5503  // FIXME memcpy is used way more than necessary. Correctness first.
5504  // Note: "by value" is code for passing a structure by value, not
5505  // basic types.
5506  if (Flags.isByVal()) {
5507  unsigned Size = Flags.getByValSize();
5508  // Very small objects are passed right-justified. Everything else is
5509  // passed left-justified.
5510  if (Size==1 || Size==2) {
5511  EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
5512  if (GPR_idx != NumGPRs) {
5513  SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
5514  MachinePointerInfo(), VT,
5515  false, false, false, 0);
5516  MemOpChains.push_back(Load.getValue(1));
5517  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5518 
5519  ArgOffset += PtrByteSize;
5520  } else {
5521  SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
5522  PtrOff.getValueType());
5523  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5524  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5525  CallSeqStart,
5526  Flags, DAG, dl);
5527  ArgOffset += PtrByteSize;
5528  }
5529  continue;
5530  }
5531  // Copy entire object into memory. There are cases where gcc-generated
5532  // code assumes it is there, even if it could be put entirely into
5533  // registers. (This is not what the doc says.)
5534  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
5535  CallSeqStart,
5536  Flags, DAG, dl);
5537 
5538  // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
5539  // copy the pieces of the object that fit into registers from the
5540  // parameter save area.
5541  for (unsigned j=0; j<Size; j+=PtrByteSize) {
5542  SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
5543  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
5544  if (GPR_idx != NumGPRs) {
5545  SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
5547  false, false, false, 0);
5548  MemOpChains.push_back(Load.getValue(1));
5549  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5550  ArgOffset += PtrByteSize;
5551  } else {
5552  ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
5553  break;
5554  }
5555  }
5556  continue;
5557  }
5558 
5559  switch (Arg.getSimpleValueType().SimpleTy) {
5560  default: llvm_unreachable("Unexpected ValueType for argument!");
5561  case MVT::i1:
5562  case MVT::i32:
5563  case MVT::i64:
5564  if (GPR_idx != NumGPRs) {
5565  if (Arg.getValueType() == MVT::i1)
5566  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
5567 
5568  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
5569  } else {
5570  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5571  isPPC64, isTailCall, false, MemOpChains,
5572  TailCallArguments, dl);
5573  }
5574  ArgOffset += PtrByteSize;
5575  break;
5576  case MVT::f32:
5577  case MVT::f64:
5578  if (FPR_idx != NumFPRs) {
5579  RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
5580 
5581  if (isVarArg) {
5582  SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
5583  MachinePointerInfo(), false, false, 0);
5584  MemOpChains.push_back(Store);
5585 
5586  // Float varargs are always shadowed in available integer registers
5587  if (GPR_idx != NumGPRs) {
5588  SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
5589  MachinePointerInfo(), false, false,
5590  false, 0);
5591  MemOpChains.push_back(Load.getValue(1));
5592  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5593  }
5594  if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
5595  SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
5596  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
5597  SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
5599  false, false, false, 0);
5600  MemOpChains.push_back(Load.getValue(1));
5601  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5602  }
5603  } else {
5604  // If we have any FPRs remaining, we may also have GPRs remaining.
5605  // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
5606  // GPRs.
5607  if (GPR_idx != NumGPRs)
5608  ++GPR_idx;
5609  if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
5610  !isPPC64) // PPC64 has 64-bit GPR's obviously :)
5611  ++GPR_idx;
5612  }
5613  } else
5614  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5615  isPPC64, isTailCall, false, MemOpChains,
5616  TailCallArguments, dl);
5617  if (isPPC64)
5618  ArgOffset += 8;
5619  else
5620  ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
5621  break;
5622  case MVT::v4f32:
5623  case MVT::v4i32:
5624  case MVT::v8i16:
5625  case MVT::v16i8:
5626  if (isVarArg) {
5627  // These go aligned on the stack, or in the corresponding R registers
5628  // when within range. The Darwin PPC ABI doc claims they also go in
5629  // V registers; in fact gcc does this only for arguments that are
5630  // prototyped, not for those that match the ... We do it for all
5631  // arguments, seems to work.
5632  while (ArgOffset % 16 !=0) {
5633  ArgOffset += PtrByteSize;
5634  if (GPR_idx != NumGPRs)
5635  GPR_idx++;
5636  }
5637  // We could elide this store in the case where the object fits
5638  // entirely in R registers. Maybe later.
5639  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5640  DAG.getConstant(ArgOffset, dl, PtrVT));
5641  SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
5642  MachinePointerInfo(), false, false, 0);
5643  MemOpChains.push_back(Store);
5644  if (VR_idx != NumVRs) {
5645  SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
5647  false, false, false, 0);
5648  MemOpChains.push_back(Load.getValue(1));
5649  RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
5650  }
5651  ArgOffset += 16;
5652  for (unsigned i=0; i<16; i+=PtrByteSize) {
5653  if (GPR_idx == NumGPRs)
5654  break;
5655  SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5656  DAG.getConstant(i, dl, PtrVT));
5657  SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
5658  false, false, false, 0);
5659  MemOpChains.push_back(Load.getValue(1));
5660  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5661  }
5662  break;
5663  }
5664 
5665  // Non-varargs Altivec params generally go in registers, but have
5666  // stack space allocated at the end.
5667  if (VR_idx != NumVRs) {
5668  // Doesn't have GPR space allocated.
5669  RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
5670  } else if (nAltivecParamsAtEnd==0) {
5671  // We are emitting Altivec params in order.
5672  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5673  isPPC64, isTailCall, true, MemOpChains,
5674  TailCallArguments, dl);
5675  ArgOffset += 16;
5676  }
5677  break;
5678  }
5679  }
5680  // If all Altivec parameters fit in registers, as they usually do,
5681  // they get stack space following the non-Altivec parameters. We
5682  // don't track this here because nobody below needs it.
5683  // If there are more Altivec parameters than fit in registers emit
5684  // the stores here.
5685  if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
5686  unsigned j = 0;
5687  // Offset is aligned; skip 1st 12 params which go in V registers.
5688  ArgOffset = ((ArgOffset+15)/16)*16;
5689  ArgOffset += 12*16;
5690  for (unsigned i = 0; i != NumOps; ++i) {
5691  SDValue Arg = OutVals[i];
5692  EVT ArgType = Outs[i].VT;
5693  if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
5694  ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
5695  if (++j > NumVRs) {
5696  SDValue PtrOff;
5697  // We are emitting Altivec params in order.
5698  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5699  isPPC64, isTailCall, true, MemOpChains,
5700  TailCallArguments, dl);
5701  ArgOffset += 16;
5702  }
5703  }
5704  }
5705  }
5706 
5707  if (!MemOpChains.empty())
5708  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5709 
5710  // On Darwin, R12 must contain the address of an indirect callee. This does
5711  // not mean the MTCTR instruction must use R12; it's easier to model this as
5712  // an extra parameter, so do that.
5713  if (!isTailCall &&
5714  !isFunctionGlobalAddress(Callee) &&
5715  !isa<ExternalSymbolSDNode>(Callee) &&
5716  !isBLACompatibleAddress(Callee, DAG))
5717  RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
5718  PPC::R12), Callee));
5719 
5720  // Build a sequence of copy-to-reg nodes chained together with token chain
5721  // and flag operands which copy the outgoing args into the appropriate regs.
5722  SDValue InFlag;
5723  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5724  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5725  RegsToPass[i].second, InFlag);
5726  InFlag = Chain.getValue(1);
5727  }
5728 
5729  if (isTailCall)
5730  PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
5731  FPOp, true, TailCallArguments);
5732 
5733  return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint,
5734  /* unused except on PPC64 ELFv1 */ false, DAG,
5735  RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
5736  NumBytes, Ins, InVals, CS);
5737 }
5738 
5739 bool
5740 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
5741  MachineFunction &MF, bool isVarArg,
5742  const SmallVectorImpl<ISD::OutputArg> &Outs,
5743  LLVMContext &Context) const {
5745  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
5746  return CCInfo.CheckReturn(Outs, RetCC_PPC);
5747 }
5748 
5749 SDValue
5750 PPCTargetLowering::LowerReturn(SDValue Chain,
5751  CallingConv::ID CallConv, bool isVarArg,
5752  const SmallVectorImpl<ISD::OutputArg> &Outs,
5753  const SmallVectorImpl<SDValue> &OutVals,
5754  SDLoc dl, SelectionDAG &DAG) const {
5755 
5757  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5758  *DAG.getContext());
5759  CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
5760 
5761  SDValue Flag;
5762  SmallVector<SDValue, 4> RetOps(1, Chain);
5763 
5764  // Copy the result values into the output registers.
5765  for (unsigned i = 0; i != RVLocs.size(); ++i) {
5766  CCValAssign &VA = RVLocs[i];
5767  assert(VA.isRegLoc() && "Can only return in registers!");
5768 
5769  SDValue Arg = OutVals[i];
5770 
5771  switch (VA.getLocInfo()) {
5772  default: llvm_unreachable("Unknown loc info!");
5773  case CCValAssign::Full: break;
5774  case CCValAssign::AExt:
5775  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
5776  break;
5777  case CCValAssign::ZExt:
5778  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
5779  break;
5780  case CCValAssign::SExt:
5781  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
5782  break;
5783  }
5784 
5785  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
5786  Flag = Chain.getValue(1);
5787  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
5788  }
5789 
5790  RetOps[0] = Chain; // Update chain.
5791 
5792  // Add the flag if we have it.
5793  if (Flag.getNode())
5794  RetOps.push_back(Flag);
5795 
5796  return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
5797 }
5798 
5799 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
5800  const PPCSubtarget &Subtarget) const {
5801  // When we pop the dynamic allocation we need to restore the SP link.
5802  SDLoc dl(Op);
5803 
5804  // Get the corect type for pointers.
5805  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5806 
5807  // Construct the stack pointer operand.
5808  bool isPPC64 = Subtarget.isPPC64();
5809  unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
5810  SDValue StackPtr = DAG.getRegister(SP, PtrVT);
5811 
5812  // Get the operands for the STACKRESTORE.
5813  SDValue Chain = Op.getOperand(0);
5814  SDValue SaveSP = Op.getOperand(1);
5815 
5816  // Load the old link SP.
5817  SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr,
5819  false, false, false, 0);
5820 
5821  // Restore the stack pointer.
5822  Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
5823 
5824  // Store the old link SP.
5825  return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(),
5826  false, false, 0);
5827 }
5828 
5829 
5830 
5831 SDValue
5832 PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
5833  MachineFunction &MF = DAG.getMachineFunction();
5834  bool isPPC64 = Subtarget.isPPC64();
5836 
5837  // Get current frame pointer save index. The users of this index will be
5838  // primarily DYNALLOC instructions.
5840  int RASI = FI->getReturnAddrSaveIndex();
5841 
5842  // If the frame pointer save index hasn't been defined yet.
5843  if (!RASI) {
5844  // Find out what the fix offset of the frame pointer save area.
5845  int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
5846  // Allocate the frame index for frame pointer save area.
5847  RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
5848  // Save the result.
5849  FI->setReturnAddrSaveIndex(RASI);
5850  }
5851  return DAG.getFrameIndex(RASI, PtrVT);
5852 }
5853 
5854 SDValue
5855 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
5856  MachineFunction &MF = DAG.getMachineFunction();
5857  bool isPPC64 = Subtarget.isPPC64();
5859 
5860  // Get current frame pointer save index. The users of this index will be
5861  // primarily DYNALLOC instructions.
5863  int FPSI = FI->getFramePointerSaveIndex();
5864 
5865  // If the frame pointer save index hasn't been defined yet.
5866  if (!FPSI) {
5867  // Find out what the fix offset of the frame pointer save area.
5868  int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
5869  // Allocate the frame index for frame pointer save area.
5870  FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
5871  // Save the result.
5872  FI->setFramePointerSaveIndex(FPSI);
5873  }
5874  return DAG.getFrameIndex(FPSI, PtrVT);
5875 }
5876 
5877 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
5878  SelectionDAG &DAG,
5879  const PPCSubtarget &Subtarget) const {
5880  // Get the inputs.
5881  SDValue Chain = Op.getOperand(0);
5882  SDValue Size = Op.getOperand(1);
5883  SDLoc dl(Op);
5884 
5885  // Get the corect type for pointers.
5886  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5887  // Negate the size.
5888  SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
5889  DAG.getConstant(0, dl, PtrVT), Size);
5890  // Construct a node for the frame pointer save index.
5891  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
5892  // Build a DYNALLOC node.
5893  SDValue Ops[3] = { Chain, NegSize, FPSIdx };
5894  SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
5895  return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
5896 }
5897 
5898 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
5899  SelectionDAG &DAG) const {
5900  SDLoc DL(Op);
5901  return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
5902  DAG.getVTList(MVT::i32, MVT::Other),
5903  Op.getOperand(0), Op.getOperand(1));
5904 }
5905 
5906 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
5907  SelectionDAG &DAG) const {
5908  SDLoc DL(Op);
5910  Op.getOperand(0), Op.getOperand(1));
5911 }
5912 
5913 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
5914  if (Op.getValueType().isVector())
5915  return LowerVectorLoad(Op, DAG);
5916 
5917  assert(Op.getValueType() == MVT::i1 &&
5918  "Custom lowering only for i1 loads");
5919 
5920  // First, load 8 bits into 32 bits, then truncate to 1 bit.
5921 
5922  SDLoc dl(Op);
5923  LoadSDNode *LD = cast<LoadSDNode>(Op);
5924 
5925  SDValue Chain = LD->getChain();
5926  SDValue BasePtr = LD->getBasePtr();
5927  MachineMemOperand *MMO = LD->getMemOperand();
5928 
5929  SDValue NewLD =
5930  DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
5931  BasePtr, MVT::i8, MMO);
5932  SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
5933 
5934  SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
5935  return DAG.getMergeValues(Ops, dl);
5936 }
5937 
5938 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
5939  if (Op.getOperand(1).getValueType().isVector())
5940  return LowerVectorStore(Op, DAG);
5941 
5942  assert(Op.getOperand(1).getValueType() == MVT::i1 &&
5943  "Custom lowering only for i1 stores");
5944 
5945  // First, zero extend to 32 bits, then use a truncating store to 8 bits.
5946 
5947  SDLoc dl(Op);
5948  StoreSDNode *ST = cast<StoreSDNode>(Op);
5949 
5950  SDValue Chain = ST->getChain();
5951  SDValue BasePtr = ST->getBasePtr();
5952  SDValue Value = ST->getValue();
5953  MachineMemOperand *MMO = ST->getMemOperand();
5954 
5955  Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
5956  Value);
5957  return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
5958 }
5959 
5960 // FIXME: Remove this once the ANDI glue bug is fixed:
5961 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
5962  assert(Op.getValueType() == MVT::i1 &&
5963  "Custom lowering only for i1 results");
5964 
5965  SDLoc DL(Op);
5967  Op.getOperand(0));
5968 }
5969 
5970 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
5971 /// possible.
5972 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
5973  // Not FP? Not a fsel.
5974  if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
5976  return Op;
5977 
5978  // We might be able to do better than this under some circumstances, but in
5979  // general, fsel-based lowering of select is a finite-math-only optimization.
5980  // For more information, see section F.3 of the 2.06 ISA specification.
5981  if (!DAG.getTarget().Options.NoInfsFPMath ||
5983  return Op;
5984 
5985  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5986 
5987  EVT ResVT = Op.getValueType();
5988  EVT CmpVT = Op.getOperand(0).getValueType();
5989  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
5990  SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
5991  SDLoc dl(Op);
5992 
5993  // If the RHS of the comparison is a 0.0, we don't need to do the
5994  // subtraction at all.
5995  SDValue Sel1;
5996  if (isFloatingPointZero(RHS))
5997  switch (CC) {
5998  default: break; // SETUO etc aren't handled by fsel.
5999  case ISD::SETNE:
6000  std::swap(TV, FV);
6001  case ISD::SETEQ:
6002  if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
6003  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6004  Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
6005  if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
6006  Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
6007  return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6008  DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
6009  case ISD::SETULT:
6010  case ISD::SETLT:
6011  std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
6012  case ISD::SETOGE:
6013  case ISD::SETGE:
6014  if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
6015  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6016  return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
6017  case ISD::SETUGT:
6018  case ISD::SETGT:
6019  std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
6020  case ISD::SETOLE:
6021  case ISD::SETLE:
6022  if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
6023  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6024  return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6025  DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
6026  }
6027 
6028  SDValue Cmp;
6029  switch (CC) {
6030  default: break; // SETUO etc aren't handled by fsel.
6031  case ISD::SETNE:
6032  std::swap(TV, FV);
6033  case ISD::SETEQ:
6034  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
6035  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
6036  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6037  Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6038  if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
6039  Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
6040  return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6041  DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
6042  case ISD::SETULT:
6043  case ISD::SETLT:
6044  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
6045  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
6046  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6047  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
6048  case ISD::SETOGE:
6049  case ISD::SETGE:
6050  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
6051  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
6052  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6053  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6054  case ISD::SETUGT:
6055  case ISD::SETGT:
6056  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
6057  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
6058  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6059  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
6060  case ISD::SETOLE:
6061  case ISD::SETLE:
6062  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
6063  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
6064  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6065  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6066  }
6067  return Op;
6068 }
6069 
6070 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
6071  SelectionDAG &DAG,
6072  SDLoc dl) const {
6073  assert(Op.getOperand(0).getValueType().isFloatingPoint());
6074  SDValue Src = Op.getOperand(0);
6075  if (Src.getValueType() == MVT::f32)
6076  Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
6077 
6078  SDValue Tmp;
6079  switch (Op.getSimpleValueType().SimpleTy) {
6080  default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
6081  case MVT::i32:
6082  Tmp = DAG.getNode(
6083  Op.getOpcode() == ISD::FP_TO_SINT
6084  ? PPCISD::FCTIWZ
6085  : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
6086  dl, MVT::f64, Src);
6087  break;
6088  case MVT::i64:
6089  assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
6090  "i64 FP_TO_UINT is supported only with FPCVT");
6091  Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
6093  dl, MVT::f64, Src);
6094  break;
6095  }
6096 
6097  // Convert the FP value to an int value through memory.
6098  bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
6099  (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
6100  SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
6101  int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
6103 
6104  // Emit a store to the stack slot.
6105  SDValue Chain;
6106  if (i32Stack) {
6107  MachineFunction &MF = DAG.getMachineFunction();
6108  MachineMemOperand *MMO =
6110  SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
6111  Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
6112  DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
6113  } else
6114  Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
6115  MPI, false, false, 0);
6116 
6117  // Result is a load from the stack slot. If loading 4 bytes, make sure to
6118  // add in a bias.
6119  if (Op.getValueType() == MVT::i32 && !i32Stack) {
6120  FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
6121  DAG.getConstant(4, dl, FIPtr.getValueType()));
6122  MPI = MPI.getWithOffset(4);
6123  }
6124 
6125  RLI.Chain = Chain;
6126  RLI.Ptr = FIPtr;
6127  RLI.MPI = MPI;
6128 }
6129 
6130 /// \brief Custom lowers floating point to integer conversions to use
6131 /// the direct move instructions available in ISA 2.07 to avoid the
6132 /// need for load/store combinations.
6133 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
6134  SelectionDAG &DAG,
6135  SDLoc dl) const {
6136  assert(Op.getOperand(0).getValueType().isFloatingPoint());
6137  SDValue Src = Op.getOperand(0);
6138 
6139  if (Src.getValueType() == MVT::f32)
6140  Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
6141 
6142  SDValue Tmp;
6143  switch (Op.getSimpleValueType().SimpleTy) {
6144  default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
6145  case MVT::i32:
6146  Tmp = DAG.getNode(
6147  Op.getOpcode() == ISD::FP_TO_SINT
6148  ? PPCISD::FCTIWZ
6149  : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
6150  dl, MVT::f64, Src);
6151  Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp);
6152  break;
6153  case MVT::i64:
6154  assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
6155  "i64 FP_TO_UINT is supported only with FPCVT");
6156  Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
6158  dl, MVT::f64, Src);
6159  Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp);
6160  break;
6161  }
6162  return Tmp;
6163 }
6164 
6165 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
6166  SDLoc dl) const {
6167  if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
6168  return LowerFP_TO_INTDirectMove(Op, DAG, dl);
6169 
6170  ReuseLoadInfo RLI;
6171  LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
6172 
6173  return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, false,
6174  false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo,
6175  RLI.Ranges);
6176 }
6177 
6178 // We're trying to insert a regular store, S, and then a load, L. If the
6179 // incoming value, O, is a load, we might just be able to have our load use the
6180 // address used by O. However, we don't know if anything else will store to
6181 // that address before we can load from it. To prevent this situation, we need
6182 // to insert our load, L, into the chain as a peer of O. To do this, we give L
6183 // the same chain operand as O, we create a token factor from the chain results
6184 // of O and L, and we replace all uses of O's chain result with that token
6185 // factor (see spliceIntoChain below for this last part).
6186 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
6187  ReuseLoadInfo &RLI,
6188  SelectionDAG &DAG,
6189  ISD::LoadExtType ET) const {
6190  SDLoc dl(Op);
6191  if (ET == ISD::NON_EXTLOAD &&
6192  (Op.getOpcode() == ISD::FP_TO_UINT ||
6193  Op.getOpcode() == ISD::FP_TO_SINT) &&
6195  Op.getOperand(0).getValueType())) {
6196 
6197  LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
6198  return true;
6199  }
6200 
6201  LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
6202  if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
6203  LD->isNonTemporal())
6204  return false;
6205  if (LD->getMemoryVT() != MemVT)
6206  return false;
6207 
6208  RLI.Ptr = LD->getBasePtr();
6209  if (LD->isIndexed() && LD->getOffset().getOpcode() != ISD::UNDEF) {
6210  assert(LD->getAddressingMode() == ISD::PRE_INC &&
6211  "Non-pre-inc AM on PPC?");
6212  RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
6213  LD->getOffset());
6214  }
6215 
6216  RLI.Chain = LD->getChain();
6217  RLI.MPI = LD->getPointerInfo();
6218  RLI.IsInvariant = LD->isInvariant();
6219  RLI.Alignment = LD->getAlignment();
6220  RLI.AAInfo = LD->getAAInfo();
6221  RLI.Ranges = LD->getRanges();
6222 
6223  RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
6224  return true;
6225 }
6226 
6227 // Given the head of the old chain, ResChain, insert a token factor containing
6228 // it and NewResChain, and make users of ResChain now be users of that token
6229 // factor.
6230 void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
6231  SDValue NewResChain,
6232  SelectionDAG &DAG) const {
6233  if (!ResChain)
6234  return;
6235 
6236  SDLoc dl(NewResChain);
6237 
6239  NewResChain, DAG.getUNDEF(MVT::Other));
6240  assert(TF.getNode() != NewResChain.getNode() &&
6241  "A new TF really is required here");
6242 
6243  DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
6244  DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
6245 }
6246 
6247 /// \brief Custom lowers integer to floating point conversions to use
6248 /// the direct move instructions available in ISA 2.07 to avoid the
6249 /// need for load/store combinations.
6250 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
6251  SelectionDAG &DAG,
6252  SDLoc dl) const {
6253  assert((Op.getValueType() == MVT::f32 ||
6254  Op.getValueType() == MVT::f64) &&
6255  "Invalid floating point type as target of conversion");
6256  assert(Subtarget.hasFPCVT() &&
6257  "Int to FP conversions with direct moves require FPCVT");
6258  SDValue FP;
6259  SDValue Src = Op.getOperand(0);
6260  bool SinglePrec = Op.getValueType() == MVT::f32;
6261  bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
6262  bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
6263  unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) :
6264  (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU);
6265 
6266  if (WordInt) {
6267  FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ,
6268  dl, MVT::f64, Src);
6269  FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
6270  }
6271  else {
6272  FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src);
6273  FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
6274  }
6275 
6276  return FP;
6277 }
6278 
6279 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
6280  SelectionDAG &DAG) const {
6281  SDLoc dl(Op);
6282 
6283  if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
6284  if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64)
6285  return SDValue();
6286 
6287  SDValue Value = Op.getOperand(0);
6288  // The values are now known to be -1 (false) or 1 (true). To convert this
6289  // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
6290  // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
6291  Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
6292 
6293  SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::f64);
6294  FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
6295  FPHalfs, FPHalfs, FPHalfs, FPHalfs);
6296 
6297  Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
6298 
6299  if (Op.getValueType() != MVT::v4f64)
6300  Value = DAG.getNode(ISD::FP_ROUND, dl,
6301  Op.getValueType(), Value,
6302  DAG.getIntPtrConstant(1, dl));
6303  return Value;
6304  }
6305 
6306  // Don't handle ppc_fp128 here; let it be lowered to a libcall.
6307  if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
6308  return SDValue();
6309 
6310  if (Op.getOperand(0).getValueType() == MVT::i1)
6311  return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
6312  DAG.getConstantFP(1.0, dl, Op.getValueType()),
6313  DAG.getConstantFP(0.0, dl, Op.getValueType()));
6314 
6315  // If we have direct moves, we can do all the conversion, skip the store/load
6316  // however, without FPCVT we can't do most conversions.
6317  if (Subtarget.hasDirectMove() && Subtarget.isPPC64() && Subtarget.hasFPCVT())
6318  return LowerINT_TO_FPDirectMove(Op, DAG, dl);
6319 
6320  assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
6321  "UINT_TO_FP is supported only with FPCVT");
6322 
6323  // If we have FCFIDS, then use it when converting to single-precision.
6324  // Otherwise, convert to double-precision and then round.
6325  unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
6327  : PPCISD::FCFIDS)
6329  : PPCISD::FCFID);
6330  MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
6331  ? MVT::f32
6332  : MVT::f64;
6333 
6334  if (Op.getOperand(0).getValueType() == MVT::i64) {
6335  SDValue SINT = Op.getOperand(0);
6336  // When converting to single-precision, we actually need to convert
6337  // to double-precision first and then round to single-precision.
6338  // To avoid double-rounding effects during that operation, we have
6339  // to prepare the input operand. Bits that might be truncated when
6340  // converting to double-precision are replaced by a bit that won't
6341  // be lost at this stage, but is below the single-precision rounding
6342  // position.
6343  //
6344  // However, if -enable-unsafe-fp-math is in effect, accept double
6345  // rounding to avoid the extra overhead.
6346  if (Op.getValueType() == MVT::f32 &&
6347  !Subtarget.hasFPCVT() &&
6348  !DAG.getTarget().Options.UnsafeFPMath) {
6349 
6350  // Twiddle input to make sure the low 11 bits are zero. (If this
6351  // is the case, we are guaranteed the value will fit into the 53 bit
6352  // mantissa of an IEEE double-precision value without rounding.)
6353  // If any of those low 11 bits were not zero originally, make sure
6354  // bit 12 (value 2048) is set instead, so that the final rounding
6355  // to single-precision gets the correct result.
6356  SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
6357  SINT, DAG.getConstant(2047, dl, MVT::i64));
6358  Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
6359  Round, DAG.getConstant(2047, dl, MVT::i64));
6360  Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
6361  Round = DAG.getNode(ISD::AND, dl, MVT::i64,
6362  Round, DAG.getConstant(-2048, dl, MVT::i64));
6363 
6364  // However, we cannot use that value unconditionally: if the magnitude
6365  // of the input value is small, the bit-twiddling we did above might
6366  // end up visibly changing the output. Fortunately, in that case, we
6367  // don't need to twiddle bits since the original input will convert
6368  // exactly to double-precision floating-point already. Therefore,
6369  // construct a conditional to use the original value if the top 11
6370  // bits are all sign-bit copies, and use the rounded value computed
6371  // above otherwise.
6372  SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
6373  SINT, DAG.getConstant(53, dl, MVT::i32));
6374  Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
6375  Cond, DAG.getConstant(1, dl, MVT::i64));
6376  Cond = DAG.getSetCC(dl, MVT::i32,
6377  Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
6378 
6379  SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
6380  }
6381 
6382  ReuseLoadInfo RLI;
6383  SDValue Bits;
6384 
6385  MachineFunction &MF = DAG.getMachineFunction();
6386  if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
6387  Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, false,
6388  false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo,
6389  RLI.Ranges);
6390  spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
6391  } else if (Subtarget.hasLFIWAX() &&
6392  canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
6393  MachineMemOperand *MMO =
6395  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6396  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6397  Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
6398  DAG.getVTList(MVT::f64, MVT::Other),
6399  Ops, MVT::i32, MMO);
6400  spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
6401  } else if (Subtarget.hasFPCVT() &&
6402  canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
6403  MachineMemOperand *MMO =
6405  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6406  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6407  Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
6408  DAG.getVTList(MVT::f64, MVT::Other),
6409  Ops, MVT::i32, MMO);
6410  spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
6411  } else if (((Subtarget.hasLFIWAX() &&
6412  SINT.getOpcode() == ISD::SIGN_EXTEND) ||
6413  (Subtarget.hasFPCVT() &&
6414  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
6415  SINT.getOperand(0).getValueType() == MVT::i32) {
6416  MachineFrameInfo *FrameInfo = MF.getFrameInfo();
6417  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
6418 
6419  int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
6420  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
6421 
6422  SDValue Store =
6423  DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
6425  false, false, 0);
6426 
6427  assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
6428  "Expected an i32 store");
6429 
6430  RLI.Ptr = FIdx;
6431  RLI.Chain = Store;
6432  RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx);
6433  RLI.Alignment = 4;
6434 
6435  MachineMemOperand *MMO =
6437  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6438  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6439  Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
6441  dl, DAG.getVTList(MVT::f64, MVT::Other),
6442  Ops, MVT::i32, MMO);
6443  } else
6444  Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
6445 
6446  SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
6447 
6448  if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
6449  FP = DAG.getNode(ISD::FP_ROUND, dl,
6450  MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
6451  return FP;
6452  }
6453 
6454  assert(Op.getOperand(0).getValueType() == MVT::i32 &&
6455  "Unhandled INT_TO_FP type in custom expander!");
6456  // Since we only generate this in 64-bit mode, we can take advantage of
6457  // 64-bit registers. In particular, sign extend the input value into the
6458  // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
6459  // then lfd it and fcfid it.
6460  MachineFunction &MF = DAG.getMachineFunction();
6461  MachineFrameInfo *FrameInfo = MF.getFrameInfo();
6462  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
6463 
6464  SDValue Ld;
6465  if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
6466  ReuseLoadInfo RLI;
6467  bool ReusingLoad;
6468  if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
6469  DAG))) {
6470  int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
6471  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
6472 
6473  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
6475  false, false, 0);
6476 
6477  assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
6478  "Expected an i32 store");
6479 
6480  RLI.Ptr = FIdx;
6481  RLI.Chain = Store;
6482  RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx);
6483  RLI.Alignment = 4;
6484  }
6485 
6486  MachineMemOperand *MMO =
6487  MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
6488  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6489  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6490  Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
6492  dl, DAG.getVTList(MVT::f64, MVT::Other),
6493  Ops, MVT::i32, MMO);
6494  if (ReusingLoad)
6495  spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
6496  } else {
6497  assert(Subtarget.isPPC64() &&
6498  "i32->FP without LFIWAX supported only on PPC64");
6499 
6500  int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
6501  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
6502 
6503  SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
6504  Op.getOperand(0));
6505 
6506  // STD the extended value into the stack slot.
6507  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx,
6509  false, false, 0);
6510 
6511  // Load the value as a double.
6512  Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx,
6514  false, false, false, 0);
6515  }
6516 
6517  // FCFID it and return it.
6518  SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
6519  if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
6520  FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
6521  DAG.getIntPtrConstant(0, dl));
6522  return FP;
6523 }
6524 
6525 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
6526  SelectionDAG &DAG) const {
6527  SDLoc dl(Op);
6528  /*
6529  The rounding mode is in bits 30:31 of FPSR, and has the following
6530  settings:
6531  00 Round to nearest
6532  01 Round to 0
6533  10 Round to +inf
6534  11 Round to -inf
6535 
6536  FLT_ROUNDS, on the other hand, expects the following:
6537  -1 Undefined
6538  0 Round to 0
6539  1 Round to nearest
6540  2 Round to +inf
6541  3 Round to -inf
6542 
6543  To perform the conversion, we do:
6544  ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
6545  */
6546 
6547  MachineFunction &MF = DAG.getMachineFunction();
6548  EVT VT = Op.getValueType();
6550 
6551  // Save FP Control Word to register
6552  EVT NodeTys[] = {
6553  MVT::f64, // return register
6554  MVT::Glue // unused in this context
6555  };
6556  SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
6557 
6558  // Save FP register to stack slot
6559  int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
6560  SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
6561  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
6562  StackSlot, MachinePointerInfo(), false, false,0);
6563 
6564  // Load FP Control Word from low 32 bits of stack slot.
6565  SDValue Four = DAG.getConstant(4, dl, PtrVT);
6566  SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
6567  SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(),
6568  false, false, false, 0);
6569 
6570  // Transform as necessary
6571  SDValue CWD1 =
6572  DAG.getNode(ISD::AND, dl, MVT::i32,
6573  CWD, DAG.getConstant(3, dl, MVT::i32));
6574  SDValue CWD2 =
6575  DAG.getNode(ISD::SRL, dl, MVT::i32,
6576  DAG.getNode(ISD::AND, dl, MVT::i32,
6577  DAG.getNode(ISD::XOR, dl, MVT::i32,
6578  CWD, DAG.getConstant(3, dl, MVT::i32)),
6579  DAG.getConstant(3, dl, MVT::i32)),
6580  DAG.getConstant(1, dl, MVT::i32));
6581 
6582  SDValue RetVal =
6583  DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
6584 
6585  return DAG.getNode((VT.getSizeInBits() < 16 ?
6586  ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
6587 }
6588 
6589 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
6590  EVT VT = Op.getValueType();
6591  unsigned BitWidth = VT.getSizeInBits();
6592  SDLoc dl(Op);
6593  assert(Op.getNumOperands() == 3 &&
6594  VT == Op.getOperand(1).getValueType() &&
6595  "Unexpected SHL!");
6596 
6597  // Expand into a bunch of logical ops. Note that these ops
6598  // depend on the PPC behavior for oversized shift amounts.
6599  SDValue Lo = Op.getOperand(0);
6600  SDValue Hi = Op.getOperand(1);
6601  SDValue Amt = Op.getOperand(2);
6602  EVT AmtVT = Amt.getValueType();
6603 
6604  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
6605  DAG.getConstant(BitWidth, dl, AmtVT), Amt);
6606  SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
6607  SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
6608  SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
6609  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
6610  DAG.getConstant(-BitWidth, dl, AmtVT));
6611  SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
6612  SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
6613  SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
6614  SDValue OutOps[] = { OutLo, OutHi };
6615  return DAG.getMergeValues(OutOps, dl);
6616 }
6617 
6618 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
6619  EVT VT = Op.getValueType();
6620  SDLoc dl(Op);
6621  unsigned BitWidth = VT.getSizeInBits();
6622  assert(Op.getNumOperands() == 3 &&
6623  VT == Op.getOperand(1).getValueType() &&
6624  "Unexpected SRL!");
6625 
6626  // Expand into a bunch of logical ops. Note that these ops
6627  // depend on the PPC behavior for oversized shift amounts.
6628  SDValue Lo = Op.getOperand(0);
6629  SDValue Hi = Op.getOperand(1);
6630  SDValue Amt = Op.getOperand(2);
6631  EVT AmtVT = Amt.getValueType();
6632 
6633  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
6634  DAG.getConstant(BitWidth, dl, AmtVT), Amt);
6635  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
6636  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
6637  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
6638  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
6639  DAG.getConstant(-BitWidth, dl, AmtVT));
6640  SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
6641  SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
6642  SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
6643  SDValue OutOps[] = { OutLo, OutHi };
6644  return DAG.getMergeValues(OutOps, dl);
6645 }
6646 
6647 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
6648  SDLoc dl(Op);
6649  EVT VT = Op.getValueType();
6650  unsigned BitWidth = VT.getSizeInBits();
6651  assert(Op.getNumOperands() == 3 &&
6652  VT == Op.getOperand(1).getValueType() &&
6653  "Unexpected SRA!");
6654 
6655  // Expand into a bunch of logical ops, followed by a select_cc.
6656  SDValue Lo = Op.getOperand(0);
6657  SDValue Hi = Op.getOperand(1);
6658  SDValue Amt = Op.getOperand(2);
6659  EVT AmtVT = Amt.getValueType();
6660 
6661  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
6662  DAG.getConstant(BitWidth, dl, AmtVT), Amt);
6663  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
6664  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
6665  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
6666  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
6667  DAG.getConstant(-BitWidth, dl, AmtVT));
6668  SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
6669  SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
6670  SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
6671  Tmp4, Tmp6, ISD::SETLE);
6672  SDValue OutOps[] = { OutLo, OutHi };
6673  return DAG.getMergeValues(OutOps, dl);
6674 }
6675 
6676 //===----------------------------------------------------------------------===//
6677 // Vector related lowering.
6678 //
6679 
6680 /// BuildSplatI - Build a canonical splati of Val with an element size of
6681 /// SplatSize. Cast the result to VT.
6682 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
6683  SelectionDAG &DAG, SDLoc dl) {
6684  assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
6685 
6686  static const MVT VTys[] = { // canonical VT to use for each size.
6688  };
6689 
6690  EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
6691 
6692  // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
6693  if (Val == -1)
6694  SplatSize = 1;
6695 
6696  EVT CanonicalVT = VTys[SplatSize-1];
6697 
6698  // Build a canonical splat for this value.
6699  SDValue Elt = DAG.getConstant(Val, dl, MVT::i32);
6701  Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
6702  SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, Ops);
6703  return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res);
6704 }
6705 
6706 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
6707 /// specified intrinsic ID.
6708 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op,
6709  SelectionDAG &DAG, SDLoc dl,
6710  EVT DestVT = MVT::Other) {
6711  if (DestVT == MVT::Other) DestVT = Op.getValueType();
6712  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
6713  DAG.getConstant(IID, dl, MVT::i32), Op);
6714 }
6715 
6716 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
6717 /// specified intrinsic ID.
6718 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
6719  SelectionDAG &DAG, SDLoc dl,
6720  EVT DestVT = MVT::Other) {
6721  if (DestVT == MVT::Other) DestVT = LHS.getValueType();
6722  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
6723  DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
6724 }
6725 
6726 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
6727 /// specified intrinsic ID.
6728 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
6729  SDValue Op2, SelectionDAG &DAG,
6730  SDLoc dl, EVT DestVT = MVT::Other) {
6731  if (DestVT == MVT::Other) DestVT = Op0.getValueType();
6732  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
6733  DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
6734 }
6735 
6736 
6737 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
6738 /// amount. The result has the specified value type.
6739 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
6740  EVT VT, SelectionDAG &DAG, SDLoc dl) {
6741  // Force LHS/RHS to be the right type.
6742  LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
6743  RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
6744 
6745  int Ops[16];
6746  for (unsigned i = 0; i != 16; ++i)
6747  Ops[i] = i + Amt;
6748  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
6749  return DAG.getNode(ISD::BITCAST, dl, VT, T);
6750 }
6751 
6752 // If this is a case we can't handle, return null and let the default
6753 // expansion code take care of it. If we CAN select this case, and if it
6754 // selects to a single instruction, return Op. Otherwise, if we can codegen
6755 // this case more efficiently than a constant pool load, lower it to the
6756 // sequence of ops that should be used.
6757 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
6758  SelectionDAG &DAG) const {
6759  SDLoc dl(Op);
6761  assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
6762 
6763  if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) {
6764  // We first build an i32 vector, load it into a QPX register,
6765  // then convert it to a floating-point vector and compare it
6766  // to a zero vector to get the boolean result.
6767  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
6768  int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
6770  EVT PtrVT = getPointerTy(DAG.getDataLayout());
6771  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
6772 
6773  assert(BVN->getNumOperands() == 4 &&
6774  "BUILD_VECTOR for v4i1 does not have 4 operands");
6775 
6776  bool IsConst = true;
6777  for (unsigned i = 0; i < 4; ++i) {
6778  if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) continue;
6779  if (!isa<ConstantSDNode>(BVN->getOperand(i))) {
6780  IsConst = false;
6781  break;
6782  }
6783  }
6784 
6785  if (IsConst) {
6786  Constant *One =
6788  Constant *NegOne =
6790 
6791  SmallVector<Constant*, 4> CV(4, NegOne);
6792  for (unsigned i = 0; i < 4; ++i) {
6793  if (BVN->getOperand(i).getOpcode() == ISD::UNDEF)
6794  CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
6795  else if (cast<ConstantSDNode>(BVN->getOperand(i))->
6796  getConstantIntValue()->isZero())
6797  continue;
6798  else
6799  CV[i] = One;
6800  }
6801 
6802  Constant *CP = ConstantVector::get(CV);
6803  SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()),
6804  16 /* alignment */);
6805 
6807  Ops.push_back(DAG.getEntryNode());
6808  Ops.push_back(CPIdx);
6809 
6810  SmallVector<EVT, 2> ValueVTs;
6811  ValueVTs.push_back(MVT::v4i1);
6812  ValueVTs.push_back(MVT::Other); // chain
6813  SDVTList VTs = DAG.getVTList(ValueVTs);
6814 
6816  dl, VTs, Ops, MVT::v4f32,
6818  }
6819 
6820  SmallVector<SDValue, 4> Stores;
6821  for (unsigned i = 0; i < 4; ++i) {
6822  if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) continue;
6823 
6824  unsigned Offset = 4*i;
6825  SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
6826  Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
6827 
6828  unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize();
6829  if (StoreSize > 4) {
6830  Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
6831  BVN->getOperand(i), Idx,
6832  PtrInfo.getWithOffset(Offset),
6833  MVT::i32, false, false, 0));
6834  } else {
6835  SDValue StoreValue = BVN->getOperand(i);
6836  if (StoreSize < 4)
6837  StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue);
6838 
6839  Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
6840  StoreValue, Idx,
6841  PtrInfo.getWithOffset(Offset),
6842  false, false, 0));
6843  }
6844  }
6845 
6846  SDValue StoreChain;
6847  if (!Stores.empty())
6848  StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
6849  else
6850  StoreChain = DAG.getEntryNode();
6851 
6852  // Now load from v4i32 into the QPX register; this will extend it to
6853  // v4i64 but not yet convert it to a floating point. Nevertheless, this
6854  // is typed as v4f64 because the QPX register integer states are not
6855  // explicitly represented.
6856 
6858  Ops.push_back(StoreChain);
6859  Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32));
6860  Ops.push_back(FIdx);
6861 
6862  SmallVector<EVT, 2> ValueVTs;
6863  ValueVTs.push_back(MVT::v4f64);
6864  ValueVTs.push_back(MVT::Other); // chain
6865  SDVTList VTs = DAG.getVTList(ValueVTs);
6866 
6868  dl, VTs, Ops, MVT::v4i32, PtrInfo);
6869  LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
6870  DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32),
6871  LoadedVect);
6872 
6873  SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::f64);
6874  FPZeros = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
6875  FPZeros, FPZeros, FPZeros, FPZeros);
6876 
6877  return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ);
6878  }
6879 
6880  // All other QPX vectors are handled by generic code.
6881  if (Subtarget.hasQPX())
6882  return SDValue();
6883 
6884  // Check if this is a splat of a constant value.
6885  APInt APSplatBits, APSplatUndef;
6886  unsigned SplatBitSize;
6887  bool HasAnyUndefs;
6888  if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
6889  HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
6890  SplatBitSize > 32)
6891  return SDValue();
6892 
6893  unsigned SplatBits = APSplatBits.getZExtValue();
6894  unsigned SplatUndef = APSplatUndef.getZExtValue();
6895  unsigned SplatSize = SplatBitSize / 8;
6896 
6897  // First, handle single instruction cases.
6898 
6899  // All zeros?
6900  if (SplatBits == 0) {
6901  // Canonicalize all zero vectors to be v4i32.
6902  if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
6903  SDValue Z = DAG.getConstant(0, dl, MVT::i32);
6904  Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z);
6905  Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
6906  }
6907  return Op;
6908  }
6909 
6910  // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
6911  int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
6912  (32-SplatBitSize));
6913  if (SextVal >= -16 && SextVal <= 15)
6914  return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
6915 
6916 
6917  // Two instruction sequences.
6918 
6919  // If this value is in the range [-32,30] and is even, use:
6920  // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
6921  // If this value is in the range [17,31] and is odd, use:
6922  // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
6923  // If this value is in the range [-31,-17] and is odd, use:
6924  // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
6925  // Note the last two are three-instruction sequences.
6926  if (SextVal >= -32 && SextVal <= 31) {
6927  // To avoid having these optimizations undone by constant folding,
6928  // we convert to a pseudo that will be expanded later into one of
6929  // the above forms.
6930  SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
6931  EVT VT = (SplatSize == 1 ? MVT::v16i8 :
6932  (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
6933  SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
6934  SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
6935  if (VT == Op.getValueType())
6936  return RetVal;
6937  else
6938  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
6939  }
6940 
6941  // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
6942  // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
6943  // for fneg/fabs.
6944  if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
6945  // Make -1 and vspltisw -1:
6946  SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
6947 
6948  // Make the VSLW intrinsic, computing 0x8000_0000.
6949  SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
6950  OnesV, DAG, dl);
6951 
6952  // xor by OnesV to invert it.
6953  Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
6954  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
6955  }
6956 
6957  // Check to see if this is a wide variety of vsplti*, binop self cases.
6958  static const signed char SplatCsts[] = {
6959  -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
6960  -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
6961  };
6962 
6963  for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
6964  // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
6965  // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
6966  int i = SplatCsts[idx];
6967 
6968  // Figure out what shift amount will be used by altivec if shifted by i in
6969  // this splat size.
6970  unsigned TypeShiftAmt = i & (SplatBitSize-1);
6971 
6972  // vsplti + shl self.
6973  if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
6974  SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
6975  static const unsigned IIDs[] = { // Intrinsic to use for each size.
6976  Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
6977  Intrinsic::ppc_altivec_vslw
6978  };
6979  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
6980  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
6981  }
6982 
6983  // vsplti + srl self.
6984  if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
6985  SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
6986  static const unsigned IIDs[] = { // Intrinsic to use for each size.
6987  Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
6988  Intrinsic::ppc_altivec_vsrw
6989  };
6990  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
6991  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
6992  }
6993 
6994  // vsplti + sra self.
6995  if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
6996  SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
6997  static const unsigned IIDs[] = { // Intrinsic to use for each size.
6998  Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
6999  Intrinsic::ppc_altivec_vsraw
7000  };
7001  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7002  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7003  }
7004 
7005  // vsplti + rol self.
7006  if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
7007  ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
7008  SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7009  static const unsigned IIDs[] = { // Intrinsic to use for each size.
7010  Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
7011  Intrinsic::ppc_altivec_vrlw
7012  };
7013  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7014  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7015  }
7016 
7017  // t = vsplti c, result = vsldoi t, t, 1
7018  if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
7019  SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7020  unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
7021  return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7022  }
7023  // t = vsplti c, result = vsldoi t, t, 2
7024  if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
7025  SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7026  unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
7027  return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7028  }
7029  // t = vsplti c, result = vsldoi t, t, 3
7030  if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
7031  SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7032  unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
7033  return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7034  }
7035  }
7036 
7037  return SDValue();
7038 }
7039 
7040 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
7041 /// the specified operations to build the shuffle.
7042 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
7043  SDValue RHS, SelectionDAG &DAG,
7044  SDLoc dl) {
7045  unsigned OpNum = (PFEntry >> 26) & 0x0F;
7046  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
7047  unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
7048 
7049  enum {
7050  OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
7051  OP_VMRGHW,
7052  OP_VMRGLW,
7053  OP_VSPLTISW0,
7054  OP_VSPLTISW1,
7055  OP_VSPLTISW2,
7056  OP_VSPLTISW3,
7057  OP_VSLDOI4,
7058  OP_VSLDOI8,
7059  OP_VSLDOI12
7060  };
7061 
7062  if (OpNum == OP_COPY) {
7063  if (LHSID == (1*9+2)*9+3) return LHS;
7064  assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
7065  return RHS;
7066  }
7067 
7068  SDValue OpLHS, OpRHS;
7069  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
7070  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
7071 
7072  int ShufIdxs[16];
7073  switch (OpNum) {
7074  default: llvm_unreachable("Unknown i32 permute!");
7075  case OP_VMRGHW:
7076  ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
7077  ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
7078  ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
7079  ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
7080  break;
7081  case OP_VMRGLW:
7082  ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
7083  ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
7084  ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
7085  ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
7086  break;
7087  case OP_VSPLTISW0:
7088  for (unsigned i = 0; i != 16; ++i)
7089  ShufIdxs[i] = (i&3)+0;
7090  break;
7091  case OP_VSPLTISW1:
7092  for (unsigned i = 0; i != 16; ++i)
7093  ShufIdxs[i] = (i&3)+4;
7094  break;
7095  case OP_VSPLTISW2:
7096  for (unsigned i = 0; i != 16; ++i)
7097  ShufIdxs[i] = (i&3)+8;
7098  break;
7099  case OP_VSPLTISW3:
7100  for (unsigned i = 0; i != 16; ++i)
7101  ShufIdxs[i] = (i&3)+12;
7102  break;
7103  case OP_VSLDOI4:
7104  return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
7105  case OP_VSLDOI8:
7106  return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
7107  case OP_VSLDOI12:
7108  return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
7109  }
7110  EVT VT = OpLHS.getValueType();
7111  OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
7112  OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
7113  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
7114  return DAG.getNode(ISD::BITCAST, dl, VT, T);
7115 }
7116 
7117 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
7118 /// is a shuffle we can handle in a single instruction, return it. Otherwise,
7119 /// return the code it can be lowered into. Worst case, it can always be
7120 /// lowered into a vperm.
7121 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
7122  SelectionDAG &DAG) const {
7123  SDLoc dl(Op);
7124  SDValue V1 = Op.getOperand(0);
7125  SDValue V2 = Op.getOperand(1);
7126  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
7127  EVT VT = Op.getValueType();
7128  bool isLittleEndian = Subtarget.isLittleEndian();
7129 
7130  if (Subtarget.hasQPX()) {
7131  if (VT.getVectorNumElements() != 4)
7132  return SDValue();
7133 
7134  if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
7135 
7136  int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp);
7137  if (AlignIdx != -1) {
7138  return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2,
7139  DAG.getConstant(AlignIdx, dl, MVT::i32));
7140  } else if (SVOp->isSplat()) {
7141  int SplatIdx = SVOp->getSplatIndex();
7142  if (SplatIdx >= 4) {
7143  std::swap(V1, V2);
7144  SplatIdx -= 4;
7145  }
7146 
7147  // FIXME: If SplatIdx == 0 and the input came from a load, then there is
7148  // nothing to do.
7149 
7150  return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
7151  DAG.getConstant(SplatIdx, dl, MVT::i32));
7152  }
7153 
7154  // Lower this into a qvgpci/qvfperm pair.
7155 
7156  // Compute the qvgpci literal
7157  unsigned idx = 0;
7158  for (unsigned i = 0; i < 4; ++i) {
7159  int m = SVOp->getMaskElt(i);
7160  unsigned mm = m >= 0 ? (unsigned) m : i;
7161  idx |= mm << (3-i)*3;
7162  }
7163 
7164  SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64,
7165  DAG.getConstant(idx, dl, MVT::i32));
7166  return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3);
7167  }
7168 
7169  // Cases that are handled by instructions that take permute immediates
7170  // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
7171  // selected by the instruction selector.
7172  if (V2.getOpcode() == ISD::UNDEF) {
7173  if (PPC::isSplatShuffleMask(SVOp, 1) ||
7174  PPC::isSplatShuffleMask(SVOp, 2) ||
7175  PPC::isSplatShuffleMask(SVOp, 4) ||
7176  PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
7177  PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
7178  PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
7179  PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
7180  PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
7181  PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
7182  PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
7183  PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
7184  PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
7185  PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
7186  PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
7187  PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)) {
7188  return Op;
7189  }
7190  }
7191 
7192  // Altivec has a variety of "shuffle immediates" that take two vector inputs
7193  // and produce a fixed permutation. If any of these match, do not lower to
7194  // VPERM.
7195  unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
7196  if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
7197  PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
7198  PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
7199  PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
7200  PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
7201  PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
7202  PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
7203  PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
7204  PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
7205  PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
7206  PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
7207  PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))
7208  return Op;
7209 
7210  // Check to see if this is a shuffle of 4-byte values. If so, we can use our
7211  // perfect shuffle table to emit an optimal matching sequence.
7212  ArrayRef<int> PermMask = SVOp->getMask();
7213 
7214  unsigned PFIndexes[4];
7215  bool isFourElementShuffle = true;
7216  for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
7217  unsigned EltNo = 8; // Start out undef.
7218  for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
7219  if (PermMask[i*4+j] < 0)
7220  continue; // Undef, ignore it.
7221 
7222  unsigned ByteSource = PermMask[i*4+j];
7223  if ((ByteSource & 3) != j) {
7224  isFourElementShuffle = false;
7225  break;
7226  }
7227 
7228  if (EltNo == 8) {
7229  EltNo = ByteSource/4;
7230  } else if (EltNo != ByteSource/4) {
7231  isFourElementShuffle = false;
7232  break;
7233  }
7234  }
7235  PFIndexes[i] = EltNo;
7236  }
7237 
7238  // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
7239  // perfect shuffle vector to determine if it is cost effective to do this as
7240  // discrete instructions, or whether we should use a vperm.
7241  // For now, we skip this for little endian until such time as we have a
7242  // little-endian perfect shuffle table.
7243  if (isFourElementShuffle && !isLittleEndian) {
7244  // Compute the index in the perfect shuffle table.
7245  unsigned PFTableIndex =
7246  PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
7247 
7248  unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
7249  unsigned Cost = (PFEntry >> 30);
7250 
7251  // Determining when to avoid vperm is tricky. Many things affect the cost
7252  // of vperm, particularly how many times the perm mask needs to be computed.
7253  // For example, if the perm mask can be hoisted out of a loop or is already
7254  // used (perhaps because there are multiple permutes with the same shuffle
7255  // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
7256  // the loop requires an extra register.
7257  //
7258  // As a compromise, we only emit discrete instructions if the shuffle can be
7259  // generated in 3 or fewer operations. When we have loop information
7260  // available, if this block is within a loop, we should avoid using vperm
7261  // for 3-operation perms and use a constant pool load instead.
7262  if (Cost < 3)
7263  return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
7264  }
7265 
7266  // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
7267  // vector that will get spilled to the constant pool.
7268  if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
7269 
7270  // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
7271  // that it is in input element units, not in bytes. Convert now.
7272 
7273  // For little endian, the order of the input vectors is reversed, and
7274  // the permutation mask is complemented with respect to 31. This is
7275  // necessary to produce proper semantics with the big-endian-biased vperm
7276  // instruction.
7277  EVT EltVT = V1.getValueType().getVectorElementType();
7278  unsigned BytesPerElement = EltVT.getSizeInBits()/8;
7279 
7280  SmallVector<SDValue, 16> ResultMask;
7281  for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
7282  unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
7283 
7284  for (unsigned j = 0; j != BytesPerElement; ++j)
7285  if (isLittleEndian)
7286  ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
7287  dl, MVT::i32));
7288  else
7289  ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
7290  MVT::i32));
7291  }
7292 
7293  SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
7294  ResultMask);
7295  if (isLittleEndian)
7296  return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
7297  V2, V1, VPermMask);
7298  else
7299  return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
7300  V1, V2, VPermMask);
7301 }
7302 
7303 /// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
7304 /// altivec comparison. If it is, return true and fill in Opc/isDot with
7305 /// information about the intrinsic.
7306 static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
7307  bool &isDot, const PPCSubtarget &Subtarget) {
7308  unsigned IntrinsicID =
7309  cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
7310  CompareOpc = -1;
7311  isDot = false;
7312  switch (IntrinsicID) {
7313  default: return false;
7314  // Comparison predicates.
7315  case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break;
7316  case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
7317  case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;
7318  case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break;
7319  case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
7320  case Intrinsic::ppc_altivec_vcmpequd_p:
7321  if (Subtarget.hasP8Altivec()) {
7322  CompareOpc = 199;
7323  isDot = 1;
7324  }
7325  else
7326  return false;
7327 
7328  break;
7329  case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
7330  case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
7331  case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
7332  case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
7333  case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
7334  case Intrinsic::ppc_altivec_vcmpgtsd_p:
7335  if (Subtarget.hasP8Altivec()) {
7336  CompareOpc = 967;
7337  isDot = 1;
7338  }
7339  else
7340  return false;
7341 
7342  break;
7343  case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
7344  case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
7345  case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
7346  case Intrinsic::ppc_altivec_vcmpgtud_p:
7347  if (Subtarget.hasP8Altivec()) {
7348  CompareOpc = 711;
7349  isDot = 1;
7350  }
7351  else
7352  return false;
7353 
7354  break;
7355 
7356  // Normal Comparisons.
7357  case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break;
7358  case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break;
7359  case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break;
7360  case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break;
7361  case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break;
7362  case Intrinsic::ppc_altivec_vcmpequd:
7363  if (Subtarget.hasP8Altivec()) {
7364  CompareOpc = 199;
7365  isDot = 0;
7366  }
7367  else
7368  return false;
7369 
7370  break;
7371  case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break;
7372  case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break;
7373  case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;
7374  case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break;
7375  case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break;
7376  case Intrinsic::ppc_altivec_vcmpgtsd:
7377  if (Subtarget.hasP8Altivec()) {
7378  CompareOpc = 967;
7379  isDot = 0;
7380  }
7381  else
7382  return false;
7383 
7384  break;
7385  case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break;
7386  case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;
7387  case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;
7388  case Intrinsic::ppc_altivec_vcmpgtud:
7389  if (Subtarget.hasP8Altivec()) {
7390  CompareOpc = 711;
7391  isDot = 0;
7392  }
7393  else
7394  return false;
7395 
7396  break;
7397  }
7398  return true;
7399 }
7400 
7401 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
7402 /// lower, do it, otherwise return null.
7403 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
7404  SelectionDAG &DAG) const {
7405  // If this is a lowered altivec predicate compare, CompareOpc is set to the
7406  // opcode number of the comparison.
7407  SDLoc dl(Op);
7408  int CompareOpc;
7409  bool isDot;
7410  if (!getAltivecCompareInfo(Op, CompareOpc, isDot, Subtarget))
7411  return SDValue(); // Don't custom lower most intrinsics.
7412 
7413  // If this is a non-dot comparison, make the VCMP node and we are done.
7414  if (!isDot) {
7415  SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
7416  Op.getOperand(1), Op.getOperand(2),
7417  DAG.getConstant(CompareOpc, dl, MVT::i32));
7418  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
7419  }
7420 
7421  // Create the PPCISD altivec 'dot' comparison node.
7422  SDValue Ops[] = {
7423  Op.getOperand(2), // LHS
7424  Op.getOperand(3), // RHS
7425  DAG.getConstant(CompareOpc, dl, MVT::i32)
7426  };
7427  EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
7428  SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
7429 
7430  // Now that we have the comparison, emit a copy from the CR to a GPR.
7431  // This is flagged to the above dot comparison.
7432  SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
7433  DAG.getRegister(PPC::CR6, MVT::i32),
7434  CompNode.getValue(1));
7435 
7436  // Unpack the result based on how the target uses it.
7437  unsigned BitNo; // Bit # of CR6.
7438  bool InvertBit; // Invert result?
7439  switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
7440  default: // Can't happen, don't crash on invalid number though.
7441  case 0: // Return the value of the EQ bit of CR6.
7442  BitNo = 0; InvertBit = false;
7443  break;
7444  case 1: // Return the inverted value of the EQ bit of CR6.
7445  BitNo = 0; InvertBit = true;
7446  break;
7447  case 2: // Return the value of the LT bit of CR6.
7448  BitNo = 2; InvertBit = false;
7449  break;
7450  case 3: // Return the inverted value of the LT bit of CR6.
7451  BitNo = 2; InvertBit = true;
7452  break;
7453  }
7454 
7455  // Shift the bit into the low position.
7456  Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
7457  DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
7458  // Isolate the bit.
7459  Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
7460  DAG.getConstant(1, dl, MVT::i32));
7461 
7462  // If we are supposed to, toggle the bit.
7463  if (InvertBit)
7464  Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
7465  DAG.getConstant(1, dl, MVT::i32));
7466  return Flags;
7467 }
7468 
7469 SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
7470  SelectionDAG &DAG) const {
7471  SDLoc dl(Op);
7472  // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int
7473  // instructions), but for smaller types, we need to first extend up to v2i32
7474  // before doing going farther.
7475  if (Op.getValueType() == MVT::v2i64) {
7476  EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
7477  if (ExtVT != MVT::v2i32) {
7478  Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0));
7479  Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op,
7481  ExtVT.getVectorElementType(), 4)));
7482  Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op);
7483  Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op,
7484  DAG.getValueType(MVT::v2i32));
7485  }
7486 
7487  return Op;
7488  }
7489 
7490  return SDValue();
7491 }
7492 
7493 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
7494  SelectionDAG &DAG) const {
7495  SDLoc dl(Op);
7496  // Create a stack slot that is 16-byte aligned.
7497  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
7498  int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
7499  EVT PtrVT = getPointerTy(DAG.getDataLayout());
7500  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7501 
7502  // Store the input value into Value#0 of the stack slot.
7503  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
7504  Op.getOperand(0), FIdx, MachinePointerInfo(),
7505  false, false, 0);
7506  // Load it out.
7507  return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(),
7508  false, false, false, 0);
7509 }
7510 
7511 SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
7512  SelectionDAG &DAG) const {
7513  SDLoc dl(Op);
7514  SDNode *N = Op.getNode();
7515 
7516  assert(N->getOperand(0).getValueType() == MVT::v4i1 &&
7517  "Unknown extract_vector_elt type");
7518 
7519  SDValue Value = N->getOperand(0);
7520 
7521  // The first part of this is like the store lowering except that we don't
7522  // need to track the chain.
7523 
7524  // The values are now known to be -1 (false) or 1 (true). To convert this
7525  // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
7526  // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
7527  Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
7528 
7529  // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
7530  // understand how to form the extending load.
7531  SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::f64);
7532  FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
7533  FPHalfs, FPHalfs, FPHalfs, FPHalfs);
7534 
7535  Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
7536 
7537  // Now convert to an integer and store.
7538  Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
7539  DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
7540  Value);
7541 
7542  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
7543  int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
7545  EVT PtrVT = getPointerTy(DAG.getDataLayout());
7546  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7547 
7548  SDValue StoreChain = DAG.getEntryNode();
7550  Ops.push_back(StoreChain);
7551  Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32));
7552  Ops.push_back(Value);
7553  Ops.push_back(FIdx);
7554 
7555  SmallVector<EVT, 2> ValueVTs;
7556  ValueVTs.push_back(MVT::Other); // chain
7557  SDVTList VTs = DAG.getVTList(ValueVTs);
7558 
7559  StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
7560  dl, VTs, Ops, MVT::v4i32, PtrInfo);
7561 
7562  // Extract the value requested.
7563  unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
7564  SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
7565  Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
7566 
7567  SDValue IntVal = DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
7568  PtrInfo.getWithOffset(Offset),
7569  false, false, false, 0);
7570 
7571  if (!Subtarget.useCRBits())
7572  return IntVal;
7573 
7574  return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal);
7575 }
7576 
7577 /// Lowering for QPX v4i1 loads
7578 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
7579  SelectionDAG &DAG) const {
7580  SDLoc dl(Op);
7581  LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
7582  SDValue LoadChain = LN->getChain();
7583  SDValue BasePtr = LN->getBasePtr();
7584 
7585  if (Op.getValueType() == MVT::v4f64 ||
7586  Op.getValueType() == MVT::v4f32) {
7587  EVT MemVT = LN->getMemoryVT();
7588  unsigned Alignment = LN->getAlignment();
7589 
7590  // If this load is properly aligned, then it is legal.
7591  if (Alignment >= MemVT.getStoreSize())
7592  return Op;
7593 
7594  EVT ScalarVT = Op.getValueType().getScalarType(),
7595  ScalarMemVT = MemVT.getScalarType();
7596  unsigned Stride = ScalarMemVT.getStoreSize();
7597 
7598  SmallVector<SDValue, 8> Vals, LoadChains;
7599  for (unsigned Idx = 0; Idx < 4; ++Idx) {
7600  SDValue Load;
7601  if (ScalarVT != ScalarMemVT)
7602  Load =
7603  DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain,
7604  BasePtr,
7605  LN->getPointerInfo().getWithOffset(Idx*Stride),
7606  ScalarMemVT, LN->isVolatile(), LN->isNonTemporal(),
7607  LN->isInvariant(), MinAlign(Alignment, Idx*Stride),
7608  LN->getAAInfo());
7609  else
7610  Load =
7611  DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr,
7612  LN->getPointerInfo().getWithOffset(Idx*Stride),
7613  LN->isVolatile(), LN->isNonTemporal(),
7614  LN->isInvariant(), MinAlign(Alignment, Idx*Stride),
7615  LN->getAAInfo());
7616 
7617  if (Idx == 0 && LN->isIndexed()) {
7618  assert(LN->getAddressingMode() == ISD::PRE_INC &&
7619  "Unknown addressing mode on vector load");
7620  Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(),
7621  LN->getAddressingMode());
7622  }
7623 
7624  Vals.push_back(Load);
7625  LoadChains.push_back(Load.getValue(1));
7626 
7627  BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
7628  DAG.getConstant(Stride, dl,
7629  BasePtr.getValueType()));
7630  }
7631 
7632  SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
7633  SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
7634  Op.getValueType(), Vals);
7635 
7636  if (LN->isIndexed()) {
7637  SDValue RetOps[] = { Value, Vals[0].getValue(1), TF };
7638  return DAG.getMergeValues(RetOps, dl);
7639  }
7640 
7641  SDValue RetOps[] = { Value, TF };
7642  return DAG.getMergeValues(RetOps, dl);
7643  }
7644 
7645  assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower");
7646  assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported");
7647 
7648  // To lower v4i1 from a byte array, we load the byte elements of the
7649  // vector and then reuse the BUILD_VECTOR logic.
7650 
7651  SmallVector<SDValue, 4> VectElmts, VectElmtChains;
7652  for (unsigned i = 0; i < 4; ++i) {
7653  SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
7654  Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
7655 
7656  VectElmts.push_back(DAG.getExtLoad(ISD::EXTLOAD,
7657  dl, MVT::i32, LoadChain, Idx,
7658  LN->getPointerInfo().getWithOffset(i),
7659  MVT::i8 /* memory type */,
7660  LN->isVolatile(), LN->isNonTemporal(),
7661  LN->isInvariant(),
7662  1 /* alignment */, LN->getAAInfo()));
7663  VectElmtChains.push_back(VectElmts[i].getValue(1));
7664  }
7665 
7666  LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains);
7667  SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i1, VectElmts);
7668 
7669  SDValue RVals[] = { Value, LoadChain };
7670  return DAG.getMergeValues(RVals, dl);
7671 }
7672 
7673 /// Lowering for QPX v4i1 stores
7674 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
7675  SelectionDAG &DAG) const {
7676  SDLoc dl(Op);
7677  StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
7678  SDValue StoreChain = SN->getChain();
7679  SDValue BasePtr = SN->getBasePtr();
7680  SDValue Value = SN->getValue();
7681 
7682  if (Value.getValueType() == MVT::v4f64 ||
7683  Value.getValueType() == MVT::v4f32) {
7684  EVT MemVT = SN->getMemoryVT();
7685  unsigned Alignment = SN->getAlignment();
7686 
7687  // If this store is properly aligned, then it is legal.
7688  if (Alignment >= MemVT.getStoreSize())
7689  return Op;
7690 
7691  EVT ScalarVT = Value.getValueType().getScalarType(),
7692  ScalarMemVT = MemVT.getScalarType();
7693  unsigned Stride = ScalarMemVT.getStoreSize();
7694 
7695  SmallVector<SDValue, 8> Stores;
7696  for (unsigned Idx = 0; Idx < 4; ++Idx) {
7697  SDValue Ex = DAG.getNode(
7698  ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
7699  DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout())));
7700  SDValue Store;
7701  if (ScalarVT != ScalarMemVT)
7702  Store =
7703  DAG.getTruncStore(StoreChain, dl, Ex, BasePtr,
7704  SN->getPointerInfo().getWithOffset(Idx*Stride),
7705  ScalarMemVT, SN->isVolatile(), SN->isNonTemporal(),
7706  MinAlign(Alignment, Idx*Stride), SN->getAAInfo());
7707  else
7708  Store =
7709  DAG.getStore(StoreChain, dl, Ex, BasePtr,
7710  SN->getPointerInfo().getWithOffset(Idx*Stride),
7711  SN->isVolatile(), SN->isNonTemporal(),
7712  MinAlign(Alignment, Idx*Stride), SN->getAAInfo());
7713 
7714  if (Idx == 0 && SN->isIndexed()) {
7715  assert(SN->getAddressingMode() == ISD::PRE_INC &&
7716  "Unknown addressing mode on vector store");
7717  Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(),
7718  SN->getAddressingMode());
7719  }
7720 
7721  BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
7722  DAG.getConstant(Stride, dl,
7723  BasePtr.getValueType()));
7724  Stores.push_back(Store);
7725  }
7726 
7727  SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
7728 
7729  if (SN->isIndexed()) {
7730  SDValue RetOps[] = { TF, Stores[0].getValue(1) };
7731  return DAG.getMergeValues(RetOps, dl);
7732  }
7733 
7734  return TF;
7735  }
7736 
7737  assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported");
7738  assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower");
7739 
7740  // The values are now known to be -1 (false) or 1 (true). To convert this
7741  // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
7742  // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
7743  Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
7744 
7745  // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
7746  // understand how to form the extending load.
7747  SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::f64);
7748  FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
7749  FPHalfs, FPHalfs, FPHalfs, FPHalfs);
7750 
7751  Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
7752 
7753  // Now convert to an integer and store.
7754  Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
7755  DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
7756  Value);
7757 
7758  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
7759  int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
7761  EVT PtrVT = getPointerTy(DAG.getDataLayout());
7762  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7763 
7765  Ops.push_back(StoreChain);
7766  Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32));
7767  Ops.push_back(Value);
7768  Ops.push_back(FIdx);
7769 
7770  SmallVector<EVT, 2> ValueVTs;
7771  ValueVTs.push_back(MVT::Other); // chain
7772  SDVTList VTs = DAG.getVTList(ValueVTs);
7773 
7774  StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
7775  dl, VTs, Ops, MVT::v4i32, PtrInfo);
7776 
7777  // Move data into the byte array.
7778  SmallVector<SDValue, 4> Loads, LoadChains;
7779  for (unsigned i = 0; i < 4; ++i) {
7780  unsigned Offset = 4*i;
7781  SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
7782  Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
7783 
7784  Loads.push_back(DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
7785  PtrInfo.getWithOffset(Offset),
7786  false, false, false, 0));
7787  LoadChains.push_back(Loads[i].getValue(1));
7788  }
7789 
7790  StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
7791 
7792  SmallVector<SDValue, 4> Stores;
7793  for (unsigned i = 0; i < 4; ++i) {
7794  SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
7795  Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
7796 
7797  Stores.push_back(DAG.getTruncStore(StoreChain, dl, Loads[i], Idx,
7798  SN->getPointerInfo().getWithOffset(i),
7799  MVT::i8 /* memory type */,
7800  SN->isNonTemporal(), SN->isVolatile(),
7801  1 /* alignment */, SN->getAAInfo()));
7802  }
7803 
7804  StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
7805 
7806  return StoreChain;
7807 }
7808 
7809 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
7810  SDLoc dl(Op);
7811  if (Op.getValueType() == MVT::v4i32) {
7812  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7813 
7814  SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl);
7815  SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
7816 
7817  SDValue RHSSwap = // = vrlw RHS, 16
7818  BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
7819 
7820  // Shrinkify inputs to v8i16.
7821  LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
7822  RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
7823  RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
7824 
7825  // Low parts multiplied together, generating 32-bit results (we ignore the
7826  // top parts).
7827  SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
7828  LHS, RHS, DAG, dl, MVT::v4i32);
7829 
7830  SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
7831  LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
7832  // Shift the high parts up 16 bits.
7833  HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
7834  Neg16, DAG, dl);
7835  return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
7836  } else if (Op.getValueType() == MVT::v8i16) {
7837  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7838 
7839  SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
7840 
7841  return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
7842  LHS, RHS, Zero, DAG, dl);
7843  } else if (Op.getValueType() == MVT::v16i8) {
7844  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7845  bool isLittleEndian = Subtarget.isLittleEndian();
7846 
7847  // Multiply the even 8-bit parts, producing 16-bit sums.
7848  SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
7849  LHS, RHS, DAG, dl, MVT::v8i16);
7850  EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
7851 
7852  // Multiply the odd 8-bit parts, producing 16-bit sums.
7853  SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
7854  LHS, RHS, DAG, dl, MVT::v8i16);
7855  OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
7856 
7857  // Merge the results together. Because vmuleub and vmuloub are
7858  // instructions with a big-endian bias, we must reverse the
7859  // element numbering and reverse the meaning of "odd" and "even"
7860  // when generating little endian code.
7861  int Ops[16];
7862  for (unsigned i = 0; i != 8; ++i) {
7863  if (isLittleEndian) {
7864  Ops[i*2 ] = 2*i;
7865  Ops[i*2+1] = 2*i+16;
7866  } else {
7867  Ops[i*2 ] = 2*i+1;
7868  Ops[i*2+1] = 2*i+1+16;
7869  }
7870  }
7871  if (isLittleEndian)
7872  return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
7873  else
7874  return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
7875  } else {
7876  llvm_unreachable("Unknown mul to lower!");
7877  }
7878 }
7879 
7880 /// LowerOperation - Provide custom lowering hooks for some operations.
7881 ///
7883  switch (Op.getOpcode()) {
7884  default: llvm_unreachable("Wasn't expecting to be able to lower this!");
7885  case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
7886  case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
7887  case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
7888  case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
7889  case ISD::JumpTable: return LowerJumpTable(Op, DAG);
7890  case ISD::SETCC: return LowerSETCC(Op, DAG);
7891  case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
7892  case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
7893  case ISD::VASTART:
7894  return LowerVASTART(Op, DAG, Subtarget);
7895 
7896  case ISD::VAARG:
7897  return LowerVAARG(Op, DAG, Subtarget);
7898 
7899  case ISD::VACOPY:
7900  return LowerVACOPY(Op, DAG, Subtarget);
7901 
7902  case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, Subtarget);
7904  return LowerDYNAMIC_STACKALLOC(Op, DAG, Subtarget);
7905 
7906  case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
7907  case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
7908 
7909  case ISD::LOAD: return LowerLOAD(Op, DAG);
7910  case ISD::STORE: return LowerSTORE(Op, DAG);
7911  case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
7912  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
7913  case ISD::FP_TO_UINT:
7914  case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG,
7915  SDLoc(Op));
7916  case ISD::UINT_TO_FP:
7917  case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
7918  case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
7919 
7920  // Lower 64-bit shifts.
7921  case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
7922  case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
7923  case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
7924 
7925  // Vector-related lowering.
7926  case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
7927  case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
7928  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
7929  case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
7930  case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
7931  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
7932  case ISD::MUL: return LowerMUL(Op, DAG);
7933 
7934  // For counter-based loop handling.
7935  case ISD::INTRINSIC_W_CHAIN: return SDValue();
7936 
7937  // Frame & Return address.
7938  case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
7939  case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
7940  }
7941 }
7942 
7944  SmallVectorImpl<SDValue>&Results,
7945  SelectionDAG &DAG) const {
7946  SDLoc dl(N);
7947  switch (N->getOpcode()) {
7948  default:
7949  llvm_unreachable("Do not know how to custom type legalize this operation!");
7950  case ISD::READCYCLECOUNTER: {
7951  SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
7952  SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
7953 
7954  Results.push_back(RTB);
7955  Results.push_back(RTB.getValue(1));
7956  Results.push_back(RTB.getValue(2));
7957  break;
7958  }
7959  case ISD::INTRINSIC_W_CHAIN: {
7960  if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
7961  Intrinsic::ppc_is_decremented_ctr_nonzero)
7962  break;
7963 
7964  assert(N->getValueType(0) == MVT::i1 &&
7965  "Unexpected result type for CTR decrement intrinsic");
7966  EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
7967  N->getValueType(0));
7968  SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
7969  SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
7970  N->getOperand(1));
7971 
7972  Results.push_back(NewInt);
7973  Results.push_back(NewInt.getValue(1));
7974  break;
7975  }
7976  case ISD::VAARG: {
7977  if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
7978  return;
7979 
7980  EVT VT = N->getValueType(0);
7981 
7982  if (VT == MVT::i64) {
7983  SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, Subtarget);
7984 
7985  Results.push_back(NewNode);
7986  Results.push_back(NewNode.getValue(1));
7987  }
7988  return;
7989  }
7990  case ISD::FP_ROUND_INREG: {
7991  assert(N->getValueType(0) == MVT::ppcf128);
7992  assert(N->getOperand(0).getValueType() == MVT::ppcf128);
7993  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
7994  MVT::f64, N->getOperand(0),
7995  DAG.getIntPtrConstant(0, dl));
7996  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
7997  MVT::f64, N->getOperand(0),
7998  DAG.getIntPtrConstant(1, dl));
7999 
8000  // Add the two halves of the long double in round-to-zero mode.
8001  SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8002 
8003  // We know the low half is about to be thrown away, so just use something
8004  // convenient.
8005  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
8006  FPreg, FPreg));
8007  return;
8008  }
8009  case ISD::FP_TO_SINT:
8010  case ISD::FP_TO_UINT:
8011  // LowerFP_TO_INT() can only handle f32 and f64.
8012  if (N->getOperand(0).getValueType() == MVT::ppcf128)
8013  return;
8014  Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
8015  return;
8016  }
8017 }
8018 
8019 
8020 //===----------------------------------------------------------------------===//
8021 // Other Lowering Code
8022 //===----------------------------------------------------------------------===//
8023 
8025  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
8027  return Builder.CreateCall(Func, {});
8028 }
8029 
8030 // The mappings for emitLeading/TrailingFence is taken from
8031 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
8033  AtomicOrdering Ord, bool IsStore,
8034  bool IsLoad) const {
8035  if (Ord == SequentiallyConsistent)
8036  return callIntrinsic(Builder, Intrinsic::ppc_sync);
8037  if (isAtLeastRelease(Ord))
8038  return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
8039  return nullptr;
8040 }
8041 
8043  AtomicOrdering Ord, bool IsStore,
8044  bool IsLoad) const {
8045  if (IsLoad && isAtLeastAcquire(Ord))
8046  return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
8047  // FIXME: this is too conservative, a dependent branch + isync is enough.
8048  // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
8049  // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
8050  // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
8051  return nullptr;
8052 }
8053 
8056  unsigned AtomicSize,
8057  unsigned BinOpcode) const {
8058  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
8059  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8060 
8061  auto LoadMnemonic = PPC::LDARX;
8062  auto StoreMnemonic = PPC::STDCX;
8063  switch (AtomicSize) {
8064  default:
8065  llvm_unreachable("Unexpected size of atomic entity");
8066  case 1:
8067  LoadMnemonic = PPC::LBARX;
8068  StoreMnemonic = PPC::STBCX;
8069  assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
8070  break;
8071  case 2:
8072  LoadMnemonic = PPC::LHARX;
8073  StoreMnemonic = PPC::STHCX;
8074  assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
8075  break;
8076  case 4:
8077  LoadMnemonic = PPC::LWARX;
8078  StoreMnemonic = PPC::STWCX;
8079  break;
8080  case 8:
8081  LoadMnemonic = PPC::LDARX;
8082  StoreMnemonic = PPC::STDCX;
8083  break;
8084  }
8085 
8086  const BasicBlock *LLVM_BB = BB->getBasicBlock();
8087  MachineFunction *F = BB->getParent();
8088  MachineFunction::iterator It = BB;
8089  ++It;
8090 
8091  unsigned dest = MI->getOperand(0).getReg();
8092  unsigned ptrA = MI->getOperand(1).getReg();
8093  unsigned ptrB = MI->getOperand(2).getReg();
8094  unsigned incr = MI->getOperand(3).getReg();
8095  DebugLoc dl = MI->getDebugLoc();
8096 
8097  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
8098  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
8099  F->insert(It, loopMBB);
8100  F->insert(It, exitMBB);
8101  exitMBB->splice(exitMBB->begin(), BB,
8102  std::next(MachineBasicBlock::iterator(MI)), BB->end());
8103  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
8104 
8105  MachineRegisterInfo &RegInfo = F->getRegInfo();
8106  unsigned TmpReg = (!BinOpcode) ? incr :
8107  RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
8108  : &PPC::GPRCRegClass);
8109 
8110  // thisMBB:
8111  // ...
8112  // fallthrough --> loopMBB
8113  BB->addSuccessor(loopMBB);
8114 
8115  // loopMBB:
8116  // l[wd]arx dest, ptr
8117  // add r0, dest, incr
8118  // st[wd]cx. r0, ptr
8119  // bne- loopMBB
8120  // fallthrough --> exitMBB
8121  BB = loopMBB;
8122  BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
8123  .addReg(ptrA).addReg(ptrB);
8124  if (BinOpcode)
8125  BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
8126  BuildMI(BB, dl, TII->get(StoreMnemonic))
8127  .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
8128  BuildMI(BB, dl, TII->get(PPC::BCC))
8129  .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
8130  BB->addSuccessor(loopMBB);
8131  BB->addSuccessor(exitMBB);
8132 
8133  // exitMBB:
8134  // ...
8135  BB = exitMBB;
8136  return BB;
8137 }
8138 
8141  MachineBasicBlock *BB,
8142  bool is8bit, // operation
8143  unsigned BinOpcode) const {
8144  // If we support part-word atomic mnemonics, just use them
8145  if (Subtarget.hasPartwordAtomics())
8146  return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode);
8147 
8148  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
8149  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8150  // In 64 bit mode we have to use 64 bits for addresses, even though the
8151  // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
8152  // registers without caring whether they're 32 or 64, but here we're
8153  // doing actual arithmetic on the addresses.
8154  bool is64bit = Subtarget.isPPC64();
8155  unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
8156 
8157  const BasicBlock *LLVM_BB = BB->getBasicBlock();
8158  MachineFunction *F = BB->getParent();
8159  MachineFunction::iterator It = BB;
8160  ++It;
8161 
8162  unsigned dest = MI->getOperand(0).getReg();
8163  unsigned ptrA = MI->getOperand(1).getReg();
8164  unsigned ptrB = MI->getOperand(2).getReg();
8165  unsigned incr = MI->getOperand(3).getReg();
8166  DebugLoc dl = MI->getDebugLoc();
8167 
8168  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
8169  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
8170  F->insert(It, loopMBB);
8171  F->insert(It, exitMBB);
8172  exitMBB->splice(exitMBB->begin(), BB,
8173  std::next(MachineBasicBlock::iterator(MI)), BB->end());
8174  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
8175 
8176  MachineRegisterInfo &RegInfo = F->getRegInfo();
8177  const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
8178  : &PPC::GPRCRegClass;
8179  unsigned PtrReg = RegInfo.createVirtualRegister(RC);
8180  unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
8181  unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
8182  unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
8183  unsigned MaskReg = RegInfo.createVirtualRegister(RC);
8184  unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
8185  unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
8186  unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
8187  unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
8188  unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
8189  unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
8190  unsigned Ptr1Reg;
8191  unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
8192 
8193  // thisMBB:
8194  // ...
8195  // fallthrough --> loopMBB
8196  BB->addSuccessor(loopMBB);
8197 
8198  // The 4-byte load must be aligned, while a char or short may be
8199  // anywhere in the word. Hence all this nasty bookkeeping code.
8200  // add ptr1, ptrA, ptrB [copy if ptrA==0]
8201  // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
8202  // xori shift, shift1, 24 [16]
8203  // rlwinm ptr, ptr1, 0, 0, 29
8204  // slw incr2, incr, shift
8205  // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
8206  // slw mask, mask2, shift
8207  // loopMBB:
8208  // lwarx tmpDest, ptr
8209  // add tmp, tmpDest, incr2
8210  // andc tmp2, tmpDest, mask
8211  // and tmp3, tmp, mask
8212  // or tmp4, tmp3, tmp2
8213  // stwcx. tmp4, ptr
8214  // bne- loopMBB
8215  // fallthrough --> exitMBB
8216  // srw dest, tmpDest, shift
8217  if (ptrA != ZeroReg) {
8218  Ptr1Reg = RegInfo.createVirtualRegister(RC);
8219  BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
8220  .addReg(ptrA).addReg(ptrB);
8221  } else {
8222  Ptr1Reg = ptrB;
8223  }
8224  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
8225  .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
8226  BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
8227  .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
8228  if (is64bit)
8229  BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
8230  .addReg(Ptr1Reg).addImm(0).addImm(61);
8231  else
8232  BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
8233  .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
8234  BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
8235  .addReg(incr).addReg(ShiftReg);
8236  if (is8bit)
8237  BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
8238  else {
8239  BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
8240  BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
8241  }
8242  BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
8243  .addReg(Mask2Reg).addReg(ShiftReg);
8244 
8245  BB = loopMBB;
8246  BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
8247  .addReg(ZeroReg).addReg(PtrReg);
8248  if (BinOpcode)
8249  BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
8250  .addReg(Incr2Reg).addReg(TmpDestReg);
8251  BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
8252  .addReg(TmpDestReg).addReg(MaskReg);
8253  BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
8254  .addReg(TmpReg).addReg(MaskReg);
8255  BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
8256  .addReg(Tmp3Reg).addReg(Tmp2Reg);
8257  BuildMI(BB, dl, TII->get(PPC::STWCX))
8258  .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
8259  BuildMI(BB, dl, TII->get(PPC::BCC))
8260  .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
8261  BB->addSuccessor(loopMBB);
8262  BB->addSuccessor(exitMBB);
8263 
8264  // exitMBB:
8265  // ...
8266  BB = exitMBB;
8267  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
8268  .addReg(ShiftReg);
8269  return BB;
8270 }
8271 
8274  MachineBasicBlock *MBB) const {
8275  DebugLoc DL = MI->getDebugLoc();
8276  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8277 
8278  MachineFunction *MF = MBB->getParent();
8279  MachineRegisterInfo &MRI = MF->getRegInfo();
8280 
8281  const BasicBlock *BB = MBB->getBasicBlock();
8283  ++I;
8284 
8285  // Memory Reference
8288 
8289  unsigned DstReg = MI->getOperand(0).getReg();
8290  const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
8291  assert(RC->hasType(MVT::i32) && "Invalid destination!");
8292  unsigned mainDstReg = MRI.createVirtualRegister(RC);
8293  unsigned restoreDstReg = MRI.createVirtualRegister(RC);
8294 
8295  MVT PVT = getPointerTy(MF->getDataLayout());
8296  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
8297  "Invalid Pointer Size!");
8298  // For v = setjmp(buf), we generate
8299  //
8300  // thisMBB:
8301  // SjLjSetup mainMBB
8302  // bl mainMBB
8303  // v_restore = 1
8304  // b sinkMBB
8305  //
8306  // mainMBB:
8307  // buf[LabelOffset] = LR
8308  // v_main = 0
8309  //
8310  // sinkMBB:
8311  // v = phi(main, restore)
8312  //
8313 
8314  MachineBasicBlock *thisMBB = MBB;
8315  MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
8316  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
8317  MF->insert(I, mainMBB);
8318  MF->insert(I, sinkMBB);
8319 
8320  MachineInstrBuilder MIB;
8321 
8322  // Transfer the remainder of BB and its successor edges to sinkMBB.
8323  sinkMBB->splice(sinkMBB->begin(), MBB,
8324  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
8325  sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
8326 
8327  // Note that the structure of the jmp_buf used here is not compatible
8328  // with that used by libc, and is not designed to be. Specifically, it
8329  // stores only those 'reserved' registers that LLVM does not otherwise
8330  // understand how to spill. Also, by convention, by the time this
8331  // intrinsic is called, Clang has already stored the frame address in the
8332  // first slot of the buffer and stack address in the third. Following the
8333  // X86 target code, we'll store the jump address in the second slot. We also
8334  // need to save the TOC pointer (R2) to handle jumps between shared
8335  // libraries, and that will be stored in the fourth slot. The thread
8336  // identifier (R13) is not affected.
8337 
8338  // thisMBB:
8339  const int64_t LabelOffset = 1 * PVT.getStoreSize();
8340  const int64_t TOCOffset = 3 * PVT.getStoreSize();
8341  const int64_t BPOffset = 4 * PVT.getStoreSize();
8342 
8343  // Prepare IP either in reg.
8344  const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
8345  unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
8346  unsigned BufReg = MI->getOperand(1).getReg();
8347 
8348  if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
8349  setUsesTOCBasePtr(*MBB->getParent());
8350  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
8351  .addReg(PPC::X2)
8352  .addImm(TOCOffset)
8353  .addReg(BufReg);
8354  MIB.setMemRefs(MMOBegin, MMOEnd);
8355  }
8356 
8357  // Naked functions never have a base pointer, and so we use r1. For all
8358  // other functions, this decision must be delayed until during PEI.
8359  unsigned BaseReg;
8361  BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
8362  else
8363  BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
8364 
8365  MIB = BuildMI(*thisMBB, MI, DL,
8366  TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
8367  .addReg(BaseReg)
8368  .addImm(BPOffset)
8369  .addReg(BufReg);
8370  MIB.setMemRefs(MMOBegin, MMOEnd);
8371 
8372  // Setup
8373  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
8374  const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
8375  MIB.addRegMask(TRI->getNoPreservedMask());
8376 
8377  BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
8378 
8379  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
8380  .addMBB(mainMBB);
8381  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
8382 
8383  thisMBB->addSuccessor(mainMBB, /* weight */ 0);
8384  thisMBB->addSuccessor(sinkMBB, /* weight */ 1);
8385 
8386  // mainMBB:
8387  // mainDstReg = 0
8388  MIB =
8389  BuildMI(mainMBB, DL,
8390  TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
8391 
8392  // Store IP
8393  if (Subtarget.isPPC64()) {
8394  MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
8395  .addReg(LabelReg)
8396  .addImm(LabelOffset)
8397  .addReg(BufReg);
8398  } else {
8399  MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
8400  .addReg(LabelReg)
8401  .addImm(LabelOffset)
8402  .addReg(BufReg);
8403  }
8404 
8405  MIB.setMemRefs(MMOBegin, MMOEnd);
8406 
8407  BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
8408  mainMBB->addSuccessor(sinkMBB);
8409 
8410  // sinkMBB:
8411  BuildMI(*sinkMBB, sinkMBB->begin(), DL,
8412  TII->get(PPC::PHI), DstReg)
8413  .addReg(mainDstReg).addMBB(mainMBB)
8414  .addReg(restoreDstReg).addMBB(thisMBB);
8415 
8416  MI->eraseFromParent();
8417  return sinkMBB;
8418 }
8419 
8422  MachineBasicBlock *MBB) const {
8423  DebugLoc DL = MI->getDebugLoc();
8424  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8425 
8426  MachineFunction *MF = MBB->getParent();
8427  MachineRegisterInfo &MRI = MF->getRegInfo();
8428 
8429  // Memory Reference
8432 
8433  MVT PVT = getPointerTy(MF->getDataLayout());
8434  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
8435  "Invalid Pointer Size!");
8436 
8437  const TargetRegisterClass *RC =
8438  (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
8439  unsigned Tmp = MRI.createVirtualRegister(RC);
8440  // Since FP is only updated here but NOT referenced, it's treated as GPR.
8441  unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
8442  unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
8443  unsigned BP =
8444  (PVT == MVT::i64)
8445  ? PPC::X30
8446  : (Subtarget.isSVR4ABI() &&
8448  ? PPC::R29
8449  : PPC::R30);
8450 
8451  MachineInstrBuilder MIB;
8452 
8453  const int64_t LabelOffset = 1 * PVT.getStoreSize();
8454  const int64_t SPOffset = 2 * PVT.getStoreSize();
8455  const int64_t TOCOffset = 3 * PVT.getStoreSize();
8456  const int64_t BPOffset = 4 * PVT.getStoreSize();
8457 
8458  unsigned BufReg = MI->getOperand(0).getReg();
8459 
8460  // Reload FP (the jumped-to function may not have had a
8461  // frame pointer, and if so, then its r31 will be restored
8462  // as necessary).
8463  if (PVT == MVT::i64) {
8464  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
8465  .addImm(0)
8466  .addReg(BufReg);
8467  } else {
8468  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
8469  .addImm(0)
8470  .addReg(BufReg);
8471  }
8472  MIB.setMemRefs(MMOBegin, MMOEnd);
8473 
8474  // Reload IP
8475  if (PVT == MVT::i64) {
8476  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
8477  .addImm(LabelOffset)
8478  .addReg(BufReg);
8479  } else {
8480  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
8481  .addImm(LabelOffset)
8482  .addReg(BufReg);
8483  }
8484  MIB.setMemRefs(MMOBegin, MMOEnd);
8485 
8486  // Reload SP
8487  if (PVT == MVT::i64) {
8488  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
8489  .addImm(SPOffset)
8490  .addReg(BufReg);
8491  } else {
8492  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
8493  .addImm(SPOffset)
8494  .addReg(BufReg);
8495  }
8496  MIB.setMemRefs(MMOBegin, MMOEnd);
8497 
8498  // Reload BP
8499  if (PVT == MVT::i64) {
8500  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
8501  .addImm(BPOffset)
8502  .addReg(BufReg);
8503  } else {
8504  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
8505  .addImm(BPOffset)
8506  .addReg(BufReg);
8507  }
8508  MIB.setMemRefs(MMOBegin, MMOEnd);
8509 
8510  // Reload TOC
8511  if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
8512  setUsesTOCBasePtr(*MBB->getParent());
8513  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
8514  .addImm(TOCOffset)
8515  .addReg(BufReg);
8516 
8517  MIB.setMemRefs(MMOBegin, MMOEnd);
8518  }
8519 
8520  // Jump
8521  BuildMI(*MBB, MI, DL,
8522  TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
8523  BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
8524 
8525  MI->eraseFromParent();
8526  return MBB;
8527 }
8528 
8531  MachineBasicBlock *BB) const {
8532  if (MI->getOpcode() == TargetOpcode::STACKMAP ||
8534  if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() &&
8536  // Call lowering should have added an r2 operand to indicate a dependence
8537  // on the TOC base pointer value. It can't however, because there is no
8538  // way to mark the dependence as implicit there, and so the stackmap code
8539  // will confuse it with a regular operand. Instead, add the dependence
8540  // here.
8541  setUsesTOCBasePtr(*BB->getParent());
8542  MI->addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
8543  }
8544 
8545  return emitPatchPoint(MI, BB);
8546  }
8547 
8548  if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 ||
8549  MI->getOpcode() == PPC::EH_SjLj_SetJmp64) {
8550  return emitEHSjLjSetJmp(MI, BB);
8551  } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 ||
8552  MI->getOpcode() == PPC::EH_SjLj_LongJmp64) {
8553  return emitEHSjLjLongJmp(MI, BB);
8554  }
8555 
8556  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8557 
8558  // To "insert" these instructions we actually have to insert their
8559  // control-flow patterns.
8560  const BasicBlock *LLVM_BB = BB->getBasicBlock();
8561  MachineFunction::iterator It = BB;
8562  ++It;
8563 
8564  MachineFunction *F = BB->getParent();
8565 
8566  if (Subtarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
8567  MI->getOpcode() == PPC::SELECT_CC_I8 ||
8568  MI->getOpcode() == PPC::SELECT_I4 ||
8569  MI->getOpcode() == PPC::SELECT_I8)) {
8571  if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
8572  MI->getOpcode() == PPC::SELECT_CC_I8)
8573  Cond.push_back(MI->getOperand(4));
8574  else
8576  Cond.push_back(MI->getOperand(1));
8577 
8578  DebugLoc dl = MI->getDebugLoc();
8579  TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(),
8580  Cond, MI->getOperand(2).getReg(),
8581  MI->getOperand(3).getReg());
8582  } else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
8583  MI->getOpcode() == PPC::SELECT_CC_I8 ||
8584  MI->getOpcode() == PPC::SELECT_CC_F4 ||
8585  MI->getOpcode() == PPC::SELECT_CC_F8 ||
8586  MI->getOpcode() == PPC::SELECT_CC_QFRC ||
8587  MI->getOpcode() == PPC::SELECT_CC_QSRC ||
8588  MI->getOpcode() == PPC::SELECT_CC_QBRC ||
8589  MI->getOpcode() == PPC::SELECT_CC_VRRC ||
8590  MI->getOpcode() == PPC::SELECT_CC_VSFRC ||
8591  MI->getOpcode() == PPC::SELECT_CC_VSSRC ||
8592  MI->getOpcode() == PPC::SELECT_CC_VSRC ||
8593  MI->getOpcode() == PPC::SELECT_I4 ||
8594  MI->getOpcode() == PPC::SELECT_I8 ||
8595  MI->getOpcode() == PPC::SELECT_F4 ||
8596  MI->getOpcode() == PPC::SELECT_F8 ||
8597  MI->getOpcode() == PPC::SELECT_QFRC ||
8598  MI->getOpcode() == PPC::SELECT_QSRC ||
8599  MI->getOpcode() == PPC::SELECT_QBRC ||
8600  MI->getOpcode() == PPC::SELECT_VRRC ||
8601  MI->getOpcode() == PPC::SELECT_VSFRC ||
8602  MI->getOpcode() == PPC::SELECT_VSSRC ||
8603  MI->getOpcode() == PPC::SELECT_VSRC) {
8604  // The incoming instruction knows the destination vreg to set, the
8605  // condition code register to branch on, the true/false values to
8606  // select between, and a branch opcode to use.
8607 
8608  // thisMBB:
8609  // ...
8610  // TrueVal = ...
8611  // cmpTY ccX, r1, r2
8612  // bCC copy1MBB
8613  // fallthrough --> copy0MBB
8614  MachineBasicBlock *thisMBB = BB;
8615  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
8616  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
8617  DebugLoc dl = MI->getDebugLoc();
8618  F->insert(It, copy0MBB);
8619  F->insert(It, sinkMBB);
8620 
8621  // Transfer the remainder of BB and its successor edges to sinkMBB.
8622  sinkMBB->splice(sinkMBB->begin(), BB,
8623  std::next(MachineBasicBlock::iterator(MI)), BB->end());
8624  sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
8625 
8626  // Next, add the true and fallthrough blocks as its successors.
8627  BB->addSuccessor(copy0MBB);
8628  BB->addSuccessor(sinkMBB);
8629 
8630  if (MI->getOpcode() == PPC::SELECT_I4 ||
8631  MI->getOpcode() == PPC::SELECT_I8 ||
8632  MI->getOpcode() == PPC::SELECT_F4 ||
8633  MI->getOpcode() == PPC::SELECT_F8 ||
8634  MI->getOpcode() == PPC::SELECT_QFRC ||
8635  MI->getOpcode() == PPC::SELECT_QSRC ||
8636  MI->getOpcode() == PPC::SELECT_QBRC ||
8637  MI->getOpcode() == PPC::SELECT_VRRC ||
8638  MI->getOpcode() == PPC::SELECT_VSFRC ||
8639  MI->getOpcode() == PPC::SELECT_VSSRC ||
8640  MI->getOpcode() == PPC::SELECT_VSRC) {
8641  BuildMI(BB, dl, TII->get(PPC::BC))
8642  .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
8643  } else {
8644  unsigned SelectPred = MI->getOperand(4).getImm();
8645  BuildMI(BB, dl, TII->get(PPC::BCC))
8646  .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
8647  }
8648 
8649  // copy0MBB:
8650  // %FalseValue = ...
8651  // # fallthrough to sinkMBB
8652  BB = copy0MBB;
8653 
8654  // Update machine-CFG edges
8655  BB->addSuccessor(sinkMBB);
8656 
8657  // sinkMBB:
8658  // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
8659  // ...
8660  BB = sinkMBB;
8661  BuildMI(*BB, BB->begin(), dl,
8662  TII->get(PPC::PHI), MI->getOperand(0).getReg())
8663  .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
8664  .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
8665  } else if (MI->getOpcode() == PPC::ReadTB) {
8666  // To read the 64-bit time-base register on a 32-bit target, we read the
8667  // two halves. Should the counter have wrapped while it was being read, we
8668  // need to try again.
8669  // ...
8670  // readLoop:
8671  // mfspr Rx,TBU # load from TBU
8672  // mfspr Ry,TB # load from TB
8673  // mfspr Rz,TBU # load from TBU
8674  // cmpw crX,Rx,Rz # check if ‘old’=’new’
8675  // bne readLoop # branch if they're not equal
8676  // ...
8677 
8678  MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
8679  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
8680  DebugLoc dl = MI->getDebugLoc();
8681  F->insert(It, readMBB);
8682  F->insert(It, sinkMBB);
8683 
8684  // Transfer the remainder of BB and its successor edges to sinkMBB.
8685  sinkMBB->splice(sinkMBB->begin(), BB,
8686  std::next(MachineBasicBlock::iterator(MI)), BB->end());
8687  sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
8688 
8689  BB->addSuccessor(readMBB);
8690  BB = readMBB;
8691 
8692  MachineRegisterInfo &RegInfo = F->getRegInfo();
8693  unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
8694  unsigned LoReg = MI->getOperand(0).getReg();
8695  unsigned HiReg = MI->getOperand(1).getReg();
8696 
8697  BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
8698  BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
8699  BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
8700 
8701  unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
8702 
8703  BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
8704  .addReg(HiReg).addReg(ReadAgainReg);
8705  BuildMI(BB, dl, TII->get(PPC::BCC))
8706  .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB);
8707 
8708  BB->addSuccessor(readMBB);
8709  BB->addSuccessor(sinkMBB);
8710  }
8711  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
8712  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
8713  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
8714  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
8715  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
8716  BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
8717  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
8718  BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
8719 
8720  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
8721  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
8722  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
8723  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
8724  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
8725  BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
8726  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
8727  BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
8728 
8729  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
8730  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
8731  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
8732  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
8733  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
8734  BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
8735  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
8736  BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
8737 
8738  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
8739  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
8740  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
8741  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
8742  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
8743  BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
8744  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
8745  BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
8746 
8747  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
8748  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
8749  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
8750  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
8751  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
8752  BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
8753  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
8754  BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
8755 
8756  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
8757  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
8758  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
8759  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
8760  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
8761  BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
8762  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
8763  BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
8764 
8765  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8)
8766  BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
8767  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16)
8768  BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
8769  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32)
8770  BB = EmitAtomicBinary(MI, BB, 4, 0);
8771  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64)
8772  BB = EmitAtomicBinary(MI, BB, 8, 0);
8773 
8774  else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
8775  MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
8776  (Subtarget.hasPartwordAtomics() &&
8777  MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
8778  (Subtarget.hasPartwordAtomics() &&
8779  MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
8780  bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
8781 
8782  auto LoadMnemonic = PPC::LDARX;
8783  auto StoreMnemonic = PPC::STDCX;
8784  switch(MI->getOpcode()) {
8785  default:
8786  llvm_unreachable("Compare and swap of unknown size");
8787  case PPC::ATOMIC_CMP_SWAP_I8:
8788  LoadMnemonic = PPC::LBARX;
8789  StoreMnemonic = PPC::STBCX;
8790  assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
8791  break;
8792  case PPC::ATOMIC_CMP_SWAP_I16:
8793  LoadMnemonic = PPC::LHARX;
8794  StoreMnemonic = PPC::STHCX;
8795  assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
8796  break;
8797  case PPC::ATOMIC_CMP_SWAP_I32:
8798  LoadMnemonic = PPC::LWARX;
8799  StoreMnemonic = PPC::STWCX;
8800  break;
8801  case PPC::ATOMIC_CMP_SWAP_I64:
8802  LoadMnemonic = PPC::LDARX;
8803  StoreMnemonic = PPC::STDCX;
8804  break;
8805  }
8806  unsigned dest = MI->getOperand(0).getReg();
8807  unsigned ptrA = MI->getOperand(1).getReg();
8808  unsigned ptrB = MI->getOperand(2).getReg();
8809  unsigned oldval = MI->getOperand(3).getReg();
8810  unsigned newval = MI->getOperand(4).getReg();
8811  DebugLoc dl = MI->getDebugLoc();
8812 
8813  MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
8814  MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
8815  MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
8816  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
8817  F->insert(It, loop1MBB);
8818  F->insert(It, loop2MBB);
8819  F->insert(It, midMBB);
8820  F->insert(It, exitMBB);
8821  exitMBB->splice(exitMBB->begin(), BB,
8822  std::next(MachineBasicBlock::iterator(MI)), BB->end());
8823  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
8824 
8825  // thisMBB:
8826  // ...
8827  // fallthrough --> loopMBB
8828  BB->addSuccessor(loop1MBB);
8829 
8830  // loop1MBB:
8831  // l[bhwd]arx dest, ptr
8832  // cmp[wd] dest, oldval
8833  // bne- midMBB
8834  // loop2MBB:
8835  // st[bhwd]cx. newval, ptr
8836  // bne- loopMBB
8837  // b exitBB
8838  // midMBB:
8839  // st[bhwd]cx. dest, ptr
8840  // exitBB:
8841  BB = loop1MBB;
8842  BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
8843  .addReg(ptrA).addReg(ptrB);
8844  BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
8845  .addReg(oldval).addReg(dest);
8846  BuildMI(BB, dl, TII->get(PPC::BCC))
8847  .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
8848  BB->addSuccessor(loop2MBB);
8849  BB->addSuccessor(midMBB);
8850 
8851  BB = loop2MBB;
8852  BuildMI(BB, dl, TII->get(StoreMnemonic))
8853  .addReg(newval).addReg(ptrA).addReg(ptrB);
8854  BuildMI(BB, dl, TII->get(PPC::BCC))
8855  .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
8856  BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
8857  BB->addSuccessor(loop1MBB);
8858  BB->addSuccessor(exitMBB);
8859 
8860  BB = midMBB;
8861  BuildMI(BB, dl, TII->get(StoreMnemonic))
8862  .addReg(dest).addReg(ptrA).addReg(ptrB);
8863  BB->addSuccessor(exitMBB);
8864 
8865  // exitMBB:
8866  // ...
8867  BB = exitMBB;
8868  } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
8869  MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
8870  // We must use 64-bit registers for addresses when targeting 64-bit,
8871  // since we're actually doing arithmetic on them. Other registers
8872  // can be 32-bit.
8873  bool is64bit = Subtarget.isPPC64();
8874  bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
8875 
8876  unsigned dest = MI->getOperand(0).getReg();
8877  unsigned ptrA = MI->getOperand(1).getReg();
8878  unsigned ptrB = MI->getOperand(2).getReg();
8879  unsigned oldval = MI->getOperand(3).getReg();
8880  unsigned newval = MI->getOperand(4).getReg();
8881  DebugLoc dl = MI->getDebugLoc();
8882 
8883  MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
8884  MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
8885  MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
8886  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
8887  F->insert(It, loop1MBB);
8888  F->insert(It, loop2MBB);
8889  F->insert(It, midMBB);
8890  F->insert(It, exitMBB);
8891  exitMBB->splice(exitMBB->begin(), BB,
8892  std::next(MachineBasicBlock::iterator(MI)), BB->end());
8893  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
8894 
8895  MachineRegisterInfo &RegInfo = F->getRegInfo();
8896  const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
8897  : &PPC::GPRCRegClass;
8898  unsigned PtrReg = RegInfo.createVirtualRegister(RC);
8899  unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
8900  unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
8901  unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
8902  unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
8903  unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
8904  unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
8905  unsigned MaskReg = RegInfo.createVirtualRegister(RC);
8906  unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
8907  unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
8908  unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
8909  unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
8910  unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
8911  unsigned Ptr1Reg;
8912  unsigned TmpReg = RegInfo.createVirtualRegister(RC);
8913  unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
8914  // thisMBB:
8915  // ...
8916  // fallthrough --> loopMBB
8917  BB->addSuccessor(loop1MBB);
8918 
8919  // The 4-byte load must be aligned, while a char or short may be
8920  // anywhere in the word. Hence all this nasty bookkeeping code.
8921  // add ptr1, ptrA, ptrB [copy if ptrA==0]
8922  // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
8923  // xori shift, shift1, 24 [16]
8924  // rlwinm ptr, ptr1, 0, 0, 29
8925  // slw newval2, newval, shift
8926  // slw oldval2, oldval,shift
8927  // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
8928  // slw mask, mask2, shift
8929  // and newval3, newval2, mask
8930  // and oldval3, oldval2, mask
8931  // loop1MBB:
8932  // lwarx tmpDest, ptr
8933  // and tmp, tmpDest, mask
8934  // cmpw tmp, oldval3
8935  // bne- midMBB
8936  // loop2MBB:
8937  // andc tmp2, tmpDest, mask
8938  // or tmp4, tmp2, newval3
8939  // stwcx. tmp4, ptr
8940  // bne- loop1MBB
8941  // b exitBB
8942  // midMBB:
8943  // stwcx. tmpDest, ptr
8944  // exitBB:
8945  // srw dest, tmpDest, shift
8946  if (ptrA != ZeroReg) {
8947  Ptr1Reg = RegInfo.createVirtualRegister(RC);
8948  BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
8949  .addReg(ptrA).addReg(ptrB);
8950  } else {
8951  Ptr1Reg = ptrB;
8952  }
8953  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
8954  .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
8955  BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
8956  .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
8957  if (is64bit)
8958  BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
8959  .addReg(Ptr1Reg).addImm(0).addImm(61);
8960  else
8961  BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
8962  .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
8963  BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
8964  .addReg(newval).addReg(ShiftReg);
8965  BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
8966  .addReg(oldval).addReg(ShiftReg);
8967  if (is8bit)
8968  BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
8969  else {
8970  BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
8971  BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
8972  .addReg(Mask3Reg).addImm(65535);
8973  }
8974  BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
8975  .addReg(Mask2Reg).addReg(ShiftReg);
8976  BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
8977  .addReg(NewVal2Reg).addReg(MaskReg);
8978  BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
8979  .addReg(OldVal2Reg).addReg(MaskReg);
8980 
8981  BB = loop1MBB;
8982  BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
8983  .addReg(ZeroReg).addReg(PtrReg);
8984  BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
8985  .addReg(TmpDestReg).addReg(MaskReg);
8986  BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
8987  .addReg(TmpReg).addReg(OldVal3Reg);
8988  BuildMI(BB, dl, TII->get(PPC::BCC))
8989  .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
8990  BB->addSuccessor(loop2MBB);
8991  BB->addSuccessor(midMBB);
8992 
8993  BB = loop2MBB;
8994  BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
8995  .addReg(TmpDestReg).addReg(MaskReg);
8996  BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
8997  .addReg(Tmp2Reg).addReg(NewVal3Reg);
8998  BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
8999  .addReg(ZeroReg).addReg(PtrReg);
9000  BuildMI(BB, dl, TII->get(PPC::BCC))
9001  .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
9002  BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
9003  BB->addSuccessor(loop1MBB);
9004  BB->addSuccessor(exitMBB);
9005 
9006  BB = midMBB;
9007  BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
9008  .addReg(ZeroReg).addReg(PtrReg);
9009  BB->addSuccessor(exitMBB);
9010 
9011  // exitMBB:
9012  // ...
9013  BB = exitMBB;
9014  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
9015  .addReg(ShiftReg);
9016  } else if (MI->getOpcode() == PPC::FADDrtz) {
9017  // This pseudo performs an FADD with rounding mode temporarily forced
9018  // to round-to-zero. We emit this via custom inserter since the FPSCR
9019  // is not modeled at the SelectionDAG level.
9020  unsigned Dest = MI->getOperand(0).getReg();
9021  unsigned Src1 = MI->getOperand(1).getReg();
9022  unsigned Src2 = MI->getOperand(2).getReg();
9023  DebugLoc dl = MI->getDebugLoc();
9024 
9025  MachineRegisterInfo &RegInfo = F->getRegInfo();
9026  unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
9027 
9028  // Save FPSCR value.
9029  BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
9030 
9031  // Set rounding mode to round-to-zero.
9032  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
9033  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
9034 
9035  // Perform addition.
9036  BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
9037 
9038  // Restore FPSCR value.
9039  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
9040  } else if (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||
9041  MI->getOpcode() == PPC::ANDIo_1_GT_BIT ||
9042  MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
9043  MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) {
9044  unsigned Opcode = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
9045  MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) ?
9046  PPC::ANDIo8 : PPC::ANDIo;
9047  bool isEQ = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||
9048  MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8);
9049 
9050  MachineRegisterInfo &RegInfo = F->getRegInfo();
9051  unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ?
9052  &PPC::GPRCRegClass :
9053  &PPC::G8RCRegClass);
9054 
9055  DebugLoc dl = MI->getDebugLoc();
9056  BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
9057  .addReg(MI->getOperand(1).getReg()).addImm(1);
9058  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
9059  MI->getOperand(0).getReg())
9060  .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
9061  } else if (MI->getOpcode() == PPC::TCHECK_RET) {
9062  DebugLoc Dl = MI->getDebugLoc();
9063  MachineRegisterInfo &RegInfo = F->getRegInfo();
9064  unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
9065  BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
9066  return BB;
9067  } else {
9068  llvm_unreachable("Unexpected instr type to insert");
9069  }
9070 
9071  MI->eraseFromParent(); // The pseudo instruction is gone now.
9072  return BB;
9073 }
9074 
9075 //===----------------------------------------------------------------------===//
9076 // Target Optimization Hooks
9077 //===----------------------------------------------------------------------===//
9078 
9079 static std::string getRecipOp(const char *Base, EVT VT) {
9080  std::string RecipOp(Base);
9081  if (VT.getScalarType() == MVT::f64)
9082  RecipOp += "d";
9083  else
9084  RecipOp += "f";
9085 
9086  if (VT.isVector())
9087  RecipOp = "vec-" + RecipOp;
9088 
9089  return RecipOp;
9090 }
9091 
9092 SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
9093  DAGCombinerInfo &DCI,
9094  unsigned &RefinementSteps,
9095  bool &UseOneConstNR) const {
9096  EVT VT = Operand.getValueType();
9097  if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
9098  (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
9099  (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
9100  (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
9101  (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
9102  (VT == MVT::v4f64 && Subtarget.hasQPX())) {
9103  TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
9104  std::string RecipOp = getRecipOp("sqrt", VT);
9105  if (!Recips.isEnabled(RecipOp))
9106  return SDValue();
9107 
9108  RefinementSteps = Recips.getRefinementSteps(RecipOp);
9109  UseOneConstNR = true;
9110  return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
9111  }
9112  return SDValue();
9113 }
9114 
9115 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
9116  DAGCombinerInfo &DCI,
9117  unsigned &RefinementSteps) const {
9118  EVT VT = Operand.getValueType();
9119  if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
9120  (VT == MVT::f64 && Subtarget.hasFRE()) ||
9121  (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
9122  (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
9123  (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
9124  (VT == MVT::v4f64 && Subtarget.hasQPX())) {
9125  TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
9126  std::string RecipOp = getRecipOp("div", VT);
9127  if (!Recips.isEnabled(RecipOp))
9128  return SDValue();
9129 
9130  RefinementSteps = Recips.getRefinementSteps(RecipOp);
9131  return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
9132  }
9133  return SDValue();
9134 }
9135 
9136 bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
9137  // Note: This functionality is used only when unsafe-fp-math is enabled, and
9138  // on cores with reciprocal estimates (which are used when unsafe-fp-math is
9139  // enabled for division), this functionality is redundant with the default
9140  // combiner logic (once the division -> reciprocal/multiply transformation
9141  // has taken place). As a result, this matters more for older cores than for
9142  // newer ones.
9143 
9144  // Combine multiple FDIVs with the same divisor into multiple FMULs by the
9145  // reciprocal if there are two or more FDIVs (for embedded cores with only
9146  // one FP pipeline) for three or more FDIVs (for generic OOO cores).
9147  switch (Subtarget.getDarwinDirective()) {
9148  default:
9149  return NumUsers > 2;
9150  case PPC::DIR_440:
9151  case PPC::DIR_A2:
9152  case PPC::DIR_E500mc:
9153  case PPC::DIR_E5500:
9154  return NumUsers > 1;
9155  }
9156 }
9157 
9158 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
9159  unsigned Bytes, int Dist,
9160  SelectionDAG &DAG) {
9161  if (VT.getSizeInBits() / 8 != Bytes)
9162  return false;
9163 
9164  SDValue BaseLoc = Base->getBasePtr();
9165  if (Loc.getOpcode() == ISD::FrameIndex) {
9166  if (BaseLoc.getOpcode() != ISD::FrameIndex)
9167  return false;
9168  const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
9169  int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
9170  int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
9171  int FS = MFI->getObjectSize(FI);
9172  int BFS = MFI->getObjectSize(BFI);
9173  if (FS != BFS || FS != (int)Bytes) return false;
9174  return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
9175  }
9176 
9177  // Handle X+C
9178  if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
9179  cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
9180  return true;
9181 
9182  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9183  const GlobalValue *GV1 = nullptr;
9184  const GlobalValue *GV2 = nullptr;
9185  int64_t Offset1 = 0;
9186  int64_t Offset2 = 0;
9187  bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
9188  bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
9189  if (isGA1 && isGA2 && GV1 == GV2)
9190  return Offset1 == (Offset2 + Dist*Bytes);
9191  return false;
9192 }
9193 
9194 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
9195 // not enforce equality of the chain operands.
9196 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
9197  unsigned Bytes, int Dist,
9198  SelectionDAG &DAG) {
9199  if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
9200  EVT VT = LS->getMemoryVT();
9201  SDValue Loc = LS->getBasePtr();
9202  return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
9203  }
9204 
9205  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
9206  EVT VT;
9207  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
9208  default: return false;
9209  case Intrinsic::ppc_qpx_qvlfd:
9210  case Intrinsic::ppc_qpx_qvlfda:
9211  VT = MVT::v4f64;
9212  break;
9213  case Intrinsic::ppc_qpx_qvlfs:
9214  case Intrinsic::ppc_qpx_qvlfsa:
9215  VT = MVT::v4f32;
9216  break;
9217  case Intrinsic::ppc_qpx_qvlfcd:
9218  case Intrinsic::ppc_qpx_qvlfcda:
9219  VT = MVT::v2f64;
9220  break;
9221  case Intrinsic::ppc_qpx_qvlfcs:
9222  case Intrinsic::ppc_qpx_qvlfcsa:
9223  VT = MVT::v2f32;
9224  break;
9225  case Intrinsic::ppc_qpx_qvlfiwa:
9226  case Intrinsic::ppc_qpx_qvlfiwz:
9227  case Intrinsic::ppc_altivec_lvx:
9228  case Intrinsic::ppc_altivec_lvxl:
9229  case Intrinsic::ppc_vsx_lxvw4x:
9230  VT = MVT::v4i32;
9231  break;
9232  case Intrinsic::ppc_vsx_lxvd2x:
9233  VT = MVT::v2f64;
9234  break;
9235  case Intrinsic::ppc_altivec_lvebx:
9236  VT = MVT::i8;
9237  break;
9238  case Intrinsic::ppc_altivec_lvehx:
9239  VT = MVT::i16;
9240  break;
9241  case Intrinsic::ppc_altivec_lvewx:
9242  VT = MVT::i32;
9243  break;
9244  }
9245 
9246  return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
9247  }
9248 
9249  if (N->getOpcode() == ISD::INTRINSIC_VOID) {
9250  EVT VT;
9251  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
9252  default: return false;
9253  case Intrinsic::ppc_qpx_qvstfd:
9254  case Intrinsic::ppc_qpx_qvstfda:
9255  VT = MVT::v4f64;
9256  break;
9257  case Intrinsic::ppc_qpx_qvstfs:
9258  case Intrinsic::ppc_qpx_qvstfsa:
9259  VT = MVT::v4f32;
9260  break;
9261  case Intrinsic::ppc_qpx_qvstfcd:
9262  case Intrinsic::ppc_qpx_qvstfcda:
9263  VT = MVT::v2f64;
9264  break;
9265  case Intrinsic::ppc_qpx_qvstfcs:
9266  case Intrinsic::ppc_qpx_qvstfcsa:
9267  VT = MVT::v2f32;
9268  break;
9269  case Intrinsic::ppc_qpx_qvstfiw:
9270  case Intrinsic::ppc_qpx_qvstfiwa:
9271  case Intrinsic::ppc_altivec_stvx:
9272  case Intrinsic::ppc_altivec_stvxl:
9273  case Intrinsic::ppc_vsx_stxvw4x:
9274  VT = MVT::v4i32;
9275  break;
9276  case Intrinsic::ppc_vsx_stxvd2x:
9277  VT = MVT::v2f64;
9278  break;
9279  case Intrinsic::ppc_altivec_stvebx:
9280  VT = MVT::i8;
9281  break;
9282  case Intrinsic::ppc_altivec_stvehx:
9283  VT = MVT::i16;
9284  break;
9285  case Intrinsic::ppc_altivec_stvewx:
9286  VT = MVT::i32;
9287  break;
9288  }
9289 
9290  return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
9291  }
9292 
9293  return false;
9294 }
9295 
9296 // Return true is there is a nearyby consecutive load to the one provided
9297 // (regardless of alignment). We search up and down the chain, looking though
9298 // token factors and other loads (but nothing else). As a result, a true result
9299 // indicates that it is safe to create a new consecutive load adjacent to the
9300 // load provided.
9302  SDValue Chain = LD->getChain();
9303  EVT VT = LD->getMemoryVT();
9304 
9305  SmallSet<SDNode *, 16> LoadRoots;
9306  SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
9307  SmallSet<SDNode *, 16> Visited;
9308 
9309  // First, search up the chain, branching to follow all token-factor operands.
9310  // If we find a consecutive load, then we're done, otherwise, record all
9311  // nodes just above the top-level loads and token factors.
9312  while (!Queue.empty()) {
9313  SDNode *ChainNext = Queue.pop_back_val();
9314  if (!Visited.insert(ChainNext).second)
9315  continue;
9316 
9317  if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
9318  if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
9319  return true;
9320 
9321  if (!Visited.count(ChainLD->getChain().getNode()))
9322  Queue.push_back(ChainLD->getChain().getNode());
9323  } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
9324  for (const SDUse &O : ChainNext->ops())
9325  if (!Visited.count(O.getNode()))
9326  Queue.push_back(O.getNode());
9327  } else
9328  LoadRoots.insert(ChainNext);
9329  }
9330 
9331  // Second, search down the chain, starting from the top-level nodes recorded
9332  // in the first phase. These top-level nodes are the nodes just above all
9333  // loads and token factors. Starting with their uses, recursively look though
9334  // all loads (just the chain uses) and token factors to find a consecutive
9335  // load.
9336  Visited.clear();
9337  Queue.clear();
9338 
9339  for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
9340  IE = LoadRoots.end(); I != IE; ++I) {
9341  Queue.push_back(*I);
9342 
9343  while (!Queue.empty()) {
9344  SDNode *LoadRoot = Queue.pop_back_val();
9345  if (!Visited.insert(LoadRoot).second)
9346  continue;
9347 
9348  if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
9349  if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
9350  return true;
9351 
9352  for (SDNode::use_iterator UI = LoadRoot->use_begin(),
9353  UE = LoadRoot->use_end(); UI != UE; ++UI)
9354  if (((isa<MemSDNode>(*UI) &&
9355  cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
9356  UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
9357  Queue.push_back(*UI);
9358  }
9359  }
9360 
9361  return false;
9362 }
9363 
9364 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
9365  DAGCombinerInfo &DCI) const {
9366  SelectionDAG &DAG = DCI.DAG;
9367  SDLoc dl(N);
9368 
9369  assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
9370  // If we're tracking CR bits, we need to be careful that we don't have:
9371  // trunc(binary-ops(zext(x), zext(y)))
9372  // or
9373  // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
9374  // such that we're unnecessarily moving things into GPRs when it would be
9375  // better to keep them in CR bits.
9376 
9377  // Note that trunc here can be an actual i1 trunc, or can be the effective
9378  // truncation that comes from a setcc or select_cc.
9379  if (N->getOpcode() == ISD::TRUNCATE &&
9380  N->getValueType(0) != MVT::i1)
9381  return SDValue();
9382 
9383  if (N->getOperand(0).getValueType() != MVT::i32 &&
9384  N->getOperand(0).getValueType() != MVT::i64)
9385  return SDValue();
9386 
9387  if (N->getOpcode() == ISD::SETCC ||
9388  N->getOpcode() == ISD::SELECT_CC) {
9389  // If we're looking at a comparison, then we need to make sure that the
9390  // high bits (all except for the first) don't matter the result.
9391  ISD::CondCode CC =
9392  cast<CondCodeSDNode>(N->getOperand(
9393  N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
9394  unsigned OpBits = N->getOperand(0).getValueSizeInBits();
9395 
9396  if (ISD::isSignedIntSetCC(CC)) {
9397  if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
9398  DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
9399  return SDValue();
9400  } else if (ISD::isUnsignedIntSetCC(CC)) {
9401  if (!DAG.MaskedValueIsZero(N->getOperand(0),
9402  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
9403  !DAG.MaskedValueIsZero(N->getOperand(1),
9404  APInt::getHighBitsSet(OpBits, OpBits-1)))
9405  return SDValue();
9406  } else {
9407  // This is neither a signed nor an unsigned comparison, just make sure
9408  // that the high bits are equal.
9409  APInt Op1Zero, Op1One;
9410  APInt Op2Zero, Op2One;
9411  DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One);
9412  DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One);
9413 
9414  // We don't really care about what is known about the first bit (if
9415  // anything), so clear it in all masks prior to comparing them.
9416  Op1Zero.clearBit(0); Op1One.clearBit(0);
9417  Op2Zero.clearBit(0); Op2One.clearBit(0);
9418 
9419  if (Op1Zero != Op2Zero || Op1One != Op2One)
9420  return SDValue();
9421  }
9422  }
9423 
9424  // We now know that the higher-order bits are irrelevant, we just need to
9425  // make sure that all of the intermediate operations are bit operations, and
9426  // all inputs are extensions.
9427  if (N->getOperand(0).getOpcode() != ISD::AND &&
9428  N->getOperand(0).getOpcode() != ISD::OR &&
9429  N->getOperand(0).getOpcode() != ISD::XOR &&
9430  N->getOperand(0).getOpcode() != ISD::SELECT &&
9431  N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
9432  N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
9433  N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
9434  N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
9436  return SDValue();
9437 
9438  if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
9439  N->getOperand(1).getOpcode() != ISD::AND &&
9440  N->getOperand(1).getOpcode() != ISD::OR &&
9441  N->getOperand(1).getOpcode() != ISD::XOR &&
9442  N->getOperand(1).getOpcode() != ISD::SELECT &&
9443  N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
9444  N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
9445  N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
9446  N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
9448  return SDValue();
9449 
9450  SmallVector<SDValue, 4> Inputs;
9451  SmallVector<SDValue, 8> BinOps, PromOps;
9452  SmallPtrSet<SDNode *, 16> Visited;
9453 
9454  for (unsigned i = 0; i < 2; ++i) {
9455  if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
9456  N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
9457  N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
9458  N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
9459  isa<ConstantSDNode>(N->getOperand(i)))
9460  Inputs.push_back(N->getOperand(i));
9461  else
9462  BinOps.push_back(N->getOperand(i));
9463 
9464  if (N->getOpcode() == ISD::TRUNCATE)
9465  break;
9466  }
9467 
9468  // Visit all inputs, collect all binary operations (and, or, xor and
9469  // select) that are all fed by extensions.
9470  while (!BinOps.empty()) {
9471  SDValue BinOp = BinOps.back();
9472  BinOps.pop_back();
9473 
9474  if (!Visited.insert(BinOp.getNode()).second)
9475  continue;
9476 
9477  PromOps.push_back(BinOp);
9478 
9479  for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
9480  // The condition of the select is not promoted.
9481  if (BinOp.getOpcode() == ISD::SELECT && i == 0)
9482  continue;
9483  if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
9484  continue;
9485 
9486  if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
9487  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
9488  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
9489  BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
9490  isa<ConstantSDNode>(BinOp.getOperand(i))) {
9491  Inputs.push_back(BinOp.getOperand(i));
9492  } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
9493  BinOp.getOperand(i).getOpcode() == ISD::OR ||
9494  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
9495  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
9496  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
9497  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
9498  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
9499  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
9500  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
9501  BinOps.push_back(BinOp.getOperand(i));
9502  } else {
9503  // We have an input that is not an extension or another binary
9504  // operation; we'll abort this transformation.
9505  return SDValue();
9506  }
9507  }
9508  }
9509 
9510  // Make sure that this is a self-contained cluster of operations (which
9511  // is not quite the same thing as saying that everything has only one
9512  // use).
9513  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
9514  if (isa<ConstantSDNode>(Inputs[i]))
9515  continue;
9516 
9517  for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
9518  UE = Inputs[i].getNode()->use_end();
9519  UI != UE; ++UI) {
9520  SDNode *User = *UI;
9521  if (User != N && !Visited.count(User))
9522  return SDValue();
9523 
9524  // Make sure that we're not going to promote the non-output-value
9525  // operand(s) or SELECT or SELECT_CC.
9526  // FIXME: Although we could sometimes handle this, and it does occur in
9527  // practice that one of the condition inputs to the select is also one of
9528  // the outputs, we currently can't deal with this.
9529  if (User->getOpcode() == ISD::SELECT) {
9530  if (User->getOperand(0) == Inputs[i])
9531  return SDValue();
9532  } else if (User->getOpcode() == ISD::SELECT_CC) {
9533  if (User->getOperand(0) == Inputs[i] ||
9534  User->getOperand(1) == Inputs[i])
9535  return SDValue();
9536  }
9537  }
9538  }
9539 
9540  for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
9541  for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
9542  UE = PromOps[i].getNode()->use_end();
9543  UI != UE; ++UI) {
9544  SDNode *User = *UI;
9545  if (User != N && !Visited.count(User))
9546  return SDValue();
9547 
9548  // Make sure that we're not going to promote the non-output-value
9549  // operand(s) or SELECT or SELECT_CC.
9550  // FIXME: Although we could sometimes handle this, and it does occur in
9551  // practice that one of the condition inputs to the select is also one of
9552  // the outputs, we currently can't deal with this.
9553  if (User->getOpcode() == ISD::SELECT) {
9554  if (User->getOperand(0) == PromOps[i])
9555  return SDValue();
9556  } else if (User->getOpcode() == ISD::SELECT_CC) {
9557  if (User->getOperand(0) == PromOps[i] ||
9558  User->getOperand(1) == PromOps[i])
9559  return SDValue();
9560  }
9561  }
9562  }
9563 
9564  // Replace all inputs with the extension operand.
9565  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
9566  // Constants may have users outside the cluster of to-be-promoted nodes,
9567  // and so we need to replace those as we do the promotions.
9568  if (isa<ConstantSDNode>(Inputs[i]))
9569  continue;
9570  else
9571  DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
9572  }
9573 
9574  // Replace all operations (these are all the same, but have a different
9575  // (i1) return type). DAG.getNode will validate that the types of
9576  // a binary operator match, so go through the list in reverse so that
9577  // we've likely promoted both operands first. Any intermediate truncations or
9578  // extensions disappear.
9579  while (!PromOps.empty()) {
9580  SDValue PromOp = PromOps.back();
9581  PromOps.pop_back();
9582 
9583  if (PromOp.getOpcode() == ISD::TRUNCATE ||
9584  PromOp.getOpcode() == ISD::SIGN_EXTEND ||
9585  PromOp.getOpcode() == ISD::ZERO_EXTEND ||
9586  PromOp.getOpcode() == ISD::ANY_EXTEND) {
9587  if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
9588  PromOp.getOperand(0).getValueType() != MVT::i1) {
9589  // The operand is not yet ready (see comment below).
9590  PromOps.insert(PromOps.begin(), PromOp);
9591  continue;
9592  }
9593 
9594  SDValue RepValue = PromOp.getOperand(0);
9595  if (isa<ConstantSDNode>(RepValue))
9596  RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
9597 
9598  DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
9599  continue;
9600  }
9601 
9602  unsigned C;
9603  switch (PromOp.getOpcode()) {
9604  default: C = 0; break;
9605  case ISD::SELECT: C = 1; break;
9606  case ISD::SELECT_CC: C = 2; break;
9607  }
9608 
9609  if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
9610  PromOp.getOperand(C).getValueType() != MVT::i1) ||
9611  (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
9612  PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
9613  // The to-be-promoted operands of this node have not yet been
9614  // promoted (this should be rare because we're going through the
9615  // list backward, but if one of the operands has several users in
9616  // this cluster of to-be-promoted nodes, it is possible).
9617  PromOps.insert(PromOps.begin(), PromOp);
9618  continue;
9619  }
9620 
9621  SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
9622  PromOp.getNode()->op_end());
9623 
9624  // If there are any constant inputs, make sure they're replaced now.
9625  for (unsigned i = 0; i < 2; ++i)
9626  if (isa<ConstantSDNode>(Ops[C+i]))
9627  Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
9628 
9629  DAG.ReplaceAllUsesOfValueWith(PromOp,
9630  DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
9631  }
9632 
9633  // Now we're left with the initial truncation itself.
9634  if (N->getOpcode() == ISD::TRUNCATE)
9635  return N->getOperand(0);
9636 
9637  // Otherwise, this is a comparison. The operands to be compared have just
9638  // changed type (to i1), but everything else is the same.
9639  return SDValue(N, 0);
9640 }
9641 
9642 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
9643  DAGCombinerInfo &DCI) const {
9644  SelectionDAG &DAG = DCI.DAG;
9645  SDLoc dl(N);
9646 
9647  // If we're tracking CR bits, we need to be careful that we don't have:
9648  // zext(binary-ops(trunc(x), trunc(y)))
9649  // or
9650  // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
9651  // such that we're unnecessarily moving things into CR bits that can more
9652  // efficiently stay in GPRs. Note that if we're not certain that the high
9653  // bits are set as required by the final extension, we still may need to do
9654  // some masking to get the proper behavior.
9655 
9656  // This same functionality is important on PPC64 when dealing with
9657  // 32-to-64-bit extensions; these occur often when 32-bit values are used as
9658  // the return values of functions. Because it is so similar, it is handled
9659  // here as well.
9660 
9661  if (N->getValueType(0) != MVT::i32 &&
9662  N->getValueType(0) != MVT::i64)
9663  return SDValue();
9664 
9665  if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
9666  (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
9667  return SDValue();
9668 
9669  if (N->getOperand(0).getOpcode() != ISD::AND &&
9670  N->getOperand(0).getOpcode() != ISD::OR &&
9671  N->getOperand(0).getOpcode() != ISD::XOR &&
9672  N->getOperand(0).getOpcode() != ISD::SELECT &&
9673  N->getOperand(0).getOpcode() != ISD::SELECT_CC)
9674  return SDValue();
9675 
9676  SmallVector<SDValue, 4> Inputs;
9677  SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
9678  SmallPtrSet<SDNode *, 16> Visited;
9679 
9680  // Visit all inputs, collect all binary operations (and, or, xor and
9681  // select) that are all fed by truncations.
9682  while (!BinOps.empty()) {
9683  SDValue BinOp = BinOps.back();
9684  BinOps.pop_back();
9685 
9686  if (!Visited.insert(BinOp.getNode()).second)
9687  continue;
9688 
9689  PromOps.push_back(BinOp);
9690 
9691  for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
9692  // The condition of the select is not promoted.
9693  if (BinOp.getOpcode() == ISD::SELECT && i == 0)
9694  continue;
9695  if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
9696  continue;
9697 
9698  if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
9699  isa<ConstantSDNode>(BinOp.getOperand(i))) {
9700  Inputs.push_back(BinOp.getOperand(i));
9701  } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
9702  BinOp.getOperand(i).getOpcode() == ISD::OR ||
9703  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
9704  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
9705  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
9706  BinOps.push_back(BinOp.getOperand(i));
9707  } else {
9708  // We have an input that is not a truncation or another binary
9709  // operation; we'll abort this transformation.
9710  return SDValue();
9711  }
9712  }
9713  }
9714 
9715  // The operands of a select that must be truncated when the select is
9716  // promoted because the operand is actually part of the to-be-promoted set.
9717  DenseMap<SDNode *, EVT> SelectTruncOp[2];
9718 
9719  // Make sure that this is a self-contained cluster of operations (which
9720  // is not quite the same thing as saying that everything has only one
9721  // use).
9722  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
9723  if (isa<ConstantSDNode>(Inputs[i]))
9724  continue;
9725 
9726  for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
9727  UE = Inputs[i].getNode()->use_end();
9728  UI != UE; ++UI) {
9729  SDNode *User = *UI;
9730  if (User != N && !Visited.count(User))
9731  return SDValue();
9732 
9733  // If we're going to promote the non-output-value operand(s) or SELECT or
9734  // SELECT_CC, record them for truncation.
9735  if (User->getOpcode() == ISD::SELECT) {
9736  if (User->getOperand(0) == Inputs[i])
9737  SelectTruncOp[0].insert(std::make_pair(User,
9738  User->getOperand(0).getValueType()));
9739  } else if (User->getOpcode() == ISD::SELECT_CC) {
9740  if (User->getOperand(0) == Inputs[i])
9741  SelectTruncOp[0].insert(std::make_pair(User,
9742  User->getOperand(0).getValueType()));
9743  if (User->getOperand(1) == Inputs[i])
9744  SelectTruncOp[1].insert(std::make_pair(User,
9745  User->getOperand(1).getValueType()));
9746  }
9747  }
9748  }
9749 
9750  for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
9751  for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
9752  UE = PromOps[i].getNode()->use_end();
9753  UI != UE; ++UI) {
9754  SDNode *User = *UI;
9755  if (User != N && !Visited.count(User))
9756  return SDValue();
9757 
9758  // If we're going to promote the non-output-value operand(s) or SELECT or
9759  // SELECT_CC, record them for truncation.
9760  if (User->getOpcode() == ISD::SELECT) {
9761  if (User->getOperand(0) == PromOps[i])
9762  SelectTruncOp[0].insert(std::make_pair(User,
9763  User->getOperand(0).getValueType()));
9764  } else if (User->getOpcode() == ISD::SELECT_CC) {
9765  if (User->getOperand(0) == PromOps[i])
9766  SelectTruncOp[0].insert(std::make_pair(User,
9767  User->getOperand(0).getValueType()));
9768  if (User->getOperand(1) == PromOps[i])
9769  SelectTruncOp[1].insert(std::make_pair(User,
9770  User->getOperand(1).getValueType()));
9771  }
9772  }
9773  }
9774 
9775  unsigned PromBits = N->getOperand(0).getValueSizeInBits();
9776  bool ReallyNeedsExt = false;
9777  if (N->getOpcode() != ISD::ANY_EXTEND) {
9778  // If all of the inputs are not already sign/zero extended, then
9779  // we'll still need to do that at the end.
9780  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
9781  if (isa<ConstantSDNode>(Inputs[i]))
9782  continue;
9783 
9784  unsigned OpBits =
9785  Inputs[i].getOperand(0).getValueSizeInBits();
9786  assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
9787 
9788  if ((N->getOpcode() == ISD::ZERO_EXTEND &&
9789  !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
9790  APInt::getHighBitsSet(OpBits,
9791  OpBits-PromBits))) ||
9792  (N->getOpcode() == ISD::SIGN_EXTEND &&
9793  DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
9794  (OpBits-(PromBits-1)))) {
9795  ReallyNeedsExt = true;
9796  break;
9797  }
9798  }
9799  }
9800 
9801  // Replace all inputs, either with the truncation operand, or a
9802  // truncation or extension to the final output type.
9803  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
9804  // Constant inputs need to be replaced with the to-be-promoted nodes that
9805  // use them because they might have users outside of the cluster of
9806  // promoted nodes.
9807  if (isa<ConstantSDNode>(Inputs[i]))
9808  continue;
9809 
9810  SDValue InSrc = Inputs[i].getOperand(0);
9811  if (Inputs[i].getValueType() == N->getValueType(0))
9812  DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
9813  else if (N->getOpcode() == ISD::SIGN_EXTEND)
9814  DAG.ReplaceAllUsesOfValueWith(Inputs[i],
9815  DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
9816  else if (N->getOpcode() == ISD::ZERO_EXTEND)
9817  DAG.ReplaceAllUsesOfValueWith(Inputs[i],
9818  DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
9819  else
9820  DAG.ReplaceAllUsesOfValueWith(Inputs[i],
9821  DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
9822  }
9823 
9824  // Replace all operations (these are all the same, but have a different
9825  // (promoted) return type). DAG.getNode will validate that the types of
9826  // a binary operator match, so go through the list in reverse so that
9827  // we've likely promoted both operands first.
9828  while (!PromOps.empty()) {
9829  SDValue PromOp = PromOps.back();
9830  PromOps.pop_back();
9831 
9832  unsigned C;
9833  switch (PromOp.getOpcode()) {
9834  default: C = 0; break;
9835  case ISD::SELECT: C = 1; break;
9836  case ISD::SELECT_CC: C = 2; break;
9837  }
9838 
9839  if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
9840  PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
9841  (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
9842  PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
9843  // The to-be-promoted operands of this node have not yet been
9844  // promoted (this should be rare because we're going through the
9845  // list backward, but if one of the operands has several users in
9846  // this cluster of to-be-promoted nodes, it is possible).
9847  PromOps.insert(PromOps.begin(), PromOp);
9848  continue;
9849  }
9850 
9851  // For SELECT and SELECT_CC nodes, we do a similar check for any
9852  // to-be-promoted comparison inputs.
9853  if (PromOp.getOpcode() == ISD::SELECT ||
9854  PromOp.getOpcode() == ISD::SELECT_CC) {
9855  if ((SelectTruncOp[0].count(PromOp.getNode()) &&
9856  PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
9857  (SelectTruncOp[1].count(PromOp.getNode()) &&
9858  PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
9859  PromOps.insert(PromOps.begin(), PromOp);
9860  continue;
9861  }
9862  }
9863 
9864  SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
9865  PromOp.getNode()->op_end());
9866 
9867  // If this node has constant inputs, then they'll need to be promoted here.
9868  for (unsigned i = 0; i < 2; ++i) {
9869  if (!isa<ConstantSDNode>(Ops[C+i]))
9870  continue;
9871  if (Ops[C+i].getValueType() == N->getValueType(0))
9872  continue;
9873 
9874  if (N->getOpcode() == ISD::SIGN_EXTEND)
9875  Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
9876  else if (N->getOpcode() == ISD::ZERO_EXTEND)
9877  Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
9878  else
9879  Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
9880  }
9881 
9882  // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
9883  // truncate them again to the original value type.
9884  if (PromOp.getOpcode() == ISD::SELECT ||
9885  PromOp.getOpcode() == ISD::SELECT_CC) {
9886  auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
9887  if (SI0 != SelectTruncOp[0].end())
9888  Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
9889  auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
9890  if (SI1 != SelectTruncOp[1].end())
9891  Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
9892  }
9893 
9894  DAG.ReplaceAllUsesOfValueWith(PromOp,
9895  DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
9896  }
9897 
9898  // Now we're left with the initial extension itself.
9899  if (!ReallyNeedsExt)
9900  return N->getOperand(0);
9901 
9902  // To zero extend, just mask off everything except for the first bit (in the
9903  // i1 case).
9904  if (N->getOpcode() == ISD::ZERO_EXTEND)
9905  return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
9907  N->getValueSizeInBits(0), PromBits),
9908  dl, N->getValueType(0)));
9909 
9910  assert(N->getOpcode() == ISD::SIGN_EXTEND &&
9911  "Invalid extension type");
9912  EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
9913  SDValue ShiftCst =
9914  DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
9915  return DAG.getNode(ISD::SRA, dl, N->getValueType(0),
9916  DAG.getNode(ISD::SHL, dl, N->getValueType(0),
9917  N->getOperand(0), ShiftCst), ShiftCst);
9918 }
9919 
9920 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
9921  DAGCombinerInfo &DCI) const {
9922  assert((N->getOpcode() == ISD::SINT_TO_FP ||
9923  N->getOpcode() == ISD::UINT_TO_FP) &&
9924  "Need an int -> FP conversion node here");
9925 
9926  if (!Subtarget.has64BitSupport())
9927  return SDValue();
9928 
9929  SelectionDAG &DAG = DCI.DAG;
9930  SDLoc dl(N);
9931  SDValue Op(N, 0);
9932 
9933  // Don't handle ppc_fp128 here or i1 conversions.
9934  if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
9935  return SDValue();
9936  if (Op.getOperand(0).getValueType() == MVT::i1)
9937  return SDValue();
9938 
9939  // For i32 intermediate values, unfortunately, the conversion functions
9940  // leave the upper 32 bits of the value are undefined. Within the set of
9941  // scalar instructions, we have no method for zero- or sign-extending the
9942  // value. Thus, we cannot handle i32 intermediate values here.
9943  if (Op.getOperand(0).getValueType() == MVT::i32)
9944  return SDValue();
9945 
9946  assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
9947  "UINT_TO_FP is supported only with FPCVT");
9948 
9949  // If we have FCFIDS, then use it when converting to single-precision.
9950  // Otherwise, convert to double-precision and then round.
9951  unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
9953  : PPCISD::FCFIDS)
9955  : PPCISD::FCFID);
9956  MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
9957  ? MVT::f32
9958  : MVT::f64;
9959 
9960  // If we're converting from a float, to an int, and back to a float again,
9961  // then we don't need the store/load pair at all.
9962  if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
9963  Subtarget.hasFPCVT()) ||
9964  (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
9965  SDValue Src = Op.getOperand(0).getOperand(0);
9966  if (Src.getValueType() == MVT::f32) {
9967  Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
9968  DCI.AddToWorklist(Src.getNode());
9969  } else if (Src.getValueType() != MVT::f64) {
9970  // Make sure that we don't pick up a ppc_fp128 source value.
9971  return SDValue();
9972  }
9973 
9974  unsigned FCTOp =
9977 
9978  SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
9979  SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
9980 
9981  if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9982  FP = DAG.getNode(ISD::FP_ROUND, dl,
9983  MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
9984  DCI.AddToWorklist(FP.getNode());
9985  }
9986 
9987  return FP;
9988  }
9989 
9990  return SDValue();
9991 }
9992 
9993 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
9994 // builtins) into loads with swaps.
9996  DAGCombinerInfo &DCI) const {
9997  SelectionDAG &DAG = DCI.DAG;
9998  SDLoc dl(N);
9999  SDValue Chain;
10000  SDValue Base;
10001  MachineMemOperand *MMO;
10002 
10003  switch (N->getOpcode()) {
10004  default:
10005  llvm_unreachable("Unexpected opcode for little endian VSX load");
10006  case ISD::LOAD: {
10007  LoadSDNode *LD = cast<LoadSDNode>(N);
10008  Chain = LD->getChain();
10009  Base = LD->getBasePtr();
10010  MMO = LD->getMemOperand();
10011  // If the MMO suggests this isn't a load of a full vector, leave
10012  // things alone. For a built-in, we have to make the change for
10013  // correctness, so if there is a size problem that will be a bug.
10014  if (MMO->getSize() < 16)
10015  return SDValue();
10016  break;
10017  }
10018  case ISD::INTRINSIC_W_CHAIN: {
10019  MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
10020  Chain = Intrin->getChain();
10021  // Similarly to the store case below, Intrin->getBasePtr() doesn't get
10022  // us what we want. Get operand 2 instead.
10023  Base = Intrin->getOperand(2);
10024  MMO = Intrin->getMemOperand();
10025  break;
10026  }
10027  }
10028 
10029  MVT VecTy = N->getValueType(0).getSimpleVT();
10030  SDValue LoadOps[] = { Chain, Base };
10032  DAG.getVTList(VecTy, MVT::Other),
10033  LoadOps, VecTy, MMO);
10034  DCI.AddToWorklist(Load.getNode());
10035  Chain = Load.getValue(1);
10036  SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
10037  DAG.getVTList(VecTy, MVT::Other), Chain, Load);
10038  DCI.AddToWorklist(Swap.getNode());
10039  return Swap;
10040 }
10041 
10042 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
10043 // builtins) into stores with swaps.
10045  DAGCombinerInfo &DCI) const {
10046  SelectionDAG &DAG = DCI.DAG;
10047  SDLoc dl(N);
10048  SDValue Chain;
10049  SDValue Base;
10050  unsigned SrcOpnd;
10051  MachineMemOperand *MMO;
10052 
10053  switch (N->getOpcode()) {
10054  default:
10055  llvm_unreachable("Unexpected opcode for little endian VSX store");
10056  case ISD::STORE: {
10057  StoreSDNode *ST = cast<StoreSDNode>(N);
10058  Chain = ST->getChain();
10059  Base = ST->getBasePtr();
10060  MMO = ST->getMemOperand();
10061  SrcOpnd = 1;
10062  // If the MMO suggests this isn't a store of a full vector, leave
10063  // things alone. For a built-in, we have to make the change for
10064  // correctness, so if there is a size problem that will be a bug.
10065  if (MMO->getSize() < 16)
10066  return SDValue();
10067  break;
10068  }
10069  case ISD::INTRINSIC_VOID: {
10070  MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
10071  Chain = Intrin->getChain();
10072  // Intrin->getBasePtr() oddly does not get what we want.
10073  Base = Intrin->getOperand(3);
10074  MMO = Intrin->getMemOperand();
10075  SrcOpnd = 2;
10076  break;
10077  }
10078  }
10079 
10080  SDValue Src = N->getOperand(SrcOpnd);
10081  MVT VecTy = Src.getValueType().getSimpleVT();
10082  SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
10083  DAG.getVTList(VecTy, MVT::Other), Chain, Src);
10084  DCI.AddToWorklist(Swap.getNode());
10085  Chain = Swap.getValue(1);
10086  SDValue StoreOps[] = { Chain, Swap, Base };
10088  DAG.getVTList(MVT::Other),
10089  StoreOps, VecTy, MMO);
10090  DCI.AddToWorklist(Store.getNode());
10091  return Store;
10092 }
10093 
10095  DAGCombinerInfo &DCI) const {
10096  SelectionDAG &DAG = DCI.DAG;
10097  SDLoc dl(N);
10098  switch (N->getOpcode()) {
10099  default: break;
10100  case PPCISD::SHL:
10101  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
10102  if (C->isNullValue()) // 0 << V -> 0.
10103  return N->getOperand(0);
10104  }
10105  break;
10106  case PPCISD::SRL:
10107  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
10108  if (C->isNullValue()) // 0 >>u V -> 0.
10109  return N->getOperand(0);
10110  }
10111  break;
10112  case PPCISD::SRA:
10113  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
10114  if (C->isNullValue() || // 0 >>s V -> 0.
10115  C->isAllOnesValue()) // -1 >>s V -> -1.
10116  return N->getOperand(0);
10117  }
10118  break;
10119  case ISD::SIGN_EXTEND:
10120  case ISD::ZERO_EXTEND:
10121  case ISD::ANY_EXTEND:
10122  return DAGCombineExtBoolTrunc(N, DCI);
10123  case ISD::TRUNCATE:
10124  case ISD::SETCC:
10125  case ISD::SELECT_CC:
10126  return DAGCombineTruncBoolExt(N, DCI);
10127  case ISD::SINT_TO_FP:
10128  case ISD::UINT_TO_FP:
10129  return combineFPToIntToFP(N, DCI);
10130  case ISD::STORE: {
10131  // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
10132  if (Subtarget.hasSTFIWX() && !cast<StoreSDNode>(N)->isTruncatingStore() &&
10133  N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
10134  N->getOperand(1).getValueType() == MVT::i32 &&
10136  SDValue Val = N->getOperand(1).getOperand(0);
10137  if (Val.getValueType() == MVT::f32) {
10138  Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
10139  DCI.AddToWorklist(Val.getNode());
10140  }
10141  Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
10142  DCI.AddToWorklist(Val.getNode());
10143 
10144  SDValue Ops[] = {
10145  N->getOperand(0), Val, N->getOperand(2),
10146  DAG.getValueType(N->getOperand(1).getValueType())
10147  };
10148 
10149  Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
10150  DAG.getVTList(MVT::Other), Ops,
10151  cast<StoreSDNode>(N)->getMemoryVT(),
10152  cast<StoreSDNode>(N)->getMemOperand());
10153  DCI.AddToWorklist(Val.getNode());
10154  return Val;
10155  }
10156 
10157  // Turn STORE (BSWAP) -> sthbrx/stwbrx.
10158  if (cast<StoreSDNode>(N)->isUnindexed() &&
10159  N->getOperand(1).getOpcode() == ISD::BSWAP &&
10160  N->getOperand(1).getNode()->hasOneUse() &&
10161  (N->getOperand(1).getValueType() == MVT::i32 ||
10162  N->getOperand(1).getValueType() == MVT::i16 ||
10163  (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
10164  N->getOperand(1).getValueType() == MVT::i64))) {
10165  SDValue BSwapOp = N->getOperand(1).getOperand(0);
10166  // Do an any-extend to 32-bits if this is a half-word input.
10167  if (BSwapOp.getValueType() == MVT::i16)
10168  BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
10169 
10170  SDValue Ops[] = {
10171  N->getOperand(0), BSwapOp, N->getOperand(2),
10172  DAG.getValueType(N->getOperand(1).getValueType())
10173  };
10174  return
10176  Ops, cast<StoreSDNode>(N)->getMemoryVT(),
10177  cast<StoreSDNode>(N)->getMemOperand());
10178  }
10179 
10180  // For little endian, VSX stores require generating xxswapd/lxvd2x.
10181  EVT VT = N->getOperand(1).getValueType();
10182  if (VT.isSimple()) {
10183  MVT StoreVT = VT.getSimpleVT();
10184  if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
10185  (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
10186  StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
10187  return expandVSXStoreForLE(N, DCI);
10188  }
10189  break;
10190  }
10191  case ISD::LOAD: {
10192  LoadSDNode *LD = cast<LoadSDNode>(N);
10193  EVT VT = LD->getValueType(0);
10194 
10195  // For little endian, VSX loads require generating lxvd2x/xxswapd.
10196  if (VT.isSimple()) {
10197  MVT LoadVT = VT.getSimpleVT();
10198  if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
10199  (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
10200  LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
10201  return expandVSXLoadForLE(N, DCI);
10202  }
10203 
10204  EVT MemVT = LD->getMemoryVT();
10205  Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
10206  unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
10207  Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
10208  unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy);
10209  if (LD->isUnindexed() && VT.isVector() &&
10210  ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
10211  // P8 and later hardware should just use LOAD.
10212  !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 ||
10213  VT == MVT::v4i32 || VT == MVT::v4f32)) ||
10214  (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) &&
10215  LD->getAlignment() >= ScalarABIAlignment)) &&
10216  LD->getAlignment() < ABIAlignment) {
10217  // This is a type-legal unaligned Altivec or QPX load.
10218  SDValue Chain = LD->getChain();
10219  SDValue Ptr = LD->getBasePtr();
10220  bool isLittleEndian = Subtarget.isLittleEndian();
10221 
10222  // This implements the loading of unaligned vectors as described in
10223  // the venerable Apple Velocity Engine overview. Specifically:
10224  // https://developer.apple.com/hardwaredrivers/ve/alignment.html
10225  // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
10226  //
10227  // The general idea is to expand a sequence of one or more unaligned
10228  // loads into an alignment-based permutation-control instruction (lvsl
10229  // or lvsr), a series of regular vector loads (which always truncate
10230  // their input address to an aligned address), and a series of
10231  // permutations. The results of these permutations are the requested
10232  // loaded values. The trick is that the last "extra" load is not taken
10233  // from the address you might suspect (sizeof(vector) bytes after the
10234  // last requested load), but rather sizeof(vector) - 1 bytes after the
10235  // last requested vector. The point of this is to avoid a page fault if
10236  // the base address happened to be aligned. This works because if the
10237  // base address is aligned, then adding less than a full vector length
10238  // will cause the last vector in the sequence to be (re)loaded.
10239  // Otherwise, the next vector will be fetched as you might suspect was
10240  // necessary.
10241 
10242  // We might be able to reuse the permutation generation from
10243  // a different base address offset from this one by an aligned amount.
10244  // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
10245  // optimization later.
10246  Intrinsic::ID Intr, IntrLD, IntrPerm;
10247  MVT PermCntlTy, PermTy, LDTy;
10248  if (Subtarget.hasAltivec()) {
10249  Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr :
10250  Intrinsic::ppc_altivec_lvsl;
10251  IntrLD = Intrinsic::ppc_altivec_lvx;
10252  IntrPerm = Intrinsic::ppc_altivec_vperm;
10253  PermCntlTy = MVT::v16i8;
10254  PermTy = MVT::v4i32;
10255  LDTy = MVT::v4i32;
10256  } else {
10257  Intr = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld :
10258  Intrinsic::ppc_qpx_qvlpcls;
10259  IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd :
10260  Intrinsic::ppc_qpx_qvlfs;
10261  IntrPerm = Intrinsic::ppc_qpx_qvfperm;
10262  PermCntlTy = MVT::v4f64;
10263  PermTy = MVT::v4f64;
10264  LDTy = MemVT.getSimpleVT();
10265  }
10266 
10267  SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
10268 
10269  // Create the new MMO for the new base load. It is like the original MMO,
10270  // but represents an area in memory almost twice the vector size centered
10271  // on the original address. If the address is unaligned, we might start
10272  // reading up to (sizeof(vector)-1) bytes below the address of the
10273  // original unaligned load.
10274  MachineFunction &MF = DAG.getMachineFunction();
10275  MachineMemOperand *BaseMMO =
10276  MF.getMachineMemOperand(LD->getMemOperand(), -MemVT.getStoreSize()+1,
10277  2*MemVT.getStoreSize()-1);
10278 
10279  // Create the new base load.
10280  SDValue LDXIntID =
10281  DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
10282  SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
10283  SDValue BaseLoad =
10285  DAG.getVTList(PermTy, MVT::Other),
10286  BaseLoadOps, LDTy, BaseMMO);
10287 
10288  // Note that the value of IncOffset (which is provided to the next
10289  // load's pointer info offset value, and thus used to calculate the
10290  // alignment), and the value of IncValue (which is actually used to
10291  // increment the pointer value) are different! This is because we
10292  // require the next load to appear to be aligned, even though it
10293  // is actually offset from the base pointer by a lesser amount.
10294  int IncOffset = VT.getSizeInBits() / 8;
10295  int IncValue = IncOffset;
10296 
10297  // Walk (both up and down) the chain looking for another load at the real
10298  // (aligned) offset (the alignment of the other load does not matter in
10299  // this case). If found, then do not use the offset reduction trick, as
10300  // that will prevent the loads from being later combined (as they would
10301  // otherwise be duplicates).
10302  if (!findConsecutiveLoad(LD, DAG))
10303  --IncValue;
10304 
10305  SDValue Increment =
10306  DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
10307  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
10308 
10309  MachineMemOperand *ExtraMMO =
10311  1, 2*MemVT.getStoreSize()-1);
10312  SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
10313  SDValue ExtraLoad =
10315  DAG.getVTList(PermTy, MVT::Other),
10316  ExtraLoadOps, LDTy, ExtraMMO);
10317 
10319  BaseLoad.getValue(1), ExtraLoad.getValue(1));
10320 
10321  // Because vperm has a big-endian bias, we must reverse the order
10322  // of the input vectors and complement the permute control vector
10323  // when generating little endian code. We have already handled the
10324  // latter by using lvsr instead of lvsl, so just reverse BaseLoad
10325  // and ExtraLoad here.
10326  SDValue Perm;
10327  if (isLittleEndian)
10328  Perm = BuildIntrinsicOp(IntrPerm,
10329  ExtraLoad, BaseLoad, PermCntl, DAG, dl);
10330  else
10331  Perm = BuildIntrinsicOp(IntrPerm,
10332  BaseLoad, ExtraLoad, PermCntl, DAG, dl);
10333 
10334  if (VT != PermTy)
10335  Perm = Subtarget.hasAltivec() ?
10336  DAG.getNode(ISD::BITCAST, dl, VT, Perm) :
10337  DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX
10338  DAG.getTargetConstant(1, dl, MVT::i64));
10339  // second argument is 1 because this rounding
10340  // is always exact.
10341 
10342  // The output of the permutation is our loaded result, the TokenFactor is
10343  // our new chain.
10344  DCI.CombineTo(N, Perm, TF);
10345  return SDValue(N, 0);
10346  }
10347  }
10348  break;
10349  case ISD::INTRINSIC_WO_CHAIN: {
10350  bool isLittleEndian = Subtarget.isLittleEndian();
10351  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
10352  Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
10353  : Intrinsic::ppc_altivec_lvsl);
10354  if ((IID == Intr ||
10355  IID == Intrinsic::ppc_qpx_qvlpcld ||
10356  IID == Intrinsic::ppc_qpx_qvlpcls) &&
10357  N->getOperand(1)->getOpcode() == ISD::ADD) {
10358  SDValue Add = N->getOperand(1);
10359 
10360  int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
10361  5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
10362 
10363  if (DAG.MaskedValueIsZero(
10364  Add->getOperand(1),
10365  APInt::getAllOnesValue(Bits /* alignment */)
10366  .zext(
10368  SDNode *BasePtr = Add->getOperand(0).getNode();
10369  for (SDNode::use_iterator UI = BasePtr->use_begin(),
10370  UE = BasePtr->use_end();
10371  UI != UE; ++UI) {
10372  if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
10373  cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) {
10374  // We've found another LVSL/LVSR, and this address is an aligned
10375  // multiple of that one. The results will be the same, so use the
10376  // one we've just found instead.
10377 
10378  return SDValue(*UI, 0);
10379  }
10380  }
10381  }
10382 
10383  if (isa<ConstantSDNode>(Add->getOperand(1))) {
10384  SDNode *BasePtr = Add->getOperand(0).getNode();
10385  for (SDNode::use_iterator UI = BasePtr->use_begin(),
10386  UE = BasePtr->use_end(); UI != UE; ++UI) {
10387  if (UI->getOpcode() == ISD::ADD &&
10388  isa<ConstantSDNode>(UI->getOperand(1)) &&
10389  (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
10390  cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
10391  (1ULL << Bits) == 0) {
10392  SDNode *OtherAdd = *UI;
10393  for (SDNode::use_iterator VI = OtherAdd->use_begin(),
10394  VE = OtherAdd->use_end(); VI != VE; ++VI) {
10395  if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
10396  cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
10397  return SDValue(*VI, 0);
10398  }
10399  }
10400  }
10401  }
10402  }
10403  }
10404  }
10405 
10406  break;
10407  case ISD::INTRINSIC_W_CHAIN: {
10408  // For little endian, VSX loads require generating lxvd2x/xxswapd.
10409  if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
10410  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
10411  default:
10412  break;
10413  case Intrinsic::ppc_vsx_lxvw4x:
10414  case Intrinsic::ppc_vsx_lxvd2x:
10415  return expandVSXLoadForLE(N, DCI);
10416  }
10417  }
10418  break;
10419  }
10420  case ISD::INTRINSIC_VOID: {
10421  // For little endian, VSX stores require generating xxswapd/stxvd2x.
10422  if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
10423  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
10424  default:
10425  break;
10426  case Intrinsic::ppc_vsx_stxvw4x:
10427  case Intrinsic::ppc_vsx_stxvd2x:
10428  return expandVSXStoreForLE(N, DCI);
10429  }
10430  }
10431  break;
10432  }
10433  case ISD::BSWAP:
10434  // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
10435  if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
10436  N->getOperand(0).hasOneUse() &&
10437  (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
10438  (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
10439  N->getValueType(0) == MVT::i64))) {
10440  SDValue Load = N->getOperand(0);
10441  LoadSDNode *LD = cast<LoadSDNode>(Load);
10442  // Create the byte-swapping load.
10443  SDValue Ops[] = {
10444  LD->getChain(), // Chain
10445  LD->getBasePtr(), // Ptr
10446  DAG.getValueType(N->getValueType(0)) // VT
10447  };
10448  SDValue BSLoad =
10450  DAG.getVTList(N->getValueType(0) == MVT::i64 ?
10452  Ops, LD->getMemoryVT(), LD->getMemOperand());
10453 
10454  // If this is an i16 load, insert the truncate.
10455  SDValue ResVal = BSLoad;
10456  if (N->getValueType(0) == MVT::i16)
10457  ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
10458 
10459  // First, combine the bswap away. This makes the value produced by the
10460  // load dead.
10461  DCI.CombineTo(N, ResVal);
10462 
10463  // Next, combine the load away, we give it a bogus result value but a real
10464  // chain result. The result value is dead because the bswap is dead.
10465  DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
10466 
10467  // Return N so it doesn't get rechecked!
10468  return SDValue(N, 0);
10469  }
10470 
10471  break;
10472  case PPCISD::VCMP: {
10473  // If a VCMPo node already exists with exactly the same operands as this
10474  // node, use its result instead of this node (VCMPo computes both a CR6 and
10475  // a normal output).
10476  //
10477  if (!N->getOperand(0).hasOneUse() &&
10478  !N->getOperand(1).hasOneUse() &&
10479  !N->getOperand(2).hasOneUse()) {
10480 
10481  // Scan all of the users of the LHS, looking for VCMPo's that match.
10482  SDNode *VCMPoNode = nullptr;
10483 
10484  SDNode *LHSN = N->getOperand(0).getNode();
10485  for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
10486  UI != E; ++UI)
10487  if (UI->getOpcode() == PPCISD::VCMPo &&
10488  UI->getOperand(1) == N->getOperand(1) &&
10489  UI->getOperand(2) == N->getOperand(2) &&
10490  UI->getOperand(0) == N->getOperand(0)) {
10491  VCMPoNode = *UI;
10492  break;
10493  }
10494 
10495  // If there is no VCMPo node, or if the flag value has a single use, don't
10496  // transform this.
10497  if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
10498  break;
10499 
10500  // Look at the (necessarily single) use of the flag value. If it has a
10501  // chain, this transformation is more complex. Note that multiple things
10502  // could use the value result, which we should ignore.
10503  SDNode *FlagUser = nullptr;
10504  for (SDNode::use_iterator UI = VCMPoNode->use_begin();
10505  FlagUser == nullptr; ++UI) {
10506  assert(UI != VCMPoNode->use_end() && "Didn't find user!");
10507  SDNode *User = *UI;
10508  for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
10509  if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
10510  FlagUser = User;
10511  break;
10512  }
10513  }
10514  }
10515 
10516  // If the user is a MFOCRF instruction, we know this is safe.
10517  // Otherwise we give up for right now.
10518  if (FlagUser->getOpcode() == PPCISD::MFOCRF)
10519  return SDValue(VCMPoNode, 0);
10520  }
10521  break;
10522  }
10523  case ISD::BRCOND: {
10524  SDValue Cond = N->getOperand(1);
10525  SDValue Target = N->getOperand(2);
10526 
10527  if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
10528  cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
10529  Intrinsic::ppc_is_decremented_ctr_nonzero) {
10530 
10531  // We now need to make the intrinsic dead (it cannot be instruction
10532  // selected).
10533  DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
10534  assert(Cond.getNode()->hasOneUse() &&
10535  "Counter decrement has more than one use");
10536 
10537  return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
10538  N->getOperand(0), Target);
10539  }
10540  }
10541  break;
10542  case ISD::BR_CC: {
10543  // If this is a branch on an altivec predicate comparison, lower this so
10544  // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
10545  // lowering is done pre-legalize, because the legalizer lowers the predicate
10546  // compare down to code that is difficult to reassemble.
10547  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
10548  SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
10549 
10550  // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
10551  // value. If so, pass-through the AND to get to the intrinsic.
10552  if (LHS.getOpcode() == ISD::AND &&
10554  cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
10555  Intrinsic::ppc_is_decremented_ctr_nonzero &&
10556  isa<ConstantSDNode>(LHS.getOperand(1)) &&
10557  !cast<ConstantSDNode>(LHS.getOperand(1))->getConstantIntValue()->
10558  isZero())
10559  LHS = LHS.getOperand(0);
10560 
10561  if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
10562  cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
10563  Intrinsic::ppc_is_decremented_ctr_nonzero &&
10564  isa<ConstantSDNode>(RHS)) {
10565  assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
10566  "Counter decrement comparison is not EQ or NE");
10567 
10568  unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
10569  bool isBDNZ = (CC == ISD::SETEQ && Val) ||
10570  (CC == ISD::SETNE && !Val);
10571 
10572  // We now need to make the intrinsic dead (it cannot be instruction
10573  // selected).
10574  DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
10575  assert(LHS.getNode()->hasOneUse() &&
10576  "Counter decrement has more than one use");
10577 
10578  return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
10579  N->getOperand(0), N->getOperand(4));
10580  }
10581 
10582  int CompareOpc;
10583  bool isDot;
10584 
10585  if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
10586  isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
10587  getAltivecCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
10588  assert(isDot && "Can't compare against a vector result!");
10589 
10590  // If this is a comparison against something other than 0/1, then we know
10591  // that the condition is never/always true.
10592  unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
10593  if (Val != 0 && Val != 1) {
10594  if (CC == ISD::SETEQ) // Cond never true, remove branch.
10595  return N->getOperand(0);
10596  // Always !=, turn it into an unconditional branch.
10597  return DAG.getNode(ISD::BR, dl, MVT::Other,
10598  N->getOperand(0), N->getOperand(4));
10599  }
10600 
10601  bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
10602 
10603  // Create the PPCISD altivec 'dot' comparison node.
10604  SDValue Ops[] = {
10605  LHS.getOperand(2), // LHS of compare
10606  LHS.getOperand(3), // RHS of compare
10607  DAG.getConstant(CompareOpc, dl, MVT::i32)
10608  };
10609  EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
10610  SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
10611 
10612  // Unpack the result based on how the target uses it.
10613  PPC::Predicate CompOpc;
10614  switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
10615  default: // Can't happen, don't crash on invalid number though.
10616  case 0: // Branch on the value of the EQ bit of CR6.
10617  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
10618  break;
10619  case 1: // Branch on the inverted value of the EQ bit of CR6.
10620  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
10621  break;
10622  case 2: // Branch on the value of the LT bit of CR6.
10623  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
10624  break;
10625  case 3: // Branch on the inverted value of the LT bit of CR6.
10626  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
10627  break;
10628  }
10629 
10630  return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
10631  DAG.getConstant(CompOpc, dl, MVT::i32),
10632  DAG.getRegister(PPC::CR6, MVT::i32),
10633  N->getOperand(4), CompNode.getValue(1));
10634  }
10635  break;
10636  }
10637  }
10638 
10639  return SDValue();
10640 }
10641 
10642 SDValue
10644  SelectionDAG &DAG,
10645  std::vector<SDNode *> *Created) const {
10646  // fold (sdiv X, pow2)
10647  EVT VT = N->getValueType(0);
10648  if (VT == MVT::i64 && !Subtarget.isPPC64())
10649  return SDValue();
10650  if ((VT != MVT::i32 && VT != MVT::i64) ||
10651  !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
10652  return SDValue();
10653 
10654  SDLoc DL(N);
10655  SDValue N0 = N->getOperand(0);
10656 
10657  bool IsNegPow2 = (-Divisor).isPowerOf2();
10658  unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
10659  SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
10660 
10661  SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
10662  if (Created)
10663  Created->push_back(Op.getNode());
10664 
10665  if (IsNegPow2) {
10666  Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
10667  if (Created)
10668  Created->push_back(Op.getNode());
10669  }
10670 
10671  return Op;
10672 }
10673 
10674 //===----------------------------------------------------------------------===//
10675 // Inline Assembly Support
10676 //===----------------------------------------------------------------------===//
10677 
10679  APInt &KnownZero,
10680  APInt &KnownOne,
10681  const SelectionDAG &DAG,
10682  unsigned Depth) const {
10683  KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
10684  switch (Op.getOpcode()) {
10685  default: break;
10686  case PPCISD::LBRX: {
10687  // lhbrx is known to have the top bits cleared out.
10688  if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
10689  KnownZero = 0xFFFF0000;
10690  break;
10691  }
10692  case ISD::INTRINSIC_WO_CHAIN: {
10693  switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
10694  default: break;
10695  case Intrinsic::ppc_altivec_vcmpbfp_p:
10696  case Intrinsic::ppc_altivec_vcmpeqfp_p:
10697  case Intrinsic::ppc_altivec_vcmpequb_p:
10698  case Intrinsic::ppc_altivec_vcmpequh_p:
10699  case Intrinsic::ppc_altivec_vcmpequw_p:
10700  case Intrinsic::ppc_altivec_vcmpequd_p:
10701  case Intrinsic::ppc_altivec_vcmpgefp_p:
10702  case Intrinsic::ppc_altivec_vcmpgtfp_p:
10703  case Intrinsic::ppc_altivec_vcmpgtsb_p:
10704  case Intrinsic::ppc_altivec_vcmpgtsh_p:
10705  case Intrinsic::ppc_altivec_vcmpgtsw_p:
10706  case Intrinsic::ppc_altivec_vcmpgtsd_p:
10707  case Intrinsic::ppc_altivec_vcmpgtub_p:
10708  case Intrinsic::ppc_altivec_vcmpgtuh_p:
10709  case Intrinsic::ppc_altivec_vcmpgtuw_p:
10710  case Intrinsic::ppc_altivec_vcmpgtud_p:
10711  KnownZero = ~1U; // All bits but the low one are known to be zero.
10712  break;
10713  }
10714  }
10715  }
10716 }
10717 
10719  switch (Subtarget.getDarwinDirective()) {
10720  default: break;
10721  case PPC::DIR_970:
10722  case PPC::DIR_PWR4:
10723  case PPC::DIR_PWR5:
10724  case PPC::DIR_PWR5X:
10725  case PPC::DIR_PWR6:
10726  case PPC::DIR_PWR6X:
10727  case PPC::DIR_PWR7:
10728  case PPC::DIR_PWR8: {
10729  if (!ML)
10730  break;
10731 
10732  const PPCInstrInfo *TII = Subtarget.getInstrInfo();
10733 
10734  // For small loops (between 5 and 8 instructions), align to a 32-byte
10735  // boundary so that the entire loop fits in one instruction-cache line.
10736  uint64_t LoopSize = 0;
10737  for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
10738  for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J)
10739  LoopSize += TII->GetInstSizeInBytes(J);
10740 
10741  if (LoopSize > 16 && LoopSize <= 32)
10742  return 5;
10743 
10744  break;
10745  }
10746  }
10747 
10749 }
10750 
10751 /// getConstraintType - Given a constraint, return the type of
10752 /// constraint it is for this target.
10755  if (Constraint.size() == 1) {
10756  switch (Constraint[0]) {
10757  default: break;
10758  case 'b':
10759  case 'r':
10760  case 'f':
10761  case 'v':
10762  case 'y':
10763  return C_RegisterClass;
10764  case 'Z':
10765  // FIXME: While Z does indicate a memory constraint, it specifically
10766  // indicates an r+r address (used in conjunction with the 'y' modifier
10767  // in the replacement string). Currently, we're forcing the base
10768  // register to be r0 in the asm printer (which is interpreted as zero)
10769  // and forming the complete address in the second register. This is
10770  // suboptimal.
10771  return C_Memory;
10772  }
10773  } else if (Constraint == "wc") { // individual CR bits.
10774  return C_RegisterClass;
10775  } else if (Constraint == "wa" || Constraint == "wd" ||
10776  Constraint == "wf" || Constraint == "ws") {
10777  return C_RegisterClass; // VSX registers.
10778  }
10779  return TargetLowering::getConstraintType(Constraint);
10780 }
10781 
10782 /// Examine constraint type and operand type and determine a weight value.
10783 /// This object must already have been set up with the operand type
10784 /// and the current alternative constraint selected.
10787  AsmOperandInfo &info, const char *constraint) const {
10788  ConstraintWeight weight = CW_Invalid;
10789  Value *CallOperandVal = info.CallOperandVal;
10790  // If we don't have a value, we can't do a match,
10791  // but allow it at the lowest weight.
10792  if (!CallOperandVal)
10793  return CW_Default;
10794  Type *type = CallOperandVal->getType();
10795 
10796  // Look at the constraint type.
10797  if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
10798  return CW_Register; // an individual CR bit.
10799  else if ((StringRef(constraint) == "wa" ||
10800  StringRef(constraint) == "wd" ||
10801  StringRef(constraint) == "wf") &&
10802  type->isVectorTy())
10803  return CW_Register;
10804  else if (StringRef(constraint) == "ws" && type->isDoubleTy())
10805  return CW_Register;
10806 
10807  switch (*constraint) {
10808  default:
10809  weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
10810  break;
10811  case 'b':
10812  if (type->isIntegerTy())
10813  weight = CW_Register;
10814  break;
10815  case 'f':
10816  if (type->isFloatTy())
10817  weight = CW_Register;
10818  break;
10819  case 'd':
10820  if (type->isDoubleTy())
10821  weight = CW_Register;
10822  break;
10823  case 'v':
10824  if (type->isVectorTy())
10825  weight = CW_Register;
10826  break;
10827  case 'y':
10828  weight = CW_Register;
10829  break;
10830  case 'Z':
10831  weight = CW_Memory;
10832  break;
10833  }
10834  return weight;
10835 }
10836 
10837 std::pair<unsigned, const TargetRegisterClass *>
10839  StringRef Constraint,
10840  MVT VT) const {
10841  if (Constraint.size() == 1) {
10842  // GCC RS6000 Constraint Letters
10843  switch (Constraint[0]) {
10844  case 'b': // R1-R31
10845  if (VT == MVT::i64 && Subtarget.isPPC64())
10846  return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
10847  return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
10848  case 'r': // R0-R31
10849  if (VT == MVT::i64 && Subtarget.isPPC64())
10850  return std::make_pair(0U, &PPC::G8RCRegClass);
10851  return std::make_pair(0U, &PPC::GPRCRegClass);
10852  case 'f':
10853  if (VT == MVT::f32 || VT == MVT::i32)
10854  return std::make_pair(0U, &PPC::F4RCRegClass);
10855  if (VT == MVT::f64 || VT == MVT::i64)
10856  return std::make_pair(0U, &PPC::F8RCRegClass);
10857  if (VT == MVT::v4f64 && Subtarget.hasQPX())
10858  return std::make_pair(0U, &PPC::QFRCRegClass);
10859  if (VT == MVT::v4f32 && Subtarget.hasQPX())
10860  return std::make_pair(0U, &PPC::QSRCRegClass);
10861  break;
10862  case 'v':
10863  if (VT == MVT::v4f64 && Subtarget.hasQPX())
10864  return std::make_pair(0U, &PPC::QFRCRegClass);
10865  if (VT == MVT::v4f32 && Subtarget.hasQPX())
10866  return std::make_pair(0U, &PPC::QSRCRegClass);
10867  return std::make_pair(0U, &PPC::VRRCRegClass);
10868  case 'y': // crrc
10869  return std::make_pair(0U, &PPC::CRRCRegClass);
10870  }
10871  } else if (Constraint == "wc") { // an individual CR bit.
10872  return std::make_pair(0U, &PPC::CRBITRCRegClass);
10873  } else if (Constraint == "wa" || Constraint == "wd" ||
10874  Constraint == "wf") {
10875  return std::make_pair(0U, &PPC::VSRCRegClass);
10876  } else if (Constraint == "ws") {
10877  if (VT == MVT::f32)
10878  return std::make_pair(0U, &PPC::VSSRCRegClass);
10879  else
10880  return std::make_pair(0U, &PPC::VSFRCRegClass);
10881  }
10882 
10883  std::pair<unsigned, const TargetRegisterClass *> R =
10884  TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
10885 
10886  // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
10887  // (which we call X[0-9]+). If a 64-bit value has been requested, and a
10888  // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
10889  // register.
10890  // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
10891  // the AsmName field from *RegisterInfo.td, then this would not be necessary.
10892  if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
10893  PPC::GPRCRegClass.contains(R.first))
10894  return std::make_pair(TRI->getMatchingSuperReg(R.first,
10895  PPC::sub_32, &PPC::G8RCRegClass),
10896  &PPC::G8RCRegClass);
10897 
10898  // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
10899  if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
10900  R.first = PPC::CR0;
10901  R.second = &PPC::CRRCRegClass;
10902  }
10903 
10904  return R;
10905 }
10906 
10907 
10908 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
10909 /// vector. If it is invalid, don't add anything to Ops.
10911  std::string &Constraint,
10912  std::vector<SDValue>&Ops,
10913  SelectionDAG &DAG) const {
10914  SDValue Result;
10915 
10916  // Only support length 1 constraints.
10917  if (Constraint.length() > 1) return;
10918 
10919  char Letter = Constraint[0];
10920  switch (Letter) {
10921  default: break;
10922  case 'I':
10923  case 'J':
10924  case 'K':
10925  case 'L':
10926  case 'M':
10927  case 'N':
10928  case 'O':
10929  case 'P': {
10931  if (!CST) return; // Must be an immediate to match.
10932  SDLoc dl(Op);
10933  int64_t Value = CST->getSExtValue();
10934  EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
10935  // numbers are printed as such.
10936  switch (Letter) {
10937  default: llvm_unreachable("Unknown constraint letter!");
10938  case 'I': // "I" is a signed 16-bit constant.
10939  if (isInt<16>(Value))
10940  Result = DAG.getTargetConstant(Value, dl, TCVT);
10941  break;
10942  case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
10943  if (isShiftedUInt<16, 16>(Value))
10944  Result = DAG.getTargetConstant(Value, dl, TCVT);
10945  break;
10946  case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
10947  if (isShiftedInt<16, 16>(Value))
10948  Result = DAG.getTargetConstant(Value, dl, TCVT);
10949  break;
10950  case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
10951  if (isUInt<16>(Value))
10952  Result = DAG.getTargetConstant(Value, dl, TCVT);
10953  break;
10954  case 'M': // "M" is a constant that is greater than 31.
10955  if (Value > 31)
10956  Result = DAG.getTargetConstant(Value, dl, TCVT);
10957  break;
10958  case 'N': // "N" is a positive constant that is an exact power of two.
10959  if (Value > 0 && isPowerOf2_64(Value))
10960  Result = DAG.getTargetConstant(Value, dl, TCVT);
10961  break;
10962  case 'O': // "O" is the constant zero.
10963  if (Value == 0)
10964  Result = DAG.getTargetConstant(Value, dl, TCVT);
10965  break;
10966  case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
10967  if (isInt<16>(-Value))
10968  Result = DAG.getTargetConstant(Value, dl, TCVT);
10969  break;
10970  }
10971  break;
10972  }
10973  }
10974 
10975  if (Result.getNode()) {
10976  Ops.push_back(Result);
10977  return;
10978  }
10979 
10980  // Handle standard constraint letters.
10981  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
10982 }
10983 
10984 // isLegalAddressingMode - Return true if the addressing mode represented
10985 // by AM is legal for this target, for a load/store of the specified type.
10987  const AddrMode &AM, Type *Ty,
10988  unsigned AS) const {
10989  // PPC does not allow r+i addressing modes for vectors!
10990  if (Ty->isVectorTy() && AM.BaseOffs != 0)
10991  return false;
10992 
10993  // PPC allows a sign-extended 16-bit immediate field.
10994  if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
10995  return false;
10996 
10997  // No global is ever allowed as a base.
10998  if (AM.BaseGV)
10999  return false;
11000 
11001  // PPC only support r+r,
11002  switch (AM.Scale) {
11003  case 0: // "r+i" or just "i", depending on HasBaseReg.
11004  break;
11005  case 1:
11006  if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
11007  return false;
11008  // Otherwise we have r+r or r+i.
11009  break;
11010  case 2:
11011  if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
11012  return false;
11013  // Allow 2*r as r+r.
11014  break;
11015  default:
11016  // No other scales are supported.
11017  return false;
11018  }
11019 
11020  return true;
11021 }
11022 
11023 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
11024  SelectionDAG &DAG) const {
11025  MachineFunction &MF = DAG.getMachineFunction();
11026  MachineFrameInfo *MFI = MF.getFrameInfo();
11027  MFI->setReturnAddressIsTaken(true);
11028 
11030  return SDValue();
11031 
11032  SDLoc dl(Op);
11033  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
11034 
11035  // Make sure the function does not optimize away the store of the RA to
11036  // the stack.
11037  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
11038  FuncInfo->setLRStoreRequired();
11039  bool isPPC64 = Subtarget.isPPC64();
11040  auto PtrVT = getPointerTy(MF.getDataLayout());
11041 
11042  if (Depth > 0) {
11043  SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
11044  SDValue Offset =
11045  DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
11046  isPPC64 ? MVT::i64 : MVT::i32);
11047  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
11048  DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
11049  MachinePointerInfo(), false, false, false, 0);
11050  }
11051 
11052  // Just load the return address off the stack.
11053  SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
11054  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
11055  MachinePointerInfo(), false, false, false, 0);
11056 }
11057 
11058 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
11059  SelectionDAG &DAG) const {
11060  SDLoc dl(Op);
11061  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
11062 
11063  MachineFunction &MF = DAG.getMachineFunction();
11064  MachineFrameInfo *MFI = MF.getFrameInfo();
11065  MFI->setFrameAddressIsTaken(true);
11066 
11068  bool isPPC64 = PtrVT == MVT::i64;
11069 
11070  // Naked functions never have a frame pointer, and so we use r1. For all
11071  // other functions, this decision must be delayed until during PEI.
11072  unsigned FrameReg;
11074  FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
11075  else
11076  FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
11077 
11078  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
11079  PtrVT);
11080  while (Depth--)
11081  FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
11082  FrameAddr, MachinePointerInfo(), false, false,
11083  false, 0);
11084  return FrameAddr;
11085 }
11086 
11087 // FIXME? Maybe this could be a TableGen attribute on some registers and
11088 // this table could be generated automatically from RegInfo.
11089 unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
11090  SelectionDAG &DAG) const {
11091  bool isPPC64 = Subtarget.isPPC64();
11092  bool isDarwinABI = Subtarget.isDarwinABI();
11093 
11094  if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
11095  (!isPPC64 && VT != MVT::i32))
11096  report_fatal_error("Invalid register global variable type");
11097 
11098  bool is64Bit = isPPC64 && VT == MVT::i64;
11099  unsigned Reg = StringSwitch<unsigned>(RegName)
11100  .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
11101  .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2)
11102  .Case("r13", (!isPPC64 && isDarwinABI) ? 0 :
11103  (is64Bit ? PPC::X13 : PPC::R13))
11104  .Default(0);
11105 
11106  if (Reg)
11107  return Reg;
11108  report_fatal_error("Invalid register name global variable");
11109 }
11110 
11111 bool
11113  // The PowerPC target isn't yet aware of offsets.
11114  return false;
11115 }
11116 
11118  const CallInst &I,
11119  unsigned Intrinsic) const {
11120 
11121  switch (Intrinsic) {
11122  case Intrinsic::ppc_qpx_qvlfd:
11123  case Intrinsic::ppc_qpx_qvlfs:
11124  case Intrinsic::ppc_qpx_qvlfcd:
11125  case Intrinsic::ppc_qpx_qvlfcs:
11126  case Intrinsic::ppc_qpx_qvlfiwa:
11127  case Intrinsic::ppc_qpx_qvlfiwz:
11128  case Intrinsic::ppc_altivec_lvx:
11129  case Intrinsic::ppc_altivec_lvxl:
11130  case Intrinsic::ppc_altivec_lvebx:
11131  case Intrinsic::ppc_altivec_lvehx:
11132  case Intrinsic::ppc_altivec_lvewx:
11133  case Intrinsic::ppc_vsx_lxvd2x:
11134  case Intrinsic::ppc_vsx_lxvw4x: {
11135  EVT VT;
11136  switch (Intrinsic) {
11137  case Intrinsic::ppc_altivec_lvebx:
11138  VT = MVT::i8;
11139  break;
11140  case Intrinsic::ppc_altivec_lvehx:
11141  VT = MVT::i16;
11142  break;
11143  case Intrinsic::ppc_altivec_lvewx:
11144  VT = MVT::i32;
11145  break;
11146  case Intrinsic::ppc_vsx_lxvd2x:
11147  VT = MVT::v2f64;
11148  break;
11149  case Intrinsic::ppc_qpx_qvlfd:
11150  VT = MVT::v4f64;
11151  break;
11152  case Intrinsic::ppc_qpx_qvlfs:
11153  VT = MVT::v4f32;
11154  break;
11155  case Intrinsic::ppc_qpx_qvlfcd:
11156  VT = MVT::v2f64;
11157  break;
11158  case Intrinsic::ppc_qpx_qvlfcs:
11159  VT = MVT::v2f32;
11160  break;
11161  default:
11162  VT = MVT::v4i32;
11163  break;
11164  }
11165 
11166  Info.opc = ISD::INTRINSIC_W_CHAIN;
11167  Info.memVT = VT;
11168  Info.ptrVal = I.getArgOperand(0);
11169  Info.offset = -VT.getStoreSize()+1;
11170  Info.size = 2*VT.getStoreSize()-1;
11171  Info.align = 1;
11172  Info.vol = false;
11173  Info.readMem = true;
11174  Info.writeMem = false;
11175  return true;
11176  }
11177  case Intrinsic::ppc_qpx_qvlfda:
11178  case Intrinsic::ppc_qpx_qvlfsa:
11179  case Intrinsic::ppc_qpx_qvlfcda:
11180  case Intrinsic::ppc_qpx_qvlfcsa:
11181  case Intrinsic::ppc_qpx_qvlfiwaa:
11182  case Intrinsic::ppc_qpx_qvlfiwza: {
11183  EVT VT;
11184  switch (Intrinsic) {
11185  case Intrinsic::ppc_qpx_qvlfda:
11186  VT = MVT::v4f64;
11187  break;
11188  case Intrinsic::ppc_qpx_qvlfsa:
11189  VT = MVT::v4f32;
11190  break;
11191  case Intrinsic::ppc_qpx_qvlfcda:
11192  VT = MVT::v2f64;
11193  break;
11194  case Intrinsic::ppc_qpx_qvlfcsa:
11195  VT = MVT::v2f32;
11196  break;
11197  default:
11198  VT = MVT::v4i32;
11199  break;
11200  }
11201 
11202  Info.opc = ISD::INTRINSIC_W_CHAIN;
11203  Info.memVT = VT;
11204  Info.ptrVal = I.getArgOperand(0);
11205  Info.offset = 0;
11206  Info.size = VT.getStoreSize();
11207  Info.align = 1;
11208  Info.vol = false;
11209  Info.readMem = true;
11210  Info.writeMem = false;
11211  return true;
11212  }
11213  case Intrinsic::ppc_qpx_qvstfd:
11214  case Intrinsic::ppc_qpx_qvstfs:
11215  case Intrinsic::ppc_qpx_qvstfcd:
11216  case Intrinsic::ppc_qpx_qvstfcs:
11217  case Intrinsic::ppc_qpx_qvstfiw:
11218  case Intrinsic::ppc_altivec_stvx:
11219  case Intrinsic::ppc_altivec_stvxl:
11220  case Intrinsic::ppc_altivec_stvebx:
11221  case Intrinsic::ppc_altivec_stvehx:
11222  case Intrinsic::ppc_altivec_stvewx:
11223  case Intrinsic::ppc_vsx_stxvd2x:
11224  case Intrinsic::ppc_vsx_stxvw4x: {
11225  EVT VT;
11226  switch (Intrinsic) {
11227  case Intrinsic::ppc_altivec_stvebx:
11228  VT = MVT::i8;
11229  break;
11230  case Intrinsic::ppc_altivec_stvehx:
11231  VT = MVT::i16;
11232  break;
11233  case Intrinsic::ppc_altivec_stvewx:
11234  VT = MVT::i32;
11235  break;
11236  case Intrinsic::ppc_vsx_stxvd2x:
11237  VT = MVT::v2f64;
11238  break;
11239  case Intrinsic::ppc_qpx_qvstfd:
11240  VT = MVT::v4f64;
11241  break;
11242  case Intrinsic::ppc_qpx_qvstfs:
11243  VT = MVT::v4f32;
11244  break;
11245  case Intrinsic::ppc_qpx_qvstfcd:
11246  VT = MVT::v2f64;
11247  break;
11248  case Intrinsic::ppc_qpx_qvstfcs:
11249  VT = MVT::v2f32;
11250  break;
11251  default:
11252  VT = MVT::v4i32;
11253  break;
11254  }
11255 
11256  Info.opc = ISD::INTRINSIC_VOID;
11257  Info.memVT = VT;
11258  Info.ptrVal = I.getArgOperand(1);
11259  Info.offset = -VT.getStoreSize()+1;
11260  Info.size = 2*VT.getStoreSize()-1;
11261  Info.align = 1;
11262  Info.vol = false;
11263  Info.readMem = false;
11264  Info.writeMem = true;
11265  return true;
11266  }
11267  case Intrinsic::ppc_qpx_qvstfda:
11268  case Intrinsic::ppc_qpx_qvstfsa:
11269  case Intrinsic::ppc_qpx_qvstfcda:
11270  case Intrinsic::ppc_qpx_qvstfcsa:
11271  case Intrinsic::ppc_qpx_qvstfiwa: {
11272  EVT VT;
11273  switch (Intrinsic) {
11274  case Intrinsic::ppc_qpx_qvstfda:
11275  VT = MVT::v4f64;
11276  break;
11277  case Intrinsic::ppc_qpx_qvstfsa:
11278  VT = MVT::v4f32;
11279  break;
11280  case Intrinsic::ppc_qpx_qvstfcda:
11281  VT = MVT::v2f64;
11282  break;
11283  case Intrinsic::ppc_qpx_qvstfcsa:
11284  VT = MVT::v2f32;
11285  break;
11286  default:
11287  VT = MVT::v4i32;
11288  break;
11289  }
11290 
11291  Info.opc = ISD::INTRINSIC_VOID;
11292  Info.memVT = VT;
11293  Info.ptrVal = I.getArgOperand(1);
11294  Info.offset = 0;
11295  Info.size = VT.getStoreSize();
11296  Info.align = 1;
11297  Info.vol = false;
11298  Info.readMem = false;
11299  Info.writeMem = true;
11300  return true;
11301  }
11302  default:
11303  break;
11304  }
11305 
11306  return false;
11307 }
11308 
11309 /// getOptimalMemOpType - Returns the target specific optimal type for load
11310 /// and store operations as a result of memset, memcpy, and memmove
11311 /// lowering. If DstAlign is zero that means it's safe to destination
11312 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
11313 /// means there isn't a need to check it against alignment requirement,
11314 /// probably because the source does not need to be loaded. If 'IsMemset' is
11315 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
11316 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
11317 /// source is constant so it does not need to be loaded.
11318 /// It returns EVT::Other if the type should be determined using generic
11319 /// target-independent logic.
11321  unsigned DstAlign, unsigned SrcAlign,
11322  bool IsMemset, bool ZeroMemset,
11323  bool MemcpyStrSrc,
11324  MachineFunction &MF) const {
11325  if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
11326  const Function *F = MF.getFunction();
11327  // When expanding a memset, require at least two QPX instructions to cover
11328  // the cost of loading the value to be stored from the constant pool.
11329  if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
11330  (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
11332  return MVT::v4f64;
11333  }
11334 
11335  // We should use Altivec/VSX loads and stores when available. For unaligned
11336  // addresses, unaligned VSX loads are only fast starting with the P8.
11337  if (Subtarget.hasAltivec() && Size >= 16 &&
11338  (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
11339  ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
11340  return MVT::v4i32;
11341  }
11342 
11343  if (Subtarget.isPPC64()) {
11344  return MVT::i64;
11345  }
11346 
11347  return MVT::i32;
11348 }
11349 
11350 /// \brief Returns true if it is beneficial to convert a load of a constant
11351 /// to just the constant itself.
11353  Type *Ty) const {
11354  assert(Ty->isIntegerTy());
11355 
11356  unsigned BitSize = Ty->getPrimitiveSizeInBits();
11357  if (BitSize == 0 || BitSize > 64)
11358  return false;
11359  return true;
11360 }
11361 
11363  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
11364  return false;
11365  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
11366  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
11367  return NumBits1 == 64 && NumBits2 == 32;
11368 }
11369 
11371  if (!VT1.isInteger() || !VT2.isInteger())
11372  return false;
11373  unsigned NumBits1 = VT1.getSizeInBits();
11374  unsigned NumBits2 = VT2.getSizeInBits();
11375  return NumBits1 == 64 && NumBits2 == 32;
11376 }
11377 
11379  // Generally speaking, zexts are not free, but they are free when they can be
11380  // folded with other operations.
11381  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
11382  EVT MemVT = LD->getMemoryVT();
11383  if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
11384  (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
11385  (LD->getExtensionType() == ISD::NON_EXTLOAD ||
11386  LD->getExtensionType() == ISD::ZEXTLOAD))
11387  return true;
11388  }
11389 
11390  // FIXME: Add other cases...
11391  // - 32-bit shifts with a zext to i64
11392  // - zext after ctlz, bswap, etc.
11393  // - zext after and by a constant mask
11394 
11395  return TargetLowering::isZExtFree(Val, VT2);
11396 }
11397 
11399  assert(VT.isFloatingPoint());
11400  return true;
11401 }
11402 
11404  return isInt<16>(Imm) || isUInt<16>(Imm);
11405 }
11406 
11408  return isInt<16>(Imm) || isUInt<16>(Imm);
11409 }
11410 
11412  unsigned,
11413  unsigned,
11414  bool *Fast) const {
11415  if (DisablePPCUnaligned)
11416  return false;
11417 
11418  // PowerPC supports unaligned memory access for simple non-vector types.
11419  // Although accessing unaligned addresses is not as efficient as accessing
11420  // aligned addresses, it is generally more efficient than manual expansion,
11421  // and generally only traps for software emulation when crossing page
11422  // boundaries.
11423 
11424  if (!VT.isSimple())
11425  return false;
11426 
11427  if (VT.getSimpleVT().isVector()) {
11428  if (Subtarget.hasVSX()) {
11429  if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
11430  VT != MVT::v4f32 && VT != MVT::v4i32)
11431  return false;
11432  } else {
11433  return false;
11434  }
11435  }
11436 
11437  if (VT == MVT::ppcf128)
11438  return false;
11439 
11440  if (Fast)
11441  *Fast = true;
11442 
11443  return true;
11444 }
11445 
11447  VT = VT.getScalarType();
11448 
11449  if (!VT.isSimple())
11450  return false;
11451 
11452  switch (VT.getSimpleVT().SimpleTy) {
11453  case MVT::f32:
11454  case MVT::f64:
11455  return true;
11456  default:
11457  break;
11458  }
11459 
11460  return false;
11461 }
11462 
11463 const MCPhysReg *
11465  // LR is a callee-save register, but we must treat it as clobbered by any call
11466  // site. Hence we include LR in the scratch registers, which are in turn added
11467  // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
11468  // to CTR, which is used by any indirect call.
11469  static const MCPhysReg ScratchRegs[] = {
11470  PPC::X12, PPC::LR8, PPC::CTR8, 0
11471  };
11472 
11473  return ScratchRegs;
11474 }
11475 
11476 bool
11478  EVT VT , unsigned DefinedValues) const {
11479  if (VT == MVT::v2i64)
11480  return false;
11481 
11482  if (Subtarget.hasQPX()) {
11483  if (VT == MVT::v4f32 || VT == MVT::v4f64 || VT == MVT::v4i1)
11484  return true;
11485  }
11486 
11487  return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
11488 }
11489 
11491  if (DisableILPPref || Subtarget.enableMachineScheduler())
11493 
11494  return Sched::ILP;
11495 }
11496 
11497 // Create a fast isel object.
11498 FastISel *
11500  const TargetLibraryInfo *LibInfo) const {
11501  return PPC::createFastISel(FuncInfo, LibInfo);
11502 }
bool hasType(MVT vt) const
hasType - return true if this TargetRegisterClass has the ValueType vt.
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
SDValue getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT TVT, bool isNonTemporal, bool isVolatile, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes())
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:641
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
cl::opt< bool > ANDIGlueBug
X3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
void setFrameAddressIsTaken(bool T)
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:477
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:450
unsigned getValueSizeInBits(unsigned ResNo) const
Returns MVT::getSizeInBits(getValueType(ResNo)).
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
bool hasLDBRX() const
Definition: PPCSubtarget.h:234
const MachineFunction * getParent() const
getParent - Return the MachineFunction containing this basic block.
The memory access reads data.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:104
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:240
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
MachineBasicBlock * EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode) const
SDValue getValue(unsigned R) const
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
Return with a flag operand, matched by 'blr'.
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers.
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
The memory access writes data.
static Instruction * callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id)
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
MVT getValVT() const
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
void setVarArgsNumGPR(unsigned Num)
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool use64BitRegs() const
use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit registers in 32-bit mode when...
Definition: PPCSubtarget.h:195
bool isTargetELF() const
Definition: PPCSubtarget.h:265
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant, which is required to be operand #1) half of the integer or float value specified as operand #0.
Definition: ISDOpcodes.h:175
unsigned getRegisterByName(const char *RegName, EVT VT, SelectionDAG &DAG) const override
Return the register ID of the name passed in.
#define R4(n)
unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:453
LLVMContext * getContext() const
Definition: SelectionDAG.h:289
SDValue getCopyToReg(SDValue Chain, SDLoc dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:522
QVFPERM = This corresponds to the QPX qvfperm instruction.
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1327
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, SDLoc DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd)...
Definition: SelectionDAG.h:646
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
bool hasPOPCNTD() const
Definition: PPCSubtarget.h:230
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
SDValue getIndexedLoad(SDValue OrigLoad, SDLoc dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
size_t size() const
size - Get the string size.
Definition: StringRef.h:113
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:292
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:554
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, bool isDarwinABI, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
LocInfo getLocInfo() const
GPRC = address of GLOBAL_OFFSET_TABLE.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:301
bool isLittleEndian() const
Definition: PPCSubtarget.h:207
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:114
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State)
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode) const
const TargetMachine & getTargetMachine() const
bool isAtLeastAcquire(AtomicOrdering Ord)
Returns true if the ordering is at least as strong as acquire (i.e.
Definition: Instructions.h:56
bool isDarwinABI() const
Definition: PPCSubtarget.h:268
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, SDLoc dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID...
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
SDValue getMergeValues(ArrayRef< SDValue > Ops, SDLoc dl)
Create a MERGE_VALUES node from the given operands.
bool hasISEL() const
Definition: PPCSubtarget.h:229
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:285
unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, bool isPPC64, bool isDarwinABI, SDLoc dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition: PPC.h:84
A Stackmap instruction captures the location of live variables at its position in the instruction str...
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:286
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain...
Definition: ISDOpcodes.h:585
CallInst - This class represents a function call, abstracting a target machine's calling convention...
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in the KnownZero/KnownO...
size_type count(PtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:276
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:531
const GlobalValue * getGlobal() const
QBRC, CHAIN = QVLFSb CHAIN, Ptr The 4xf32 load used for v4i1 constants.
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
bool hasLazyResolverStub(const GlobalValue *GV) const
hasLazyResolverStub - Return true if accesses to the specified global have to go through a dyld lazy ...
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDValue getSelectCC(SDLoc DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:752
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
void setFramePointerSaveIndex(int Idx)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Type * getTypeForEVT(LLVMContext &Context) const
getTypeForEVT - This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:181
unsigned getSizeInBits() const
SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
bool isDoubleTy() const
isDoubleTy - Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:146
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:122
unsigned getByValSize() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
CHAIN = RFEBB CHAIN, State - Return from event-based branch.
unsigned getNumOperands() const
Return the number of values used by this operation.
static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
bool isDarwin() const
isDarwin - True if this is any darwin platform.
Definition: PPCSubtarget.h:261
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:111
unsigned getNumOperands() const
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
virtual bool isZExtFree(Type *, Type *) const
Return true if any actual instruction that defines a value of type Ty1 implicitly zero-extends the va...
A debug info location.
Definition: DebugLoc.h:34
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB)
transferSuccessorsAndUpdatePHIs - Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor blocks which refer to fromMBB to refer to this.
const SDValue & getOperand(unsigned Num) const
F(f)
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
unsigned getVarArgsNumGPR() const
static bool isLocalCall(const SDValue &Callee)
CALL - A direct function call.
static MachinePointerInfo getConstantPool()
getConstantPool - Return a MachinePointerInfo record that refers to the constant pool.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
#define R2(n)
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned char TargetFlags=0)
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
Value * CallOperandVal
If this is the result output operand or a clobber, this is null, otherwise it is the incoming operand...
StringSwitch & Case(const char(&S)[N], const T &Value)
Definition: StringSwitch.h:55
unsigned getValNo() const
const SDValue & getBasePtr() const
SDValue getConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offs=0, bool isT=false, unsigned char TargetFlags=0)
void setVarArgsNumFPR(unsigned Num)
bool hasAltivec() const
Definition: PPCSubtarget.h:221
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
getMachineMemOperand - Allocate a new MachineMemOperand.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
Naked function.
Definition: Attributes.h:81
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:357
bool bitsLT(EVT VT) const
bitsLT - Return true if this has less bits than VT.
Definition: ValueTypes.h:189
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructions which can compare a register against the immediate without having to materialize the immediate into a register.
bool isUnsignedIntSetCC(CondCode Code)
isUnsignedIntSetCC - Return true if this is a setcc instruction that performs an unsigned comparison ...
Definition: ISDOpcodes.h:843
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition: ISDOpcodes.h:254
bool isRegLoc() const
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason, bool gen_crash_diag=true)
Reports a serious error, calling any installed error handler.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
SDValue getExternalSymbol(const char *Sym, EVT VT)
bool hasQPX() const
Definition: PPCSubtarget.h:223
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:172
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
SDValue getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo)
static MachinePointerInfo getFixedStack(int FI, int64_t offset=0)
getFixedStack - Return a MachinePointerInfo record that refers to the the specified FrameIndex...
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:150
GlobalBaseReg - On Darwin, this node represents the result of the mflr at function entry...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, unsigned Align=1, bool *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation...
bool isFPExtFree(EVT VT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic...
Definition: ISDOpcodes.h:109
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:200
COPY - Target-independent register copy.
Definition: TargetOpcodes.h:86
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations...
Definition: ISDOpcodes.h:371
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:115
bool isMacOSX() const
isMacOSX - Is this a Mac OS X triple.
Definition: Triple.h:394
lazy value info
BlockAddress - The address of a basic block.
Definition: Constants.h:802
SDValue getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
void setVarArgsStackOffset(int Offset)
MO_PLT_OR_STUB - On a symbol operand "FOO", this indicates that the reference is actually to the "FOO...
Definition: PPC.h:65
MachineMemOperand - A description of a memory reference used in the backend.
std::string getEVTString() const
getEVTString - This function returns value type as a string, e.g.
Definition: ValueTypes.cpp:106
static std::string getRecipOp(const char *Base, EVT VT)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
const HexagonInstrInfo * TII
static Type * getFloatTy(LLVMContext &C)
Definition: Type.cpp:228
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr *MI, MachineBasicBlock *MBB) const
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:308
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false)
Shift and rotation operations.
Definition: ISDOpcodes.h:332
bool hasFPRND() const
Definition: PPCSubtarget.h:219
StructType - Class to represent struct types.
Definition: DerivedTypes.h:191
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
Base class for LoadSDNode and StoreSDNode.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, SDLoc dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:283
unsigned getMinReservedArea() const
static void advance(T &it, size_t Val)
static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, unsigned MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:181
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
SDValue getTargetGlobalAddress(const GlobalValue *GV, SDLoc DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:467
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1057
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:517
const TargetRegisterClass * getRegClass(unsigned Reg) const
getRegClass - Return the register class of the specified virtual register.
FLT_ROUNDS_ - Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest 2 Round to ...
Definition: ISDOpcodes.h:458
Reg
All possible values of the reg field in the ModR/M byte.
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
static cl::opt< bool > DisablePPCFloatInVariadic("disable-ppc-float-in-variadic", cl::desc("disable saving float registers for va_start on PPC"), cl::Hidden)
MachinePointerInfo getWithOffset(int64_t O) const
SimpleValueType SimpleTy
EVT getScalarType() const
getScalarType - If this is a vector type, return the element type, otherwise return this...
Definition: ValueTypes.h:210
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
Number of individual test Apply this number of consecutive mutations to each input exit after the first new interesting input is found the minimized corpus is saved into the first input directory Number of jobs to run If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
Direct move from a GPR to a VSX register (algebraic)
X3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:353
int getMaskElt(unsigned Idx) const
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
unsigned getStoreSize() const
getStoreSize - Return the number of bytes overwritten by a store of the specified value type...
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG...
Definition: ISDOpcodes.h:73
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
QVALIGNI = This corresponds to the QPX qvaligni instruction.
const MachineInstrBuilder & addImm(int64_t Val) const
addImm - Add a new immediate operand.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
#define G(x, y, z)
Definition: MD5.cpp:52
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
Definition: ValueTypes.h:110
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:400
SmallVector< ISD::InputArg, 32 > Ins
AtomicOrdering
Definition: Instructions.h:38
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:581
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
EVT getVectorElementType() const
getVectorElementType - Given a vector type, return the type of each element.
Definition: ValueTypes.h:216
void assign(size_type NumElts, const T &Elt)
Definition: SmallVector.h:442
unsigned getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
unsigned getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector...
static const unsigned PerfectShuffleTable[6561+1]
bool isInConsecutiveRegs() const
load Combine Adjacent Loads
SDValue getCALLSEQ_START(SDValue Chain, SDValue Op, SDLoc DL)
Return a new CALLSEQ_START node, which always must have a glue result (to ensure it's not CSE'd)...
Definition: SelectionDAG.h:637
unsigned isMacOSXVersionLT(unsigned Major, unsigned Minor=0, unsigned Micro=0) const
isMacOSXVersionLT - Comparison function for checking OS X version compatibility, which handles suppor...
Definition: Triple.h:379
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
unsigned getLocReg() const
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:30
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getAltivecCompareInfo - Given an intrinsic, return false if it is not an altivec comparison.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:393
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:57
ValTy * getCalledValue() const
getCalledValue - Return the pointer to function that is being called.
Definition: CallSite.h:91
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:473
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:351
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
G8RC = ADDIS_TLSGD_HA X2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
ArrayType - Class to represent array types.
Definition: DerivedTypes.h:336
This contains information for each constraint that we are lowering.
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:191
unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
getMatchingSuperReg - Return a super-register of the specified register Reg so its sub-register of in...
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
Return an ISD::VECTOR_SHUFFLE node.
MO_NLP_HIDDEN_FLAG - If this bit is set, the symbol reference is to a symbol with hidden visibility...
Definition: PPC.h:78
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself...
SmallVector< ISD::OutputArg, 32 > Outs
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: ArrayRef.h:31
bool hasInvariantFunctionDescriptors() const
Definition: PPCSubtarget.h:243
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:217
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:866
const SDValue & getBasePtr() const
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:659
MachineBasicBlock * emitPatchPoint(MachineInstr *MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments, on Darwin.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:804
EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const override
getOptimalMemOpType - Returns the target specific optimal type for load and store operations as a res...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
EVT getMemoryVT() const
Return the type of the in-memory value.
bool isInConsecutiveRegsLast() const
int64_t getImm() const
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:624
SDValue getAnyExtOrTrunc(SDValue Op, SDLoc DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is 0.0 or -0.0.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
bool isSignedIntSetCC(CondCode Code)
isSignedIntSetCC - Return true if this is a setcc instruction that performs a signed comparison when ...
Definition: ISDOpcodes.h:837
bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State)
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:142
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:284
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
const BasicBlock * getBasicBlock() const
getBasicBlock - Return the LLVM basic block that this instance corresponded to originally.
UNDEF - An undefined node.
Definition: ISDOpcodes.h:169
This class is used to represent ISD::STORE nodes.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:267
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:436
uint32_t FloatToBits(float Float)
FloatToBits - This function takes a float and returns the bit equivalent 32-bit integer.
Definition: MathExtras.h:541
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:97
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:262
TargetInstrInfo - Interface to description of machine instruction set.
LLVM_CONSTEXPR size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:247
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:513
bool isELFv2ABI() const
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
mmo_iterator memoperands_end() const
Definition: MachineInstr.h:341
SDNode * getNode() const
get the SDNode which holds the desired result
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bundle_iterator< MachineInstr, instr_iterator > iterator
unsigned getStoreSize() const
getStoreSize - Return the number of bytes overwritten by a store of the specified value type...
Definition: ValueTypes.h:245
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:42
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set, or Regs.size() if they are all allocated.
Instruction * emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const override
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:157
const MachineInstrBuilder & setMemRefs(MachineInstr::mmo_iterator b, MachineInstr::mmo_iterator e) const
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
Patchable call instruction - this instruction represents a call to a constant address, followed by a series of NOPs.
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:533
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, SDLoc dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls...
const SDValue & getBasePtr() const
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, SDLoc dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:109
FSEL - Traditional three-operand fsel node.
unsigned getFramePointerSaveOffset() const
getFramePointerSaveOffset - Return the previous frame offset to save the frame pointer.
MVT - Machine Value Type.
LLVM Basic Block Representation.
Definition: BasicBlock.h:65
const Triple & getTargetTriple() const
Definition: PPCSubtarget.h:258
const SDValue & getOperand(unsigned i) const
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:41
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
static volatile int One
Definition: InfiniteTest.cpp:9
Simple binary floating point operators.
Definition: ISDOpcodes.h:237
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
bool isNonTemporal() const
bool isVectorTy() const
isVectorTy - True if this is an instance of VectorType.
Definition: Type.h:226
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MVT getLocVT() const
This is an important base class in LLVM.
Definition: Constant.h:41
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, SDLoc dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
virtual unsigned getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
MO_NLP_FLAG - If this bit is set, the symbol reference is actually to the non_lazy_ptr for the global...
Definition: PPC.h:73
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE...
Definition: ISDOpcodes.h:607
G8RC = ADDIS_DTPREL_HA X3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
bool isVector() const
isVector - Return true if this is a vector value type.
Direct move from a VSX register to a GPR.
static bool is64Bit(const char *name)
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:780
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:267
const Constant * getConstVal() const
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:32
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:273
unsigned getVarArgsNumFPR() const
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
bool isFloatTy() const
isFloatTy - Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:143
unsigned getLiveInVirtReg(unsigned PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in physical ...
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:233
unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the specified isSplatShuffleMask...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:264
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
STFIWX - The STFIWX instruction.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target...
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:160
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:225
This class provides iterator support for SDUse operands that use a specific SDNode.
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
SDValue getCopyFromReg(SDValue Chain, SDLoc dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:547
SDValue getSExtOrTrunc(SDValue Op, SDLoc DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:635
static bool isIntS16Immediate(SDNode *N, short &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate...
SDValue getTargetConstant(uint64_t Val, SDLoc DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:436
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1273
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setVarArgsFrameIndex(int Index)
unsigned getOpcode() const
static unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, SDValue CallSeqStart, SDLoc dl, int SPDiff, bool isTailCall, bool IsPatchPoint, bool hasNest, SmallVectorImpl< std::pair< unsigned, SDValue > > &RegsToPass, SmallVectorImpl< SDValue > &Ops, std::vector< EVT > &NodeTys, ImmutableCallSite *CS, const PPCSubtarget &Subtarget)
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:644
static const MCPhysReg QFPR[]
QFPR - The set of QPX registers that should be allocated for arguments.
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline...
bool isEnabled(const StringRef &Key) const
Return true if the reciprocal operation has been enabled by default or from the command-line.
CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:57
void setPrefFunctionAlignment(unsigned Align)
Set the target's preferred function alignment.
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
static mvt_range vector_valuetypes()
bool hasFSQRT() const
Definition: PPCSubtarget.h:211
bool has64BitSupport() const
has64BitSupport - Return true if the selected CPU supports 64-bit instructions, regardless of whether...
Definition: PPCSubtarget.h:190
arg_iterator arg_begin()
Definition: Function.h:472
G8RC = ADDIS_TLSLD_HA X2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, SDLoc dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:69
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool Immutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
QVESPLATI = This corresponds to the QPX qvesplati instruction.
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:598
void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isVolatile() const
const SDValue & getValue() const
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
PPCTargetMachine - Common code between 32-bit and 64-bit PowerPC targets.
unsigned MaxStoresPerMemmove
Specify maximum bytes of store instructions per memmove call.
SDValue getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes())
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:338
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
BuildMI - Builder interface.
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:468
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1...
EVT - Extended Value Type.
Definition: ValueTypes.h:31
static UndefValue * get(Type *T)
get() - Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1473
std::vector< ArgListEntry > ArgListTy
void setMinReservedArea(unsigned size)
static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, SDLoc dl)
BuildSplatI - Build a canonical splati of Val with an element size of SplatSize.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool hasFPCVT() const
Definition: PPCSubtarget.h:220
CallInst * CreateCall(Value *Callee, ArrayRef< Value * > Args=None, const Twine &Name="")
Definition: IRBuilder.h:1467
This structure contains all information that is necessary for lowering calls.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
bool isSVR4ABI() const
Definition: PPCSubtarget.h:269
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements)
getVectorVT - Returns the EVT that represents a vector NumElements in length, where each element is o...
Definition: ValueTypes.h:70
MachinePointerInfo - This class contains a discriminated union of information about pointers in memor...
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:386
void setUseUnderscoreLongJmp(bool Val)
Indicate whether this target prefers to use _longjmp to implement llvm.longjmp or the version without...
Instruction * emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
const BlockAddress * getBlockAddress() const
bool hasPartwordAtomics() const
Definition: PPCSubtarget.h:246
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
SDValue CreateStackTemporary(EVT VT, unsigned minAlign=1)
Create a stack temporary, suitable for holding the specified value type.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
const MachinePointerInfo & getPointerInfo() const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
bool hasLFIWAX() const
Definition: PPCSubtarget.h:218
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
unsigned getByValAlign() const
const SDValue & getOffset() const
unsigned getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only...
ArrayRef< int > getMask() const
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:694
SDValue getTargetConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:484
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:478
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
const uint32_t * getNoPreservedMask() const
QBFLT = Access the underlying QPX floating-point boolean representation.
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:674
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Definition: CallSite.h:174
const PPCRegisterInfo * getRegisterInfo() const override
Definition: PPCSubtarget.h:170
bool hasP8Altivec() const
Definition: PPCSubtarget.h:226
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs, bool HasQPX)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
CCState - This class holds information needed while lowering arguments and return values...
X3 = GET_TLSLD_ADDR X3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:299
bool hasDirectMove() const
Definition: PPCSubtarget.h:247
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
#define R6(n)
PICLevel::Level getPICLevel() const
Returns the PIC level (small or large model)
Definition: Module.cpp:471
void setNode(SDNode *N)
set the SDNode
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side...
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, std::vector< SDNode * > *Created) const override
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:273
void setExceptionPointerRegister(unsigned R)
If set to a physical register, this sets the register that receives the exception address on entry to...
bool isInvariant() const
shadow stack gc Shadow Stack GC Lowering
bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State)
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:179
X = FP_ROUND_INREG(Y, VT) - This operator takes an FP register, and rounds it to a floating point val...
Definition: ISDOpcodes.h:465
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:163
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS model, produces an ADD instruction that ...
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:222
Provides information about what library functions are available for the current target.
CCValAssign - Represent assignment of one arg/retval to a location.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:27
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:548
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
const SDValue & getChain() const
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:335
CHAIN = SC CHAIN, Imm128 - System call.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
This is an abstract virtual class for memory operations.
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node. ...
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always beneficiates from combining into FMA for a given value type...
void setHasMultipleConditionRegisters(bool hasManyRegs=true)
Tells the code generator that the target has multiple (allocatable) condition registers that can be u...
X3 = GET_TLS_ADDR X3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset...
MachineFrameInfo * getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Represents one node in the SelectionDAG.
static Constant * get(Type *Ty, double V)
get() - This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in the specified type.
Definition: Constants.cpp:652
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr *MI, MachineBasicBlock *MBB) const
static MachinePointerInfo getStack(int64_t Offset)
getStack - stack pointer relative access.
VPERM - The PPC VPERM Instruction.
static cl::opt< AlignMode > Align(cl::desc("Load/store alignment support"), cl::Hidden, cl::init(NoStrictAlign), cl::values(clEnumValN(StrictAlign,"aarch64-strict-align","Disallow all unaligned memory accesses"), clEnumValN(NoStrictAlign,"aarch64-no-strict-align","Allow unaligned memory accesses"), clEnumValEnd))
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
R Default(const T &Value) const
Definition: StringSwitch.h:111
static mvt_range integer_valuetypes()
i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after execu...
unsigned Log2_32(uint32_t Value)
Log2_32 - This function returns the floor log base 2 of the specified value, -1 if the value is zero...
Definition: MathExtras.h:468
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:576
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
G8RC = ADDIS_GOT_TPREL_HA X2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
VectorType - Class to represent vector types.
Definition: DerivedTypes.h:362
void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
Target - Wrapper for Target specific information.
Class for arbitrary precision integers.
Definition: APInt.h:73
void setExceptionSelectorRegister(unsigned R)
If set to a physical register, this sets the register that receives the exception typeid on entry to ...
QVGPCI = This corresponds to the QPX qvgpci instruction.
virtual const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const
getCallPreservedMask - Return a mask of call-preserved registers for the given calling convention on ...
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate, that is the target has add instructions which can add a register and the immediate without having to materialize the immediate into a register.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:342
void setMinFunctionAlignment(unsigned Align)
Set the target's minimum function alignment (in log2(bytes))
int64_t getSExtValue() const
op_iterator op_begin() const
bool isIntegerTy() const
isIntegerTy - True if this is an instance of IntegerType.
Definition: Type.h:193
virtual const TargetRegisterClass * getRegClassFor(MVT VT) const
Return the register class that should be used for the specified value type.
static use_iterator use_end()
void setPrefLoopAlignment(unsigned Align)
Set the target's preferred loop alignment.
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:383
bool isPowerOf2_64(uint64_t Value)
isPowerOf2_64 - This function returns true if the argument is a power of two 0 (64 bit edition...
Definition: MathExtras.h:360
bool isMemLoc() const
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:386
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be represented as an inde...
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:238
bool hasSTFIWX() const
Definition: PPCSubtarget.h:217
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:481
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:250
bool enableMachineScheduler() const override
static SDValue getTOCEntry(SelectionDAG &DAG, SDLoc dl, bool Is64Bit, SDValue GA)
GPRC = address of GLOBAL_OFFSET_TABLE.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
block_iterator block_end() const
Definition: LoopInfo.h:142
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:542
Representation of each machine instruction.
Definition: MachineInstr.h:51
static MachinePointerInfo getGOT()
getGOT - Return a MachinePointerInfo record that refers to a GOT entry.
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer, a SRCVALUE for the destination, and a SRCVALUE for the source.
Definition: ISDOpcodes.h:603
uint64_t MinAlign(uint64_t A, uint64_t B)
MinAlign - A and B are either alignments or offsets.
Definition: MathExtras.h:552
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:697
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack...
Represents a use of a SDNode.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:516
SmallVector< SDValue, 32 > OutVals
GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch history rolling buffer entry...
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:321
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:217
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Reciprocal estimate instructions (unary FP ops).
bool equals_lower(StringRef RHS) const
equals_lower - Check for string equality, ignoring case.
Definition: StringRef.h:142
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:196
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:401
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:518
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
ImmutableCallSite - establish a view to a call site for examination.
Definition: CallSite.h:418
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:233
void clearBit(unsigned bitPosition)
Set a given bit to 0.
Definition: APInt.cpp:597
SDValue getIndexedStore(SDValue OrigStoe, SDLoc dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
static MachineOperand CreateImm(int64_t Val)
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
ArrayRef< SDUse > ops() const
int32_t SignExtend32(uint32_t x)
SignExtend32 - Sign extend B-bit number x to 32-bit int.
Definition: MathExtras.h:622
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
Direct move from a GPR to a VSX register (zero)
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
The CMPB instruction (takes two operands of i32 or i64).
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
bool hasVSX() const
Definition: PPCSubtarget.h:224
unsigned MaxStoresPerMemmoveOptSize
Maximum number of store instructions that may be substituted for a call to memmove, used for functions with OpSize attribute.
unsigned MaxStoresPerMemcpyOptSize
Maximum number of store operations that may be substituted for a call to memcpy, used for functions w...
void setStackPointerRegisterToSaveRestore(unsigned R)
If set to a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save and restore.
CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a byte-swapping store instruction. ...
op_iterator op_end() const
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const SDValue & getOffset() const
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
static void setUsesTOCBasePtr(MachineFunction &MF)
MachineSDNode * getMachineNode(unsigned Opcode, SDLoc dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
TC_RETURN - A tail call return.
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:512
bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:272
unsigned MaxStoresPerMemcpy
Specify maximum bytes of store instructions per memcpy call.
bool isAtLeastRelease(AtomicOrdering Ord)
Returns true if the ordering is at least as strong as release (i.e.
Definition: Instructions.h:64
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.setjmp intrinsic.
Definition: ISDOpcodes.h:103
static bool isConstantOrUndef(int Op, int Val)
isConstantOrUndef - Op is either an undef node or a ConstantSDNode.
int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS, const AllocaInst *Alloca=nullptr)
Create a new statically sized stack object, returning a nonnegative identifier to represent it...
static bool isFunctionGlobalAddress(SDValue Callee)
bool useCRBits() const
useCRBits - Return true if we should store and manipulate i1 values in the individual condition regis...
Definition: PPCSubtarget.h:199
EVT getValueType() const
Return the ValueType of the referenced return value.
bool hasLocalLinkage() const
Definition: GlobalValue.h:280
bool hasP8Vector() const
Definition: PPCSubtarget.h:225
SDValue getConstant(uint64_t Val, SDLoc DL, EVT VT, bool isTarget=false, bool isOpaque=false)
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:279
unsigned getReg() const
getReg - Returns the register number.
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
Definition: ValueTypes.h:105
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
unsigned GetInstSizeInBytes(const MachineInstr *MI) const
GetInstSize - Return the number of bytes of code the specified instruction may be.
void insert(iterator MBBI, MachineBasicBlock *MBB)
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
void setReturnAddressIsTaken(bool s)
bool isSimple() const
isSimple - Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:94
unsigned getAlignment() const
void setMinStackArgumentAlignment(unsigned Align)
Set the minimum stack alignment of an argument (in log2(bytes)).
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
getPrimitiveSizeInBits - Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:121
SDValue getZExtOrTrunc(SDValue Op, SDLoc DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, bool Aligned) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
static bool GetLabelAccessInfo(const TargetMachine &TM, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
GetLabelAccessInfo - Return true if we should reference labels using a PICBase, set the HiOpFlags and...
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:365
LLVM Value Representation.
Definition: Value.h:69
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:240
SDValue getRegister(unsigned Reg, EVT VT)
void setUseUnderscoreSetJmp(bool Val)
Indicate whether this target prefers to use _setjmp to implement llvm.setjmp or the version without _...
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2...
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
void setInsertFencesForAtomic(bool fence)
Set if the DAG builder should automatically insert fences and reduce the order of atomic memory opera...
SDValue getValueType(EVT)
Disable implicit floating point insts.
Definition: Attributes.h:87
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:653
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:298
These nodes represent the 32-bit PPC shifts that operate on 6-bit shift amounts.
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits...
virtual void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DstReg, ArrayRef< MachineOperand > Cond, unsigned TrueReg, unsigned FalseReg) const
Insert a select instruction into MBB before I that will copy TrueReg to DstReg when Cond is true...
unsigned getDarwinDirective() const
getDarwinDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:152
BasicBlockListType::iterator iterator
uint64_t getSize() const
getSize - Return the size in bytes of the memory reference.
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:287
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.Val alone...
Primary interface to the complete machine description for the target machine.
block_iterator block_begin() const
Definition: LoopInfo.h:141
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:40
bool hasFRSQRTE() const
Definition: PPCSubtarget.h:214
APInt LLVM_ATTRIBUTE_UNUSED_RESULT zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:996
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:365
bool hasFRES() const
Definition: PPCSubtarget.h:213
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
unsigned MaxStoresPerMemsetOptSize
Maximum number of stores operations that may be substituted for the call to memset, used for functions with OptSize attribute.
unsigned getRefinementSteps(const StringRef &Key) const
Return the number of iterations necessary to refine the the result of a machine instruction for the g...
bool hasFCPSGN() const
Definition: PPCSubtarget.h:210
SDValue getConstantFP(double Val, SDLoc DL, EVT VT, bool isTarget=false)
bool hasFRE() const
Definition: PPCSubtarget.h:212
SDValue getSetCC(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
Definition: SelectionDAG.h:726
unsigned getLocMemOffset() const
Conversion operators.
Definition: ISDOpcodes.h:380
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:338
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:666
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:389
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
unsigned getAlignment() const
unsigned AllocateReg(unsigned Reg)
AllocateReg - Attempt to allocate one register.
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
addReg - Add a new virtual register operand...
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, FLOG, FLOG2, FLOG10, FEXP, FEXP2, FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary floating point operations.
Definition: ISDOpcodes.h:506
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
unsigned AllocateStack(unsigned Size, unsigned Align)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
void addSuccessor(MachineBasicBlock *succ, uint32_t weight=0)
addSuccessor - Add succ as a successor of this MachineBasicBlock.
bool hasFRSQRTES() const
Definition: PPCSubtarget.h:215
RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the altivec VCMP*o instructions.
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:662
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:309
EVT changeVectorElementTypeToInteger() const
changeVectorElementTypeToInteger - Return a vector with the same number of elements as this vector...
Definition: ValueTypes.h:80
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase...
Definition: PPC.h:69
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:203
SDValue getIntPtrConstant(uint64_t Val, SDLoc DL, bool isTarget=false)
SDValue getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, bool Vol=false, bool ReadMem=true, bool WriteMem=true, unsigned Size=0)
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
uint64_t getZExtValue() const
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:761
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:340
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:314
int isQVALIGNIShuffleMask(SDNode *N)
If this is a qvaligni shuffle mask, return the shift amount, otherwise return -1. ...
unsigned getVectorNumElements() const
getVectorNumElements - Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:225
This class is used to represent ISD::LOAD nodes.
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary...
Definition: ISDOpcodes.h:527