LLVM  3.7.0
ARMISelLowering.cpp
Go to the documentation of this file.
1 //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the interfaces that ARM uses to lower LLVM code into a
11 // selection DAG.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ARMISelLowering.h"
16 #include "ARMCallingConv.h"
17 #include "ARMConstantPoolValue.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMPerfectShuffle.h"
20 #include "ARMSubtarget.h"
21 #include "ARMTargetMachine.h"
22 #include "ARMTargetObjectFile.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/ADT/StringExtras.h"
26 #include "llvm/ADT/StringSwitch.h"
37 #include "llvm/IR/CallingConv.h"
38 #include "llvm/IR/Constants.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/IR/GlobalValue.h"
41 #include "llvm/IR/IRBuilder.h"
42 #include "llvm/IR/Instruction.h"
43 #include "llvm/IR/Instructions.h"
44 #include "llvm/IR/IntrinsicInst.h"
45 #include "llvm/IR/Intrinsics.h"
46 #include "llvm/IR/Type.h"
47 #include "llvm/MC/MCSectionMachO.h"
49 #include "llvm/Support/Debug.h"
54 #include <utility>
55 using namespace llvm;
56 
57 #define DEBUG_TYPE "arm-isel"
58 
59 STATISTIC(NumTailCalls, "Number of tail calls");
60 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
61 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
62 
63 static cl::opt<bool>
64 ARMInterworking("arm-interworking", cl::Hidden,
65  cl::desc("Enable / disable ARM interworking (for debugging only)"),
66  cl::init(true));
67 
68 namespace {
69  class ARMCCState : public CCState {
70  public:
71  ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
73  ParmContext PC)
74  : CCState(CC, isVarArg, MF, locs, C) {
75  assert(((PC == Call) || (PC == Prologue)) &&
76  "ARMCCState users must specify whether their context is call"
77  "or prologue generation.");
78  CallOrPrologue = PC;
79  }
80  };
81 }
82 
83 // The APCS parameter registers.
84 static const MCPhysReg GPRArgRegs[] = {
85  ARM::R0, ARM::R1, ARM::R2, ARM::R3
86 };
87 
88 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
89  MVT PromotedBitwiseVT) {
90  if (VT != PromotedLdStVT) {
92  AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
93 
95  AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
96  }
97 
98  MVT ElemTy = VT.getVectorElementType();
99  if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
103  if (ElemTy == MVT::i32) {
108  } else {
113  }
122  if (VT.isInteger()) {
126  }
127 
128  // Promote all bit-wise operations.
129  if (VT.isInteger() && VT != PromotedBitwiseVT) {
131  AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
133  AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
135  AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
136  }
137 
138  // Neon does not support vector divide/remainder operations.
145 }
146 
147 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
148  addRegisterClass(VT, &ARM::DPRRegClass);
149  addTypeForNEON(VT, MVT::f64, MVT::v2i32);
150 }
151 
152 void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
153  addRegisterClass(VT, &ARM::DPairRegClass);
154  addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
155 }
156 
158  const ARMSubtarget &STI)
159  : TargetLowering(TM), Subtarget(&STI) {
160  RegInfo = Subtarget->getRegisterInfo();
161  Itins = Subtarget->getInstrItineraryData();
162 
164 
165  if (Subtarget->isTargetMachO()) {
166  // Uses VFP for Thumb libfuncs if available.
167  if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
168  Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
169  // Single-precision floating-point arithmetic.
170  setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
171  setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
172  setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
173  setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
174 
175  // Double-precision floating-point arithmetic.
176  setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
177  setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
178  setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
179  setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
180 
181  // Single-precision comparisons.
182  setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
183  setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
184  setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
185  setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
186  setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
187  setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
188  setLibcallName(RTLIB::UO_F32, "__unordsf2vfp");
189  setLibcallName(RTLIB::O_F32, "__unordsf2vfp");
190 
199 
200  // Double-precision comparisons.
201  setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
202  setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
203  setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
204  setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
205  setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
206  setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
207  setLibcallName(RTLIB::UO_F64, "__unorddf2vfp");
208  setLibcallName(RTLIB::O_F64, "__unorddf2vfp");
209 
218 
219  // Floating-point to integer conversions.
220  // i64 conversions are done via library routines even when generating VFP
221  // instructions, so use the same ones.
222  setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
223  setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
224  setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
225  setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
226 
227  // Conversions between floating types.
228  setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
229  setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp");
230 
231  // Integer to floating-point conversions.
232  // i64 conversions are done via library routines even when generating VFP
233  // instructions, so use the same ones.
234  // FIXME: There appears to be some naming inconsistency in ARM libgcc:
235  // e.g., __floatunsidf vs. __floatunssidfvfp.
236  setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
237  setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
238  setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
239  setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
240  }
241  }
242 
243  // These libcalls are not available in 32-bit.
247 
248  if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO() &&
249  !Subtarget->isTargetWindows()) {
250  static const struct {
251  const RTLIB::Libcall Op;
252  const char * const Name;
253  const CallingConv::ID CC;
254  const ISD::CondCode Cond;
255  } LibraryCalls[] = {
256  // Double-precision floating-point arithmetic helper functions
257  // RTABI chapter 4.1.2, Table 2
262 
263  // Double-precision floating-point comparison helper functions
264  // RTABI chapter 4.1.2, Table 3
265  { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
266  { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
267  { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
268  { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
269  { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
270  { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
271  { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
272  { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
273 
274  // Single-precision floating-point arithmetic helper functions
275  // RTABI chapter 4.1.2, Table 4
280 
281  // Single-precision floating-point comparison helper functions
282  // RTABI chapter 4.1.2, Table 5
283  { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
284  { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
285  { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
286  { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
287  { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
288  { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
289  { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
290  { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
291 
292  // Floating-point to integer conversions.
293  // RTABI chapter 4.1.2, Table 6
302 
303  // Conversions between floating types.
304  // RTABI chapter 4.1.2, Table 7
308 
309  // Integer to floating-point conversions.
310  // RTABI chapter 4.1.2, Table 8
319 
320  // Long long helper functions
321  // RTABI chapter 4.2, Table 9
326 
327  // Integer division functions
328  // RTABI chapter 4.3.1
337 
338  // Memory operations
339  // RTABI chapter 4.3.4
343  };
344 
345  for (const auto &LC : LibraryCalls) {
346  setLibcallName(LC.Op, LC.Name);
347  setLibcallCallingConv(LC.Op, LC.CC);
348  if (LC.Cond != ISD::SETCC_INVALID)
349  setCmpLibcallCC(LC.Op, LC.Cond);
350  }
351  }
352 
353  if (Subtarget->isTargetWindows()) {
354  static const struct {
355  const RTLIB::Libcall Op;
356  const char * const Name;
357  const CallingConv::ID CC;
358  } LibraryCalls[] = {
367  };
368 
369  for (const auto &LC : LibraryCalls) {
370  setLibcallName(LC.Op, LC.Name);
371  setLibcallCallingConv(LC.Op, LC.CC);
372  }
373  }
374 
375  // Use divmod compiler-rt calls for iOS 5.0 and later.
376  if (Subtarget->getTargetTriple().isiOS() &&
377  !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) {
378  setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
379  setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
380  }
381 
382  // The half <-> float conversion functions are always soft-float, but are
383  // needed for some targets which use a hard-float calling convention by
384  // default.
385  if (Subtarget->isAAPCS_ABI()) {
389  } else {
393  }
394 
395  if (Subtarget->isThumb1Only())
396  addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
397  else
398  addRegisterClass(MVT::i32, &ARM::GPRRegClass);
399  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
400  !Subtarget->isThumb1Only()) {
401  addRegisterClass(MVT::f32, &ARM::SPRRegClass);
402  addRegisterClass(MVT::f64, &ARM::DPRRegClass);
403  }
404 
405  for (MVT VT : MVT::vector_valuetypes()) {
406  for (MVT InnerVT : MVT::vector_valuetypes()) {
407  setTruncStoreAction(VT, InnerVT, Expand);
408  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
409  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
410  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
411  }
412 
417 
419  }
420 
423 
426 
427  if (Subtarget->hasNEON()) {
428  addDRTypeForNEON(MVT::v2f32);
429  addDRTypeForNEON(MVT::v8i8);
430  addDRTypeForNEON(MVT::v4i16);
431  addDRTypeForNEON(MVT::v2i32);
432  addDRTypeForNEON(MVT::v1i64);
433 
434  addQRTypeForNEON(MVT::v4f32);
435  addQRTypeForNEON(MVT::v2f64);
436  addQRTypeForNEON(MVT::v16i8);
437  addQRTypeForNEON(MVT::v8i16);
438  addQRTypeForNEON(MVT::v4i32);
439  addQRTypeForNEON(MVT::v2i64);
440 
441  // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
442  // neither Neon nor VFP support any arithmetic operations on it.
443  // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
444  // supported for v4f32.
448  // FIXME: Code duplication: FDIV and FREM are expanded always, see
449  // ARMTargetLowering::addTypeForNEON method for details.
452  // FIXME: Create unittest.
453  // In another words, find a way when "copysign" appears in DAG with vector
454  // operands.
456  // FIXME: Code duplication: SETCC has custom operation action, see
457  // ARMTargetLowering::addTypeForNEON method for details.
459  // FIXME: Create unittest for FNEG and for FABS.
472  // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
479 
495 
496  // Mark v2f32 intrinsics.
512 
513  // Neon does not support some operations on v1i64 and v2i64 types.
515  // Custom handling for some quad-vector types to detect VMULL.
519  // Custom handling for some vector types to avoid expensive expansions
526  // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
527  // a destination type that is wider than the source, and nor does
528  // it have a FP_TO_[SU]INT instruction with a narrower destination than
529  // source.
534 
537 
538  // NEON does not have single instruction CTPOP for vectors with element
539  // types wider than 8-bits. However, custom lowering can leverage the
540  // v8i8/v16i8 vcnt instruction.
545 
546  // NEON does not have single instruction CTTZ for vectors.
551 
556 
561 
566 
567  // NEON only has FMA instructions as of VFP4.
568  if (!Subtarget->hasVFP4()) {
571  }
572 
591 
592  // It is legal to extload from v4i8 to v4i16 or v4i32.
594  MVT::v2i32}) {
595  for (MVT VT : MVT::integer_vector_valuetypes()) {
599  }
600  }
601  }
602 
603  // ARM and Thumb2 support UMLAL/SMLAL.
604  if (!Subtarget->isThumb1Only())
606 
607  if (Subtarget->isFPOnlySP()) {
608  // When targetting a floating-point unit with only single-precision
609  // operations, f64 is legal for the few double-precision instructions which
610  // are present However, no double-precision operations other than moves,
611  // loads and stores are provided by the hardware.
645  }
646 
648 
649  // ARM does not have floating-point extending loads.
650  for (MVT VT : MVT::fp_valuetypes()) {
653  }
654 
655  // ... or truncating stores
659 
660  // ARM does not have i1 sign extending load.
661  for (MVT VT : MVT::integer_valuetypes())
663 
664  // ARM supports all 4 flavors of integer indexed load / store.
665  if (!Subtarget->isThumb1Only()) {
666  for (unsigned im = (unsigned)ISD::PRE_INC;
676  }
677  }
678 
683 
684  // i64 operation support.
687  if (Subtarget->isThumb1Only()) {
690  }
691  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
692  || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP()))
694 
700 
701  if (!Subtarget->isThumb1Only()) {
702  // FIXME: We should do this for Thumb1 as well.
707  }
708 
709  // ARM does not have ROTL.
713  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
715 
716  // These just redirect to CTTZ and CTLZ on ARM.
719 
721 
722  // Only ARMv6 has BSWAP.
723  if (!Subtarget->hasV6Ops())
725 
726  if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) &&
727  !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) {
728  // These are expanded into libcalls if the cpu doesn't have HW divider.
731  }
732 
733  // FIXME: Also set divmod for SREM on EABI
736  // Register based DivRem for AEABI (RTABI 4.2)
737  if (Subtarget->isTargetAEABI()) {
738  setLibcallName(RTLIB::SDIVREM_I8, "__aeabi_idivmod");
739  setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod");
740  setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod");
741  setLibcallName(RTLIB::SDIVREM_I64, "__aeabi_ldivmod");
742  setLibcallName(RTLIB::UDIVREM_I8, "__aeabi_uidivmod");
743  setLibcallName(RTLIB::UDIVREM_I16, "__aeabi_uidivmod");
744  setLibcallName(RTLIB::UDIVREM_I32, "__aeabi_uidivmod");
745  setLibcallName(RTLIB::UDIVREM_I64, "__aeabi_uldivmod");
746 
755 
758  } else {
761  }
762 
768 
770 
771  // Use the default implementation.
778 
779  if (!Subtarget->isTargetMachO()) {
780  // Non-MachO platforms may return values in these registers via the
781  // personality function.
784  }
785 
786  if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
788  else
790 
791  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
792  // the default expansion. If we are targeting a single threaded system,
793  // then set them all for expand so we can lower them later into their
794  // non-atomic form.
797  else if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
798  // ATOMIC_FENCE needs custom lowering; the others should have been expanded
799  // to ldrex/strex loops already.
801 
802  // On v8, we have particularly efficient implementations of atomic fences
803  // if they can be combined with nearby atomic loads and stores.
804  if (!Subtarget->hasV8Ops()) {
805  // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
807  }
808  } else {
809  // If there's anything we can use as a barrier, go through custom lowering
810  // for ATOMIC_FENCE.
812  Subtarget->hasAnyDataBarrier() ? Custom : Expand);
813 
814  // Set them all for expansion, which will force libcalls.
827  // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
828  // Unordered/Monotonic case.
831  }
832 
834 
835  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
836  if (!Subtarget->hasV6Ops()) {
839  }
841 
842  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
843  !Subtarget->isThumb1Only()) {
844  // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
845  // iff target supports vfp2.
848  }
849 
850  // We want to custom lower some of our intrinsics.
852  if (Subtarget->isTargetDarwin()) {
855  setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
856  }
857 
867 
873 
874  // We don't support sin/cos/fmod/copysign/pow
883  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
884  !Subtarget->isThumb1Only()) {
887  }
890 
891  if (!Subtarget->hasVFP4()) {
894  }
895 
896  // Various VFP goodness
897  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
898  // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
899  if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
902  }
903 
904  // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
905  if (!Subtarget->hasFP16()) {
908  }
909  }
910 
911  // Combine sin / cos into one node or libcall if possible.
912  if (Subtarget->hasSinCos()) {
913  setLibcallName(RTLIB::SINCOS_F32, "sincosf");
915  if (Subtarget->getTargetTriple().isiOS()) {
916  // For iOS, we don't want to the normal expansion of a libcall to
917  // sincos. We want to issue a libcall to __sincos_stret.
920  }
921  }
922 
923  // FP-ARMv8 implements a lot of rounding-like FP operations.
924  if (Subtarget->hasFPARMv8()) {
931  if (!Subtarget->isFPOnlySP()) {
938  }
939  }
940  // We have target-specific dag combine patterns for the following nodes:
941  // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
948 
949  if (Subtarget->hasV6Ops())
951 
953 
954  if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
955  !Subtarget->hasVFP2())
957  else
959 
960  //// temporary - rewrite interface to use type
961  MaxStoresPerMemset = 8;
962  MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
963  MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
964  MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
965  MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
966  MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
967 
968  // On ARM arguments smaller than 4 bytes are extended, so all arguments
969  // are at least 4 bytes aligned.
971 
972  // Prefer likely predicted branches to selects on out-of-order cores.
973  PredictableSelectIsExpensive = Subtarget->isLikeA9();
974 
975  setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
976 }
977 
979  return Subtarget->useSoftFloat();
980 }
981 
982 // FIXME: It might make sense to define the representative register class as the
983 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
984 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
985 // SPR's representative would be DPR_VFP2. This should work well if register
986 // pressure tracking were modified such that a register use would increment the
987 // pressure of the register class's representative and all of it's super
988 // classes' representatives transitively. We have not implemented this because
989 // of the difficulty prior to coalescing of modeling operand register classes
990 // due to the common occurrence of cross class copies and subregister insertions
991 // and extractions.
992 std::pair<const TargetRegisterClass *, uint8_t>
994  MVT VT) const {
995  const TargetRegisterClass *RRC = nullptr;
996  uint8_t Cost = 1;
997  switch (VT.SimpleTy) {
998  default:
1000  // Use DPR as representative register class for all floating point
1001  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1002  // the cost is 1 for both f32 and f64.
1003  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1004  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1005  RRC = &ARM::DPRRegClass;
1006  // When NEON is used for SP, only half of the register file is available
1007  // because operations that define both SP and DP results will be constrained
1008  // to the VFP2 class (D0-D15). We currently model this constraint prior to
1009  // coalescing by double-counting the SP regs. See the FIXME above.
1010  if (Subtarget->useNEONForSinglePrecisionFP())
1011  Cost = 2;
1012  break;
1013  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1014  case MVT::v4f32: case MVT::v2f64:
1015  RRC = &ARM::DPRRegClass;
1016  Cost = 2;
1017  break;
1018  case MVT::v4i64:
1019  RRC = &ARM::DPRRegClass;
1020  Cost = 4;
1021  break;
1022  case MVT::v8i64:
1023  RRC = &ARM::DPRRegClass;
1024  Cost = 8;
1025  break;
1026  }
1027  return std::make_pair(RRC, Cost);
1028 }
1029 
1030 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1031  switch ((ARMISD::NodeType)Opcode) {
1032  case ARMISD::FIRST_NUMBER: break;
1033  case ARMISD::Wrapper: return "ARMISD::Wrapper";
1034  case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
1035  case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
1036  case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
1037  case ARMISD::CALL: return "ARMISD::CALL";
1038  case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
1039  case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
1040  case ARMISD::tCALL: return "ARMISD::tCALL";
1041  case ARMISD::BRCOND: return "ARMISD::BRCOND";
1042  case ARMISD::BR_JT: return "ARMISD::BR_JT";
1043  case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
1044  case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
1045  case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
1046  case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
1047  case ARMISD::CMP: return "ARMISD::CMP";
1048  case ARMISD::CMN: return "ARMISD::CMN";
1049  case ARMISD::CMPZ: return "ARMISD::CMPZ";
1050  case ARMISD::CMPFP: return "ARMISD::CMPFP";
1051  case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
1052  case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
1053  case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
1054 
1055  case ARMISD::CMOV: return "ARMISD::CMOV";
1056 
1057  case ARMISD::RBIT: return "ARMISD::RBIT";
1058 
1059  case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
1060  case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
1061  case ARMISD::RRX: return "ARMISD::RRX";
1062 
1063  case ARMISD::ADDC: return "ARMISD::ADDC";
1064  case ARMISD::ADDE: return "ARMISD::ADDE";
1065  case ARMISD::SUBC: return "ARMISD::SUBC";
1066  case ARMISD::SUBE: return "ARMISD::SUBE";
1067 
1068  case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
1069  case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
1070 
1071  case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
1072  case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
1073 
1074  case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
1075 
1076  case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
1077 
1078  case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
1079 
1080  case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
1081 
1082  case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
1083 
1084  case ARMISD::WIN__CHKSTK: return "ARMISD:::WIN__CHKSTK";
1085 
1086  case ARMISD::VCEQ: return "ARMISD::VCEQ";
1087  case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
1088  case ARMISD::VCGE: return "ARMISD::VCGE";
1089  case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
1090  case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
1091  case ARMISD::VCGEU: return "ARMISD::VCGEU";
1092  case ARMISD::VCGT: return "ARMISD::VCGT";
1093  case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
1094  case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
1095  case ARMISD::VCGTU: return "ARMISD::VCGTU";
1096  case ARMISD::VTST: return "ARMISD::VTST";
1097 
1098  case ARMISD::VSHL: return "ARMISD::VSHL";
1099  case ARMISD::VSHRs: return "ARMISD::VSHRs";
1100  case ARMISD::VSHRu: return "ARMISD::VSHRu";
1101  case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
1102  case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
1103  case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
1104  case ARMISD::VQSHLs: return "ARMISD::VQSHLs";
1105  case ARMISD::VQSHLu: return "ARMISD::VQSHLu";
1106  case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu";
1107  case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs";
1108  case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu";
1109  case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu";
1110  case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs";
1111  case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu";
1112  case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
1113  case ARMISD::VSLI: return "ARMISD::VSLI";
1114  case ARMISD::VSRI: return "ARMISD::VSRI";
1115  case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
1116  case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
1117  case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
1118  case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
1119  case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
1120  case ARMISD::VDUP: return "ARMISD::VDUP";
1121  case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
1122  case ARMISD::VEXT: return "ARMISD::VEXT";
1123  case ARMISD::VREV64: return "ARMISD::VREV64";
1124  case ARMISD::VREV32: return "ARMISD::VREV32";
1125  case ARMISD::VREV16: return "ARMISD::VREV16";
1126  case ARMISD::VZIP: return "ARMISD::VZIP";
1127  case ARMISD::VUZP: return "ARMISD::VUZP";
1128  case ARMISD::VTRN: return "ARMISD::VTRN";
1129  case ARMISD::VTBL1: return "ARMISD::VTBL1";
1130  case ARMISD::VTBL2: return "ARMISD::VTBL2";
1131  case ARMISD::VMULLs: return "ARMISD::VMULLs";
1132  case ARMISD::VMULLu: return "ARMISD::VMULLu";
1133  case ARMISD::UMLAL: return "ARMISD::UMLAL";
1134  case ARMISD::SMLAL: return "ARMISD::SMLAL";
1135  case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
1136  case ARMISD::FMAX: return "ARMISD::FMAX";
1137  case ARMISD::FMIN: return "ARMISD::FMIN";
1138  case ARMISD::VMAXNM: return "ARMISD::VMAX";
1139  case ARMISD::VMINNM: return "ARMISD::VMIN";
1140  case ARMISD::BFI: return "ARMISD::BFI";
1141  case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
1142  case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
1143  case ARMISD::VBSL: return "ARMISD::VBSL";
1144  case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
1145  case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
1146  case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
1147  case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
1148  case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
1149  case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
1150  case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
1151  case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
1152  case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
1153  case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
1154  case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
1155  case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
1156  case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
1157  case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
1158  case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
1159  case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
1160  case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
1161  case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
1162  case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
1163  case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
1164  }
1165  return nullptr;
1166 }
1167 
1169  EVT VT) const {
1170  if (!VT.isVector())
1171  return getPointerTy(DL);
1173 }
1174 
1175 /// getRegClassFor - Return the register class that should be used for the
1176 /// specified value type.
1178  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1179  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1180  // load / store 4 to 8 consecutive D registers.
1181  if (Subtarget->hasNEON()) {
1182  if (VT == MVT::v4i64)
1183  return &ARM::QQPRRegClass;
1184  if (VT == MVT::v8i64)
1185  return &ARM::QQQQPRRegClass;
1186  }
1187  return TargetLowering::getRegClassFor(VT);
1188 }
1189 
1190 // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1191 // source/dest is aligned and the copy size is large enough. We therefore want
1192 // to align such objects passed to memory intrinsics.
1194  unsigned &PrefAlign) const {
1195  if (!isa<MemIntrinsic>(CI))
1196  return false;
1197  MinSize = 8;
1198  // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1199  // cycle faster than 4-byte aligned LDM.
1200  PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
1201  return true;
1202 }
1203 
1204 // Create a fast isel object.
1205 FastISel *
1207  const TargetLibraryInfo *libInfo) const {
1208  return ARM::createFastISel(funcInfo, libInfo);
1209 }
1210 
1212  unsigned NumVals = N->getNumValues();
1213  if (!NumVals)
1214  return Sched::RegPressure;
1215 
1216  for (unsigned i = 0; i != NumVals; ++i) {
1217  EVT VT = N->getValueType(i);
1218  if (VT == MVT::Glue || VT == MVT::Other)
1219  continue;
1220  if (VT.isFloatingPoint() || VT.isVector())
1221  return Sched::ILP;
1222  }
1223 
1224  if (!N->isMachineOpcode())
1225  return Sched::RegPressure;
1226 
1227  // Load are scheduled for latency even if there instruction itinerary
1228  // is not available.
1229  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1230  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1231 
1232  if (MCID.getNumDefs() == 0)
1233  return Sched::RegPressure;
1234  if (!Itins->isEmpty() &&
1235  Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1236  return Sched::ILP;
1237 
1238  return Sched::RegPressure;
1239 }
1240 
1241 //===----------------------------------------------------------------------===//
1242 // Lowering Code
1243 //===----------------------------------------------------------------------===//
1244 
1245 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1247  switch (CC) {
1248  default: llvm_unreachable("Unknown condition code!");
1249  case ISD::SETNE: return ARMCC::NE;
1250  case ISD::SETEQ: return ARMCC::EQ;
1251  case ISD::SETGT: return ARMCC::GT;
1252  case ISD::SETGE: return ARMCC::GE;
1253  case ISD::SETLT: return ARMCC::LT;
1254  case ISD::SETLE: return ARMCC::LE;
1255  case ISD::SETUGT: return ARMCC::HI;
1256  case ISD::SETUGE: return ARMCC::HS;
1257  case ISD::SETULT: return ARMCC::LO;
1258  case ISD::SETULE: return ARMCC::LS;
1259  }
1260 }
1261 
1262 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1264  ARMCC::CondCodes &CondCode2) {
1265  CondCode2 = ARMCC::AL;
1266  switch (CC) {
1267  default: llvm_unreachable("Unknown FP condition!");
1268  case ISD::SETEQ:
1269  case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
1270  case ISD::SETGT:
1271  case ISD::SETOGT: CondCode = ARMCC::GT; break;
1272  case ISD::SETGE:
1273  case ISD::SETOGE: CondCode = ARMCC::GE; break;
1274  case ISD::SETOLT: CondCode = ARMCC::MI; break;
1275  case ISD::SETOLE: CondCode = ARMCC::LS; break;
1276  case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
1277  case ISD::SETO: CondCode = ARMCC::VC; break;
1278  case ISD::SETUO: CondCode = ARMCC::VS; break;
1279  case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
1280  case ISD::SETUGT: CondCode = ARMCC::HI; break;
1281  case ISD::SETUGE: CondCode = ARMCC::PL; break;
1282  case ISD::SETLT:
1283  case ISD::SETULT: CondCode = ARMCC::LT; break;
1284  case ISD::SETLE:
1285  case ISD::SETULE: CondCode = ARMCC::LE; break;
1286  case ISD::SETNE:
1287  case ISD::SETUNE: CondCode = ARMCC::NE; break;
1288  }
1289 }
1290 
1291 //===----------------------------------------------------------------------===//
1292 // Calling Convention Implementation
1293 //===----------------------------------------------------------------------===//
1294 
1295 #include "ARMGenCallingConv.inc"
1296 
1297 /// getEffectiveCallingConv - Get the effective calling convention, taking into
1298 /// account presence of floating point hardware and calling convention
1299 /// limitations, such as support for variadic functions.
1301 ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1302  bool isVarArg) const {
1303  switch (CC) {
1304  default:
1305  llvm_unreachable("Unsupported calling convention");
1307  case CallingConv::ARM_APCS:
1308  case CallingConv::GHC:
1309  return CC;
1312  case CallingConv::C:
1313  if (!Subtarget->isAAPCS_ABI())
1314  return CallingConv::ARM_APCS;
1315  else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
1317  !isVarArg)
1319  else
1320  return CallingConv::ARM_AAPCS;
1321  case CallingConv::Fast:
1322  if (!Subtarget->isAAPCS_ABI()) {
1323  if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1324  return CallingConv::Fast;
1325  return CallingConv::ARM_APCS;
1326  } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1328  else
1329  return CallingConv::ARM_AAPCS;
1330  }
1331 }
1332 
1333 /// CCAssignFnForNode - Selects the correct CCAssignFn for the given
1334 /// CallingConvention.
1335 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1336  bool Return,
1337  bool isVarArg) const {
1338  switch (getEffectiveCallingConv(CC, isVarArg)) {
1339  default:
1340  llvm_unreachable("Unsupported calling convention");
1341  case CallingConv::ARM_APCS:
1342  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1344  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1346  return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1347  case CallingConv::Fast:
1348  return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1349  case CallingConv::GHC:
1350  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
1351  }
1352 }
1353 
1354 /// LowerCallResult - Lower the result values of a call into the
1355 /// appropriate copies out of appropriate physical registers.
1356 SDValue
1357 ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
1358  CallingConv::ID CallConv, bool isVarArg,
1360  SDLoc dl, SelectionDAG &DAG,
1361  SmallVectorImpl<SDValue> &InVals,
1362  bool isThisReturn, SDValue ThisVal) const {
1363 
1364  // Assign locations to each value returned by this call.
1366  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1367  *DAG.getContext(), Call);
1368  CCInfo.AnalyzeCallResult(Ins,
1369  CCAssignFnForNode(CallConv, /* Return*/ true,
1370  isVarArg));
1371 
1372  // Copy all of the result registers out of their specified physreg.
1373  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1374  CCValAssign VA = RVLocs[i];
1375 
1376  // Pass 'this' value directly from the argument to return value, to avoid
1377  // reg unit interference
1378  if (i == 0 && isThisReturn) {
1379  assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
1380  "unexpected return calling convention register assignment");
1381  InVals.push_back(ThisVal);
1382  continue;
1383  }
1384 
1385  SDValue Val;
1386  if (VA.needsCustom()) {
1387  // Handle f64 or half of a v2f64.
1388  SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1389  InFlag);
1390  Chain = Lo.getValue(1);
1391  InFlag = Lo.getValue(2);
1392  VA = RVLocs[++i]; // skip ahead to next loc
1393  SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1394  InFlag);
1395  Chain = Hi.getValue(1);
1396  InFlag = Hi.getValue(2);
1397  if (!Subtarget->isLittle())
1398  std::swap (Lo, Hi);
1399  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1400 
1401  if (VA.getLocVT() == MVT::v2f64) {
1402  SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1403  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1404  DAG.getConstant(0, dl, MVT::i32));
1405 
1406  VA = RVLocs[++i]; // skip ahead to next loc
1407  Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1408  Chain = Lo.getValue(1);
1409  InFlag = Lo.getValue(2);
1410  VA = RVLocs[++i]; // skip ahead to next loc
1411  Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1412  Chain = Hi.getValue(1);
1413  InFlag = Hi.getValue(2);
1414  if (!Subtarget->isLittle())
1415  std::swap (Lo, Hi);
1416  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1417  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1418  DAG.getConstant(1, dl, MVT::i32));
1419  }
1420  } else {
1421  Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1422  InFlag);
1423  Chain = Val.getValue(1);
1424  InFlag = Val.getValue(2);
1425  }
1426 
1427  switch (VA.getLocInfo()) {
1428  default: llvm_unreachable("Unknown loc info!");
1429  case CCValAssign::Full: break;
1430  case CCValAssign::BCvt:
1431  Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1432  break;
1433  }
1434 
1435  InVals.push_back(Val);
1436  }
1437 
1438  return Chain;
1439 }
1440 
1441 /// LowerMemOpCallTo - Store the argument to the stack.
1442 SDValue
1443 ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
1444  SDValue StackPtr, SDValue Arg,
1445  SDLoc dl, SelectionDAG &DAG,
1446  const CCValAssign &VA,
1447  ISD::ArgFlagsTy Flags) const {
1448  unsigned LocMemOffset = VA.getLocMemOffset();
1449  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1450  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1451  StackPtr, PtrOff);
1452  return DAG.getStore(Chain, dl, Arg, PtrOff,
1453  MachinePointerInfo::getStack(LocMemOffset),
1454  false, false, 0);
1455 }
1456 
1457 void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,
1458  SDValue Chain, SDValue &Arg,
1459  RegsToPassVector &RegsToPass,
1460  CCValAssign &VA, CCValAssign &NextVA,
1461  SDValue &StackPtr,
1462  SmallVectorImpl<SDValue> &MemOpChains,
1463  ISD::ArgFlagsTy Flags) const {
1464 
1465  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1466  DAG.getVTList(MVT::i32, MVT::i32), Arg);
1467  unsigned id = Subtarget->isLittle() ? 0 : 1;
1468  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
1469 
1470  if (NextVA.isRegLoc())
1471  RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
1472  else {
1473  assert(NextVA.isMemLoc());
1474  if (!StackPtr.getNode())
1475  StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
1476  getPointerTy(DAG.getDataLayout()));
1477 
1478  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
1479  dl, DAG, NextVA,
1480  Flags));
1481  }
1482 }
1483 
1484 /// LowerCall - Lowering a call into a callseq_start <-
1485 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
1486 /// nodes.
1487 SDValue
1488 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1489  SmallVectorImpl<SDValue> &InVals) const {
1490  SelectionDAG &DAG = CLI.DAG;
1491  SDLoc &dl = CLI.DL;
1493  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1495  SDValue Chain = CLI.Chain;
1496  SDValue Callee = CLI.Callee;
1497  bool &isTailCall = CLI.IsTailCall;
1498  CallingConv::ID CallConv = CLI.CallConv;
1499  bool doesNotRet = CLI.DoesNotReturn;
1500  bool isVarArg = CLI.IsVarArg;
1501 
1502  MachineFunction &MF = DAG.getMachineFunction();
1503  bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1504  bool isThisReturn = false;
1505  bool isSibCall = false;
1506  auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
1507 
1508  // Disable tail calls if they're not supported.
1509  if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
1510  isTailCall = false;
1511 
1512  if (isTailCall) {
1513  // Check if it's really possible to do a tail call.
1514  isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
1515  isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
1516  Outs, OutVals, Ins, DAG);
1517  if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
1518  report_fatal_error("failed to perform tail call elimination on a call "
1519  "site marked musttail");
1520  // We don't support GuaranteedTailCallOpt for ARM, only automatically
1521  // detected sibcalls.
1522  if (isTailCall) {
1523  ++NumTailCalls;
1524  isSibCall = true;
1525  }
1526  }
1527 
1528  // Analyze operands of the call, assigning locations to each operand.
1530  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1531  *DAG.getContext(), Call);
1532  CCInfo.AnalyzeCallOperands(Outs,
1533  CCAssignFnForNode(CallConv, /* Return*/ false,
1534  isVarArg));
1535 
1536  // Get a count of how many bytes are to be pushed on the stack.
1537  unsigned NumBytes = CCInfo.getNextStackOffset();
1538 
1539  // For tail calls, memory operands are available in our caller's stack.
1540  if (isSibCall)
1541  NumBytes = 0;
1542 
1543  // Adjust the stack pointer for the new arguments...
1544  // These operations are automatically eliminated by the prolog/epilog pass
1545  if (!isSibCall)
1546  Chain = DAG.getCALLSEQ_START(Chain,
1547  DAG.getIntPtrConstant(NumBytes, dl, true), dl);
1548 
1549  SDValue StackPtr =
1550  DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
1551 
1552  RegsToPassVector RegsToPass;
1553  SmallVector<SDValue, 8> MemOpChains;
1554 
1555  // Walk the register/memloc assignments, inserting copies/loads. In the case
1556  // of tail call optimization, arguments are handled later.
1557  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1558  i != e;
1559  ++i, ++realArgIdx) {
1560  CCValAssign &VA = ArgLocs[i];
1561  SDValue Arg = OutVals[realArgIdx];
1562  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1563  bool isByVal = Flags.isByVal();
1564 
1565  // Promote the value if needed.
1566  switch (VA.getLocInfo()) {
1567  default: llvm_unreachable("Unknown loc info!");
1568  case CCValAssign::Full: break;
1569  case CCValAssign::SExt:
1570  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
1571  break;
1572  case CCValAssign::ZExt:
1573  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
1574  break;
1575  case CCValAssign::AExt:
1576  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
1577  break;
1578  case CCValAssign::BCvt:
1579  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
1580  break;
1581  }
1582 
1583  // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
1584  if (VA.needsCustom()) {
1585  if (VA.getLocVT() == MVT::v2f64) {
1586  SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1587  DAG.getConstant(0, dl, MVT::i32));
1588  SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1589  DAG.getConstant(1, dl, MVT::i32));
1590 
1591  PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
1592  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1593 
1594  VA = ArgLocs[++i]; // skip ahead to next loc
1595  if (VA.isRegLoc()) {
1596  PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
1597  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1598  } else {
1599  assert(VA.isMemLoc());
1600 
1601  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
1602  dl, DAG, VA, Flags));
1603  }
1604  } else {
1605  PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
1606  StackPtr, MemOpChains, Flags);
1607  }
1608  } else if (VA.isRegLoc()) {
1609  if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) {
1610  assert(VA.getLocVT() == MVT::i32 &&
1611  "unexpected calling convention register assignment");
1612  assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
1613  "unexpected use of 'returned'");
1614  isThisReturn = true;
1615  }
1616  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1617  } else if (isByVal) {
1618  assert(VA.isMemLoc());
1619  unsigned offset = 0;
1620 
1621  // True if this byval aggregate will be split between registers
1622  // and memory.
1623  unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
1624  unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
1625 
1626  if (CurByValIdx < ByValArgsCount) {
1627 
1628  unsigned RegBegin, RegEnd;
1629  CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
1630 
1631  EVT PtrVT =
1633  unsigned int i, j;
1634  for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
1635  SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
1636  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
1637  SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
1639  false, false, false,
1640  DAG.InferPtrAlignment(AddArg));
1641  MemOpChains.push_back(Load.getValue(1));
1642  RegsToPass.push_back(std::make_pair(j, Load));
1643  }
1644 
1645  // If parameter size outsides register area, "offset" value
1646  // helps us to calculate stack slot for remained part properly.
1647  offset = RegEnd - RegBegin;
1648 
1649  CCInfo.nextInRegsParam();
1650  }
1651 
1652  if (Flags.getByValSize() > 4*offset) {
1653  auto PtrVT = getPointerTy(DAG.getDataLayout());
1654  unsigned LocMemOffset = VA.getLocMemOffset();
1655  SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1656  SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
1657  SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
1658  SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
1659  SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
1660  MVT::i32);
1661  SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
1662  MVT::i32);
1663 
1664  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
1665  SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
1666  MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
1667  Ops));
1668  }
1669  } else if (!isSibCall) {
1670  assert(VA.isMemLoc());
1671 
1672  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
1673  dl, DAG, VA, Flags));
1674  }
1675  }
1676 
1677  if (!MemOpChains.empty())
1678  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
1679 
1680  // Build a sequence of copy-to-reg nodes chained together with token chain
1681  // and flag operands which copy the outgoing args into the appropriate regs.
1682  SDValue InFlag;
1683  // Tail call byval lowering might overwrite argument registers so in case of
1684  // tail call optimization the copies to registers are lowered later.
1685  if (!isTailCall)
1686  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1687  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1688  RegsToPass[i].second, InFlag);
1689  InFlag = Chain.getValue(1);
1690  }
1691 
1692  // For tail calls lower the arguments to the 'real' stack slot.
1693  if (isTailCall) {
1694  // Force all the incoming stack arguments to be loaded from the stack
1695  // before any new outgoing arguments are stored to the stack, because the
1696  // outgoing stack slots may alias the incoming argument stack slots, and
1697  // the alias isn't otherwise explicit. This is slightly more conservative
1698  // than necessary, because it means that each store effectively depends
1699  // on every argument instead of just those arguments it would clobber.
1700 
1701  // Do not flag preceding copytoreg stuff together with the following stuff.
1702  InFlag = SDValue();
1703  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1704  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1705  RegsToPass[i].second, InFlag);
1706  InFlag = Chain.getValue(1);
1707  }
1708  InFlag = SDValue();
1709  }
1710 
1711  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1712  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1713  // node so that legalize doesn't hack it.
1714  bool isDirect = false;
1715  bool isARMFunc = false;
1716  bool isLocalARMFunc = false;
1718  auto PtrVt = getPointerTy(DAG.getDataLayout());
1719 
1720  if (Subtarget->genLongCalls()) {
1721  assert((Subtarget->isTargetWindows() ||
1723  "long-calls with non-static relocation model!");
1724  // Handle a global address or an external symbol. If it's not one of
1725  // those, the target's already in a register, so we don't need to do
1726  // anything extra.
1727  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1728  const GlobalValue *GV = G->getGlobal();
1729  // Create a constant pool entry for the callee address
1730  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
1731  ARMConstantPoolValue *CPV =
1732  ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
1733 
1734  // Get the address of the callee into a register
1735  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
1736  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1737  Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr,
1738  MachinePointerInfo::getConstantPool(), false, false,
1739  false, 0);
1740  } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
1741  const char *Sym = S->getSymbol();
1742 
1743  // Create a constant pool entry for the callee address
1744  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
1745  ARMConstantPoolValue *CPV =
1747  ARMPCLabelIndex, 0);
1748  // Get the address of the callee into a register
1749  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
1750  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1751  Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr,
1752  MachinePointerInfo::getConstantPool(), false, false,
1753  false, 0);
1754  }
1755  } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1756  const GlobalValue *GV = G->getGlobal();
1757  isDirect = true;
1758  bool isDef = GV->isStrongDefinitionForLinker();
1759  bool isStub = (!isDef && Subtarget->isTargetMachO()) &&
1761  isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
1762  // ARM call to a local ARM function is predicable.
1763  isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
1764  // tBX takes a register source operand.
1765  if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
1766  assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
1767  Callee = DAG.getNode(
1768  ARMISD::WrapperPIC, dl, PtrVt,
1769  DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
1770  Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee,
1771  MachinePointerInfo::getGOT(), false, false, true, 0);
1772  } else if (Subtarget->isTargetCOFF()) {
1773  assert(Subtarget->isTargetWindows() &&
1774  "Windows is the only supported COFF target");
1775  unsigned TargetFlags = GV->hasDLLImportStorageClass()
1778  Callee =
1779  DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0, TargetFlags);
1780  if (GV->hasDLLImportStorageClass())
1781  Callee =
1782  DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
1783  DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
1784  MachinePointerInfo::getGOT(), false, false, false, 0);
1785  } else {
1786  // On ELF targets for PIC code, direct calls should go through the PLT
1787  unsigned OpFlags = 0;
1788  if (Subtarget->isTargetELF() &&
1790  OpFlags = ARMII::MO_PLT;
1791  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, OpFlags);
1792  }
1793  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1794  isDirect = true;
1795  bool isStub = Subtarget->isTargetMachO() &&
1797  isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
1798  // tBX takes a register source operand.
1799  const char *Sym = S->getSymbol();
1800  if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
1801  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
1802  ARMConstantPoolValue *CPV =
1804  ARMPCLabelIndex, 4);
1805  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
1806  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1807  Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr,
1808  MachinePointerInfo::getConstantPool(), false, false,
1809  false, 0);
1810  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
1811  Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
1812  } else {
1813  unsigned OpFlags = 0;
1814  // On ELF targets for PIC code, direct calls should go through the PLT
1815  if (Subtarget->isTargetELF() &&
1817  OpFlags = ARMII::MO_PLT;
1818  Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, OpFlags);
1819  }
1820  }
1821 
1822  // FIXME: handle tail calls differently.
1823  unsigned CallOpc;
1824  bool HasMinSizeAttr = MF.getFunction()->hasFnAttribute(Attribute::MinSize);
1825  if (Subtarget->isThumb()) {
1826  if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
1827  CallOpc = ARMISD::CALL_NOLINK;
1828  else
1829  CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
1830  } else {
1831  if (!isDirect && !Subtarget->hasV5TOps())
1832  CallOpc = ARMISD::CALL_NOLINK;
1833  else if (doesNotRet && isDirect && Subtarget->hasRAS() &&
1834  // Emit regular call when code size is the priority
1835  !HasMinSizeAttr)
1836  // "mov lr, pc; b _foo" to avoid confusing the RSP
1837  CallOpc = ARMISD::CALL_NOLINK;
1838  else
1839  CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
1840  }
1841 
1842  std::vector<SDValue> Ops;
1843  Ops.push_back(Chain);
1844  Ops.push_back(Callee);
1845 
1846  // Add argument registers to the end of the list so that they are known live
1847  // into the call.
1848  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1849  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1850  RegsToPass[i].second.getValueType()));
1851 
1852  // Add a register mask operand representing the call-preserved registers.
1853  if (!isTailCall) {
1854  const uint32_t *Mask;
1855  const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
1856  if (isThisReturn) {
1857  // For 'this' returns, use the R0-preserving mask if applicable
1858  Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
1859  if (!Mask) {
1860  // Set isThisReturn to false if the calling convention is not one that
1861  // allows 'returned' to be modeled in this way, so LowerCallResult does
1862  // not try to pass 'this' straight through
1863  isThisReturn = false;
1864  Mask = ARI->getCallPreservedMask(MF, CallConv);
1865  }
1866  } else
1867  Mask = ARI->getCallPreservedMask(MF, CallConv);
1868 
1869  assert(Mask && "Missing call preserved mask for calling convention");
1870  Ops.push_back(DAG.getRegisterMask(Mask));
1871  }
1872 
1873  if (InFlag.getNode())
1874  Ops.push_back(InFlag);
1875 
1876  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1877  if (isTailCall) {
1878  MF.getFrameInfo()->setHasTailCall();
1879  return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
1880  }
1881 
1882  // Returns a chain and a flag for retval copy to use.
1883  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
1884  InFlag = Chain.getValue(1);
1885 
1886  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
1887  DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
1888  if (!Ins.empty())
1889  InFlag = Chain.getValue(1);
1890 
1891  // Handle result values, copying them out of physregs into vregs that we
1892  // return.
1893  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
1894  InVals, isThisReturn,
1895  isThisReturn ? OutVals[0] : SDValue());
1896 }
1897 
1898 /// HandleByVal - Every parameter *after* a byval parameter is passed
1899 /// on the stack. Remember the next parameter register to allocate,
1900 /// and then confiscate the rest of the parameter registers to insure
1901 /// this.
1902 void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
1903  unsigned Align) const {
1904  assert((State->getCallOrPrologue() == Prologue ||
1905  State->getCallOrPrologue() == Call) &&
1906  "unhandled ParmContext");
1907 
1908  // Byval (as with any stack) slots are always at least 4 byte aligned.
1909  Align = std::max(Align, 4U);
1910 
1911  unsigned Reg = State->AllocateReg(GPRArgRegs);
1912  if (!Reg)
1913  return;
1914 
1915  unsigned AlignInRegs = Align / 4;
1916  unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
1917  for (unsigned i = 0; i < Waste; ++i)
1918  Reg = State->AllocateReg(GPRArgRegs);
1919 
1920  if (!Reg)
1921  return;
1922 
1923  unsigned Excess = 4 * (ARM::R4 - Reg);
1924 
1925  // Special case when NSAA != SP and parameter size greater than size of
1926  // all remained GPR regs. In that case we can't split parameter, we must
1927  // send it to stack. We also must set NCRN to R4, so waste all
1928  // remained registers.
1929  const unsigned NSAAOffset = State->getNextStackOffset();
1930  if (NSAAOffset != 0 && Size > Excess) {
1931  while (State->AllocateReg(GPRArgRegs))
1932  ;
1933  return;
1934  }
1935 
1936  // First register for byval parameter is the first register that wasn't
1937  // allocated before this method call, so it would be "reg".
1938  // If parameter is small enough to be saved in range [reg, r4), then
1939  // the end (first after last) register would be reg + param-size-in-regs,
1940  // else parameter would be splitted between registers and stack,
1941  // end register would be r4 in this case.
1942  unsigned ByValRegBegin = Reg;
1943  unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
1944  State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
1945  // Note, first register is allocated in the beginning of function already,
1946  // allocate remained amount of registers we need.
1947  for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
1948  State->AllocateReg(GPRArgRegs);
1949  // A byval parameter that is split between registers and memory needs its
1950  // size truncated here.
1951  // In the case where the entire structure fits in registers, we set the
1952  // size in memory to zero.
1953  Size = std::max<int>(Size - Excess, 0);
1954 }
1955 
1956 /// MatchingStackOffset - Return true if the given stack call argument is
1957 /// already available in the same position (relatively) of the caller's
1958 /// incoming argument stack.
1959 static
1960 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
1961  MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
1962  const TargetInstrInfo *TII) {
1963  unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
1964  int FI = INT_MAX;
1965  if (Arg.getOpcode() == ISD::CopyFromReg) {
1966  unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
1968  return false;
1969  MachineInstr *Def = MRI->getVRegDef(VR);
1970  if (!Def)
1971  return false;
1972  if (!Flags.isByVal()) {
1973  if (!TII->isLoadFromStackSlot(Def, FI))
1974  return false;
1975  } else {
1976  return false;
1977  }
1978  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
1979  if (Flags.isByVal())
1980  // ByVal argument is passed in as a pointer but it's now being
1981  // dereferenced. e.g.
1982  // define @foo(%struct.X* %A) {
1983  // tail call @bar(%struct.X* byval %A)
1984  // }
1985  return false;
1986  SDValue Ptr = Ld->getBasePtr();
1987  FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
1988  if (!FINode)
1989  return false;
1990  FI = FINode->getIndex();
1991  } else
1992  return false;
1993 
1994  assert(FI != INT_MAX);
1995  if (!MFI->isFixedObjectIndex(FI))
1996  return false;
1997  return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
1998 }
1999 
2000 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
2001 /// for tail call optimization. Targets which want to do tail call
2002 /// optimization should implement this function.
2003 bool
2004 ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
2005  CallingConv::ID CalleeCC,
2006  bool isVarArg,
2007  bool isCalleeStructRet,
2008  bool isCallerStructRet,
2009  const SmallVectorImpl<ISD::OutputArg> &Outs,
2010  const SmallVectorImpl<SDValue> &OutVals,
2011  const SmallVectorImpl<ISD::InputArg> &Ins,
2012  SelectionDAG& DAG) const {
2013  const Function *CallerF = DAG.getMachineFunction().getFunction();
2014  CallingConv::ID CallerCC = CallerF->getCallingConv();
2015  bool CCMatch = CallerCC == CalleeCC;
2016 
2017  // Look for obvious safe cases to perform tail call optimization that do not
2018  // require ABI changes. This is what gcc calls sibcall.
2019 
2020  // Do not sibcall optimize vararg calls unless the call site is not passing
2021  // any arguments.
2022  if (isVarArg && !Outs.empty())
2023  return false;
2024 
2025  // Exception-handling functions need a special set of instructions to indicate
2026  // a return to the hardware. Tail-calling another function would probably
2027  // break this.
2028  if (CallerF->hasFnAttribute("interrupt"))
2029  return false;
2030 
2031  // Also avoid sibcall optimization if either caller or callee uses struct
2032  // return semantics.
2033  if (isCalleeStructRet || isCallerStructRet)
2034  return false;
2035 
2036  // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo::
2037  // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
2038  // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
2039  // support in the assembler and linker to be used. This would need to be
2040  // fixed to fully support tail calls in Thumb1.
2041  //
2042  // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
2043  // LR. This means if we need to reload LR, it takes an extra instructions,
2044  // which outweighs the value of the tail call; but here we don't know yet
2045  // whether LR is going to be used. Probably the right approach is to
2046  // generate the tail call here and turn it back into CALL/RET in
2047  // emitEpilogue if LR is used.
2048 
2049  // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
2050  // but we need to make sure there are enough registers; the only valid
2051  // registers are the 4 used for parameters. We don't currently do this
2052  // case.
2053  if (Subtarget->isThumb1Only())
2054  return false;
2055 
2056  // Externally-defined functions with weak linkage should not be
2057  // tail-called on ARM when the OS does not support dynamic
2058  // pre-emption of symbols, as the AAELF spec requires normal calls
2059  // to undefined weak functions to be replaced with a NOP or jump to the
2060  // next instruction. The behaviour of branch instructions in this
2061  // situation (as used for tail calls) is implementation-defined, so we
2062  // cannot rely on the linker replacing the tail call with a return.
2063  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2064  const GlobalValue *GV = G->getGlobal();
2065  const Triple &TT = getTargetMachine().getTargetTriple();
2066  if (GV->hasExternalWeakLinkage() &&
2067  (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2068  return false;
2069  }
2070 
2071  // If the calling conventions do not match, then we'd better make sure the
2072  // results are returned in the same way as what the caller expects.
2073  if (!CCMatch) {
2075  ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1,
2076  *DAG.getContext(), Call);
2077  CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
2078 
2080  ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2,
2081  *DAG.getContext(), Call);
2082  CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
2083 
2084  if (RVLocs1.size() != RVLocs2.size())
2085  return false;
2086  for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
2087  if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
2088  return false;
2089  if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
2090  return false;
2091  if (RVLocs1[i].isRegLoc()) {
2092  if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
2093  return false;
2094  } else {
2095  if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
2096  return false;
2097  }
2098  }
2099  }
2100 
2101  // If Caller's vararg or byval argument has been split between registers and
2102  // stack, do not perform tail call, since part of the argument is in caller's
2103  // local frame.
2104  const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction().
2105  getInfo<ARMFunctionInfo>();
2106  if (AFI_Caller->getArgRegsSaveSize())
2107  return false;
2108 
2109  // If the callee takes no arguments then go on to check the results of the
2110  // call.
2111  if (!Outs.empty()) {
2112  // Check if stack adjustment is needed. For now, do not do this if any
2113  // argument is passed on the stack.
2115  ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
2116  *DAG.getContext(), Call);
2117  CCInfo.AnalyzeCallOperands(Outs,
2118  CCAssignFnForNode(CalleeCC, false, isVarArg));
2119  if (CCInfo.getNextStackOffset()) {
2120  MachineFunction &MF = DAG.getMachineFunction();
2121 
2122  // Check if the arguments are already laid out in the right way as
2123  // the caller's fixed stack objects.
2124  MachineFrameInfo *MFI = MF.getFrameInfo();
2125  const MachineRegisterInfo *MRI = &MF.getRegInfo();
2126  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2127  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2128  i != e;
2129  ++i, ++realArgIdx) {
2130  CCValAssign &VA = ArgLocs[i];
2131  EVT RegVT = VA.getLocVT();
2132  SDValue Arg = OutVals[realArgIdx];
2133  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2134  if (VA.getLocInfo() == CCValAssign::Indirect)
2135  return false;
2136  if (VA.needsCustom()) {
2137  // f64 and vector types are split into multiple registers or
2138  // register/stack-slot combinations. The types will not match
2139  // the registers; give up on memory f64 refs until we figure
2140  // out what to do about this.
2141  if (!VA.isRegLoc())
2142  return false;
2143  if (!ArgLocs[++i].isRegLoc())
2144  return false;
2145  if (RegVT == MVT::v2f64) {
2146  if (!ArgLocs[++i].isRegLoc())
2147  return false;
2148  if (!ArgLocs[++i].isRegLoc())
2149  return false;
2150  }
2151  } else if (!VA.isRegLoc()) {
2152  if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
2153  MFI, MRI, TII))
2154  return false;
2155  }
2156  }
2157  }
2158  }
2159 
2160  return true;
2161 }
2162 
2163 bool
2164 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2165  MachineFunction &MF, bool isVarArg,
2166  const SmallVectorImpl<ISD::OutputArg> &Outs,
2167  LLVMContext &Context) const {
2169  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2170  return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true,
2171  isVarArg));
2172 }
2173 
2175  SDLoc DL, SelectionDAG &DAG) {
2176  const MachineFunction &MF = DAG.getMachineFunction();
2177  const Function *F = MF.getFunction();
2178 
2179  StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString();
2180 
2181  // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
2182  // version of the "preferred return address". These offsets affect the return
2183  // instruction if this is a return from PL1 without hypervisor extensions.
2184  // IRQ/FIQ: +4 "subs pc, lr, #4"
2185  // SWI: 0 "subs pc, lr, #0"
2186  // ABORT: +4 "subs pc, lr, #4"
2187  // UNDEF: +4/+2 "subs pc, lr, #0"
2188  // UNDEF varies depending on where the exception came from ARM or Thumb
2189  // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
2190 
2191  int64_t LROffset;
2192  if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
2193  IntKind == "ABORT")
2194  LROffset = 4;
2195  else if (IntKind == "SWI" || IntKind == "UNDEF")
2196  LROffset = 0;
2197  else
2198  report_fatal_error("Unsupported interrupt attribute. If present, value "
2199  "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2200 
2201  RetOps.insert(RetOps.begin() + 1,
2202  DAG.getConstant(LROffset, DL, MVT::i32, false));
2203 
2204  return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
2205 }
2206 
2207 SDValue
2208 ARMTargetLowering::LowerReturn(SDValue Chain,
2209  CallingConv::ID CallConv, bool isVarArg,
2210  const SmallVectorImpl<ISD::OutputArg> &Outs,
2211  const SmallVectorImpl<SDValue> &OutVals,
2212  SDLoc dl, SelectionDAG &DAG) const {
2213 
2214  // CCValAssign - represent the assignment of the return value to a location.
2216 
2217  // CCState - Info about the registers and stack slots.
2218  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2219  *DAG.getContext(), Call);
2220 
2221  // Analyze outgoing return values.
2222  CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
2223  isVarArg));
2224 
2225  SDValue Flag;
2226  SmallVector<SDValue, 4> RetOps;
2227  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2228  bool isLittleEndian = Subtarget->isLittle();
2229 
2230  MachineFunction &MF = DAG.getMachineFunction();
2232  AFI->setReturnRegsCount(RVLocs.size());
2233 
2234  // Copy the result values into the output registers.
2235  for (unsigned i = 0, realRVLocIdx = 0;
2236  i != RVLocs.size();
2237  ++i, ++realRVLocIdx) {
2238  CCValAssign &VA = RVLocs[i];
2239  assert(VA.isRegLoc() && "Can only return in registers!");
2240 
2241  SDValue Arg = OutVals[realRVLocIdx];
2242 
2243  switch (VA.getLocInfo()) {
2244  default: llvm_unreachable("Unknown loc info!");
2245  case CCValAssign::Full: break;
2246  case CCValAssign::BCvt:
2247  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2248  break;
2249  }
2250 
2251  if (VA.needsCustom()) {
2252  if (VA.getLocVT() == MVT::v2f64) {
2253  // Extract the first half and return it in two registers.
2254  SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2255  DAG.getConstant(0, dl, MVT::i32));
2256  SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
2257  DAG.getVTList(MVT::i32, MVT::i32), Half);
2258 
2259  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2260  HalfGPRs.getValue(isLittleEndian ? 0 : 1),
2261  Flag);
2262  Flag = Chain.getValue(1);
2263  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2264  VA = RVLocs[++i]; // skip ahead to next loc
2265  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2266  HalfGPRs.getValue(isLittleEndian ? 1 : 0),
2267  Flag);
2268  Flag = Chain.getValue(1);
2269  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2270  VA = RVLocs[++i]; // skip ahead to next loc
2271 
2272  // Extract the 2nd half and fall through to handle it as an f64 value.
2273  Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2274  DAG.getConstant(1, dl, MVT::i32));
2275  }
2276  // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
2277  // available.
2278  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2279  DAG.getVTList(MVT::i32, MVT::i32), Arg);
2280  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2281  fmrrd.getValue(isLittleEndian ? 0 : 1),
2282  Flag);
2283  Flag = Chain.getValue(1);
2284  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2285  VA = RVLocs[++i]; // skip ahead to next loc
2286  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2287  fmrrd.getValue(isLittleEndian ? 1 : 0),
2288  Flag);
2289  } else
2290  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
2291 
2292  // Guarantee that all emitted copies are
2293  // stuck together, avoiding something bad.
2294  Flag = Chain.getValue(1);
2295  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2296  }
2297 
2298  // Update chain and glue.
2299  RetOps[0] = Chain;
2300  if (Flag.getNode())
2301  RetOps.push_back(Flag);
2302 
2303  // CPUs which aren't M-class use a special sequence to return from
2304  // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
2305  // though we use "subs pc, lr, #N").
2306  //
2307  // M-class CPUs actually use a normal return sequence with a special
2308  // (hardware-provided) value in LR, so the normal code path works.
2309  if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") &&
2310  !Subtarget->isMClass()) {
2311  if (Subtarget->isThumb1Only())
2312  report_fatal_error("interrupt attribute is not supported in Thumb1");
2313  return LowerInterruptReturn(RetOps, dl, DAG);
2314  }
2315 
2316  return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
2317 }
2318 
2319 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2320  if (N->getNumValues() != 1)
2321  return false;
2322  if (!N->hasNUsesOfValue(1, 0))
2323  return false;
2324 
2325  SDValue TCChain = Chain;
2326  SDNode *Copy = *N->use_begin();
2327  if (Copy->getOpcode() == ISD::CopyToReg) {
2328  // If the copy has a glue operand, we conservatively assume it isn't safe to
2329  // perform a tail call.
2330  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2331  return false;
2332  TCChain = Copy->getOperand(0);
2333  } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
2334  SDNode *VMov = Copy;
2335  // f64 returned in a pair of GPRs.
2337  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2338  UI != UE; ++UI) {
2339  if (UI->getOpcode() != ISD::CopyToReg)
2340  return false;
2341  Copies.insert(*UI);
2342  }
2343  if (Copies.size() > 2)
2344  return false;
2345 
2346  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2347  UI != UE; ++UI) {
2348  SDValue UseChain = UI->getOperand(0);
2349  if (Copies.count(UseChain.getNode()))
2350  // Second CopyToReg
2351  Copy = *UI;
2352  else {
2353  // We are at the top of this chain.
2354  // If the copy has a glue operand, we conservatively assume it
2355  // isn't safe to perform a tail call.
2356  if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
2357  return false;
2358  // First CopyToReg
2359  TCChain = UseChain;
2360  }
2361  }
2362  } else if (Copy->getOpcode() == ISD::BITCAST) {
2363  // f32 returned in a single GPR.
2364  if (!Copy->hasOneUse())
2365  return false;
2366  Copy = *Copy->use_begin();
2367  if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
2368  return false;
2369  // If the copy has a glue operand, we conservatively assume it isn't safe to
2370  // perform a tail call.
2371  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2372  return false;
2373  TCChain = Copy->getOperand(0);
2374  } else {
2375  return false;
2376  }
2377 
2378  bool HasRet = false;
2379  for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2380  UI != UE; ++UI) {
2381  if (UI->getOpcode() != ARMISD::RET_FLAG &&
2382  UI->getOpcode() != ARMISD::INTRET_FLAG)
2383  return false;
2384  HasRet = true;
2385  }
2386 
2387  if (!HasRet)
2388  return false;
2389 
2390  Chain = TCChain;
2391  return true;
2392 }
2393 
2394 bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
2395  if (!Subtarget->supportsTailCall())
2396  return false;
2397 
2398  auto Attr =
2399  CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2400  if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2401  return false;
2402 
2403  return !Subtarget->isThumb1Only();
2404 }
2405 
2406 // Trying to write a 64 bit value so need to split into two 32 bit values first,
2407 // and pass the lower and high parts through.
2409  SDLoc DL(Op);
2410  SDValue WriteValue = Op->getOperand(2);
2411 
2412  // This function is only supposed to be called for i64 type argument.
2413  assert(WriteValue.getValueType() == MVT::i64
2414  && "LowerWRITE_REGISTER called for non-i64 type argument.");
2415 
2416  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2417  DAG.getConstant(0, DL, MVT::i32));
2418  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2419  DAG.getConstant(1, DL, MVT::i32));
2420  SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
2421  return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
2422 }
2423 
2424 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
2425 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
2426 // one of the above mentioned nodes. It has to be wrapped because otherwise
2427 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2428 // be used to form addressing mode. These wrapped nodes will be selected
2429 // into MOVi.
2431  EVT PtrVT = Op.getValueType();
2432  // FIXME there is no actual debug info here
2433  SDLoc dl(Op);
2434  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2435  SDValue Res;
2436  if (CP->isMachineConstantPoolEntry())
2437  Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2438  CP->getAlignment());
2439  else
2440  Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2441  CP->getAlignment());
2442  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
2443 }
2444 
2447 }
2448 
2449 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
2450  SelectionDAG &DAG) const {
2451  MachineFunction &MF = DAG.getMachineFunction();
2453  unsigned ARMPCLabelIndex = 0;
2454  SDLoc DL(Op);
2455  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2456  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
2458  SDValue CPAddr;
2459  if (RelocM == Reloc::Static) {
2460  CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
2461  } else {
2462  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2463  ARMPCLabelIndex = AFI->createPICLabelUId();
2464  ARMConstantPoolValue *CPV =
2465  ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
2466  ARMCP::CPBlockAddress, PCAdj);
2467  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2468  }
2469  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
2470  SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
2472  false, false, false, 0);
2473  if (RelocM == Reloc::Static)
2474  return Result;
2475  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
2476  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
2477 }
2478 
2479 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
2480 SDValue
2481 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
2482  SelectionDAG &DAG) const {
2483  SDLoc dl(GA);
2484  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2485  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2486  MachineFunction &MF = DAG.getMachineFunction();
2488  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2489  ARMConstantPoolValue *CPV =
2490  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2491  ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
2492  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2493  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
2494  Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
2496  false, false, false, 0);
2497  SDValue Chain = Argument.getValue(1);
2498 
2499  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2500  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
2501 
2502  // call __tls_get_addr.
2503  ArgListTy Args;
2504  ArgListEntry Entry;
2505  Entry.Node = Argument;
2506  Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
2507  Args.push_back(Entry);
2508 
2509  // FIXME: is there useful debug info available here?
2511  CLI.setDebugLoc(dl).setChain(Chain)
2513  DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args),
2514  0);
2515 
2516  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2517  return CallResult.first;
2518 }
2519 
2520 // Lower ISD::GlobalTLSAddress using the "initial exec" or
2521 // "local exec" model.
2522 SDValue
2523 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
2524  SelectionDAG &DAG,
2525  TLSModel::Model model) const {
2526  const GlobalValue *GV = GA->getGlobal();
2527  SDLoc dl(GA);
2528  SDValue Offset;
2529  SDValue Chain = DAG.getEntryNode();
2530  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2531  // Get the Thread Pointer
2533 
2534  if (model == TLSModel::InitialExec) {
2535  MachineFunction &MF = DAG.getMachineFunction();
2537  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2538  // Initial exec model.
2539  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2540  ARMConstantPoolValue *CPV =
2541  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2543  true);
2544  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2545  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2546  Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
2548  false, false, false, 0);
2549  Chain = Offset.getValue(1);
2550 
2551  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2552  Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
2553 
2554  Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
2556  false, false, false, 0);
2557  } else {
2558  // local exec model
2559  assert(model == TLSModel::LocalExec);
2560  ARMConstantPoolValue *CPV =
2562  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2563  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2564  Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
2566  false, false, false, 0);
2567  }
2568 
2569  // The address of the thread local variable is the add of the thread
2570  // pointer with the offset of the variable.
2571  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
2572 }
2573 
2574 SDValue
2575 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
2576  // TODO: implement the "local dynamic" model
2577  assert(Subtarget->isTargetELF() &&
2578  "TLS not implemented for non-ELF targets");
2579  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2580 
2581  TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
2582 
2583  switch (model) {
2586  return LowerToTLSGeneralDynamicModel(GA, DAG);
2587  case TLSModel::InitialExec:
2588  case TLSModel::LocalExec:
2589  return LowerToTLSExecModels(GA, DAG, model);
2590  }
2591  llvm_unreachable("bogus TLS model");
2592 }
2593 
2594 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
2595  SelectionDAG &DAG) const {
2596  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2597  SDLoc dl(Op);
2598  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
2599  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
2600  bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
2601  ARMConstantPoolValue *CPV =
2603  UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
2604  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2605  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2606  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
2607  CPAddr,
2609  false, false, false, 0);
2610  SDValue Chain = Result.getValue(1);
2611  SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
2612  Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
2613  if (!UseGOTOFF)
2614  Result = DAG.getLoad(PtrVT, dl, Chain, Result,
2616  false, false, false, 0);
2617  return Result;
2618  }
2619 
2620  // If we have T2 ops, we can materialize the address directly via movt/movw
2621  // pair. This is always cheaper.
2622  if (Subtarget->useMovt(DAG.getMachineFunction())) {
2623  ++NumMovwMovt;
2624  // FIXME: Once remat is capable of dealing with instructions with register
2625  // operands, expand this into two nodes.
2626  return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
2627  DAG.getTargetGlobalAddress(GV, dl, PtrVT));
2628  } else {
2629  SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
2630  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2631  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
2633  false, false, false, 0);
2634  }
2635 }
2636 
2637 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
2638  SelectionDAG &DAG) const {
2639  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2640  SDLoc dl(Op);
2641  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
2643 
2644  if (Subtarget->useMovt(DAG.getMachineFunction()))
2645  ++NumMovwMovt;
2646 
2647  // FIXME: Once remat is capable of dealing with instructions with register
2648  // operands, expand this into multiple nodes
2649  unsigned Wrapper =
2651 
2652  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
2653  SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
2654 
2655  if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
2656  Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
2657  MachinePointerInfo::getGOT(), false, false, false, 0);
2658  return Result;
2659 }
2660 
2661 SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
2662  SelectionDAG &DAG) const {
2663  assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
2664  assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
2665  "Windows on ARM expects to use movw/movt");
2666 
2667  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
2668  const ARMII::TOF TargetFlags =
2670  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2671  SDValue Result;
2672  SDLoc DL(Op);
2673 
2674  ++NumMovwMovt;
2675 
2676  // FIXME: Once remat is capable of dealing with instructions with register
2677  // operands, expand this into two nodes.
2678  Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
2679  DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
2680  TargetFlags));
2681  if (GV->hasDLLImportStorageClass())
2682  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
2683  MachinePointerInfo::getGOT(), false, false, false, 0);
2684  return Result;
2685 }
2686 
2687 SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
2688  SelectionDAG &DAG) const {
2689  assert(Subtarget->isTargetELF() &&
2690  "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
2691  MachineFunction &MF = DAG.getMachineFunction();
2692  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2693  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2694  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2695  SDLoc dl(Op);
2696  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2697  ARMConstantPoolValue *CPV =
2698  ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_",
2699  ARMPCLabelIndex, PCAdj);
2700  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2701  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2702  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
2704  false, false, false, 0);
2705  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2706  return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
2707 }
2708 
2709 SDValue
2710 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
2711  SDLoc dl(Op);
2712  SDValue Val = DAG.getConstant(0, dl, MVT::i32);
2713  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
2714  DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
2715  Op.getOperand(1), Val);
2716 }
2717 
2718 SDValue
2719 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
2720  SDLoc dl(Op);
2721  return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
2722  Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
2723 }
2724 
2725 SDValue
2726 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
2727  const ARMSubtarget *Subtarget) const {
2728  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2729  SDLoc dl(Op);
2730  switch (IntNo) {
2731  default: return SDValue(); // Don't custom lower most intrinsics.
2732  case Intrinsic::arm_rbit: {
2733  assert(Op.getOperand(1).getValueType() == MVT::i32 &&
2734  "RBIT intrinsic must have i32 type!");
2735  return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(1));
2736  }
2737  case Intrinsic::arm_thread_pointer: {
2738  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2739  return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
2740  }
2741  case Intrinsic::eh_sjlj_lsda: {
2742  MachineFunction &MF = DAG.getMachineFunction();
2744  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2745  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2747  SDValue CPAddr;
2748  unsigned PCAdj = (RelocM != Reloc::PIC_)
2749  ? 0 : (Subtarget->isThumb() ? 4 : 8);
2750  ARMConstantPoolValue *CPV =
2751  ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
2752  ARMCP::CPLSDA, PCAdj);
2753  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2754  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2755  SDValue Result =
2756  DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
2758  false, false, false, 0);
2759 
2760  if (RelocM == Reloc::PIC_) {
2761  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2762  Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
2763  }
2764  return Result;
2765  }
2766  case Intrinsic::arm_neon_vmulls:
2767  case Intrinsic::arm_neon_vmullu: {
2768  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
2770  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
2771  Op.getOperand(1), Op.getOperand(2));
2772  }
2773  }
2774 }
2775 
2777  const ARMSubtarget *Subtarget) {
2778  // FIXME: handle "fence singlethread" more efficiently.
2779  SDLoc dl(Op);
2780  if (!Subtarget->hasDataBarrier()) {
2781  // Some ARMv6 cpus can support data barriers with an mcr instruction.
2782  // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
2783  // here.
2784  assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
2785  "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
2786  return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
2787  DAG.getConstant(0, dl, MVT::i32));
2788  }
2789 
2790  ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
2791  AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
2792  ARM_MB::MemBOpt Domain = ARM_MB::ISH;
2793  if (Subtarget->isMClass()) {
2794  // Only a full system barrier exists in the M-class architectures.
2795  Domain = ARM_MB::SY;
2796  } else if (Subtarget->isSwift() && Ord == Release) {
2797  // Swift happens to implement ISHST barriers in a way that's compatible with
2798  // Release semantics but weaker than ISH so we'd be fools not to use
2799  // it. Beware: other processors probably don't!
2800  Domain = ARM_MB::ISHST;
2801  }
2802 
2803  return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
2804  DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
2805  DAG.getConstant(Domain, dl, MVT::i32));
2806 }
2807 
2809  const ARMSubtarget *Subtarget) {
2810  // ARM pre v5TE and Thumb1 does not have preload instructions.
2811  if (!(Subtarget->isThumb2() ||
2812  (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
2813  // Just preserve the chain.
2814  return Op.getOperand(0);
2815 
2816  SDLoc dl(Op);
2817  unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
2818  if (!isRead &&
2819  (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
2820  // ARMv7 with MP extension has PLDW.
2821  return Op.getOperand(0);
2822 
2823  unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
2824  if (Subtarget->isThumb()) {
2825  // Invert the bits.
2826  isRead = ~isRead & 1;
2827  isData = ~isData & 1;
2828  }
2829 
2830  return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
2831  Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
2832  DAG.getConstant(isData, dl, MVT::i32));
2833 }
2834 
2836  MachineFunction &MF = DAG.getMachineFunction();
2837  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
2838 
2839  // vastart just stores the address of the VarArgsFrameIndex slot into the
2840  // memory location argument.
2841  SDLoc dl(Op);
2842  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
2843  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2844  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2845  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
2846  MachinePointerInfo(SV), false, false, 0);
2847 }
2848 
2849 SDValue
2850 ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
2851  SDValue &Root, SelectionDAG &DAG,
2852  SDLoc dl) const {
2853  MachineFunction &MF = DAG.getMachineFunction();
2855 
2856  const TargetRegisterClass *RC;
2857  if (AFI->isThumb1OnlyFunction())
2858  RC = &ARM::tGPRRegClass;
2859  else
2860  RC = &ARM::GPRRegClass;
2861 
2862  // Transform the arguments stored in physical registers into virtual ones.
2863  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2864  SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
2865 
2866  SDValue ArgValue2;
2867  if (NextVA.isMemLoc()) {
2868  MachineFrameInfo *MFI = MF.getFrameInfo();
2869  int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
2870 
2871  // Create load node to retrieve arguments from the stack.
2872  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2873  ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
2875  false, false, false, 0);
2876  } else {
2877  Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2878  ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
2879  }
2880  if (!Subtarget->isLittle())
2881  std::swap (ArgValue, ArgValue2);
2882  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
2883 }
2884 
2885 // The remaining GPRs hold either the beginning of variable-argument
2886 // data, or the beginning of an aggregate passed by value (usually
2887 // byval). Either way, we allocate stack slots adjacent to the data
2888 // provided by our caller, and store the unallocated registers there.
2889 // If this is a variadic function, the va_list pointer will begin with
2890 // these values; otherwise, this reassembles a (byval) structure that
2891 // was split between registers and memory.
2892 // Return: The frame index registers were stored into.
2893 int
2894 ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
2895  SDLoc dl, SDValue &Chain,
2896  const Value *OrigArg,
2897  unsigned InRegsParamRecordIdx,
2898  int ArgOffset,
2899  unsigned ArgSize) const {
2900  // Currently, two use-cases possible:
2901  // Case #1. Non-var-args function, and we meet first byval parameter.
2902  // Setup first unallocated register as first byval register;
2903  // eat all remained registers
2904  // (these two actions are performed by HandleByVal method).
2905  // Then, here, we initialize stack frame with
2906  // "store-reg" instructions.
2907  // Case #2. Var-args function, that doesn't contain byval parameters.
2908  // The same: eat all remained unallocated registers,
2909  // initialize stack frame.
2910 
2911  MachineFunction &MF = DAG.getMachineFunction();
2912  MachineFrameInfo *MFI = MF.getFrameInfo();
2914  unsigned RBegin, REnd;
2915  if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
2916  CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
2917  } else {
2918  unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
2919  RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
2920  REnd = ARM::R4;
2921  }
2922 
2923  if (REnd != RBegin)
2924  ArgOffset = -4 * (ARM::R4 - RBegin);
2925 
2926  auto PtrVT = getPointerTy(DAG.getDataLayout());
2927  int FrameIndex = MFI->CreateFixedObject(ArgSize, ArgOffset, false);
2928  SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
2929 
2930  SmallVector<SDValue, 4> MemOps;
2931  const TargetRegisterClass *RC =
2932  AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
2933 
2934  for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
2935  unsigned VReg = MF.addLiveIn(Reg, RC);
2936  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
2937  SDValue Store =
2938  DAG.getStore(Val.getValue(1), dl, Val, FIN,
2939  MachinePointerInfo(OrigArg, 4 * i), false, false, 0);
2940  MemOps.push_back(Store);
2941  FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
2942  }
2943 
2944  if (!MemOps.empty())
2945  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
2946  return FrameIndex;
2947 }
2948 
2949 // Setup stack frame, the va_list pointer will start from.
2950 void
2951 ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
2952  SDLoc dl, SDValue &Chain,
2953  unsigned ArgOffset,
2954  unsigned TotalArgRegsSaveSize,
2955  bool ForceMutable) const {
2956  MachineFunction &MF = DAG.getMachineFunction();
2958 
2959  // Try to store any remaining integer argument regs
2960  // to their spots on the stack so that they may be loaded by deferencing
2961  // the result of va_next.
2962  // If there is no regs to be stored, just point address after last
2963  // argument passed via stack.
2964  int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
2965  CCInfo.getInRegsParamsCount(),
2966  CCInfo.getNextStackOffset(), 4);
2967  AFI->setVarArgsFrameIndex(FrameIndex);
2968 }
2969 
2970 SDValue
2971 ARMTargetLowering::LowerFormalArguments(SDValue Chain,
2972  CallingConv::ID CallConv, bool isVarArg,
2974  &Ins,
2975  SDLoc dl, SelectionDAG &DAG,
2976  SmallVectorImpl<SDValue> &InVals)
2977  const {
2978  MachineFunction &MF = DAG.getMachineFunction();
2979  MachineFrameInfo *MFI = MF.getFrameInfo();
2980 
2982 
2983  // Assign locations to all of the incoming arguments.
2985  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2986  *DAG.getContext(), Prologue);
2987  CCInfo.AnalyzeFormalArguments(Ins,
2988  CCAssignFnForNode(CallConv, /* Return*/ false,
2989  isVarArg));
2990 
2991  SmallVector<SDValue, 16> ArgValues;
2992  SDValue ArgValue;
2993  Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
2994  unsigned CurArgIdx = 0;
2995 
2996  // Initially ArgRegsSaveSize is zero.
2997  // Then we increase this value each time we meet byval parameter.
2998  // We also increase this value in case of varargs function.
2999  AFI->setArgRegsSaveSize(0);
3000 
3001  // Calculate the amount of stack space that we need to allocate to store
3002  // byval and variadic arguments that are passed in registers.
3003  // We need to know this before we allocate the first byval or variadic
3004  // argument, as they will be allocated a stack slot below the CFA (Canonical
3005  // Frame Address, the stack pointer at entry to the function).
3006  unsigned ArgRegBegin = ARM::R4;
3007  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3008  if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
3009  break;
3010 
3011  CCValAssign &VA = ArgLocs[i];
3012  unsigned Index = VA.getValNo();
3013  ISD::ArgFlagsTy Flags = Ins[Index].Flags;
3014  if (!Flags.isByVal())
3015  continue;
3016 
3017  assert(VA.isMemLoc() && "unexpected byval pointer in reg");
3018  unsigned RBegin, REnd;
3019  CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
3020  ArgRegBegin = std::min(ArgRegBegin, RBegin);
3021 
3022  CCInfo.nextInRegsParam();
3023  }
3024  CCInfo.rewindByValRegsInfo();
3025 
3026  int lastInsIndex = -1;
3027  if (isVarArg && MFI->hasVAStart()) {
3028  unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3029  if (RegIdx != array_lengthof(GPRArgRegs))
3030  ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
3031  }
3032 
3033  unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
3034  AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
3035  auto PtrVT = getPointerTy(DAG.getDataLayout());
3036 
3037  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3038  CCValAssign &VA = ArgLocs[i];
3039  if (Ins[VA.getValNo()].isOrigArg()) {
3040  std::advance(CurOrigArg,
3041  Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
3042  CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
3043  }
3044  // Arguments stored in registers.
3045  if (VA.isRegLoc()) {
3046  EVT RegVT = VA.getLocVT();
3047 
3048  if (VA.needsCustom()) {
3049  // f64 and vector types are split up into multiple registers or
3050  // combinations of registers and stack slots.
3051  if (VA.getLocVT() == MVT::v2f64) {
3052  SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
3053  Chain, DAG, dl);
3054  VA = ArgLocs[++i]; // skip ahead to next loc
3055  SDValue ArgValue2;
3056  if (VA.isMemLoc()) {
3057  int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
3058  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3059  ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
3061  false, false, false, 0);
3062  } else {
3063  ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
3064  Chain, DAG, dl);
3065  }
3066  ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
3067  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3068  ArgValue, ArgValue1,
3069  DAG.getIntPtrConstant(0, dl));
3070  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3071  ArgValue, ArgValue2,
3072  DAG.getIntPtrConstant(1, dl));
3073  } else
3074  ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
3075 
3076  } else {
3077  const TargetRegisterClass *RC;
3078 
3079  if (RegVT == MVT::f32)
3080  RC = &ARM::SPRRegClass;
3081  else if (RegVT == MVT::f64)
3082  RC = &ARM::DPRRegClass;
3083  else if (RegVT == MVT::v2f64)
3084  RC = &ARM::QPRRegClass;
3085  else if (RegVT == MVT::i32)
3086  RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
3087  : &ARM::GPRRegClass;
3088  else
3089  llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
3090 
3091  // Transform the arguments in physical registers into virtual ones.
3092  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3093  ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3094  }
3095 
3096  // If this is an 8 or 16-bit value, it is really passed promoted
3097  // to 32 bits. Insert an assert[sz]ext to capture this, then
3098  // truncate to the right size.
3099  switch (VA.getLocInfo()) {
3100  default: llvm_unreachable("Unknown loc info!");
3101  case CCValAssign::Full: break;
3102  case CCValAssign::BCvt:
3103  ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
3104  break;
3105  case CCValAssign::SExt:
3106  ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3107  DAG.getValueType(VA.getValVT()));
3108  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3109  break;
3110  case CCValAssign::ZExt:
3111  ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3112  DAG.getValueType(VA.getValVT()));
3113  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3114  break;
3115  }
3116 
3117  InVals.push_back(ArgValue);
3118 
3119  } else { // VA.isRegLoc()
3120 
3121  // sanity check
3122  assert(VA.isMemLoc());
3123  assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
3124 
3125  int index = VA.getValNo();
3126 
3127  // Some Ins[] entries become multiple ArgLoc[] entries.
3128  // Process them only once.
3129  if (index != lastInsIndex)
3130  {
3131  ISD::ArgFlagsTy Flags = Ins[index].Flags;
3132  // FIXME: For now, all byval parameter objects are marked mutable.
3133  // This can be changed with more analysis.
3134  // In case of tail call optimization mark all arguments mutable.
3135  // Since they could be overwritten by lowering of arguments in case of
3136  // a tail call.
3137  if (Flags.isByVal()) {
3138  assert(Ins[index].isOrigArg() &&
3139  "Byval arguments cannot be implicit");
3140  unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
3141 
3142  int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, CurOrigArg,
3143  CurByValIndex, VA.getLocMemOffset(),
3144  Flags.getByValSize());
3145  InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
3146  CCInfo.nextInRegsParam();
3147  } else {
3148  unsigned FIOffset = VA.getLocMemOffset();
3149  int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
3150  FIOffset, true);
3151 
3152  // Create load nodes to retrieve arguments from the stack.
3153  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3154  InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
3156  false, false, false, 0));
3157  }
3158  lastInsIndex = index;
3159  }
3160  }
3161  }
3162 
3163  // varargs
3164  if (isVarArg && MFI->hasVAStart())
3165  VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
3166  CCInfo.getNextStackOffset(),
3167  TotalArgRegsSaveSize);
3168 
3170 
3171  return Chain;
3172 }
3173 
3174 /// isFloatingPointZero - Return true if this is +0.0.
3175 static bool isFloatingPointZero(SDValue Op) {
3176  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
3177  return CFP->getValueAPF().isPosZero();
3178  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
3179  // Maybe this has already been legalized into the constant pool?
3180  if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
3181  SDValue WrapperOp = Op.getOperand(1).getOperand(0);
3182  if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
3183  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
3184  return CFP->getValueAPF().isPosZero();
3185  }
3186  } else if (Op->getOpcode() == ISD::BITCAST &&
3187  Op->getValueType(0) == MVT::f64) {
3188  // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
3189  // created by LowerConstantFP().
3190  SDValue BitcastOp = Op->getOperand(0);
3191  if (BitcastOp->getOpcode() == ARMISD::VMOVIMM) {
3192  SDValue MoveOp = BitcastOp->getOperand(0);
3193  if (MoveOp->getOpcode() == ISD::TargetConstant &&
3194  cast<ConstantSDNode>(MoveOp)->getZExtValue() == 0) {
3195  return true;
3196  }
3197  }
3198  }
3199  return false;
3200 }
3201 
3202 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
3203 /// the given operands.
3204 SDValue
3205 ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3206  SDValue &ARMcc, SelectionDAG &DAG,
3207  SDLoc dl) const {
3208  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
3209  unsigned C = RHSC->getZExtValue();
3210  if (!isLegalICmpImmediate(C)) {
3211  // Constant does not fit, try adjusting it by one?
3212  switch (CC) {
3213  default: break;
3214  case ISD::SETLT:
3215  case ISD::SETGE:
3216  if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
3217  CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
3218  RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3219  }
3220  break;
3221  case ISD::SETULT:
3222  case ISD::SETUGE:
3223  if (C != 0 && isLegalICmpImmediate(C-1)) {
3224  CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
3225  RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3226  }
3227  break;
3228  case ISD::SETLE:
3229  case ISD::SETGT:
3230  if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
3231  CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
3232  RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3233  }
3234  break;
3235  case ISD::SETULE:
3236  case ISD::SETUGT:
3237  if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
3238  CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
3239  RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3240  }
3241  break;
3242  }
3243  }
3244  }
3245 
3247  ARMISD::NodeType CompareType;
3248  switch (CondCode) {
3249  default:
3250  CompareType = ARMISD::CMP;
3251  break;
3252  case ARMCC::EQ:
3253  case ARMCC::NE:
3254  // Uses only Z Flag
3255  CompareType = ARMISD::CMPZ;
3256  break;
3257  }
3258  ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
3259  return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
3260 }
3261 
3262 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
3263 SDValue
3264 ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
3265  SDLoc dl) const {
3266  assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
3267  SDValue Cmp;
3268  if (!isFloatingPointZero(RHS))
3269  Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
3270  else
3271  Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
3272  return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
3273 }
3274 
3275 /// duplicateCmp - Glue values can have only one use, so this function
3276 /// duplicates a comparison node.
3277 SDValue
3278 ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
3279  unsigned Opc = Cmp.getOpcode();
3280  SDLoc DL(Cmp);
3281  if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
3282  return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
3283 
3284  assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
3285  Cmp = Cmp.getOperand(0);
3286  Opc = Cmp.getOpcode();
3287  if (Opc == ARMISD::CMPFP)
3288  Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
3289  else {
3290  assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
3291  Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
3292  }
3293  return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
3294 }
3295 
3296 std::pair<SDValue, SDValue>
3297 ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
3298  SDValue &ARMcc) const {
3299  assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
3300 
3301  SDValue Value, OverflowCmp;
3302  SDValue LHS = Op.getOperand(0);
3303  SDValue RHS = Op.getOperand(1);
3304  SDLoc dl(Op);
3305 
3306  // FIXME: We are currently always generating CMPs because we don't support
3307  // generating CMN through the backend. This is not as good as the natural
3308  // CMP case because it causes a register dependency and cannot be folded
3309  // later.
3310 
3311  switch (Op.getOpcode()) {
3312  default:
3313  llvm_unreachable("Unknown overflow instruction!");
3314  case ISD::SADDO:
3315  ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3316  Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
3317  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3318  break;
3319  case ISD::UADDO:
3320  ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3321  Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
3322  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3323  break;
3324  case ISD::SSUBO:
3325  ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3326  Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3327  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3328  break;
3329  case ISD::USUBO:
3330  ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3331  Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3332  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3333  break;
3334  } // switch (...)
3335 
3336  return std::make_pair(Value, OverflowCmp);
3337 }
3338 
3339 
3340 SDValue
3341 ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
3342  // Let legalize expand this if it isn't a legal type yet.
3344  return SDValue();
3345 
3346  SDValue Value, OverflowCmp;
3347  SDValue ARMcc;
3348  std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
3349  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3350  SDLoc dl(Op);
3351  // We use 0 and 1 as false and true values.
3352  SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3353  SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3354  EVT VT = Op.getValueType();
3355 
3356  SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
3357  ARMcc, CCR, OverflowCmp);
3358 
3359  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
3360  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
3361 }
3362 
3363 
3364 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
3365  SDValue Cond = Op.getOperand(0);
3366  SDValue SelectTrue = Op.getOperand(1);
3367  SDValue SelectFalse = Op.getOperand(2);
3368  SDLoc dl(Op);
3369  unsigned Opc = Cond.getOpcode();
3370 
3371  if (Cond.getResNo() == 1 &&
3372  (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
3373  Opc == ISD::USUBO)) {
3374  if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
3375  return SDValue();
3376 
3377  SDValue Value, OverflowCmp;
3378  SDValue ARMcc;
3379  std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
3380  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3381  EVT VT = Op.getValueType();
3382 
3383  return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
3384  OverflowCmp, DAG);
3385  }
3386 
3387  // Convert:
3388  //
3389  // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
3390  // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
3391  //
3392  if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
3393  const ConstantSDNode *CMOVTrue =
3395  const ConstantSDNode *CMOVFalse =
3397 
3398  if (CMOVTrue && CMOVFalse) {
3399  unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
3400  unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
3401 
3402  SDValue True;
3403  SDValue False;
3404  if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
3405  True = SelectTrue;
3406  False = SelectFalse;
3407  } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
3408  True = SelectFalse;
3409  False = SelectTrue;
3410  }
3411 
3412  if (True.getNode() && False.getNode()) {
3413  EVT VT = Op.getValueType();
3414  SDValue ARMcc = Cond.getOperand(2);
3415  SDValue CCR = Cond.getOperand(3);
3416  SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
3417  assert(True.getValueType() == VT);
3418  return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
3419  }
3420  }
3421  }
3422 
3423  // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
3424  // undefined bits before doing a full-word comparison with zero.
3425  Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
3426  DAG.getConstant(1, dl, Cond.getValueType()));
3427 
3428  return DAG.getSelectCC(dl, Cond,
3429  DAG.getConstant(0, dl, Cond.getValueType()),
3430  SelectTrue, SelectFalse, ISD::SETNE);
3431 }
3432 
3434  bool &swpCmpOps, bool &swpVselOps) {
3435  // Start by selecting the GE condition code for opcodes that return true for
3436  // 'equality'
3437  if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
3438  CC == ISD::SETULE)
3439  CondCode = ARMCC::GE;
3440 
3441  // and GT for opcodes that return false for 'equality'.
3442  else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
3443  CC == ISD::SETULT)
3444  CondCode = ARMCC::GT;
3445 
3446  // Since we are constrained to GE/GT, if the opcode contains 'less', we need
3447  // to swap the compare operands.
3448  if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
3449  CC == ISD::SETULT)
3450  swpCmpOps = true;
3451 
3452  // Both GT and GE are ordered comparisons, and return false for 'unordered'.
3453  // If we have an unordered opcode, we need to swap the operands to the VSEL
3454  // instruction (effectively negating the condition).
3455  //
3456  // This also has the effect of swapping which one of 'less' or 'greater'
3457  // returns true, so we also swap the compare operands. It also switches
3458  // whether we return true for 'equality', so we compensate by picking the
3459  // opposite condition code to our original choice.
3460  if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
3461  CC == ISD::SETUGT) {
3462  swpCmpOps = !swpCmpOps;
3463  swpVselOps = !swpVselOps;
3464  CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
3465  }
3466 
3467  // 'ordered' is 'anything but unordered', so use the VS condition code and
3468  // swap the VSEL operands.
3469  if (CC == ISD::SETO) {
3470  CondCode = ARMCC::VS;
3471  swpVselOps = true;
3472  }
3473 
3474  // 'unordered or not equal' is 'anything but equal', so use the EQ condition
3475  // code and swap the VSEL operands.
3476  if (CC == ISD::SETUNE) {
3477  CondCode = ARMCC::EQ;
3478  swpVselOps = true;
3479  }
3480 }
3481 
3482 SDValue ARMTargetLowering::getCMOV(SDLoc dl, EVT VT, SDValue FalseVal,
3483  SDValue TrueVal, SDValue ARMcc, SDValue CCR,
3484  SDValue Cmp, SelectionDAG &DAG) const {
3485  if (Subtarget->isFPOnlySP() && VT == MVT::f64) {
3486  FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
3487  DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
3488  TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
3489  DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
3490 
3491  SDValue TrueLow = TrueVal.getValue(0);
3492  SDValue TrueHigh = TrueVal.getValue(1);
3493  SDValue FalseLow = FalseVal.getValue(0);
3494  SDValue FalseHigh = FalseVal.getValue(1);
3495 
3496  SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
3497  ARMcc, CCR, Cmp);
3498  SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
3499  ARMcc, CCR, duplicateCmp(Cmp, DAG));
3500 
3501  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
3502  } else {
3503  return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
3504  Cmp);
3505  }
3506 }
3507 
3508 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
3509  EVT VT = Op.getValueType();
3510  SDValue LHS = Op.getOperand(0);
3511  SDValue RHS = Op.getOperand(1);
3512  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3513  SDValue TrueVal = Op.getOperand(2);
3514  SDValue FalseVal = Op.getOperand(3);
3515  SDLoc dl(Op);
3516 
3517  if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
3518  DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
3519  dl);
3520 
3521  // If softenSetCCOperands only returned one value, we should compare it to
3522  // zero.
3523  if (!RHS.getNode()) {
3524  RHS = DAG.getConstant(0, dl, LHS.getValueType());
3525  CC = ISD::SETNE;
3526  }
3527  }
3528 
3529  if (LHS.getValueType() == MVT::i32) {
3530  // Try to generate VSEL on ARMv8.
3531  // The VSEL instruction can't use all the usual ARM condition
3532  // codes: it only has two bits to select the condition code, so it's
3533  // constrained to use only GE, GT, VS and EQ.
3534  //
3535  // To implement all the various ISD::SETXXX opcodes, we sometimes need to
3536  // swap the operands of the previous compare instruction (effectively
3537  // inverting the compare condition, swapping 'less' and 'greater') and
3538  // sometimes need to swap the operands to the VSEL (which inverts the
3539  // condition in the sense of firing whenever the previous condition didn't)
3540  if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
3541  TrueVal.getValueType() == MVT::f64)) {
3542  ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
3543  if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
3544  CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
3545  CC = ISD::getSetCCInverse(CC, true);
3546  std::swap(TrueVal, FalseVal);
3547  }
3548  }
3549 
3550  SDValue ARMcc;
3551  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3552  SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
3553  return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
3554  }
3555 
3556  ARMCC::CondCodes CondCode, CondCode2;
3557  FPCCToARMCC(CC, CondCode, CondCode2);
3558 
3559  // Try to generate VMAXNM/VMINNM on ARMv8.
3560  if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
3561  TrueVal.getValueType() == MVT::f64)) {
3562  // We can use VMAXNM/VMINNM for a compare followed by a select with the
3563  // same operands, as follows:
3564  // c = fcmp [?gt, ?ge, ?lt, ?le] a, b
3565  // select c, a, b
3566  // In NoNaNsFPMath the CC will have been changed from, e.g., 'ogt' to 'gt'.
3567  bool swapSides = false;
3569  // transformability may depend on which way around we compare
3570  switch (CC) {
3571  default:
3572  break;
3573  case ISD::SETOGT:
3574  case ISD::SETOGE:
3575  case ISD::SETOLT:
3576  case ISD::SETOLE:
3577  // the non-NaN should be RHS
3578  swapSides = DAG.isKnownNeverNaN(LHS) && !DAG.isKnownNeverNaN(RHS);
3579  break;
3580  case ISD::SETUGT:
3581  case ISD::SETUGE:
3582  case ISD::SETULT:
3583  case ISD::SETULE:
3584  // the non-NaN should be LHS
3585  swapSides = DAG.isKnownNeverNaN(RHS) && !DAG.isKnownNeverNaN(LHS);
3586  break;
3587  }
3588  }
3589  swapSides = swapSides || (LHS == FalseVal && RHS == TrueVal);
3590  if (swapSides) {
3592  std::swap(LHS, RHS);
3593  }
3594  if (LHS == TrueVal && RHS == FalseVal) {
3595  bool canTransform = true;
3596  // FIXME: FastMathFlags::noSignedZeros() doesn't appear reachable from here
3597  if (!getTargetMachine().Options.UnsafeFPMath &&
3598  !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
3599  const ConstantFPSDNode *Zero;
3600  switch (CC) {
3601  default:
3602  break;
3603  case ISD::SETOGT:
3604  case ISD::SETUGT:
3605  case ISD::SETGT:
3606  // RHS must not be -0
3607  canTransform = (Zero = dyn_cast<ConstantFPSDNode>(RHS)) &&
3608  !Zero->isNegative();
3609  break;
3610  case ISD::SETOGE:
3611  case ISD::SETUGE:
3612  case ISD::SETGE:
3613  // LHS must not be -0
3614  canTransform = (Zero = dyn_cast<ConstantFPSDNode>(LHS)) &&
3615  !Zero->isNegative();
3616  break;
3617  case ISD::SETOLT:
3618  case ISD::SETULT:
3619  case ISD::SETLT:
3620  // RHS must not be +0
3621  canTransform = (Zero = dyn_cast<ConstantFPSDNode>(RHS)) &&
3622  Zero->isNegative();
3623  break;
3624  case ISD::SETOLE:
3625  case ISD::SETULE:
3626  case ISD::SETLE:
3627  // LHS must not be +0
3628  canTransform = (Zero = dyn_cast<ConstantFPSDNode>(LHS)) &&
3629  Zero->isNegative();
3630  break;
3631  }
3632  }
3633  if (canTransform) {
3634  // Note: If one of the elements in a pair is a number and the other
3635  // element is NaN, the corresponding result element is the number.
3636  // This is consistent with the IEEE 754-2008 standard.
3637  // Therefore, a > b ? a : b <=> vmax(a,b), if b is constant and a is NaN
3638  switch (CC) {
3639  default:
3640  break;
3641  case ISD::SETOGT:
3642  case ISD::SETOGE:
3643  if (!DAG.isKnownNeverNaN(RHS))
3644  break;
3645  return DAG.getNode(ARMISD::VMAXNM, dl, VT, LHS, RHS);
3646  case ISD::SETUGT:
3647  case ISD::SETUGE:
3648  if (!DAG.isKnownNeverNaN(LHS))
3649  break;
3650  case ISD::SETGT:
3651  case ISD::SETGE:
3652  return DAG.getNode(ARMISD::VMAXNM, dl, VT, LHS, RHS);
3653  case ISD::SETOLT:
3654  case ISD::SETOLE:
3655  if (!DAG.isKnownNeverNaN(RHS))
3656  break;
3657  return DAG.getNode(ARMISD::VMINNM, dl, VT, LHS, RHS);
3658  case ISD::SETULT:
3659  case ISD::SETULE:
3660  if (!DAG.isKnownNeverNaN(LHS))
3661  break;
3662  case ISD::SETLT:
3663  case ISD::SETLE:
3664  return DAG.getNode(ARMISD::VMINNM, dl, VT, LHS, RHS);
3665  }
3666  }
3667  }
3668 
3669  bool swpCmpOps = false;
3670  bool swpVselOps = false;
3671  checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
3672 
3673  if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
3674  CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
3675  if (swpCmpOps)
3676  std::swap(LHS, RHS);
3677  if (swpVselOps)
3678  std::swap(TrueVal, FalseVal);
3679  }
3680  }
3681 
3682  SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
3683  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
3684  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3685  SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
3686  if (CondCode2 != ARMCC::AL) {
3687  SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
3688  // FIXME: Needs another CMP because flag can have but one use.
3689  SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
3690  Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
3691  }
3692  return Result;
3693 }
3694 
3695 /// canChangeToInt - Given the fp compare operand, return true if it is suitable
3696 /// to morph to an integer compare sequence.
3697 static bool canChangeToInt(SDValue Op, bool &SeenZero,
3698  const ARMSubtarget *Subtarget) {
3699  SDNode *N = Op.getNode();
3700  if (!N->hasOneUse())
3701  // Otherwise it requires moving the value from fp to integer registers.
3702  return false;
3703  if (!N->getNumValues())
3704  return false;
3705  EVT VT = Op.getValueType();
3706  if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
3707  // f32 case is generally profitable. f64 case only makes sense when vcmpe +
3708  // vmrs are very slow, e.g. cortex-a8.
3709  return false;
3710 
3711  if (isFloatingPointZero(Op)) {
3712  SeenZero = true;
3713  return true;
3714  }
3715  return ISD::isNormalLoad(N);
3716 }
3717 
3719  if (isFloatingPointZero(Op))
3720  return DAG.getConstant(0, SDLoc(Op), MVT::i32);
3721 
3722  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
3723  return DAG.getLoad(MVT::i32, SDLoc(Op),
3724  Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
3725  Ld->isVolatile(), Ld->isNonTemporal(),
3726  Ld->isInvariant(), Ld->getAlignment());
3727 
3728  llvm_unreachable("Unknown VFP cmp argument!");
3729 }
3730 
3731 static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
3732  SDValue &RetVal1, SDValue &RetVal2) {
3733  SDLoc dl(Op);
3734 
3735  if (isFloatingPointZero(Op)) {
3736  RetVal1 = DAG.getConstant(0, dl, MVT::i32);
3737  RetVal2 = DAG.getConstant(0, dl, MVT::i32);
3738  return;
3739  }
3740 
3741  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
3742  SDValue Ptr = Ld->getBasePtr();
3743  RetVal1 = DAG.getLoad(MVT::i32, dl,
3744  Ld->getChain(), Ptr,
3745  Ld->getPointerInfo(),
3746  Ld->isVolatile(), Ld->isNonTemporal(),
3747  Ld->isInvariant(), Ld->getAlignment());
3748 
3749  EVT PtrType = Ptr.getValueType();
3750  unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
3751  SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
3752  PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
3753  RetVal2 = DAG.getLoad(MVT::i32, dl,
3754  Ld->getChain(), NewPtr,
3755  Ld->getPointerInfo().getWithOffset(4),
3756  Ld->isVolatile(), Ld->isNonTemporal(),
3757  Ld->isInvariant(), NewAlign);
3758  return;
3759  }
3760 
3761  llvm_unreachable("Unknown VFP cmp argument!");
3762 }
3763 
3764 /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
3765 /// f32 and even f64 comparisons to integer ones.
3766 SDValue
3767 ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
3768  SDValue Chain = Op.getOperand(0);
3769  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3770  SDValue LHS = Op.getOperand(2);
3771  SDValue RHS = Op.getOperand(3);
3772  SDValue Dest = Op.getOperand(4);
3773  SDLoc dl(Op);
3774 
3775  bool LHSSeenZero = false;
3776  bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
3777  bool RHSSeenZero = false;
3778  bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
3779  if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
3780  // If unsafe fp math optimization is enabled and there are no other uses of
3781  // the CMP operands, and the condition code is EQ or NE, we can optimize it
3782  // to an integer comparison.
3783  if (CC == ISD::SETOEQ)
3784  CC = ISD::SETEQ;
3785  else if (CC == ISD::SETUNE)
3786  CC = ISD::SETNE;
3787 
3788  SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
3789  SDValue ARMcc;
3790  if (LHS.getValueType() == MVT::f32) {
3791  LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
3792  bitcastf32Toi32(LHS, DAG), Mask);
3793  RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
3794  bitcastf32Toi32(RHS, DAG), Mask);
3795  SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
3796  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3797  return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
3798  Chain, Dest, ARMcc, CCR, Cmp);
3799  }
3800 
3801  SDValue LHS1, LHS2;
3802  SDValue RHS1, RHS2;
3803  expandf64Toi32(LHS, DAG, LHS1, LHS2);
3804  expandf64Toi32(RHS, DAG, RHS1, RHS2);
3805  LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
3806  RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
3807  ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
3808  ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
3809  SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
3810  SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
3811  return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
3812  }
3813 
3814  return SDValue();
3815 }
3816 
3817 SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3818  SDValue Chain = Op.getOperand(0);
3819  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3820  SDValue LHS = Op.getOperand(2);
3821  SDValue RHS = Op.getOperand(3);
3822  SDValue Dest = Op.getOperand(4);
3823  SDLoc dl(Op);
3824 
3825  if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
3826  DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
3827  dl);
3828 
3829  // If softenSetCCOperands only returned one value, we should compare it to
3830  // zero.
3831  if (!RHS.getNode()) {
3832  RHS = DAG.getConstant(0, dl, LHS.getValueType());
3833  CC = ISD::SETNE;
3834  }
3835  }
3836 
3837  if (LHS.getValueType() == MVT::i32) {
3838  SDValue ARMcc;
3839  SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
3840  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3841  return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
3842  Chain, Dest, ARMcc, CCR, Cmp);
3843  }
3844 
3845  assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
3846 
3848  (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
3849  CC == ISD::SETNE || CC == ISD::SETUNE)) {
3850  SDValue Result = OptimizeVFPBrcond(Op, DAG);
3851  if (Result.getNode())
3852  return Result;
3853  }
3854 
3855  ARMCC::CondCodes CondCode, CondCode2;
3856  FPCCToARMCC(CC, CondCode, CondCode2);
3857 
3858  SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
3859  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
3860  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3861  SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
3862  SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
3863  SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
3864  if (CondCode2 != ARMCC::AL) {
3865  ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
3866  SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
3867  Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
3868  }
3869  return Res;
3870 }
3871 
3872 SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
3873  SDValue Chain = Op.getOperand(0);
3874  SDValue Table = Op.getOperand(1);
3875  SDValue Index = Op.getOperand(2);
3876  SDLoc dl(Op);
3877 
3878  EVT PTy = getPointerTy(DAG.getDataLayout());
3879  JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
3880  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
3881  Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
3882  Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
3883  SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
3884  if (Subtarget->isThumb2()) {
3885  // Thumb2 uses a two-level jump. That is, it jumps into the jump table
3886  // which does another jump to the destination. This also makes it easier
3887  // to translate it to TBB / TBH later.
3888  // FIXME: This might not work if the function is extremely large.
3889  return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
3890  Addr, Op.getOperand(2), JTI);
3891  }
3893  Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
3895  false, false, false, 0);
3896  Chain = Addr.getValue(1);
3897  Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
3898  return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
3899  } else {
3900  Addr = DAG.getLoad(PTy, dl, Chain, Addr,
3902  false, false, false, 0);
3903  Chain = Addr.getValue(1);
3904  return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
3905  }
3906 }
3907 
3909  EVT VT = Op.getValueType();
3910  SDLoc dl(Op);
3911 
3912  if (Op.getValueType().getVectorElementType() == MVT::i32) {
3914  return Op;
3915  return DAG.UnrollVectorOp(Op.getNode());
3916  }
3917 
3918  assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&
3919  "Invalid type for custom lowering!");
3920  if (VT != MVT::v4i16)
3921  return DAG.UnrollVectorOp(Op.getNode());
3922 
3923  Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
3924  return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
3925 }
3926 
3927 SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
3928  EVT VT = Op.getValueType();
3929  if (VT.isVector())
3930  return LowerVectorFP_TO_INT(Op, DAG);
3931  if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {
3932  RTLIB::Libcall LC;
3933  if (Op.getOpcode() == ISD::FP_TO_SINT)
3935  Op.getValueType());
3936  else
3938  Op.getValueType());
3939  return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1,
3940  /*isSigned*/ false, SDLoc(Op)).first;
3941  }
3942 
3943  return Op;
3944 }
3945 
3947  EVT VT = Op.getValueType();
3948  SDLoc dl(Op);
3949 
3951  if (VT.getVectorElementType() == MVT::f32)
3952  return Op;
3953  return DAG.UnrollVectorOp(Op.getNode());
3954  }
3955 
3956  assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&
3957  "Invalid type for custom lowering!");
3958  if (VT != MVT::v4f32)
3959  return DAG.UnrollVectorOp(Op.getNode());
3960 
3961  unsigned CastOpc;
3962  unsigned Opc;
3963  switch (Op.getOpcode()) {
3964  default: llvm_unreachable("Invalid opcode!");
3965  case ISD::SINT_TO_FP:
3966  CastOpc = ISD::SIGN_EXTEND;
3967  Opc = ISD::SINT_TO_FP;
3968  break;
3969  case ISD::UINT_TO_FP:
3970  CastOpc = ISD::ZERO_EXTEND;
3971  Opc = ISD::UINT_TO_FP;
3972  break;
3973  }
3974 
3975  Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
3976  return DAG.getNode(Opc, dl, VT, Op);
3977 }
3978 
3979 SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
3980  EVT VT = Op.getValueType();
3981  if (VT.isVector())
3982  return LowerVectorINT_TO_FP(Op, DAG);
3983  if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {
3984  RTLIB::Libcall LC;
3985  if (Op.getOpcode() == ISD::SINT_TO_FP)
3987  Op.getValueType());
3988  else
3990  Op.getValueType());
3991  return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1,
3992  /*isSigned*/ false, SDLoc(Op)).first;
3993  }
3994 
3995  return Op;
3996 }
3997 
3998 SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
3999  // Implement fcopysign with a fabs and a conditional fneg.
4000  SDValue Tmp0 = Op.getOperand(0);
4001  SDValue Tmp1 = Op.getOperand(1);
4002  SDLoc dl(Op);
4003  EVT VT = Op.getValueType();
4004  EVT SrcVT = Tmp1.getValueType();
4005  bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
4006  Tmp0.getOpcode() == ARMISD::VMOVDRR;
4007  bool UseNEON = !InGPR && Subtarget->hasNEON();
4008 
4009  if (UseNEON) {
4010  // Use VBSL to copy the sign bit.
4011  unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
4012  SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
4013  DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
4014  EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
4015  if (VT == MVT::f64)
4016  Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
4017  DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
4018  DAG.getConstant(32, dl, MVT::i32));
4019  else /*if (VT == MVT::f32)*/
4020  Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
4021  if (SrcVT == MVT::f32) {
4022  Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
4023  if (VT == MVT::f64)
4024  Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
4025  DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
4026  DAG.getConstant(32, dl, MVT::i32));
4027  } else if (VT == MVT::f32)
4028  Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
4029  DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
4030  DAG.getConstant(32, dl, MVT::i32));
4031  Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
4032  Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
4033 
4034  SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
4035  dl, MVT::i32);
4036  AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
4037  SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
4038  DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
4039 
4040  SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
4041  DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
4042  DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
4043  if (VT == MVT::f32) {
4044  Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
4045  Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
4046  DAG.getConstant(0, dl, MVT::i32));
4047  } else {
4048  Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
4049  }
4050 
4051  return Res;
4052  }
4053 
4054  // Bitcast operand 1 to i32.
4055  if (SrcVT == MVT::f64)
4056  Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
4057  Tmp1).getValue(1);
4058  Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
4059 
4060  // Or in the signbit with integer operations.
4061  SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
4062  SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
4063  Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
4064  if (VT == MVT::f32) {
4065  Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
4066  DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
4067  return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
4068  DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
4069  }
4070 
4071  // f64: Or the high part with signbit and then combine two parts.
4072  Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
4073  Tmp0);
4074  SDValue Lo = Tmp0.getValue(0);
4075  SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
4076  Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
4077  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
4078 }
4079 
4080 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
4081  MachineFunction &MF = DAG.getMachineFunction();
4082  MachineFrameInfo *MFI = MF.getFrameInfo();
4083  MFI->setReturnAddressIsTaken(true);
4084 
4086  return SDValue();
4087 
4088  EVT VT = Op.getValueType();
4089  SDLoc dl(Op);
4090  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4091  if (Depth) {
4092  SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
4093  SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
4094  return DAG.getLoad(VT, dl, DAG.getEntryNode(),
4095  DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
4096  MachinePointerInfo(), false, false, false, 0);
4097  }
4098 
4099  // Return LR, which contains the return address. Mark it an implicit live-in.
4100  unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
4101  return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
4102 }
4103 
4104 SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
4105  const ARMBaseRegisterInfo &ARI =
4106  *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
4107  MachineFunction &MF = DAG.getMachineFunction();
4108  MachineFrameInfo *MFI = MF.getFrameInfo();
4109  MFI->setFrameAddressIsTaken(true);
4110 
4111  EVT VT = Op.getValueType();
4112  SDLoc dl(Op); // FIXME probably not meaningful
4113  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4114  unsigned FrameReg = ARI.getFrameRegister(MF);
4115  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
4116  while (Depth--)
4117  FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
4119  false, false, false, 0);
4120  return FrameAddr;
4121 }
4122 
4123 // FIXME? Maybe this could be a TableGen attribute on some registers and
4124 // this table could be generated automatically from RegInfo.
4125 unsigned ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT,
4126  SelectionDAG &DAG) const {
4127  unsigned Reg = StringSwitch<unsigned>(RegName)
4128  .Case("sp", ARM::SP)
4129  .Default(0);
4130  if (Reg)
4131  return Reg;
4132  report_fatal_error(Twine("Invalid register name \""
4133  + StringRef(RegName) + "\"."));
4134 }
4135 
4136 // Result is 64 bit value so split into two 32 bit values and return as a
4137 // pair of values.
4139  SelectionDAG &DAG) {
4140  SDLoc DL(N);
4141 
4142  // This function is only supposed to be called for i64 type destination.
4143  assert(N->getValueType(0) == MVT::i64
4144  && "ExpandREAD_REGISTER called for non-i64 type result.");
4145 
4146  SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL,
4147  DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
4148  N->getOperand(0),
4149  N->getOperand(1));
4150 
4151  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
4152  Read.getValue(1)));
4153  Results.push_back(Read.getOperand(0));
4154 }
4155 
4156 /// ExpandBITCAST - If the target supports VFP, this function is called to
4157 /// expand a bit convert where either the source or destination type is i64 to
4158 /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
4159 /// operand type is illegal (e.g., v2f32 for a target that doesn't support
4160 /// vectors), since the legalizer won't know what to do with that.
4162  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4163  SDLoc dl(N);
4164  SDValue Op = N->getOperand(0);
4165 
4166  // This function is only supposed to be called for i64 types, either as the
4167  // source or destination of the bit convert.
4168  EVT SrcVT = Op.getValueType();
4169  EVT DstVT = N->getValueType(0);
4170  assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
4171  "ExpandBITCAST called for non-i64 type");
4172 
4173  // Turn i64->f64 into VMOVDRR.
4174  if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
4175  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
4176  DAG.getConstant(0, dl, MVT::i32));
4177  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
4178  DAG.getConstant(1, dl, MVT::i32));
4179  return DAG.getNode(ISD::BITCAST, dl, DstVT,
4180  DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
4181  }
4182 
4183  // Turn f64->i64 into VMOVRRD.
4184  if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
4185  SDValue Cvt;
4186  if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
4187  SrcVT.getVectorNumElements() > 1)
4188  Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
4189  DAG.getVTList(MVT::i32, MVT::i32),
4190  DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
4191  else
4192  Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
4193  DAG.getVTList(MVT::i32, MVT::i32), Op);
4194  // Merge the pieces into a single i64 value.
4195  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
4196  }
4197 
4198  return SDValue();
4199 }
4200 
4201 /// getZeroVector - Returns a vector of specified type with all zero elements.
4202 /// Zero vectors are used to represent vector negation and in those cases
4203 /// will be implemented with the NEON VNEG instruction. However, VNEG does
4204 /// not support i64 elements, so sometimes the zero vectors will need to be
4205 /// explicitly constructed. Regardless, use a canonical VMOV to create the
4206 /// zero vector.
4208  assert(VT.isVector() && "Expected a vector type");
4209  // The canonical modified immediate encoding of a zero vector is....0!
4210  SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
4211  EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
4212  SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
4213  return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
4214 }
4215 
4216 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
4217 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
4218 SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
4219  SelectionDAG &DAG) const {
4220  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
4221  EVT VT = Op.getValueType();
4222  unsigned VTBits = VT.getSizeInBits();
4223  SDLoc dl(Op);
4224  SDValue ShOpLo = Op.getOperand(0);
4225  SDValue ShOpHi = Op.getOperand(1);
4226  SDValue ShAmt = Op.getOperand(2);
4227  SDValue ARMcc;
4228  unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
4229 
4230  assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
4231 
4232  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
4233  DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
4234  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
4235  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
4236  DAG.getConstant(VTBits, dl, MVT::i32));
4237  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
4238  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
4239  SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
4240 
4241  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4242  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
4243  ISD::SETGE, ARMcc, DAG, dl);
4244  SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
4245  SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
4246  CCR, Cmp);
4247 
4248  SDValue Ops[2] = { Lo, Hi };
4249  return DAG.getMergeValues(Ops, dl);
4250 }
4251 
4252 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
4253 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
4254 SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
4255  SelectionDAG &DAG) const {
4256  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
4257  EVT VT = Op.getValueType();
4258  unsigned VTBits = VT.getSizeInBits();
4259  SDLoc dl(Op);
4260  SDValue ShOpLo = Op.getOperand(0);
4261  SDValue ShOpHi = Op.getOperand(1);
4262  SDValue ShAmt = Op.getOperand(2);
4263  SDValue ARMcc;
4264 
4265  assert(Op.getOpcode() == ISD::SHL_PARTS);
4266  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
4267  DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
4268  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
4269  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
4270  DAG.getConstant(VTBits, dl, MVT::i32));
4271  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
4272  SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
4273 
4274  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
4275  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4276  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
4277  ISD::SETGE, ARMcc, DAG, dl);
4278  SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
4279  SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
4280  CCR, Cmp);
4281 
4282  SDValue Ops[2] = { Lo, Hi };
4283  return DAG.getMergeValues(Ops, dl);
4284 }
4285 
4286 SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
4287  SelectionDAG &DAG) const {
4288  // The rounding mode is in bits 23:22 of the FPSCR.
4289  // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
4290  // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
4291  // so that the shift + and get folded into a bitfield extract.
4292  SDLoc dl(Op);
4293  SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
4294  DAG.getConstant(Intrinsic::arm_get_fpscr, dl,
4295  MVT::i32));
4296  SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
4297  DAG.getConstant(1U << 22, dl, MVT::i32));
4298  SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
4299  DAG.getConstant(22, dl, MVT::i32));
4300  return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
4301  DAG.getConstant(3, dl, MVT::i32));
4302 }
4303 
4305  const ARMSubtarget *ST) {
4306  SDLoc dl(N);
4307  EVT VT = N->getValueType(0);
4308  if (VT.isVector()) {
4309  assert(ST->hasNEON());
4310 
4311  // Compute the least significant set bit: LSB = X & -X
4312  SDValue X = N->getOperand(0);
4313  SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
4314  SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
4315 
4316  EVT ElemTy = VT.getVectorElementType();
4317 
4318  if (ElemTy == MVT::i8) {
4319  // Compute with: cttz(x) = ctpop(lsb - 1)
4320  SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
4321  DAG.getTargetConstant(1, dl, ElemTy));
4322  SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
4323  return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
4324  }
4325 
4326  if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
4327  (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
4328  // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
4329  unsigned NumBits = ElemTy.getSizeInBits();
4330  SDValue WidthMinus1 =
4331  DAG.getNode(ARMISD::VMOVIMM, dl, VT,
4332  DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
4333  SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
4334  return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
4335  }
4336 
4337  // Compute with: cttz(x) = ctpop(lsb - 1)
4338 
4339  // Since we can only compute the number of bits in a byte with vcnt.8, we
4340  // have to gather the result with pairwise addition (vpaddl) for i16, i32,
4341  // and i64.
4342 
4343  // Compute LSB - 1.
4344  SDValue Bits;
4345  if (ElemTy == MVT::i64) {
4346  // Load constant 0xffff'ffff'ffff'ffff to register.
4347  SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
4348  DAG.getTargetConstant(0x1eff, dl, MVT::i32));
4349  Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
4350  } else {
4351  SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
4352  DAG.getTargetConstant(1, dl, ElemTy));
4353  Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
4354  }
4355 
4356  // Count #bits with vcnt.8.
4357  EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
4358  SDValue BitsVT8 = DAG.getNode(ISD::BITCAST, dl, VT8Bit, Bits);
4359  SDValue Cnt8 = DAG.getNode(ISD::CTPOP, dl, VT8Bit, BitsVT8);
4360 
4361  // Gather the #bits with vpaddl (pairwise add.)
4362  EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
4363  SDValue Cnt16 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT16Bit,
4364  DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
4365  Cnt8);
4366  if (ElemTy == MVT::i16)
4367  return Cnt16;
4368 
4369  EVT VT32Bit = VT.is64BitVector() ? MVT::v2i32 : MVT::v4i32;
4370  SDValue Cnt32 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT32Bit,
4371  DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
4372  Cnt16);
4373  if (ElemTy == MVT::i32)
4374  return Cnt32;
4375 
4376  assert(ElemTy == MVT::i64);
4377  SDValue Cnt64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
4378  DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
4379  Cnt32);
4380  return Cnt64;
4381  }
4382 
4383  if (!ST->hasV6T2Ops())
4384  return SDValue();
4385 
4386  SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
4387  return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
4388 }
4389 
4390 /// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count
4391 /// for each 16-bit element from operand, repeated. The basic idea is to
4392 /// leverage vcnt to get the 8-bit counts, gather and add the results.
4393 ///
4394 /// Trace for v4i16:
4395 /// input = [v0 v1 v2 v3 ] (vi 16-bit element)
4396 /// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element)
4397 /// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi)
4398 /// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6]
4399 /// [b0 b1 b2 b3 b4 b5 b6 b7]
4400 /// +[b1 b0 b3 b2 b5 b4 b7 b6]
4401 /// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0,
4402 /// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits)
4404  EVT VT = N->getValueType(0);
4405  SDLoc DL(N);
4406 
4407  EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
4408  SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0));
4409  SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0);
4410  SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1);
4411  SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2);
4412  return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3);
4413 }
4414 
4415 /// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the
4416 /// bit-count for each 16-bit element from the operand. We need slightly
4417 /// different sequencing for v4i16 and v8i16 to stay within NEON's available
4418 /// 64/128-bit registers.
4419 ///
4420 /// Trace for v4i16:
4421 /// input = [v0 v1 v2 v3 ] (vi 16-bit element)
4422 /// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi)
4423 /// v8i16:Extended = [k0 k1 k2 k3 k0 k1 k2 k3 ]
4424 /// v4i16:Extracted = [k0 k1 k2 k3 ]
4426  EVT VT = N->getValueType(0);
4427  SDLoc DL(N);
4428 
4429  SDValue BitCounts = getCTPOP16BitCounts(N, DAG);
4430  if (VT.is64BitVector()) {
4431  SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts);
4432  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended,
4433  DAG.getIntPtrConstant(0, DL));
4434  } else {
4435  SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8,
4436  BitCounts, DAG.getIntPtrConstant(0, DL));
4437  return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted);
4438  }
4439 }
4440 
4441 /// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the
4442 /// bit-count for each 32-bit element from the operand. The idea here is
4443 /// to split the vector into 16-bit elements, leverage the 16-bit count
4444 /// routine, and then combine the results.
4445 ///
4446 /// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged):
4447 /// input = [v0 v1 ] (vi: 32-bit elements)
4448 /// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1])
4449 /// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi)
4450 /// vrev: N0 = [k1 k0 k3 k2 ]
4451 /// [k0 k1 k2 k3 ]
4452 /// N1 =+[k1 k0 k3 k2 ]
4453 /// [k0 k2 k1 k3 ]
4454 /// N2 =+[k1 k3 k0 k2 ]
4455 /// [k0 k2 k1 k3 ]
4456 /// Extended =+[k1 k3 k0 k2 ]
4457 /// [k0 k2 ]
4458 /// Extracted=+[k1 k3 ]
4459 ///
4461  EVT VT = N->getValueType(0);
4462  SDLoc DL(N);
4463 
4464  EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
4465 
4466  SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0));
4467  SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG);
4468  SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16);
4469  SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0);
4470  SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1);
4471 
4472  if (VT.is64BitVector()) {
4473  SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2);
4474  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended,
4475  DAG.getIntPtrConstant(0, DL));
4476  } else {
4477  SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2,
4478  DAG.getIntPtrConstant(0, DL));
4479  return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted);
4480  }
4481 }
4482 
4484  const ARMSubtarget *ST) {
4485  EVT VT = N->getValueType(0);
4486 
4487  assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
4488  assert((VT == MVT::v2i32 || VT == MVT::v4i32 ||
4489  VT == MVT::v4i16 || VT == MVT::v8i16) &&
4490  "Unexpected type for custom ctpop lowering");
4491 
4492  if (VT.getVectorElementType() == MVT::i32)
4493  return lowerCTPOP32BitElements(N, DAG);
4494  else
4495  return lowerCTPOP16BitElements(N, DAG);
4496 }
4497 
4499  const ARMSubtarget *ST) {
4500  EVT VT = N->getValueType(0);
4501  SDLoc dl(N);
4502 
4503  if (!VT.isVector())
4504  return SDValue();
4505 
4506  // Lower vector shifts on NEON to use VSHL.
4507  assert(ST->hasNEON() && "unexpected vector shift");
4508 
4509  // Left shifts translate directly to the vshiftu intrinsic.
4510  if (N->getOpcode() == ISD::SHL)
4511  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
4512  DAG.getConstant(Intrinsic::arm_neon_vshiftu, dl,
4513  MVT::i32),
4514  N->getOperand(0), N->getOperand(1));
4515 
4516  assert((N->getOpcode() == ISD::SRA ||
4517  N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
4518 
4519  // NEON uses the same intrinsics for both left and right shifts. For
4520  // right shifts, the shift amounts are negative, so negate the vector of
4521  // shift amounts.
4522  EVT ShiftVT = N->getOperand(1).getValueType();
4523  SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
4524  getZeroVector(ShiftVT, DAG, dl),
4525  N->getOperand(1));
4526  Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
4527  Intrinsic::arm_neon_vshifts :
4528  Intrinsic::arm_neon_vshiftu);
4529  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
4530  DAG.getConstant(vshiftInt, dl, MVT::i32),
4531  N->getOperand(0), NegatedCount);
4532 }
4533 
4535  const ARMSubtarget *ST) {
4536  EVT VT = N->getValueType(0);
4537  SDLoc dl(N);
4538 
4539  // We can get here for a node like i32 = ISD::SHL i32, i64
4540  if (VT != MVT::i64)
4541  return SDValue();
4542 
4543  assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
4544  "Unknown shift to lower!");
4545 
4546  // We only lower SRA, SRL of 1 here, all others use generic lowering.
4547  if (!isa<ConstantSDNode>(N->getOperand(1)) ||
4548  cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
4549  return SDValue();
4550 
4551  // If we are in thumb mode, we don't have RRX.
4552  if (ST->isThumb1Only()) return SDValue();
4553 
4554  // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
4555  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
4556  DAG.getConstant(0, dl, MVT::i32));
4557  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
4558  DAG.getConstant(1, dl, MVT::i32));
4559 
4560  // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
4561  // captures the result into a carry flag.
4562  unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
4563  Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
4564 
4565  // The low part is an ARMISD::RRX operand, which shifts the carry in.
4566  Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
4567 
4568  // Merge the pieces into a single i64 value.
4569  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
4570 }
4571 
4573  SDValue TmpOp0, TmpOp1;
4574  bool Invert = false;
4575  bool Swap = false;
4576  unsigned Opc = 0;
4577 
4578  SDValue Op0 = Op.getOperand(0);
4579  SDValue Op1 = Op.getOperand(1);
4580  SDValue CC = Op.getOperand(2);
4582  EVT VT = Op.getValueType();
4583  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
4584  SDLoc dl(Op);
4585 
4586  if (CmpVT.getVectorElementType() == MVT::i64)
4587  // 64-bit comparisons are not legal. We've marked SETCC as non-Custom,
4588  // but it's possible that our operands are 64-bit but our result is 32-bit.
4589  // Bail in this case.
4590  return SDValue();
4591 
4592  if (Op1.getValueType().isFloatingPoint()) {
4593  switch (SetCCOpcode) {
4594  default: llvm_unreachable("Illegal FP comparison");
4595  case ISD::SETUNE:
4596  case ISD::SETNE: Invert = true; // Fallthrough
4597  case ISD::SETOEQ:
4598  case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
4599  case ISD::SETOLT:
4600  case ISD::SETLT: Swap = true; // Fallthrough
4601  case ISD::SETOGT:
4602  case ISD::SETGT: Opc = ARMISD::VCGT; break;
4603  case ISD::SETOLE:
4604  case ISD::SETLE: Swap = true; // Fallthrough
4605  case ISD::SETOGE:
4606  case ISD::SETGE: Opc = ARMISD::VCGE; break;
4607  case ISD::SETUGE: Swap = true; // Fallthrough
4608  case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
4609  case ISD::SETUGT: Swap = true; // Fallthrough
4610  case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
4611  case ISD::SETUEQ: Invert = true; // Fallthrough
4612  case ISD::SETONE:
4613  // Expand this to (OLT | OGT).
4614  TmpOp0 = Op0;
4615  TmpOp1 = Op1;
4616  Opc = ISD::OR;
4617  Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
4618  Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1);
4619  break;
4620  case ISD::SETUO: Invert = true; // Fallthrough
4621  case ISD::SETO:
4622  // Expand this to (OLT | OGE).
4623  TmpOp0 = Op0;
4624  TmpOp1 = Op1;
4625  Opc = ISD::OR;
4626  Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
4627  Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1);
4628  break;
4629  }
4630  } else {
4631  // Integer comparisons.
4632  switch (SetCCOpcode) {
4633  default: llvm_unreachable("Illegal integer comparison");
4634  case ISD::SETNE: Invert = true;
4635  case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
4636  case ISD::SETLT: Swap = true;
4637  case ISD::SETGT: Opc = ARMISD::VCGT; break;
4638  case ISD::SETLE: Swap = true;
4639  case ISD::SETGE: Opc = ARMISD::VCGE; break;
4640  case ISD::SETULT: Swap = true;
4641  case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
4642  case ISD::SETULE: Swap = true;
4643  case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
4644  }
4645 
4646  // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
4647  if (Opc == ARMISD::VCEQ) {
4648 
4649  SDValue AndOp;
4651  AndOp = Op0;
4652  else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
4653  AndOp = Op1;
4654 
4655  // Ignore bitconvert.
4656  if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
4657  AndOp = AndOp.getOperand(0);
4658 
4659  if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
4660  Opc = ARMISD::VTST;
4661  Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
4662  Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
4663  Invert = !Invert;
4664  }
4665  }
4666  }
4667 
4668  if (Swap)
4669  std::swap(Op0, Op1);
4670 
4671  // If one of the operands is a constant vector zero, attempt to fold the
4672  // comparison to a specialized compare-against-zero form.
4673  SDValue SingleOp;
4675  SingleOp = Op0;
4676  else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
4677  if (Opc == ARMISD::VCGE)
4678  Opc = ARMISD::VCLEZ;
4679  else if (Opc == ARMISD::VCGT)
4680  Opc = ARMISD::VCLTZ;
4681  SingleOp = Op1;
4682  }
4683 
4684  SDValue Result;
4685  if (SingleOp.getNode()) {
4686  switch (Opc) {
4687  case ARMISD::VCEQ:
4688  Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break;
4689  case ARMISD::VCGE:
4690  Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break;
4691  case ARMISD::VCLEZ:
4692  Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break;
4693  case ARMISD::VCGT:
4694  Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break;
4695  case ARMISD::VCLTZ:
4696  Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break;
4697  default:
4698  Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
4699  }
4700  } else {
4701  Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
4702  }
4703 
4704  Result = DAG.getSExtOrTrunc(Result, dl, VT);
4705 
4706  if (Invert)
4707  Result = DAG.getNOT(dl, Result, VT);
4708 
4709  return Result;
4710 }
4711 
4712 /// isNEONModifiedImm - Check if the specified splat value corresponds to a
4713 /// valid vector constant for a NEON instruction with a "modified immediate"
4714 /// operand (e.g., VMOV). If so, return the encoded value.
4715 static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
4716  unsigned SplatBitSize, SelectionDAG &DAG,
4717  SDLoc dl, EVT &VT, bool is128Bits,
4718  NEONModImmType type) {
4719  unsigned OpCmode, Imm;
4720 
4721  // SplatBitSize is set to the smallest size that splats the vector, so a
4722  // zero vector will always have SplatBitSize == 8. However, NEON modified
4723  // immediate instructions others than VMOV do not support the 8-bit encoding
4724  // of a zero vector, and the default encoding of zero is supposed to be the
4725  // 32-bit version.
4726  if (SplatBits == 0)
4727  SplatBitSize = 32;
4728 
4729  switch (SplatBitSize) {
4730  case 8:
4731  if (type != VMOVModImm)
4732  return SDValue();
4733  // Any 1-byte value is OK. Op=0, Cmode=1110.
4734  assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
4735  OpCmode = 0xe;
4736  Imm = SplatBits;
4737  VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
4738  break;
4739 
4740  case 16:
4741  // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
4742  VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
4743  if ((SplatBits & ~0xff) == 0) {
4744  // Value = 0x00nn: Op=x, Cmode=100x.
4745  OpCmode = 0x8;
4746  Imm = SplatBits;
4747  break;
4748  }
4749  if ((SplatBits & ~0xff00) == 0) {
4750  // Value = 0xnn00: Op=x, Cmode=101x.
4751  OpCmode = 0xa;
4752  Imm = SplatBits >> 8;
4753  break;
4754  }
4755  return SDValue();
4756 
4757  case 32:
4758  // NEON's 32-bit VMOV supports splat values where:
4759  // * only one byte is nonzero, or
4760  // * the least significant byte is 0xff and the second byte is nonzero, or
4761  // * the least significant 2 bytes are 0xff and the third is nonzero.
4762  VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
4763  if ((SplatBits & ~0xff) == 0) {
4764  // Value = 0x000000nn: Op=x, Cmode=000x.
4765  OpCmode = 0;
4766  Imm = SplatBits;
4767  break;
4768  }
4769  if ((SplatBits & ~0xff00) == 0) {
4770  // Value = 0x0000nn00: Op=x, Cmode=001x.
4771  OpCmode = 0x2;
4772  Imm = SplatBits >> 8;
4773  break;
4774  }
4775  if ((SplatBits & ~0xff0000) == 0) {
4776  // Value = 0x00nn0000: Op=x, Cmode=010x.
4777  OpCmode = 0x4;
4778  Imm = SplatBits >> 16;
4779  break;
4780  }
4781  if ((SplatBits & ~0xff000000) == 0) {
4782  // Value = 0xnn000000: Op=x, Cmode=011x.
4783  OpCmode = 0x6;
4784  Imm = SplatBits >> 24;
4785  break;
4786  }
4787 
4788  // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
4789  if (type == OtherModImm) return SDValue();
4790 
4791  if ((SplatBits & ~0xffff) == 0 &&
4792  ((SplatBits | SplatUndef) & 0xff) == 0xff) {
4793  // Value = 0x0000nnff: Op=x, Cmode=1100.
4794  OpCmode = 0xc;
4795  Imm = SplatBits >> 8;
4796  break;
4797  }
4798 
4799  if ((SplatBits & ~0xffffff) == 0 &&
4800  ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
4801  // Value = 0x00nnffff: Op=x, Cmode=1101.
4802  OpCmode = 0xd;
4803  Imm = SplatBits >> 16;
4804  break;
4805  }
4806 
4807  // Note: there are a few 32-bit splat values (specifically: 00ffff00,
4808  // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
4809  // VMOV.I32. A (very) minor optimization would be to replicate the value
4810  // and fall through here to test for a valid 64-bit splat. But, then the
4811  // caller would also need to check and handle the change in size.
4812  return SDValue();
4813 
4814  case 64: {
4815  if (type != VMOVModImm)
4816  return SDValue();
4817  // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
4818  uint64_t BitMask = 0xff;
4819  uint64_t Val = 0;
4820  unsigned ImmMask = 1;
4821  Imm = 0;
4822  for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
4823  if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
4824  Val |= BitMask;
4825  Imm |= ImmMask;
4826  } else if ((SplatBits & BitMask) != 0) {
4827  return SDValue();
4828  }
4829  BitMask <<= 8;
4830  ImmMask <<= 1;
4831  }
4832 
4833  if (DAG.getDataLayout().isBigEndian())
4834  // swap higher and lower 32 bit word
4835  Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);
4836 
4837  // Op=1, Cmode=1110.
4838  OpCmode = 0x1e;
4839  VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
4840  break;
4841  }
4842 
4843  default:
4844  llvm_unreachable("unexpected size for isNEONModifiedImm");
4845  }
4846 
4847  unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
4848  return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
4849 }
4850 
4851 SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
4852  const ARMSubtarget *ST) const {
4853  if (!ST->hasVFP3())
4854  return SDValue();
4855 
4856  bool IsDouble = Op.getValueType() == MVT::f64;
4857  ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
4858 
4859  // Use the default (constant pool) lowering for double constants when we have
4860  // an SP-only FPU
4861  if (IsDouble && Subtarget->isFPOnlySP())
4862  return SDValue();
4863 
4864  // Try splatting with a VMOV.f32...
4865  APFloat FPVal = CFP->getValueAPF();
4866  int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
4867 
4868  if (ImmVal != -1) {
4869  if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
4870  // We have code in place to select a valid ConstantFP already, no need to
4871  // do any mangling.
4872  return Op;
4873  }
4874 
4875  // It's a float and we are trying to use NEON operations where
4876  // possible. Lower it to a splat followed by an extract.
4877  SDLoc DL(Op);
4878  SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
4879  SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
4880  NewVal);
4881  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
4882  DAG.getConstant(0, DL, MVT::i32));
4883  }
4884 
4885  // The rest of our options are NEON only, make sure that's allowed before
4886  // proceeding..
4887  if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
4888  return SDValue();
4889 
4890  EVT VMovVT;
4891  uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
4892 
4893  // It wouldn't really be worth bothering for doubles except for one very
4894  // important value, which does happen to match: 0.0. So make sure we don't do
4895  // anything stupid.
4896  if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
4897  return SDValue();
4898 
4899  // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
4900  SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
4901  VMovVT, false, VMOVModImm);
4902  if (NewVal != SDValue()) {
4903  SDLoc DL(Op);
4904  SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
4905  NewVal);
4906  if (IsDouble)
4907  return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
4908 
4909  // It's a float: cast and extract a vector element.
4910  SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
4911  VecConstant);
4912  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
4913  DAG.getConstant(0, DL, MVT::i32));
4914  }
4915 
4916  // Finally, try a VMVN.i32
4917  NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
4918  false, VMVNModImm);
4919  if (NewVal != SDValue()) {
4920  SDLoc DL(Op);
4921  SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
4922 
4923  if (IsDouble)
4924  return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
4925 
4926  // It's a float: cast and extract a vector element.
4927  SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
4928  VecConstant);
4929  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
4930  DAG.getConstant(0, DL, MVT::i32));
4931  }
4932 
4933  return SDValue();
4934 }
4935 
4936 // check if an VEXT instruction can handle the shuffle mask when the
4937 // vector sources of the shuffle are the same.
4938 static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
4939  unsigned NumElts = VT.getVectorNumElements();
4940 
4941  // Assume that the first shuffle index is not UNDEF. Fail if it is.
4942  if (M[0] < 0)
4943  return false;
4944 
4945  Imm = M[0];
4946 
4947  // If this is a VEXT shuffle, the immediate value is the index of the first
4948  // element. The other shuffle indices must be the successive elements after
4949  // the first one.
4950  unsigned ExpectedElt = Imm;
4951  for (unsigned i = 1; i < NumElts; ++i) {
4952  // Increment the expected index. If it wraps around, just follow it
4953  // back to index zero and keep going.
4954  ++ExpectedElt;
4955  if (ExpectedElt == NumElts)
4956  ExpectedElt = 0;
4957 
4958  if (M[i] < 0) continue; // ignore UNDEF indices
4959  if (ExpectedElt != static_cast<unsigned>(M[i]))
4960  return false;
4961  }
4962 
4963  return true;
4964 }
4965 
4966 
4967 static bool isVEXTMask(ArrayRef<int> M, EVT VT,
4968  bool &ReverseVEXT, unsigned &Imm) {
4969  unsigned NumElts = VT.getVectorNumElements();
4970  ReverseVEXT = false;
4971 
4972  // Assume that the first shuffle index is not UNDEF. Fail if it is.
4973  if (M[0] < 0)
4974  return false;
4975 
4976  Imm = M[0];
4977 
4978  // If this is a VEXT shuffle, the immediate value is the index of the first
4979  // element. The other shuffle indices must be the successive elements after
4980  // the first one.
4981  unsigned ExpectedElt = Imm;
4982  for (unsigned i = 1; i < NumElts; ++i) {
4983  // Increment the expected index. If it wraps around, it may still be
4984  // a VEXT but the source vectors must be swapped.
4985  ExpectedElt += 1;
4986  if (ExpectedElt == NumElts * 2) {
4987  ExpectedElt = 0;
4988  ReverseVEXT = true;
4989  }
4990 
4991  if (M[i] < 0) continue; // ignore UNDEF indices
4992  if (ExpectedElt != static_cast<unsigned>(M[i]))
4993  return false;
4994  }
4995 
4996  // Adjust the index value if the source operands will be swapped.
4997  if (ReverseVEXT)
4998  Imm -= NumElts;
4999 
5000  return true;
5001 }
5002 
5003 /// isVREVMask - Check if a vector shuffle corresponds to a VREV
5004 /// instruction with the specified blocksize. (The order of the elements
5005 /// within each block of the vector is reversed.)
5006 static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
5007  assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
5008  "Only possible block sizes for VREV are: 16, 32, 64");
5009 
5010  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
5011  if (EltSz == 64)
5012  return false;
5013 
5014  unsigned NumElts = VT.getVectorNumElements();
5015  unsigned BlockElts = M[0] + 1;
5016  // If the first shuffle index is UNDEF, be optimistic.
5017  if (M[0] < 0)
5018  BlockElts = BlockSize / EltSz;
5019 
5020  if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
5021  return false;
5022 
5023  for (unsigned i = 0; i < NumElts; ++i) {
5024  if (M[i] < 0) continue; // ignore UNDEF indices
5025  if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
5026  return false;
5027  }
5028 
5029  return true;
5030 }
5031 
5032 static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
5033  // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
5034  // range, then 0 is placed into the resulting vector. So pretty much any mask
5035  // of 8 elements can work here.
5036  return VT == MVT::v8i8 && M.size() == 8;
5037 }
5038 
5039 static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5040  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
5041  if (EltSz == 64)
5042  return false;
5043 
5044  unsigned NumElts = VT.getVectorNumElements();
5045  WhichResult = (M[0] == 0 ? 0 : 1);
5046  for (unsigned i = 0; i < NumElts; i += 2) {
5047  if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
5048  (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
5049  return false;
5050  }
5051  return true;
5052 }
5053 
5054 /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
5055 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5056 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
5057 static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
5058  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
5059  if (EltSz == 64)
5060  return false;
5061 
5062  unsigned NumElts = VT.getVectorNumElements();
5063  WhichResult = (M[0] == 0 ? 0 : 1);
5064  for (unsigned i = 0; i < NumElts; i += 2) {
5065  if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
5066  (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
5067  return false;
5068  }
5069  return true;
5070 }
5071 
5072 static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5073  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
5074  if (EltSz == 64)
5075  return false;
5076 
5077  unsigned NumElts = VT.getVectorNumElements();
5078  WhichResult = (M[0] == 0 ? 0 : 1);
5079  for (unsigned i = 0; i != NumElts; ++i) {
5080  if (M[i] < 0) continue; // ignore UNDEF indices
5081  if ((unsigned) M[i] != 2 * i + WhichResult)
5082  return false;
5083  }
5084 
5085  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
5086  if (VT.is64BitVector() && EltSz == 32)
5087  return false;
5088 
5089  return true;
5090 }
5091 
5092 /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
5093 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5094 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
5095 static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
5096  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
5097  if (EltSz == 64)
5098  return false;
5099 
5100  unsigned Half = VT.getVectorNumElements() / 2;
5101  WhichResult = (M[0] == 0 ? 0 : 1);
5102  for (unsigned j = 0; j != 2; ++j) {
5103  unsigned Idx = WhichResult;
5104  for (unsigned i = 0; i != Half; ++i) {
5105  int MIdx = M[i + j * Half];
5106  if (MIdx >= 0 && (unsigned) MIdx != Idx)
5107  return false;
5108  Idx += 2;
5109  }
5110  }
5111 
5112  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
5113  if (VT.is64BitVector() && EltSz == 32)
5114  return false;
5115 
5116  return true;
5117 }
5118 
5119 static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5120  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
5121  if (EltSz == 64)
5122  return false;
5123 
5124  unsigned NumElts = VT.getVectorNumElements();
5125  WhichResult = (M[0] == 0 ? 0 : 1);
5126  unsigned Idx = WhichResult * NumElts / 2;
5127  for (unsigned i = 0; i != NumElts; i += 2) {
5128  if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
5129  (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
5130  return false;
5131  Idx += 1;
5132  }
5133 
5134  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
5135  if (VT.is64BitVector() && EltSz == 32)
5136  return false;
5137 
5138  return true;
5139 }
5140 
5141 /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
5142 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5143 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
5144 static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
5145  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
5146  if (EltSz == 64)
5147  return false;
5148 
5149  unsigned NumElts = VT.getVectorNumElements();
5150  WhichResult = (M[0] == 0 ? 0 : 1);
5151  unsigned Idx = WhichResult * NumElts / 2;
5152  for (unsigned i = 0; i != NumElts; i += 2) {
5153  if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
5154  (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
5155  return false;
5156  Idx += 1;
5157  }
5158 
5159  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
5160  if (VT.is64BitVector() && EltSz == 32)
5161  return false;
5162 
5163  return true;
5164 }
5165 
5166 /// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
5167 /// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
5168 static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
5169  unsigned &WhichResult,
5170  bool &isV_UNDEF) {
5171  isV_UNDEF = false;
5172  if (isVTRNMask(ShuffleMask, VT, WhichResult))
5173  return ARMISD::VTRN;
5174  if (isVUZPMask(ShuffleMask, VT, WhichResult))
5175  return ARMISD::VUZP;
5176  if (isVZIPMask(ShuffleMask, VT, WhichResult))
5177  return ARMISD::VZIP;
5178 
5179  isV_UNDEF = true;
5180  if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
5181  return ARMISD::VTRN;
5182  if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
5183  return ARMISD::VUZP;
5184  if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
5185  return ARMISD::VZIP;
5186 
5187  return 0;
5188 }
5189 
5190 /// \return true if this is a reverse operation on an vector.
5191 static bool isReverseMask(ArrayRef<int> M, EVT VT) {
5192  unsigned NumElts = VT.getVectorNumElements();
5193  // Make sure the mask has the right size.
5194  if (NumElts != M.size())
5195  return false;
5196 
5197  // Look for <15, ..., 3, -1, 1, 0>.
5198  for (unsigned i = 0; i != NumElts; ++i)
5199  if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
5200  return false;
5201 
5202  return true;
5203 }
5204 
5205 // If N is an integer constant that can be moved into a register in one
5206 // instruction, return an SDValue of such a constant (will become a MOV
5207 // instruction). Otherwise return null.
5209  const ARMSubtarget *ST, SDLoc dl) {
5210  uint64_t Val;
5211  if (!isa<ConstantSDNode>(N))
5212  return SDValue();
5213  Val = cast<ConstantSDNode>(N)->getZExtValue();
5214 
5215  if (ST->isThumb1Only()) {
5216  if (Val <= 255 || ~Val <= 255)
5217  return DAG.getConstant(Val, dl, MVT::i32);
5218  } else {
5219  if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
5220  return DAG.getConstant(Val, dl, MVT::i32);
5221  }
5222  return SDValue();
5223 }
5224 
5225 // If this is a case we can't handle, return null and let the default
5226 // expansion code take care of it.
5227 SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
5228  const ARMSubtarget *ST) const {
5229  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
5230  SDLoc dl(Op);
5231  EVT VT = Op.getValueType();
5232 
5233  APInt SplatBits, SplatUndef;
5234  unsigned SplatBitSize;
5235  bool HasAnyUndefs;
5236  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
5237  if (SplatBitSize <= 64) {
5238  // Check if an immediate VMOV works.
5239  EVT VmovVT;
5240  SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
5241  SplatUndef.getZExtValue(), SplatBitSize,
5242  DAG, dl, VmovVT, VT.is128BitVector(),
5243  VMOVModImm);
5244  if (Val.getNode()) {
5245  SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
5246  return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
5247  }
5248 
5249  // Try an immediate VMVN.
5250  uint64_t NegatedImm = (~SplatBits).getZExtValue();
5251  Val = isNEONModifiedImm(NegatedImm,
5252  SplatUndef.getZExtValue(), SplatBitSize,
5253  DAG, dl, VmovVT, VT.is128BitVector(),
5254  VMVNModImm);
5255  if (Val.getNode()) {
5256  SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
5257  return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
5258  }
5259 
5260  // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
5261  if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
5262  int ImmVal = ARM_AM::getFP32Imm(SplatBits);
5263  if (ImmVal != -1) {
5264  SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
5265  return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
5266  }
5267  }
5268  }
5269  }
5270 
5271  // Scan through the operands to see if only one value is used.
5272  //
5273  // As an optimisation, even if more than one value is used it may be more
5274  // profitable to splat with one value then change some lanes.
5275  //
5276  // Heuristically we decide to do this if the vector has a "dominant" value,
5277  // defined as splatted to more than half of the lanes.
5278  unsigned NumElts = VT.getVectorNumElements();
5279  bool isOnlyLowElement = true;
5280  bool usesOnlyOneValue = true;
5281  bool hasDominantValue = false;
5282  bool isConstant = true;
5283 
5284  // Map of the number of times a particular SDValue appears in the
5285  // element list.
5286  DenseMap<SDValue, unsigned> ValueCounts;
5287  SDValue Value;
5288  for (unsigned i = 0; i < NumElts; ++i) {
5289  SDValue V = Op.getOperand(i);
5290  if (V.getOpcode() == ISD::UNDEF)
5291  continue;
5292  if (i > 0)
5293  isOnlyLowElement = false;
5294  if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
5295  isConstant = false;
5296 
5297  ValueCounts.insert(std::make_pair(V, 0));
5298  unsigned &Count = ValueCounts[V];
5299 
5300  // Is this value dominant? (takes up more than half of the lanes)
5301  if (++Count > (NumElts / 2)) {
5302  hasDominantValue = true;
5303  Value = V;
5304  }
5305  }
5306  if (ValueCounts.size() != 1)
5307  usesOnlyOneValue = false;
5308  if (!Value.getNode() && ValueCounts.size() > 0)
5309  Value = ValueCounts.begin()->first;
5310 
5311  if (ValueCounts.size() == 0)
5312  return DAG.getUNDEF(VT);
5313 
5314  // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
5315  // Keep going if we are hitting this case.
5316  if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
5317  return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
5318 
5319  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
5320 
5321  // Use VDUP for non-constant splats. For f32 constant splats, reduce to
5322  // i32 and try again.
5323  if (hasDominantValue && EltSize <= 32) {
5324  if (!isConstant) {
5325  SDValue N;
5326 
5327  // If we are VDUPing a value that comes directly from a vector, that will
5328  // cause an unnecessary move to and from a GPR, where instead we could
5329  // just use VDUPLANE. We can only do this if the lane being extracted
5330  // is at a constant index, as the VDUP from lane instructions only have
5331  // constant-index forms.
5332  if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5333  isa<ConstantSDNode>(Value->getOperand(1))) {
5334  // We need to create a new undef vector to use for the VDUPLANE if the
5335  // size of the vector from which we get the value is different than the
5336  // size of the vector that we need to create. We will insert the element
5337  // such that the register coalescer will remove unnecessary copies.
5338  if (VT != Value->getOperand(0).getValueType()) {
5339  ConstantSDNode *constIndex;
5340  constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1));
5341  assert(constIndex && "The index is not a constant!");
5342  unsigned index = constIndex->getAPIntValue().getLimitedValue() %
5343  VT.getVectorNumElements();
5344  N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
5345  DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
5346  Value, DAG.getConstant(index, dl, MVT::i32)),
5347  DAG.getConstant(index, dl, MVT::i32));
5348  } else
5349  N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
5350  Value->getOperand(0), Value->getOperand(1));
5351  } else
5352  N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
5353 
5354  if (!usesOnlyOneValue) {
5355  // The dominant value was splatted as 'N', but we now have to insert
5356  // all differing elements.
5357  for (unsigned I = 0; I < NumElts; ++I) {
5358  if (Op.getOperand(I) == Value)
5359  continue;
5361  Ops.push_back(N);
5362  Ops.push_back(Op.getOperand(I));
5363  Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
5364  N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
5365  }
5366  }
5367  return N;
5368  }
5371  for (unsigned i = 0; i < NumElts; ++i)
5372  Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
5373  Op.getOperand(i)));
5374  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
5375  SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops);
5376  Val = LowerBUILD_VECTOR(Val, DAG, ST);
5377  if (Val.getNode())
5378  return DAG.getNode(ISD::BITCAST, dl, VT, Val);
5379  }
5380  if (usesOnlyOneValue) {
5381  SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
5382  if (isConstant && Val.getNode())
5383  return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
5384  }
5385  }
5386 
5387  // If all elements are constants and the case above didn't get hit, fall back
5388  // to the default expansion, which will generate a load from the constant
5389  // pool.
5390  if (isConstant)
5391  return SDValue();
5392 
5393  // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
5394  if (NumElts >= 4) {
5395  SDValue shuffle = ReconstructShuffle(Op, DAG);
5396  if (shuffle != SDValue())
5397  return shuffle;
5398  }
5399 
5400  // Vectors with 32- or 64-bit elements can be built by directly assigning
5401  // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
5402  // will be legalized.
5403  if (EltSize >= 32) {
5404  // Do the expansion with floating-point types, since that is what the VFP
5405  // registers are defined to use, and since i64 is not legal.
5406  EVT EltVT = EVT::getFloatingPointVT(EltSize);
5407  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
5409  for (unsigned i = 0; i < NumElts; ++i)
5410  Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
5411  SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
5412  return DAG.getNode(ISD::BITCAST, dl, VT, Val);
5413  }
5414 
5415  // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
5416  // know the default expansion would otherwise fall back on something even
5417  // worse. For a vector with one or two non-undef values, that's
5418  // scalar_to_vector for the elements followed by a shuffle (provided the
5419  // shuffle is valid for the target) and materialization element by element
5420  // on the stack followed by a load for everything else.
5421  if (!isConstant && !usesOnlyOneValue) {
5422  SDValue Vec = DAG.getUNDEF(VT);
5423  for (unsigned i = 0 ; i < NumElts; ++i) {
5424  SDValue V = Op.getOperand(i);
5425  if (V.getOpcode() == ISD::UNDEF)
5426  continue;
5427  SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
5428  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
5429  }
5430  return Vec;
5431  }
5432 
5433  return SDValue();
5434 }
5435 
5436 // Gather data to see if the operation can be modelled as a
5437 // shuffle in combination with VEXTs.
5438 SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
5439  SelectionDAG &DAG) const {
5440  SDLoc dl(Op);
5441  EVT VT = Op.getValueType();
5442  unsigned NumElts = VT.getVectorNumElements();
5443 
5444  SmallVector<SDValue, 2> SourceVecs;
5445  SmallVector<unsigned, 2> MinElts;
5446  SmallVector<unsigned, 2> MaxElts;
5447 
5448  for (unsigned i = 0; i < NumElts; ++i) {
5449  SDValue V = Op.getOperand(i);
5450  if (V.getOpcode() == ISD::UNDEF)
5451  continue;
5452  else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
5453  // A shuffle can only come from building a vector from various
5454  // elements of other vectors.
5455  return SDValue();
5456  } else if (V.getOperand(0).getValueType().getVectorElementType() !=
5457  VT.getVectorElementType()) {
5458  // This code doesn't know how to handle shuffles where the vector
5459  // element types do not match (this happens because type legalization
5460  // promotes the return type of EXTRACT_VECTOR_ELT).
5461  // FIXME: It might be appropriate to extend this code to handle
5462  // mismatched types.
5463  return SDValue();
5464  }
5465 
5466  // Record this extraction against the appropriate vector if possible...
5467  SDValue SourceVec = V.getOperand(0);
5468  // If the element number isn't a constant, we can't effectively
5469  // analyze what's going on.
5470  if (!isa<ConstantSDNode>(V.getOperand(1)))
5471  return SDValue();
5472  unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
5473  bool FoundSource = false;
5474  for (unsigned j = 0; j < SourceVecs.size(); ++j) {
5475  if (SourceVecs[j] == SourceVec) {
5476  if (MinElts[j] > EltNo)
5477  MinElts[j] = EltNo;
5478  if (MaxElts[j] < EltNo)
5479  MaxElts[j] = EltNo;
5480  FoundSource = true;
5481  break;
5482  }
5483  }
5484 
5485  // Or record a new source if not...
5486  if (!FoundSource) {
5487  SourceVecs.push_back(SourceVec);
5488  MinElts.push_back(EltNo);
5489  MaxElts.push_back(EltNo);
5490  }
5491  }
5492 
5493  // Currently only do something sane when at most two source vectors
5494  // involved.
5495  if (SourceVecs.size() > 2)
5496  return SDValue();
5497 
5498  SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
5499  int VEXTOffsets[2] = {0, 0};
5500 
5501  // This loop extracts the usage patterns of the source vectors
5502  // and prepares appropriate SDValues for a shuffle if possible.
5503  for (unsigned i = 0; i < SourceVecs.size(); ++i) {
5504  if (SourceVecs[i].getValueType() == VT) {
5505  // No VEXT necessary
5506  ShuffleSrcs[i] = SourceVecs[i];
5507  VEXTOffsets[i] = 0;
5508  continue;
5509  } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
5510  // It probably isn't worth padding out a smaller vector just to
5511  // break it down again in a shuffle.
5512  return SDValue();
5513  }
5514 
5515  // Since only 64-bit and 128-bit vectors are legal on ARM and
5516  // we've eliminated the other cases...
5517  assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts &&
5518  "unexpected vector sizes in ReconstructShuffle");
5519 
5520  if (MaxElts[i] - MinElts[i] >= NumElts) {
5521  // Span too large for a VEXT to cope
5522  return SDValue();
5523  }
5524 
5525  if (MinElts[i] >= NumElts) {
5526  // The extraction can just take the second half
5527  VEXTOffsets[i] = NumElts;
5528  ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
5529  SourceVecs[i],
5530  DAG.getIntPtrConstant(NumElts, dl));
5531  } else if (MaxElts[i] < NumElts) {
5532  // The extraction can just take the first half
5533  VEXTOffsets[i] = 0;
5534  ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
5535  SourceVecs[i],
5536  DAG.getIntPtrConstant(0, dl));
5537  } else {
5538  // An actual VEXT is needed
5539  VEXTOffsets[i] = MinElts[i];
5540  SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
5541  SourceVecs[i],
5542  DAG.getIntPtrConstant(0, dl));
5543  SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
5544  SourceVecs[i],
5545  DAG.getIntPtrConstant(NumElts, dl));
5546  ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2,
5547  DAG.getConstant(VEXTOffsets[i], dl,
5548  MVT::i32));
5549  }
5550  }
5551 
5552  SmallVector<int, 8> Mask;
5553 
5554  for (unsigned i = 0; i < NumElts; ++i) {
5555  SDValue Entry = Op.getOperand(i);
5556  if (Entry.getOpcode() == ISD::UNDEF) {
5557  Mask.push_back(-1);
5558  continue;
5559  }
5560 
5561  SDValue ExtractVec = Entry.getOperand(0);
5562  int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i)
5563  .getOperand(1))->getSExtValue();
5564  if (ExtractVec == SourceVecs[0]) {
5565  Mask.push_back(ExtractElt - VEXTOffsets[0]);
5566  } else {
5567  Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
5568  }
5569  }
5570 
5571  // Final check before we try to produce nonsense...
5572  if (isShuffleMaskLegal(Mask, VT))
5573  return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
5574  &Mask[0]);
5575 
5576  return SDValue();
5577 }
5578 
5579 /// isShuffleMaskLegal - Targets can use this to indicate that they only
5580 /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
5581 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
5582 /// are assumed to be legal.
5583 bool
5585  EVT VT) const {
5586  if (VT.getVectorNumElements() == 4 &&
5587  (VT.is128BitVector() || VT.is64BitVector())) {
5588  unsigned PFIndexes[4];
5589  for (unsigned i = 0; i != 4; ++i) {
5590  if (M[i] < 0)
5591  PFIndexes[i] = 8;
5592  else
5593  PFIndexes[i] = M[i];
5594  }
5595 
5596  // Compute the index in the perfect shuffle table.
5597  unsigned PFTableIndex =
5598  PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
5599  unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
5600  unsigned Cost = (PFEntry >> 30);
5601 
5602  if (Cost <= 4)
5603  return true;
5604  }
5605 
5606  bool ReverseVEXT, isV_UNDEF;
5607  unsigned Imm, WhichResult;
5608 
5609  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
5610  return (EltSize >= 32 ||
5611  ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
5612  isVREVMask(M, VT, 64) ||
5613  isVREVMask(M, VT, 32) ||
5614  isVREVMask(M, VT, 16) ||
5615  isVEXTMask(M, VT, ReverseVEXT, Imm) ||
5616  isVTBLMask(M, VT) ||
5617  isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF) ||
5618  ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));
5619 }
5620 
5621 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
5622 /// the specified operations to build the shuffle.
5623 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
5624  SDValue RHS, SelectionDAG &DAG,
5625  SDLoc dl) {
5626  unsigned OpNum = (PFEntry >> 26) & 0x0F;
5627  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
5628  unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
5629 
5630  enum {
5631  OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
5632  OP_VREV,
5633  OP_VDUP0,
5634  OP_VDUP1,
5635  OP_VDUP2,
5636  OP_VDUP3,
5637  OP_VEXT1,
5638  OP_VEXT2,
5639  OP_VEXT3,
5640  OP_VUZPL, // VUZP, left result
5641  OP_VUZPR, // VUZP, right result
5642  OP_VZIPL, // VZIP, left result
5643  OP_VZIPR, // VZIP, right result
5644  OP_VTRNL, // VTRN, left result
5645  OP_VTRNR // VTRN, right result
5646  };
5647 
5648  if (OpNum == OP_COPY) {
5649  if (LHSID == (1*9+2)*9+3) return LHS;
5650  assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
5651  return RHS;
5652  }
5653 
5654  SDValue OpLHS, OpRHS;
5655  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
5656  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
5657  EVT VT = OpLHS.getValueType();
5658 
5659  switch (OpNum) {
5660  default: llvm_unreachable("Unknown shuffle opcode!");
5661  case OP_VREV:
5662  // VREV divides the vector in half and swaps within the half.
5663  if (VT.getVectorElementType() == MVT::i32 ||
5665  return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
5666  // vrev <4 x i16> -> VREV32
5667  if (VT.getVectorElementType() == MVT::i16)
5668  return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
5669  // vrev <4 x i8> -> VREV16
5670  assert(VT.getVectorElementType() == MVT::i8);
5671  return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
5672  case OP_VDUP0:
5673  case OP_VDUP1:
5674  case OP_VDUP2:
5675  case OP_VDUP3:
5676  return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
5677  OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
5678  case OP_VEXT1:
5679  case OP_VEXT2:
5680  case OP_VEXT3:
5681  return DAG.getNode(ARMISD::VEXT, dl, VT,
5682  OpLHS, OpRHS,
5683  DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
5684  case OP_VUZPL:
5685  case OP_VUZPR:
5686  return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
5687  OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
5688  case OP_VZIPL:
5689  case OP_VZIPR:
5690  return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
5691  OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
5692  case OP_VTRNL:
5693  case OP_VTRNR:
5694  return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
5695  OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
5696  }
5697 }
5698 
5700  ArrayRef<int> ShuffleMask,
5701  SelectionDAG &DAG) {
5702  // Check to see if we can use the VTBL instruction.
5703  SDValue V1 = Op.getOperand(0);
5704  SDValue V2 = Op.getOperand(1);
5705  SDLoc DL(Op);
5706 
5707  SmallVector<SDValue, 8> VTBLMask;
5709  I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
5710  VTBLMask.push_back(DAG.getConstant(*I, DL, MVT::i32));
5711 
5712  if (V2.getNode()->getOpcode() == ISD::UNDEF)
5713  return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
5714  DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask));
5715 
5716  return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
5717  DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask));
5718 }
5719 
5721  SelectionDAG &DAG) {
5722  SDLoc DL(Op);
5723  SDValue OpLHS = Op.getOperand(0);
5724  EVT VT = OpLHS.getValueType();
5725 
5726  assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&
5727  "Expect an v8i16/v16i8 type");
5728  OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
5729  // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
5730  // extract the first 8 bytes into the top double word and the last 8 bytes
5731  // into the bottom double word. The v8i16 case is similar.
5732  unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
5733  return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
5734  DAG.getConstant(ExtractNum, DL, MVT::i32));
5735 }
5736 
5738  SDValue V1 = Op.getOperand(0);
5739  SDValue V2 = Op.getOperand(1);
5740  SDLoc dl(Op);
5741  EVT VT = Op.getValueType();
5742  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
5743 
5744  // Convert shuffles that are directly supported on NEON to target-specific
5745  // DAG nodes, instead of keeping them as shuffles and matching them again
5746  // during code selection. This is more efficient and avoids the possibility
5747  // of inconsistencies between legalization and selection.
5748  // FIXME: floating-point vectors should be canonicalized to integer vectors
5749  // of the same time so that they get CSEd properly.
5750  ArrayRef<int> ShuffleMask = SVN->getMask();
5751 
5752  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
5753  if (EltSize <= 32) {
5754  if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
5755  int Lane = SVN->getSplatIndex();
5756  // If this is undef splat, generate it via "just" vdup, if possible.
5757  if (Lane == -1) Lane = 0;
5758 
5759  // Test if V1 is a SCALAR_TO_VECTOR.
5760  if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
5761  return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
5762  }
5763  // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
5764  // (and probably will turn into a SCALAR_TO_VECTOR once legalization
5765  // reaches it).
5766  if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
5767  !isa<ConstantSDNode>(V1.getOperand(0))) {
5768  bool IsScalarToVector = true;
5769  for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
5770  if (V1.getOperand(i).getOpcode() != ISD::UNDEF) {
5771  IsScalarToVector = false;
5772  break;
5773  }
5774  if (IsScalarToVector)
5775  return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
5776  }
5777  return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
5778  DAG.getConstant(Lane, dl, MVT::i32));
5779  }
5780 
5781  bool ReverseVEXT;
5782  unsigned Imm;
5783  if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
5784  if (ReverseVEXT)
5785  std::swap(V1, V2);
5786  return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
5787  DAG.getConstant(Imm, dl, MVT::i32));
5788  }
5789 
5790  if (isVREVMask(ShuffleMask, VT, 64))
5791  return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
5792  if (isVREVMask(ShuffleMask, VT, 32))
5793  return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
5794  if (isVREVMask(ShuffleMask, VT, 16))
5795  return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
5796 
5797  if (V2->getOpcode() == ISD::UNDEF &&
5798  isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
5799  return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
5800  DAG.getConstant(Imm, dl, MVT::i32));
5801  }
5802 
5803  // Check for Neon shuffles that modify both input vectors in place.
5804  // If both results are used, i.e., if there are two shuffles with the same
5805  // source operands and with masks corresponding to both results of one of
5806  // these operations, DAG memoization will ensure that a single node is
5807  // used for both shuffles.
5808  unsigned WhichResult;
5809  bool isV_UNDEF;
5810  if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
5811  ShuffleMask, VT, WhichResult, isV_UNDEF)) {
5812  if (isV_UNDEF)
5813  V2 = V1;
5814  return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
5815  .getValue(WhichResult);
5816  }
5817 
5818  // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
5819  // shuffles that produce a result larger than their operands with:
5820  // shuffle(concat(v1, undef), concat(v2, undef))
5821  // ->
5822  // shuffle(concat(v1, v2), undef)
5823  // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
5824  //
5825  // This is useful in the general case, but there are special cases where
5826  // native shuffles produce larger results: the two-result ops.
5827  //
5828  // Look through the concat when lowering them:
5829  // shuffle(concat(v1, v2), undef)
5830  // ->
5831  // concat(VZIP(v1, v2):0, :1)
5832  //
5833  if (V1->getOpcode() == ISD::CONCAT_VECTORS &&
5834  V2->getOpcode() == ISD::UNDEF) {
5835  SDValue SubV1 = V1->getOperand(0);
5836  SDValue SubV2 = V1->getOperand(1);
5837  EVT SubVT = SubV1.getValueType();
5838 
5839  // We expect these to have been canonicalized to -1.
5840  assert(std::all_of(ShuffleMask.begin(), ShuffleMask.end(), [&](int i) {
5841  return i < (int)VT.getVectorNumElements();
5842  }) && "Unexpected shuffle index into UNDEF operand!");
5843 
5844  if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
5845  ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
5846  if (isV_UNDEF)
5847  SubV2 = SubV1;
5848  assert((WhichResult == 0) &&
5849  "In-place shuffle of concat can only have one result!");
5850  SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
5851  SubV1, SubV2);
5852  return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
5853  Res.getValue(1));
5854  }
5855  }
5856  }
5857 
5858  // If the shuffle is not directly supported and it has 4 elements, use
5859  // the PerfectShuffle-generated table to synthesize it from other shuffles.
5860  unsigned NumElts = VT.getVectorNumElements();
5861  if (NumElts == 4) {
5862  unsigned PFIndexes[4];
5863  for (unsigned i = 0; i != 4; ++i) {
5864  if (ShuffleMask[i] < 0)
5865  PFIndexes[i] = 8;
5866  else
5867  PFIndexes[i] = ShuffleMask[i];
5868  }
5869 
5870  // Compute the index in the perfect shuffle table.
5871  unsigned PFTableIndex =
5872  PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
5873  unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
5874  unsigned Cost = (PFEntry >> 30);
5875 
5876  if (Cost <= 4)
5877  return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
5878  }
5879 
5880  // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
5881  if (EltSize >= 32) {
5882  // Do the expansion with floating-point types, since that is what the VFP
5883  // registers are defined to use, and since i64 is not legal.
5884  EVT EltVT = EVT::getFloatingPointVT(EltSize);
5885  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
5886  V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
5887  V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
5889  for (unsigned i = 0; i < NumElts; ++i) {
5890  if (ShuffleMask[i] < 0)
5891  Ops.push_back(DAG.getUNDEF(EltVT));
5892  else
5893  Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
5894  ShuffleMask[i] < (int)NumElts ? V1 : V2,
5895  DAG.getConstant(ShuffleMask[i] & (NumElts-1),
5896  dl, MVT::i32)));
5897  }
5898  SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
5899  return DAG.getNode(ISD::BITCAST, dl, VT, Val);
5900  }
5901 
5902  if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
5904 
5905  if (VT == MVT::v8i8) {
5906  SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);
5907  if (NewOp.getNode())
5908  return NewOp;
5909  }
5910 
5911  return SDValue();
5912 }
5913 
5915  // INSERT_VECTOR_ELT is legal only for immediate indexes.
5916  SDValue Lane = Op.getOperand(2);
5917  if (!isa<ConstantSDNode>(Lane))
5918  return SDValue();
5919 
5920  return Op;
5921 }
5922 
5924  // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
5925  SDValue Lane = Op.getOperand(1);
5926  if (!isa<ConstantSDNode>(Lane))
5927  return SDValue();
5928 
5929  SDValue Vec = Op.getOperand(0);
5930  if (Op.getValueType() == MVT::i32 &&
5932  SDLoc dl(Op);
5933  return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
5934  }
5935 
5936  return Op;
5937 }
5938 
5940  // The only time a CONCAT_VECTORS operation can have legal types is when
5941  // two 64-bit vectors are concatenated to a 128-bit vector.
5942  assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
5943  "unexpected CONCAT_VECTORS");
5944  SDLoc dl(Op);
5945  SDValue Val = DAG.getUNDEF(MVT::v2f64);
5946  SDValue Op0 = Op.getOperand(0);
5947  SDValue Op1 = Op.getOperand(1);
5948  if (Op0.getOpcode() != ISD::UNDEF)
5949  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
5950  DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
5951  DAG.getIntPtrConstant(0, dl));
5952  if (Op1.getOpcode() != ISD::UNDEF)
5953  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
5954  DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
5955  DAG.getIntPtrConstant(1, dl));
5956  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
5957 }
5958 
5959 /// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
5960 /// element has been zero/sign-extended, depending on the isSigned parameter,
5961 /// from an integer type half its size.
5963  bool isSigned) {
5964  // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
5965  EVT VT = N->getValueType(0);
5966  if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
5967  SDNode *BVN = N->getOperand(0).getNode();
5968  if (BVN->getValueType(0) != MVT::v4i32 ||
5969  BVN->getOpcode() != ISD::BUILD_VECTOR)
5970  return false;
5971  unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
5972  unsigned HiElt = 1 - LoElt;
5973  ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
5974  ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
5975  ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
5976  ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
5977  if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
5978  return false;
5979  if (isSigned) {
5980  if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
5981  Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
5982  return true;
5983  } else {
5984  if (Hi0->isNullValue() && Hi1->isNullValue())
5985  return true;
5986  }
5987  return false;
5988  }
5989 
5990  if (N->getOpcode() != ISD::BUILD_VECTOR)
5991  return false;
5992 
5993  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
5994  SDNode *Elt = N->getOperand(i).getNode();
5995  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
5996  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
5997  unsigned HalfSize = EltSize / 2;
5998  if (isSigned) {
5999  if (!isIntN(HalfSize, C->getSExtValue()))
6000  return false;
6001  } else {
6002  if (!isUIntN(HalfSize, C->getZExtValue()))
6003  return false;
6004  }
6005  continue;
6006  }
6007  return false;
6008  }
6009 
6010  return true;
6011 }
6012 
6013 /// isSignExtended - Check if a node is a vector value that is sign-extended
6014 /// or a constant BUILD_VECTOR with sign-extended elements.
6015 static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
6016  if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
6017  return true;
6018  if (isExtendedBUILD_VECTOR(N, DAG, true))
6019  return true;
6020  return false;
6021 }
6022 
6023 /// isZeroExtended - Check if a node is a vector value that is zero-extended
6024 /// or a constant BUILD_VECTOR with zero-extended elements.
6025 static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
6026  if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
6027  return true;
6028  if (isExtendedBUILD_VECTOR(N, DAG, false))
6029  return true;
6030  return false;
6031 }
6032 
6033 static EVT getExtensionTo64Bits(const EVT &OrigVT) {
6034  if (OrigVT.getSizeInBits() >= 64)
6035  return OrigVT;
6036 
6037  assert(OrigVT.isSimple() && "Expecting a simple value type");
6038 
6039  MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
6040  switch (OrigSimpleTy) {
6041  default: llvm_unreachable("Unexpected Vector Type");
6042  case MVT::v2i8:
6043  case MVT::v2i16:
6044  return MVT::v2i32;
6045  case MVT::v4i8:
6046  return MVT::v4i16;
6047  }
6048 }
6049 
6050 /// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
6051 /// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
6052 /// We insert the required extension here to get the vector to fill a D register.
6054  const EVT &OrigTy,
6055  const EVT &ExtTy,
6056  unsigned ExtOpcode) {
6057  // The vector originally had a size of OrigTy. It was then extended to ExtTy.
6058  // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
6059  // 64-bits we need to insert a new extension so that it will be 64-bits.
6060  assert(ExtTy.is128BitVector() && "Unexpected extension size");
6061  if (OrigTy.getSizeInBits() >= 64)
6062  return N;
6063 
6064  // Must extend size to at least 64 bits to be used as an operand for VMULL.
6065  EVT NewVT = getExtensionTo64Bits(OrigTy);
6066 
6067  return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
6068 }
6069 
6070 /// SkipLoadExtensionForVMULL - return a load of the original vector size that
6071 /// does not do any sign/zero extension. If the original vector is less
6072 /// than 64 bits, an appropriate extension will be added after the load to
6073 /// reach a total size of 64 bits. We have to add the extension separately
6074 /// because ARM does not have a sign/zero extending load for vectors.
6076  EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
6077 
6078  // The load already has the right type.
6079  if (ExtendedTy == LD->getMemoryVT())
6080  return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
6081  LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
6082  LD->isNonTemporal(), LD->isInvariant(),
6083  LD->getAlignment());
6084 
6085  // We need to create a zextload/sextload. We cannot just create a load
6086  // followed by a zext/zext node because LowerMUL is also run during normal
6087  // operation legalization where we can't create illegal types.
6088  return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
6089  LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
6090  LD->getMemoryVT(), LD->isVolatile(), LD->isInvariant(),
6091  LD->isNonTemporal(), LD->getAlignment());
6092 }
6093 
6094 /// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
6095 /// extending load, or BUILD_VECTOR with extended elements, return the
6096 /// unextended value. The unextended vector should be 64 bits so that it can
6097 /// be used as an operand to a VMULL instruction. If the original vector size
6098 /// before extension is less than 64 bits we add a an extension to resize
6099 /// the vector to 64 bits.
6101  if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
6102  return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
6103  N->getOperand(0)->getValueType(0),
6104  N->getValueType(0),
6105  N->getOpcode());
6106 
6107  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
6108  return SkipLoadExtensionForVMULL(LD, DAG);
6109 
6110  // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
6111  // have been legalized as a BITCAST from v4i32.
6112  if (N->getOpcode() == ISD::BITCAST) {
6113  SDNode *BVN = N->getOperand(0).getNode();
6114  assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
6115  BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
6116  unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
6117  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), MVT::v2i32,
6118  BVN->getOperand(LowElt), BVN->getOperand(LowElt+2));
6119  }
6120  // Construct a new BUILD_VECTOR with elements truncated to half the size.
6121  assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
6122  EVT VT = N->getValueType(0);
6123  unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2;
6124  unsigned NumElts = VT.getVectorNumElements();
6125  MVT TruncVT = MVT::getIntegerVT(EltSize);
6127  SDLoc dl(N);
6128  for (unsigned i = 0; i != NumElts; ++i) {
6129  ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
6130  const APInt &CInt = C->getAPIntValue();
6131  // Element types smaller than 32 bits are not legal, so use i32 elements.
6132  // The values are implicitly truncated so sext vs. zext doesn't matter.
6133  Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
6134  }
6135  return DAG.getNode(ISD::BUILD_VECTOR, dl,
6136  MVT::getVectorVT(TruncVT, NumElts), Ops);
6137 }
6138 
6139 static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
6140  unsigned Opcode = N->getOpcode();
6141  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
6142  SDNode *N0 = N->getOperand(0).getNode();
6143  SDNode *N1 = N->getOperand(1).getNode();
6144  return N0->hasOneUse() && N1->hasOneUse() &&
6145  isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
6146  }
6147  return false;
6148 }
6149 
6150 static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
6151  unsigned Opcode = N->getOpcode();
6152  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
6153  SDNode *N0 = N->getOperand(0).getNode();
6154  SDNode *N1 = N->getOperand(1).getNode();
6155  return N0->hasOneUse() && N1->hasOneUse() &&
6156  isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
6157  }
6158  return false;
6159 }
6160 
6162  // Multiplications are only custom-lowered for 128-bit vectors so that
6163  // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
6164  EVT VT = Op.getValueType();
6165  assert(VT.is128BitVector() && VT.isInteger() &&
6166  "unexpected type for custom-lowering ISD::MUL");
6167  SDNode *N0 = Op.getOperand(0).getNode();
6168  SDNode *N1 = Op.getOperand(1).getNode();
6169  unsigned NewOpc = 0;
6170  bool isMLA = false;
6171  bool isN0SExt = isSignExtended(N0, DAG);
6172  bool isN1SExt = isSignExtended(N1, DAG);
6173  if (isN0SExt && isN1SExt)
6174  NewOpc = ARMISD::VMULLs;
6175  else {
6176  bool isN0ZExt = isZeroExtended(N0, DAG);
6177  bool isN1ZExt = isZeroExtended(N1, DAG);
6178  if (isN0ZExt && isN1ZExt)
6179  NewOpc = ARMISD::VMULLu;
6180  else if (isN1SExt || isN1ZExt) {
6181  // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
6182  // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
6183  if (isN1SExt && isAddSubSExt(N0, DAG)) {
6184  NewOpc = ARMISD::VMULLs;
6185  isMLA = true;
6186  } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
6187  NewOpc = ARMISD::VMULLu;
6188  isMLA = true;
6189  } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
6190  std::swap(N0, N1);
6191  NewOpc = ARMISD::VMULLu;
6192  isMLA = true;
6193  }
6194  }
6195 
6196  if (!NewOpc) {
6197  if (VT == MVT::v2i64)
6198  // Fall through to expand this. It is not legal.
6199  return SDValue();
6200  else
6201  // Other vector multiplications are legal.
6202  return Op;
6203  }
6204  }
6205 
6206  // Legalize to a VMULL instruction.
6207  SDLoc DL(Op);
6208  SDValue Op0;
6209  SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
6210  if (!isMLA) {
6211  Op0 = SkipExtensionForVMULL(N0, DAG);
6212  assert(Op0.getValueType().is64BitVector() &&
6213  Op1.getValueType().is64BitVector() &&
6214  "unexpected types for extended operands to VMULL");
6215  return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
6216  }
6217 
6218  // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
6219  // isel lowering to take advantage of no-stall back to back vmul + vmla.
6220  // vmull q0, d4, d6
6221  // vmlal q0, d5, d6
6222  // is faster than
6223  // vaddl q0, d4, d5
6224  // vmovl q1, d6
6225  // vmul q0, q0, q1
6226  SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
6227  SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
6228  EVT Op1VT = Op1.getValueType();
6229  return DAG.getNode(N0->getOpcode(), DL, VT,
6230  DAG.getNode(NewOpc, DL, VT,
6231  DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
6232  DAG.getNode(NewOpc, DL, VT,
6233  DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
6234 }
6235 
6236 static SDValue
6238  // Convert to float
6239  // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
6240  // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
6241  X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
6242  Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
6243  X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
6244  Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
6245  // Get reciprocal estimate.
6246  // float4 recip = vrecpeq_f32(yf);
6248  DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
6249  Y);
6250  // Because char has a smaller range than uchar, we can actually get away
6251  // without any newton steps. This requires that we use a weird bias
6252  // of 0xb000, however (again, this has been exhaustively tested).
6253  // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
6254  X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
6255  X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
6256  Y = DAG.getConstant(0xb000, dl, MVT::i32);
6257  Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y);
6258  X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
6259  X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
6260  // Convert back to short.
6261  X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
6262  X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
6263  return X;
6264 }
6265 
6266 static SDValue
6268  SDValue N2;
6269  // Convert to float.
6270  // float4 yf = vcvt_f32_s32(vmovl_s16(y));
6271  // float4 xf = vcvt_f32_s32(vmovl_s16(x));
6272  N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
6273  N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
6274  N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
6275  N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
6276 
6277  // Use reciprocal estimate and one refinement step.
6278  // float4 recip = vrecpeq_f32(yf);
6279  // recip *= vrecpsq_f32(yf, recip);
6281  DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
6282  N1);
6284  DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
6285  N1, N2);
6286  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
6287  // Because short has a smaller range than ushort, we can actually get away
6288  // with only a single newton step. This requires that we use a weird bias
6289  // of 89, however (again, this has been exhaustively tested).
6290  // float4 result = as_float4(as_int4(xf*recip) + 0x89);
6291  N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
6292  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
6293  N1 = DAG.getConstant(0x89, dl, MVT::i32);
6294  N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
6295  N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
6296  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
6297  // Convert back to integer and return.
6298  // return vmovn_s32(vcvt_s32_f32(result));
6299  N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
6300  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
6301  return N0;
6302 }
6303 
6305  EVT VT = Op.getValueType();
6306  assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
6307  "unexpected type for custom-lowering ISD::SDIV");
6308 
6309  SDLoc dl(Op);
6310  SDValue N0 = Op.getOperand(0);
6311  SDValue N1 = Op.getOperand(1);
6312  SDValue N2, N3;
6313 
6314  if (VT == MVT::v8i8) {
6315  N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
6316  N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
6317 
6318  N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
6319  DAG.getIntPtrConstant(4, dl));
6320  N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
6321  DAG.getIntPtrConstant(4, dl));
6322  N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
6323  DAG.getIntPtrConstant(0, dl));
6324  N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
6325  DAG.getIntPtrConstant(0, dl));
6326 
6327  N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
6328  N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
6329 
6330  N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
6331  N0 = LowerCONCAT_VECTORS(N0, DAG);
6332 
6333  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
6334  return N0;
6335  }
6336  return LowerSDIV_v4i16(N0, N1, dl, DAG);
6337 }
6338 
6340  EVT VT = Op.getValueType();
6341  assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
6342  "unexpected type for custom-lowering ISD::UDIV");
6343 
6344  SDLoc dl(Op);
6345  SDValue N0 = Op.getOperand(0);
6346  SDValue N1 = Op.getOperand(1);
6347  SDValue N2, N3;
6348 
6349  if (VT == MVT::v8i8) {
6350  N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
6351  N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
6352 
6353  N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
6354  DAG.getIntPtrConstant(4, dl));
6355  N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
6356  DAG.getIntPtrConstant(4, dl));
6357  N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
6358  DAG.getIntPtrConstant(0, dl));
6359  N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
6360  DAG.getIntPtrConstant(0, dl));
6361 
6362  N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
6363  N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
6364 
6365  N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
6366  N0 = LowerCONCAT_VECTORS(N0, DAG);
6367 
6369  DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
6370  MVT::i32),
6371  N0);
6372  return N0;
6373  }
6374 
6375  // v4i16 sdiv ... Convert to float.
6376  // float4 yf = vcvt_f32_s32(vmovl_u16(y));
6377  // float4 xf = vcvt_f32_s32(vmovl_u16(x));
6378  N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
6379  N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
6380  N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
6381  SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
6382 
6383  // Use reciprocal estimate and two refinement steps.
6384  // float4 recip = vrecpeq_f32(yf);
6385  // recip *= vrecpsq_f32(yf, recip);
6386  // recip *= vrecpsq_f32(yf, recip);
6388  DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
6389  BN1);
6391  DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
6392  BN1, N2);
6393  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
6395  DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
6396  BN1, N2);
6397  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
6398  // Simply multiplying by the reciprocal estimate can leave us a few ulps
6399  // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
6400  // and that it will never cause us to return an answer too large).
6401  // float4 result = as_float4(as_int4(xf*recip) + 2);
6402  N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
6403  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
6404  N1 = DAG.getConstant(2, dl, MVT::i32);
6405  N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
6406  N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
6407  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
6408  // Convert back to integer and return.
6409  // return vmovn_u32(vcvt_s32_f32(result));
6410  N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
6411  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
6412  return N0;
6413 }
6414 
6416  EVT VT = Op.getNode()->getValueType(0);
6417  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
6418 
6419  unsigned Opc;
6420  bool ExtraOp = false;
6421  switch (Op.getOpcode()) {
6422  default: llvm_unreachable("Invalid code");
6423  case ISD::ADDC: Opc = ARMISD::ADDC; break;
6424  case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break;
6425  case ISD::SUBC: Opc = ARMISD::SUBC; break;
6426  case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break;
6427  }
6428 
6429  if (!ExtraOp)
6430  return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
6431  Op.getOperand(1));
6432  return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
6433  Op.getOperand(1), Op.getOperand(2));
6434 }
6435 
6436 SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
6437  assert(Subtarget->isTargetDarwin());
6438 
6439  // For iOS, we want to call an alternative entry point: __sincos_stret,
6440  // return values are passed via sret.
6441  SDLoc dl(Op);
6442  SDValue Arg = Op.getOperand(0);
6443  EVT ArgVT = Arg.getValueType();
6444  Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
6445  auto PtrVT = getPointerTy(DAG.getDataLayout());
6446 
6447  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
6448 
6449  // Pair of floats / doubles used to pass the result.
6450  StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
6451 
6452  // Create stack object for sret.
6453  auto &DL = DAG.getDataLayout();
6454  const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
6455  const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
6456  int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
6457  SDValue SRet = DAG.getFrameIndex(FrameIdx, getPointerTy(DL));
6458 
6459  ArgListTy Args;
6460  ArgListEntry Entry;
6461 
6462  Entry.Node = SRet;
6463  Entry.Ty = RetTy->getPointerTo();
6464  Entry.isSExt = false;
6465  Entry.isZExt = false;
6466  Entry.isSRet = true;
6467  Args.push_back(Entry);
6468 
6469  Entry.Node = Arg;
6470  Entry.Ty = ArgTy;
6471  Entry.isSExt = false;
6472  Entry.isZExt = false;
6473  Args.push_back(Entry);
6474 
6475  const char *LibcallName = (ArgVT == MVT::f64)
6476  ? "__sincos_stret" : "__sincosf_stret";
6477  SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
6478 
6480  CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
6481  .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), Callee,
6482  std::move(Args), 0)
6483  .setDiscardResult();
6484 
6485  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
6486 
6487  SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet,
6488  MachinePointerInfo(), false, false, false, 0);
6489 
6490  // Address of cos field.
6491  SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
6492  DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
6493  SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add,
6494  MachinePointerInfo(), false, false, false, 0);
6495 
6496  SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
6497  return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
6498  LoadSin.getValue(0), LoadCos.getValue(0));
6499 }
6500 
6502  // Monotonic load/store is legal for all targets
6503  if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic)
6504  return Op;
6505 
6506  // Acquire/Release load/store is not legal for targets without a
6507  // dmb or equivalent available.
6508  return SDValue();
6509 }
6510 
6512  SmallVectorImpl<SDValue> &Results,
6513  SelectionDAG &DAG,
6514  const ARMSubtarget *Subtarget) {
6515  SDLoc DL(N);
6516  SDValue Cycles32, OutChain;
6517 
6518  if (Subtarget->hasPerfMon()) {
6519  // Under Power Management extensions, the cycle-count is:
6520  // mrc p15, #0, <Rt>, c9, c13, #0
6521  SDValue Ops[] = { N->getOperand(0), // Chain
6522  DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
6523  DAG.getConstant(15, DL, MVT::i32),
6524  DAG.getConstant(0, DL, MVT::i32),
6525  DAG.getConstant(9, DL, MVT::i32),
6526  DAG.getConstant(13, DL, MVT::i32),
6527  DAG.getConstant(0, DL, MVT::i32)
6528  };
6529 
6530  Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
6531  DAG.getVTList(MVT::i32, MVT::Other), Ops);
6532  OutChain = Cycles32.getValue(1);
6533  } else {
6534  // Intrinsic is defined to return 0 on unsupported platforms. Technically
6535  // there are older ARM CPUs that have implementation-specific ways of
6536  // obtaining this information (FIXME!).
6537  Cycles32 = DAG.getConstant(0, DL, MVT::i32);
6538  OutChain = DAG.getEntryNode();
6539  }
6540 
6541 
6542  SDValue Cycles64 = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
6543  Cycles32, DAG.getConstant(0, DL, MVT::i32));
6544  Results.push_back(Cycles64);
6545  Results.push_back(OutChain);
6546 }
6547 
6549  switch (Op.getOpcode()) {
6550  default: llvm_unreachable("Don't know how to custom lower this!");
6551  case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
6552  case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
6553  case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
6554  case ISD::GlobalAddress:
6555  switch (Subtarget->getTargetTriple().getObjectFormat()) {
6556  default: llvm_unreachable("unknown object format");
6557  case Triple::COFF:
6558  return LowerGlobalAddressWindows(Op, DAG);
6559  case Triple::ELF:
6560  return LowerGlobalAddressELF(Op, DAG);
6561  case Triple::MachO:
6562  return LowerGlobalAddressDarwin(Op, DAG);
6563  }
6564  case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
6565  case ISD::SELECT: return LowerSELECT(Op, DAG);
6566  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
6567  case ISD::BR_CC: return LowerBR_CC(Op, DAG);
6568  case ISD::BR_JT: return LowerBR_JT(Op, DAG);
6569  case ISD::VASTART: return LowerVASTART(Op, DAG);
6570  case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
6571  case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
6572  case ISD::SINT_TO_FP:
6573  case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
6574  case ISD::FP_TO_SINT:
6575  case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
6576  case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
6577  case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
6578  case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
6579  case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
6580  case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
6581  case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
6582  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
6583  Subtarget);
6584  case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG);
6585  case ISD::SHL:
6586  case ISD::SRL:
6587  case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
6588  case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
6589  case ISD::SRL_PARTS:
6590  case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
6591  case ISD::CTTZ:
6592  case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
6593  case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
6594  case ISD::SETCC: return LowerVSETCC(Op, DAG);
6595  case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
6596  case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
6597  case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
6598  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
6599  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
6600  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
6601  case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
6602  case ISD::MUL: return LowerMUL(Op, DAG);
6603  case ISD::SDIV: return LowerSDIV(Op, DAG);
6604  case ISD::UDIV: return LowerUDIV(Op, DAG);
6605  case ISD::ADDC:
6606  case ISD::ADDE:
6607  case ISD::SUBC:
6608  case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
6609  case ISD::SADDO:
6610  case ISD::UADDO:
6611  case ISD::SSUBO:
6612  case ISD::USUBO:
6613  return LowerXALUO(Op, DAG);
6614  case ISD::ATOMIC_LOAD:
6615  case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
6616  case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
6617  case ISD::SDIVREM:
6618  case ISD::UDIVREM: return LowerDivRem(Op, DAG);
6620  if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
6621  return LowerDYNAMIC_STACKALLOC(Op, DAG);
6622  llvm_unreachable("Don't know how to custom lower this!");
6623  case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
6624  case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
6625  }
6626 }
6627 
6628 /// ReplaceNodeResults - Replace the results of node with an illegal result
6629 /// type with new values built out of custom code.
6631  SmallVectorImpl<SDValue>&Results,
6632  SelectionDAG &DAG) const {
6633  SDValue Res;
6634  switch (N->getOpcode()) {
6635  default:
6636  llvm_unreachable("Don't know how to custom expand this!");
6637  case ISD::READ_REGISTER:
6638  ExpandREAD_REGISTER(N, Results, DAG);
6639  break;
6640  case ISD::BITCAST:
6641  Res = ExpandBITCAST(N, DAG);
6642  break;
6643  case ISD::SRL:
6644  case ISD::SRA:
6645  Res = Expand64BitShift(N, DAG, Subtarget);
6646  break;
6647  case ISD::READCYCLECOUNTER:
6648  ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
6649  return;
6650  }
6651  if (Res.getNode())
6652  Results.push_back(Res);
6653 }
6654 
6655 //===----------------------------------------------------------------------===//
6656 // ARM Scheduler Hooks
6657 //===----------------------------------------------------------------------===//
6658 
6659 /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
6660 /// registers the function context.
6661 void ARMTargetLowering::
6662 SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
6663  MachineBasicBlock *DispatchBB, int FI) const {
6664  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
6665  DebugLoc dl = MI->getDebugLoc();
6666  MachineFunction *MF = MBB->getParent();
6667  MachineRegisterInfo *MRI = &MF->getRegInfo();
6668  MachineConstantPool *MCP = MF->getConstantPool();
6669  ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
6670  const Function *F = MF->getFunction();
6671 
6672  bool isThumb = Subtarget->isThumb();
6673  bool isThumb2 = Subtarget->isThumb2();
6674 
6675  unsigned PCLabelId = AFI->createPICLabelUId();
6676  unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
6677  ARMConstantPoolValue *CPV =
6678  ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj);
6679  unsigned CPI = MCP->getConstantPoolIndex(CPV, 4);
6680 
6681  const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
6682  : &ARM::GPRRegClass;
6683 
6684  // Grab constant pool and fixed stack memory operands.
6685  MachineMemOperand *CPMMO =
6688 
6689  MachineMemOperand *FIMMOSt =
6692 
6693  // Load the address of the dispatch MBB into the jump buffer.
6694  if (isThumb2) {
6695  // Incoming value: jbuf
6696  // ldr.n r5, LCPI1_1
6697  // orr r5, r5, #1
6698  // add r5, pc
6699  // str r5, [$jbuf, #+4] ; &jbuf[1]
6700  unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
6701  AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
6702  .addConstantPoolIndex(CPI)
6703  .addMemOperand(CPMMO));
6704  // Set the low bit because of thumb mode.
6705  unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
6706  AddDefaultCC(
6707  AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
6708  .addReg(NewVReg1, RegState::Kill)
6709  .addImm(0x01)));
6710  unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
6711  BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
6712  .addReg(NewVReg2, RegState::Kill)
6713  .addImm(PCLabelId);
6714  AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
6715  .addReg(NewVReg3, RegState::Kill)
6716  .addFrameIndex(FI)
6717  .addImm(36) // &jbuf[1] :: pc
6718  .addMemOperand(FIMMOSt));
6719  } else if (isThumb) {
6720  // Incoming value: jbuf
6721  // ldr.n r1, LCPI1_4
6722  // add r1, pc
6723  // mov r2, #1
6724  // orrs r1, r2
6725  // add r2, $jbuf, #+4 ; &jbuf[1]
6726  // str r1, [r2]
6727  unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
6728  AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
6729  .addConstantPoolIndex(CPI)
6730  .addMemOperand(CPMMO));
6731  unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
6732  BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
6733  .addReg(NewVReg1, RegState::Kill)
6734  .addImm(PCLabelId);
6735  // Set the low bit because of thumb mode.
6736  unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
6737  AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
6738  .addReg(ARM::CPSR, RegState::Define)
6739  .addImm(1));
6740  unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
6741  AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
6742  .addReg(ARM::CPSR, RegState::Define)
6743  .addReg(NewVReg2, RegState::Kill)
6744  .addReg(NewVReg3, RegState::Kill));
6745  unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
6746  BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
6747  .addFrameIndex(FI)
6748  .addImm(36); // &jbuf[1] :: pc
6749  AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
6750  .addReg(NewVReg4, RegState::Kill)
6751  .addReg(NewVReg5, RegState::Kill)
6752  .addImm(0)
6753  .addMemOperand(FIMMOSt));
6754  } else {
6755  // Incoming value: jbuf
6756  // ldr r1, LCPI1_1
6757  // add r1, pc, r1
6758  // str r1, [$jbuf, #+4] ; &jbuf[1]
6759  unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
6760  AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
6761  .addConstantPoolIndex(CPI)
6762  .addImm(0)
6763  .addMemOperand(CPMMO));
6764  unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
6765  AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
6766  .addReg(NewVReg1, RegState::Kill)
6767  .addImm(PCLabelId));
6768  AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
6769  .addReg(NewVReg2, RegState::Kill)
6770  .addFrameIndex(FI)
6771  .addImm(36) // &jbuf[1] :: pc
6772  .addMemOperand(FIMMOSt));
6773  }
6774 }
6775 
6776 void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
6777  MachineBasicBlock *MBB) const {
6778  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
6779  DebugLoc dl = MI->getDebugLoc();
6780  MachineFunction *MF = MBB->getParent();
6781  MachineRegisterInfo *MRI = &MF->getRegInfo();
6782  MachineFrameInfo *MFI = MF->getFrameInfo();
6783  int FI = MFI->getFunctionContextIndex();
6784 
6785  const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
6786  : &ARM::GPRnopcRegClass;
6787 
6788  // Get a mapping of the call site numbers to all of the landing pads they're
6789  // associated with.
6791  unsigned MaxCSNum = 0;
6792  MachineModuleInfo &MMI = MF->getMMI();
6793  for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
6794  ++BB) {
6795  if (!BB->isLandingPad()) continue;
6796 
6797  // FIXME: We should assert that the EH_LABEL is the first MI in the landing
6798  // pad.
6800  II = BB->begin(), IE = BB->end(); II != IE; ++II) {
6801  if (!II->isEHLabel()) continue;
6802 
6803  MCSymbol *Sym = II->getOperand(0).getMCSymbol();
6804  if (!MMI.hasCallSiteLandingPad(Sym)) continue;
6805 
6806  SmallVectorImpl<unsigned> &CallSiteIdxs = MMI.getCallSiteLandingPad(Sym);
6808  CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end();
6809  CSI != CSE; ++CSI) {
6810  CallSiteNumToLPad[*CSI].push_back(BB);
6811  MaxCSNum = std::max(MaxCSNum, *CSI);
6812  }
6813  break;
6814  }
6815  }
6816 
6817  // Get an ordered list of the machine basic blocks for the jump table.
6818  std::vector<MachineBasicBlock*> LPadList;
6820  LPadList.reserve(CallSiteNumToLPad.size());
6821  for (unsigned I = 1; I <= MaxCSNum; ++I) {
6822  SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
6824  II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) {
6825  LPadList.push_back(*II);
6826  InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end());
6827  }
6828  }
6829 
6830  assert(!LPadList.empty() &&
6831  "No landing pad destinations for the dispatch jump table!");
6832 
6833  // Create the jump table and associated information.
6834  MachineJumpTableInfo *JTI =
6836  unsigned MJTI = JTI->createJumpTableIndex(LPadList);
6838 
6839  // Create the MBBs for the dispatch code.
6840 
6841  // Shove the dispatch's address into the return slot in the function context.
6842  MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
6843  DispatchBB->setIsLandingPad();
6844 
6846  unsigned trap_opcode;
6847  if (Subtarget->isThumb())
6848  trap_opcode = ARM::tTRAP;
6849  else
6850  trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
6851 
6852  BuildMI(TrapBB, dl, TII->get(trap_opcode));
6853  DispatchBB->addSuccessor(TrapBB);
6854 
6855  MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
6856  DispatchBB->addSuccessor(DispContBB);
6857 
6858  // Insert and MBBs.
6859  MF->insert(MF->end(), DispatchBB);
6860  MF->insert(MF->end(), DispContBB);
6861  MF->insert(MF->end(), TrapBB);
6862 
6863  // Insert code into the entry block that creates and registers the function
6864  // context.
6865  SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
6866 
6867  MachineMemOperand *FIMMOLd =
6871 
6872  MachineInstrBuilder MIB;
6873  MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
6874 
6875  const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
6876  const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
6877 
6878  // Add a register mask with no preserved registers. This results in all
6879  // registers being marked as clobbered.
6880  MIB.addRegMask(RI.getNoPreservedMask());
6881 
6882  unsigned NumLPads = LPadList.size();
6883  if (Subtarget->isThumb2()) {
6884  unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
6885  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
6886  .addFrameIndex(FI)
6887  .addImm(4)
6888  .addMemOperand(FIMMOLd));
6889 
6890  if (NumLPads < 256) {
6891  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
6892  .addReg(NewVReg1)
6893  .addImm(LPadList.size()));
6894  } else {
6895  unsigned VReg1 = MRI->createVirtualRegister(TRC);
6896  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
6897  .addImm(NumLPads & 0xFFFF));
6898 
6899  unsigned VReg2 = VReg1;
6900  if ((NumLPads & 0xFFFF0000) != 0) {
6901  VReg2 = MRI->createVirtualRegister(TRC);
6902  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
6903  .addReg(VReg1)
6904  .addImm(NumLPads >> 16));
6905  }
6906 
6907  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
6908  .addReg(NewVReg1)
6909  .addReg(VReg2));
6910  }
6911 
6912  BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
6913  .addMBB(TrapBB)
6914  .addImm(ARMCC::HI)
6915  .addReg(ARM::CPSR);
6916 
6917  unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
6918  AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3)
6919  .addJumpTableIndex(MJTI));
6920 
6921  unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
6922  AddDefaultCC(
6924  BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
6925  .addReg(NewVReg3, RegState::Kill)
6926  .addReg(NewVReg1)
6928 
6929  BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
6930  .addReg(NewVReg4, RegState::Kill)
6931  .addReg(NewVReg1)
6932  .addJumpTableIndex(MJTI);
6933  } else if (Subtarget->isThumb()) {
6934  unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
6935  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
6936  .addFrameIndex(FI)
6937  .addImm(1)
6938  .addMemOperand(FIMMOLd));
6939 
6940  if (NumLPads < 256) {
6941  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
6942  .addReg(NewVReg1)
6943  .addImm(NumLPads));
6944  } else {
6946  Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
6947  const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
6948 
6949  // MachineConstantPool wants an explicit alignment.
6950  unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
6951  if (Align == 0)
6952  Align = MF->getDataLayout().getTypeAllocSize(C->getType());
6953  unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
6954 
6955  unsigned VReg1 = MRI->createVirtualRegister(TRC);
6956  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
6957  .addReg(VReg1, RegState::Define)
6958  .addConstantPoolIndex(Idx));
6959  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
6960  .addReg(NewVReg1)
6961  .addReg(VReg1));
6962  }
6963 
6964  BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
6965  .addMBB(TrapBB)
6966  .addImm(ARMCC::HI)
6967  .addReg(ARM::CPSR);
6968 
6969  unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
6970  AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
6971  .addReg(ARM::CPSR, RegState::Define)
6972  .addReg(NewVReg1)
6973  .addImm(2));
6974 
6975  unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
6976  AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
6977  .addJumpTableIndex(MJTI));
6978 
6979  unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
6980  AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
6981  .addReg(ARM::CPSR, RegState::Define)
6982  .addReg(NewVReg2, RegState::Kill)
6983  .addReg(NewVReg3));
6984 
6985  MachineMemOperand *JTMMOLd =
6988 
6989  unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
6990  AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
6991  .addReg(NewVReg4, RegState::Kill)
6992  .addImm(0)
6993  .addMemOperand(JTMMOLd));
6994 
6995  unsigned NewVReg6 = NewVReg5;
6996  if (RelocM == Reloc::PIC_) {
6997  NewVReg6 = MRI->createVirtualRegister(TRC);
6998  AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
6999  .addReg(ARM::CPSR, RegState::Define)
7000  .addReg(NewVReg5, RegState::Kill)
7001  .addReg(NewVReg3));
7002  }
7003 
7004  BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
7005  .addReg(NewVReg6, RegState::Kill)
7006  .addJumpTableIndex(MJTI);
7007  } else {
7008  unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
7009  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
7010  .addFrameIndex(FI)
7011  .addImm(4)
7012  .addMemOperand(FIMMOLd));
7013 
7014  if (NumLPads < 256) {
7015  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
7016  .addReg(NewVReg1)
7017  .addImm(NumLPads));
7018  } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
7019  unsigned VReg1 = MRI->createVirtualRegister(TRC);
7020  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
7021  .addImm(NumLPads & 0xFFFF));
7022 
7023  unsigned VReg2 = VReg1;
7024  if ((NumLPads & 0xFFFF0000) != 0) {
7025  VReg2 = MRI->createVirtualRegister(TRC);
7026  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
7027  .addReg(VReg1)
7028  .addImm(NumLPads >> 16));
7029  }
7030 
7031  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
7032  .addReg(NewVReg1)
7033  .addReg(VReg2));
7034  } else {
7035  MachineConstantPool *ConstantPool = MF->getConstantPool();
7036  Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
7037  const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
7038 
7039  // MachineConstantPool wants an explicit alignment.
7040  unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
7041  if (Align == 0)
7042  Align = MF->getDataLayout().getTypeAllocSize(C->getType());
7043  unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
7044 
7045  unsigned VReg1 = MRI->createVirtualRegister(TRC);
7046  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
7047  .addReg(VReg1, RegState::Define)
7048  .addConstantPoolIndex(Idx)
7049  .addImm(0));
7050  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
7051  .addReg(NewVReg1)
7052  .addReg(VReg1, RegState::Kill));
7053  }
7054 
7055  BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
7056  .addMBB(TrapBB)
7057  .addImm(ARMCC::HI)
7058  .addReg(ARM::CPSR);
7059 
7060  unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
7061  AddDefaultCC(
7062  AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
7063  .addReg(NewVReg1)
7065  unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
7066  AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
7067  .addJumpTableIndex(MJTI));
7068 
7069  MachineMemOperand *JTMMOLd =
7072  unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
7074  BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
7075  .addReg(NewVReg3, RegState::Kill)
7076  .addReg(NewVReg4)
7077  .addImm(0)
7078  .addMemOperand(JTMMOLd));
7079 
7080  if (RelocM == Reloc::PIC_) {
7081  BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
7082  .addReg(NewVReg5, RegState::Kill)
7083  .addReg(NewVReg4)
7084  .addJumpTableIndex(MJTI);
7085  } else {
7086  BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
7087  .addReg(NewVReg5, RegState::Kill)
7088  .addJumpTableIndex(MJTI);
7089  }
7090  }
7091 
7092  // Add the jump table entries as successors to the MBB.
7094  for (std::vector<MachineBasicBlock*>::iterator
7095  I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
7096  MachineBasicBlock *CurMBB = *I;
7097  if (SeenMBBs.insert(CurMBB).second)
7098  DispContBB->addSuccessor(CurMBB);
7099  }
7100 
7101  // N.B. the order the invoke BBs are processed in doesn't matter here.
7102  const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
7104  for (MachineBasicBlock *BB : InvokeBBs) {
7105 
7106  // Remove the landing pad successor from the invoke block and replace it
7107  // with the new dispatch block.
7108  SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(),
7109  BB->succ_end());
7110  while (!Successors.empty()) {
7111  MachineBasicBlock *SMBB = Successors.pop_back_val();
7112  if (SMBB->isLandingPad()) {
7113  BB->removeSuccessor(SMBB);
7114  MBBLPads.push_back(SMBB);
7115  }
7116  }
7117 
7118  BB->addSuccessor(DispatchBB);
7119 
7120  // Find the invoke call and mark all of the callee-saved registers as
7121  // 'implicit defined' so that they're spilled. This prevents code from
7122  // moving instructions to before the EH block, where they will never be
7123  // executed.
7125  II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
7126  if (!II->isCall()) continue;
7127 
7128  DenseMap<unsigned, bool> DefRegs;
7130  OI = II->operands_begin(), OE = II->operands_end();
7131  OI != OE; ++OI) {
7132  if (!OI->isReg()) continue;
7133  DefRegs[OI->getReg()] = true;
7134  }
7135 
7136  MachineInstrBuilder MIB(*MF, &*II);
7137 
7138  for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
7139  unsigned Reg = SavedRegs[i];
7140  if (Subtarget->isThumb2() &&
7141  !ARM::tGPRRegClass.contains(Reg) &&
7142  !ARM::hGPRRegClass.contains(Reg))
7143  continue;
7144  if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
7145  continue;
7146  if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
7147  continue;
7148  if (!DefRegs[Reg])
7150  }
7151 
7152  break;
7153  }
7154  }
7155 
7156  // Mark all former landing pads as non-landing pads. The dispatch is the only
7157  // landing pad now.
7159  I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I)
7160  (*I)->setIsLandingPad(false);
7161 
7162  // The instruction is gone now.
7163  MI->eraseFromParent();
7164 }
7165 
7166 static
7169  E = MBB->succ_end(); I != E; ++I)
7170  if (*I != Succ)
7171  return *I;
7172  llvm_unreachable("Expecting a BB with two successors!");
7173 }
7174 
7175 /// Return the load opcode for a given load size. If load size >= 8,
7176 /// neon opcode will be returned.
7177 static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
7178  if (LdSize >= 8)
7179  return LdSize == 16 ? ARM::VLD1q32wb_fixed
7180  : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
7181  if (IsThumb1)
7182  return LdSize == 4 ? ARM::tLDRi
7183  : LdSize == 2 ? ARM::tLDRHi
7184  : LdSize == 1 ? ARM::tLDRBi : 0;
7185  if (IsThumb2)
7186  return LdSize == 4 ? ARM::t2LDR_POST
7187  : LdSize == 2 ? ARM::t2LDRH_POST
7188  : LdSize == 1 ? ARM::t2LDRB_POST : 0;
7189  return LdSize == 4 ? ARM::LDR_POST_IMM
7190  : LdSize == 2 ? ARM::LDRH_POST
7191  : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
7192 }
7193 
7194 /// Return the store opcode for a given store size. If store size >= 8,
7195 /// neon opcode will be returned.
7196 static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
7197  if (StSize >= 8)
7198  return StSize == 16 ? ARM::VST1q32wb_fixed
7199  : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
7200  if (IsThumb1)
7201  return StSize == 4 ? ARM::tSTRi
7202  : StSize == 2 ? ARM::tSTRHi
7203  : StSize == 1 ? ARM::tSTRBi : 0;
7204  if (IsThumb2)
7205  return StSize == 4 ? ARM::t2STR_POST
7206  : StSize == 2 ? ARM::t2STRH_POST
7207  : StSize == 1 ? ARM::t2STRB_POST : 0;
7208  return StSize == 4 ? ARM::STR_POST_IMM
7209  : StSize == 2 ? ARM::STRH_POST
7210  : StSize == 1 ? ARM::STRB_POST_IMM : 0;
7211 }
7212 
7213 /// Emit a post-increment load operation with given size. The instructions
7214 /// will be added to BB at Pos.
7216  const TargetInstrInfo *TII, DebugLoc dl,
7217  unsigned LdSize, unsigned Data, unsigned AddrIn,
7218  unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
7219  unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
7220  assert(LdOpc != 0 && "Should have a load opcode");
7221  if (LdSize >= 8) {
7222  AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
7223  .addReg(AddrOut, RegState::Define).addReg(AddrIn)
7224  .addImm(0));
7225  } else if (IsThumb1) {
7226  // load + update AddrIn
7227  AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
7228  .addReg(AddrIn).addImm(0));
7229  MachineInstrBuilder MIB =
7230  BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut);
7231  MIB = AddDefaultT1CC(MIB);
7232  MIB.addReg(AddrIn).addImm(LdSize);
7233  AddDefaultPred(MIB);
7234  } else if (IsThumb2) {
7235  AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
7236  .addReg(AddrOut, RegState::Define).addReg(AddrIn)
7237  .addImm(LdSize));
7238  } else { // arm
7239  AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
7240  .addReg(AddrOut, RegState::Define).addReg(AddrIn)
7241  .addReg(0).addImm(LdSize));
7242  }
7243 }
7244 
7245 /// Emit a post-increment store operation with given size. The instructions
7246 /// will be added to BB at Pos.
7248  const TargetInstrInfo *TII, DebugLoc dl,
7249  unsigned StSize, unsigned Data, unsigned AddrIn,
7250  unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
7251  unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
7252  assert(StOpc != 0 && "Should have a store opcode");
7253  if (StSize >= 8) {
7254  AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
7255  .addReg(AddrIn).addImm(0).addReg(Data));
7256  } else if (IsThumb1) {
7257  // store + update AddrIn
7258  AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc)).addReg(Data)
7259  .addReg(AddrIn).addImm(0));
7260  MachineInstrBuilder MIB =
7261  BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut);
7262  MIB = AddDefaultT1CC(MIB);
7263  MIB.addReg(AddrIn).addImm(StSize);
7264  AddDefaultPred(MIB);
7265  } else if (IsThumb2) {
7266  AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
7267  .addReg(Data).addReg(AddrIn).addImm(StSize));
7268  } else { // arm
7269  AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
7270  .addReg(Data).addReg(AddrIn).addReg(0)
7271  .addImm(StSize));
7272  }
7273 }
7274 
7276 ARMTargetLowering::EmitStructByval(MachineInstr *MI,
7277  MachineBasicBlock *BB) const {
7278  // This pseudo instruction has 3 operands: dst, src, size
7279  // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
7280  // Otherwise, we will generate unrolled scalar copies.
7281  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
7282  const BasicBlock *LLVM_BB = BB->getBasicBlock();
7283  MachineFunction::iterator It = BB;
7284  ++It;
7285 
7286  unsigned dest = MI->getOperand(0).getReg();
7287  unsigned src = MI->getOperand(1).getReg();
7288  unsigned SizeVal = MI->getOperand(2).getImm();
7289  unsigned Align = MI->getOperand(3).getImm();
7290  DebugLoc dl = MI->getDebugLoc();
7291 
7292  MachineFunction *MF = BB->getParent();
7293  MachineRegisterInfo &MRI = MF->getRegInfo();
7294  unsigned UnitSize = 0;
7295  const TargetRegisterClass *TRC = nullptr;
7296  const TargetRegisterClass *VecTRC = nullptr;
7297 
7298  bool IsThumb1 = Subtarget->isThumb1Only();
7299  bool IsThumb2 = Subtarget->isThumb2();
7300 
7301  if (Align & 1) {
7302  UnitSize = 1;
7303  } else if (Align & 2) {
7304  UnitSize = 2;
7305  } else {
7306  // Check whether we can use NEON instructions.
7308  Subtarget->hasNEON()) {
7309  if ((Align % 16 == 0) && SizeVal >= 16)
7310  UnitSize = 16;
7311  else if ((Align % 8 == 0) && SizeVal >= 8)
7312  UnitSize = 8;
7313  }
7314  // Can't use NEON instructions.
7315  if (UnitSize == 0)
7316  UnitSize = 4;
7317  }
7318 
7319  // Select the correct opcode and register class for unit size load/store
7320  bool IsNeon = UnitSize >= 8;
7321  TRC = (IsThumb1 || IsThumb2) ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
7322  if (IsNeon)
7323  VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
7324  : UnitSize == 8 ? &ARM::DPRRegClass
7325  : nullptr;
7326 
7327  unsigned BytesLeft = SizeVal % UnitSize;
7328  unsigned LoopSize = SizeVal - BytesLeft;
7329 
7330  if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
7331  // Use LDR and STR to copy.
7332  // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
7333  // [destOut] = STR_POST(scratch, destIn, UnitSize)
7334  unsigned srcIn = src;
7335  unsigned destIn = dest;
7336  for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
7337  unsigned srcOut = MRI.createVirtualRegister(TRC);
7338  unsigned destOut = MRI.createVirtualRegister(TRC);
7339  unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
7340  emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
7341  IsThumb1, IsThumb2);
7342  emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
7343  IsThumb1, IsThumb2);
7344  srcIn = srcOut;
7345  destIn = destOut;
7346  }
7347 
7348  // Handle the leftover bytes with LDRB and STRB.
7349  // [scratch, srcOut] = LDRB_POST(srcIn, 1)
7350  // [destOut] = STRB_POST(scratch, destIn, 1)
7351  for (unsigned i = 0; i < BytesLeft; i++) {
7352  unsigned srcOut = MRI.createVirtualRegister(TRC);
7353  unsigned destOut = MRI.createVirtualRegister(TRC);
7354  unsigned scratch = MRI.createVirtualRegister(TRC);
7355  emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
7356  IsThumb1, IsThumb2);
7357  emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
7358  IsThumb1, IsThumb2);
7359  srcIn = srcOut;
7360  destIn = destOut;
7361  }
7362  MI->eraseFromParent(); // The instruction is gone now.
7363  return BB;
7364  }
7365 
7366  // Expand the pseudo op to a loop.
7367  // thisMBB:
7368  // ...
7369  // movw varEnd, # --> with thumb2
7370  // movt varEnd, #
7371  // ldrcp varEnd, idx --> without thumb2
7372  // fallthrough --> loopMBB
7373  // loopMBB:
7374  // PHI varPhi, varEnd, varLoop
7375  // PHI srcPhi, src, srcLoop
7376  // PHI destPhi, dst, destLoop
7377  // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
7378  // [destLoop] = STR_POST(scratch, destPhi, UnitSize)
7379  // subs varLoop, varPhi, #UnitSize
7380  // bne loopMBB
7381  // fallthrough --> exitMBB
7382  // exitMBB:
7383  // epilogue to handle left-over bytes
7384  // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
7385  // [destOut] = STRB_POST(scratch, destLoop, 1)
7386  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
7387  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
7388  MF->insert(It, loopMBB);
7389  MF->insert(It, exitMBB);
7390 
7391  // Transfer the remainder of BB and its successor edges to exitMBB.
7392  exitMBB->splice(exitMBB->begin(), BB,
7393  std::next(MachineBasicBlock::iterator(MI)), BB->end());
7394  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
7395 
7396  // Load an immediate to varEnd.
7397  unsigned varEnd = MRI.createVirtualRegister(TRC);
7398  if (Subtarget->useMovt(*MF)) {
7399  unsigned Vtmp = varEnd;
7400  if ((LoopSize & 0xFFFF0000) != 0)
7401  Vtmp = MRI.createVirtualRegister(TRC);
7402  AddDefaultPred(BuildMI(BB, dl,
7403  TII->get(IsThumb2 ? ARM::t2MOVi16 : ARM::MOVi16),
7404  Vtmp).addImm(LoopSize & 0xFFFF));
7405 
7406  if ((LoopSize & 0xFFFF0000) != 0)
7407  AddDefaultPred(BuildMI(BB, dl,
7408  TII->get(IsThumb2 ? ARM::t2MOVTi16 : ARM::MOVTi16),
7409  varEnd)
7410  .addReg(Vtmp)
7411  .addImm(LoopSize >> 16));
7412  } else {
7413  MachineConstantPool *ConstantPool = MF->getConstantPool();
7414  Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
7415  const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
7416 
7417  // MachineConstantPool wants an explicit alignment.
7418  unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
7419  if (Align == 0)
7420  Align = MF->getDataLayout().getTypeAllocSize(C->getType());
7421  unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
7422 
7423  if (IsThumb1)
7424  AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)).addReg(
7425  varEnd, RegState::Define).addConstantPoolIndex(Idx));
7426  else
7427  AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)).addReg(
7428  varEnd, RegState::Define).addConstantPoolIndex(Idx).addImm(0));
7429  }
7430  BB->addSuccessor(loopMBB);
7431 
7432  // Generate the loop body:
7433  // varPhi = PHI(varLoop, varEnd)
7434  // srcPhi = PHI(srcLoop, src)
7435  // destPhi = PHI(destLoop, dst)
7436  MachineBasicBlock *entryBB = BB;
7437  BB = loopMBB;
7438  unsigned varLoop = MRI.createVirtualRegister(TRC);
7439  unsigned varPhi = MRI.createVirtualRegister(TRC);
7440  unsigned srcLoop = MRI.createVirtualRegister(TRC);
7441  unsigned srcPhi = MRI.createVirtualRegister(TRC);
7442  unsigned destLoop = MRI.createVirtualRegister(TRC);
7443  unsigned destPhi = MRI.createVirtualRegister(TRC);
7444 
7445  BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
7446  .addReg(varLoop).addMBB(loopMBB)
7447  .addReg(varEnd).addMBB(entryBB);
7448  BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
7449  .addReg(srcLoop).addMBB(loopMBB)
7450  .addReg(src).addMBB(entryBB);
7451  BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
7452  .addReg(destLoop).addMBB(loopMBB)
7453  .addReg(dest).addMBB(entryBB);
7454 
7455  // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
7456  // [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
7457  unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
7458  emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
7459  IsThumb1, IsThumb2);
7460  emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
7461  IsThumb1, IsThumb2);
7462 
7463  // Decrement loop variable by UnitSize.
7464  if (IsThumb1) {
7465  MachineInstrBuilder MIB =
7466  BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop);
7467  MIB = AddDefaultT1CC(MIB);
7468  MIB.addReg(varPhi).addImm(UnitSize);
7469  AddDefaultPred(MIB);
7470  } else {
7471  MachineInstrBuilder MIB =
7472  BuildMI(*BB, BB->end(), dl,
7473  TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
7474  AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize)));
7475  MIB->getOperand(5).setReg(ARM::CPSR);
7476  MIB->getOperand(5).setIsDef(true);
7477  }
7478  BuildMI(*BB, BB->end(), dl,
7479  TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
7480  .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
7481 
7482  // loopMBB can loop back to loopMBB or fall through to exitMBB.
7483  BB->addSuccessor(loopMBB);
7484  BB->addSuccessor(exitMBB);
7485 
7486  // Add epilogue to handle BytesLeft.
7487  BB = exitMBB;
7488  MachineInstr *StartOfExit = exitMBB->begin();
7489 
7490  // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
7491  // [destOut] = STRB_POST(scratch, destLoop, 1)
7492  unsigned srcIn = srcLoop;
7493  unsigned destIn = destLoop;
7494  for (unsigned i = 0; i < BytesLeft; i++) {
7495  unsigned srcOut = MRI.createVirtualRegister(TRC);
7496  unsigned destOut = MRI.createVirtualRegister(TRC);
7497  unsigned scratch = MRI.createVirtualRegister(TRC);
7498  emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
7499  IsThumb1, IsThumb2);
7500  emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
7501  IsThumb1, IsThumb2);
7502  srcIn = srcOut;
7503  destIn = destOut;
7504  }
7505 
7506  MI->eraseFromParent(); // The instruction is gone now.
7507  return BB;
7508 }
7509 
7511 ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
7512  MachineBasicBlock *MBB) const {
7513  const TargetMachine &TM = getTargetMachine();
7514  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
7515  DebugLoc DL = MI->getDebugLoc();
7516 
7517  assert(Subtarget->isTargetWindows() &&
7518  "__chkstk is only supported on Windows");
7519  assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");
7520 
7521  // __chkstk takes the number of words to allocate on the stack in R4, and
7522  // returns the stack adjustment in number of bytes in R4. This will not
7523  // clober any other registers (other than the obvious lr).
7524  //
7525  // Although, technically, IP should be considered a register which may be
7526  // clobbered, the call itself will not touch it. Windows on ARM is a pure
7527  // thumb-2 environment, so there is no interworking required. As a result, we
7528  // do not expect a veneer to be emitted by the linker, clobbering IP.
7529  //
7530  // Each module receives its own copy of __chkstk, so no import thunk is
7531  // required, again, ensuring that IP is not clobbered.
7532  //
7533  // Finally, although some linkers may theoretically provide a trampoline for
7534  // out of range calls (which is quite common due to a 32M range limitation of
7535  // branches for Thumb), we can generate the long-call version via
7536  // -mcmodel=large, alleviating the need for the trampoline which may clobber
7537  // IP.
7538 
7539  switch (TM.getCodeModel()) {
7540  case CodeModel::Small:
7541  case CodeModel::Medium:
7542  case CodeModel::Default:
7543  case CodeModel::Kernel:
7544  BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
7545  .addImm((unsigned)ARMCC::AL).addReg(0)
7546  .addExternalSymbol("__chkstk")
7547  .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
7548  .addReg(ARM::R4, RegState::Implicit | RegState::Define)
7549  .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead);
7550  break;
7551  case CodeModel::Large:
7552  case CodeModel::JITDefault: {
7553  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
7554  unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
7555 
7556  BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
7557  .addExternalSymbol("__chkstk");
7558  BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr))
7559  .addImm((unsigned)ARMCC::AL).addReg(0)
7560  .addReg(Reg, RegState::Kill)
7561  .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
7562  .addReg(ARM::R4, RegState::Implicit | RegState::Define)
7563  .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead);
7564  break;
7565  }
7566  }
7567 
7568  AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr),
7569  ARM::SP)
7570  .addReg(ARM::SP).addReg(ARM::R4)));
7571 
7572  MI->eraseFromParent();
7573  return MBB;
7574 }
7575 
7578  MachineBasicBlock *BB) const {
7579  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
7580  DebugLoc dl = MI->getDebugLoc();
7581  bool isThumb2 = Subtarget->isThumb2();
7582  switch (MI->getOpcode()) {
7583  default: {
7584  MI->dump();
7585  llvm_unreachable("Unexpected instr type to insert");
7586  }
7587  // The Thumb2 pre-indexed stores have the same MI operands, they just
7588  // define them differently in the .td files from the isel patterns, so
7589  // they need pseudos.
7590  case ARM::t2STR_preidx:
7591  MI->setDesc(TII->get(ARM::t2STR_PRE));
7592  return BB;
7593  case ARM::t2STRB_preidx:
7594  MI->setDesc(TII->get(ARM::t2STRB_PRE));
7595  return BB;
7596  case ARM::t2STRH_preidx:
7597  MI->setDesc(TII->get(ARM::t2STRH_PRE));
7598  return BB;
7599 
7600  case ARM::STRi_preidx:
7601  case ARM::STRBi_preidx: {
7602  unsigned NewOpc = MI->getOpcode() == ARM::STRi_preidx ?
7603  ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM;
7604  // Decode the offset.
7605  unsigned Offset = MI->getOperand(4).getImm();
7606  bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
7607  Offset = ARM_AM::getAM2Offset(Offset);
7608  if (isSub)
7609  Offset = -Offset;
7610 
7611  MachineMemOperand *MMO = *MI->memoperands_begin();
7612  BuildMI(*BB, MI, dl, TII->get(NewOpc))
7613  .addOperand(MI->getOperand(0)) // Rn_wb
7614  .addOperand(MI->getOperand(1)) // Rt
7615  .addOperand(MI->getOperand(2)) // Rn
7616  .addImm(Offset) // offset (skip GPR==zero_reg)
7617  .addOperand(MI->getOperand(5)) // pred
7618  .addOperand(MI->getOperand(6))
7619  .addMemOperand(MMO);
7620  MI->eraseFromParent();
7621  return BB;
7622  }
7623  case ARM::STRr_preidx:
7624  case ARM::STRBr_preidx:
7625  case ARM::STRH_preidx: {
7626  unsigned NewOpc;
7627  switch (MI->getOpcode()) {
7628  default: llvm_unreachable("unexpected opcode!");
7629  case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
7630  case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
7631  case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
7632  }
7633  MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
7634  for (unsigned i = 0; i < MI->getNumOperands(); ++i)
7635  MIB.addOperand(MI->getOperand(i));
7636  MI->eraseFromParent();
7637  return BB;
7638  }
7639 
7640  case ARM::tMOVCCr_pseudo: {
7641  // To "insert" a SELECT_CC instruction, we actually have to insert the
7642  // diamond control-flow pattern. The incoming instruction knows the
7643  // destination vreg to set, the condition code register to branch on, the
7644  // true/false values to select between, and a branch opcode to use.
7645  const BasicBlock *LLVM_BB = BB->getBasicBlock();
7646  MachineFunction::iterator It = BB;
7647  ++It;
7648 
7649  // thisMBB:
7650  // ...
7651  // TrueVal = ...
7652  // cmpTY ccX, r1, r2
7653  // bCC copy1MBB
7654  // fallthrough --> copy0MBB
7655  MachineBasicBlock *thisMBB = BB;
7656  MachineFunction *F = BB->getParent();
7657  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
7658  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
7659  F->insert(It, copy0MBB);
7660  F->insert(It, sinkMBB);
7661 
7662  // Transfer the remainder of BB and its successor edges to sinkMBB.
7663  sinkMBB->splice(sinkMBB->begin(), BB,
7664  std::next(MachineBasicBlock::iterator(MI)), BB->end());
7665  sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
7666 
7667  BB->addSuccessor(copy0MBB);
7668  BB->addSuccessor(sinkMBB);
7669 
7670  BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
7671  .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
7672 
7673  // copy0MBB:
7674  // %FalseValue = ...
7675  // # fallthrough to sinkMBB
7676  BB = copy0MBB;
7677 
7678  // Update machine-CFG edges
7679  BB->addSuccessor(sinkMBB);
7680 
7681  // sinkMBB:
7682  // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
7683  // ...
7684  BB = sinkMBB;
7685  BuildMI(*BB, BB->begin(), dl,
7686  TII->get(ARM::PHI), MI->getOperand(0).getReg())
7687  .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
7688  .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
7689 
7690  MI->eraseFromParent(); // The pseudo instruction is gone now.
7691  return BB;
7692  }
7693 
7694  case ARM::BCCi64:
7695  case ARM::BCCZi64: {
7696  // If there is an unconditional branch to the other successor, remove it.
7697  BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());
7698 
7699  // Compare both parts that make up the double comparison separately for
7700  // equality.
7701  bool RHSisZero = MI->getOpcode() == ARM::BCCZi64;
7702 
7703  unsigned LHS1 = MI->getOperand(1).getReg();
7704  unsigned LHS2 = MI->getOperand(2).getReg();
7705  if (RHSisZero) {
7706  AddDefaultPred(BuildMI(BB, dl,
7707  TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
7708  .addReg(LHS1).addImm(0));
7709  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
7710  .addReg(LHS2).addImm(0)
7711  .addImm(ARMCC::EQ).addReg(ARM::CPSR);
7712  } else {
7713  unsigned RHS1 = MI->getOperand(3).getReg();
7714  unsigned RHS2 = MI->getOperand(4).getReg();
7715  AddDefaultPred(BuildMI(BB, dl,
7716  TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
7717  .addReg(LHS1).addReg(RHS1));
7718  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
7719  .addReg(LHS2).addReg(RHS2)
7720  .addImm(ARMCC::EQ).addReg(ARM::CPSR);
7721  }
7722 
7723  MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB();
7724  MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
7725  if (MI->getOperand(0).getImm() == ARMCC::NE)
7726  std::swap(destMBB, exitMBB);
7727 
7728  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
7729  .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
7730  if (isThumb2)
7731  AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB));
7732  else
7733  BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
7734 
7735  MI->eraseFromParent(); // The pseudo instruction is gone now.
7736  return BB;
7737  }
7738 
7739  case ARM::Int_eh_sjlj_setjmp:
7740  case ARM::Int_eh_sjlj_setjmp_nofp:
7741  case ARM::tInt_eh_sjlj_setjmp:
7742  case ARM::t2Int_eh_sjlj_setjmp:
7743  case ARM::t2Int_eh_sjlj_setjmp_nofp:
7744  EmitSjLjDispatchBlock(MI, BB);
7745  return BB;
7746 
7747  case ARM::ABS:
7748  case ARM::t2ABS: {
7749  // To insert an ABS instruction, we have to insert the
7750  // diamond control-flow pattern. The incoming instruction knows the
7751  // source vreg to test against 0, the destination vreg to set,
7752  // the condition code register to branch on, the
7753  // true/false values to select between, and a branch opcode to use.
7754  // It transforms
7755  // V1 = ABS V0
7756  // into
7757  // V2 = MOVS V0
7758  // BCC (branch to SinkBB if V0 >= 0)
7759  // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
7760  // SinkBB: V1 = PHI(V2, V3)
7761  const BasicBlock *LLVM_BB = BB->getBasicBlock();
7762  MachineFunction::iterator BBI = BB;
7763  ++BBI;
7764  MachineFunction *Fn = BB->getParent();
7765  MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
7766  MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
7767  Fn->insert(BBI, RSBBB);
7768  Fn->insert(BBI, SinkBB);
7769 
7770  unsigned int ABSSrcReg = MI->getOperand(1).getReg();
7771  unsigned int ABSDstReg = MI->getOperand(0).getReg();
7772  bool ABSSrcKIll = MI->getOperand(1).isKill();
7773  bool isThumb2 = Subtarget->isThumb2();
7774  MachineRegisterInfo &MRI = Fn->getRegInfo();
7775  // In Thumb mode S must not be specified if source register is the SP or
7776  // PC and if destination register is the SP, so restrict register class
7777  unsigned NewRsbDstReg =
7778  MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
7779 
7780  // Transfer the remainder of BB and its successor edges to sinkMBB.
7781  SinkBB->splice(SinkBB->begin(), BB,
7782  std::next(MachineBasicBlock::iterator(MI)), BB->end());
7783  SinkBB->transferSuccessorsAndUpdatePHIs(BB);
7784 
7785  BB->addSuccessor(RSBBB);
7786  BB->addSuccessor(SinkBB);
7787 
7788  // fall through to SinkMBB
7789  RSBBB->addSuccessor(SinkBB);
7790 
7791  // insert a cmp at the end of BB
7792  AddDefaultPred(BuildMI(BB, dl,
7793  TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
7794  .addReg(ABSSrcReg).addImm(0));
7795 
7796  // insert a bcc with opposite CC to ARMCC::MI at the end of BB
7797  BuildMI(BB, dl,
7798  TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
7800 
7801  // insert rsbri in RSBBB
7802  // Note: BCC and rsbri will be converted into predicated rsbmi
7803  // by if-conversion pass
7804  BuildMI(*RSBBB, RSBBB->begin(), dl,
7805  TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
7806  .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
7807  .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
7808 
7809  // insert PHI in SinkBB,
7810  // reuse ABSDstReg to not change uses of ABS instruction
7811  BuildMI(*SinkBB, SinkBB->begin(), dl,
7812  TII->get(ARM::PHI), ABSDstReg)
7813  .addReg(NewRsbDstReg).addMBB(RSBBB)
7814  .addReg(ABSSrcReg).addMBB(BB);
7815 
7816  // remove ABS instruction
7817  MI->eraseFromParent();
7818 
7819  // return last added BB
7820  return SinkBB;
7821  }
7822  case ARM::COPY_STRUCT_BYVAL_I32:
7823  ++NumLoopByVals;
7824  return EmitStructByval(MI, BB);
7825  case ARM::WIN__CHKSTK:
7826  return EmitLowered__chkstk(MI, BB);
7827  }
7828 }
7829 
7831  SDNode *Node) const {
7832  const MCInstrDesc *MCID = &MI->getDesc();
7833  // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
7834  // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
7835  // operand is still set to noreg. If needed, set the optional operand's
7836  // register to CPSR, and remove the redundant implicit def.
7837  //
7838  // e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>).
7839 
7840  // Rename pseudo opcodes.
7841  unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode());
7842  if (NewOpc) {
7843  const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
7844  MCID = &TII->get(NewOpc);
7845 
7846  assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 &&
7847  "converted opcode should be the same except for cc_out");
7848 
7849  MI->setDesc(*MCID);
7850 
7851  // Add the optional cc_out operand
7852  MI->addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
7853  }
7854  unsigned ccOutIdx = MCID->getNumOperands() - 1;
7855 
7856  // Any ARM instruction that sets the 's' bit should specify an optional
7857  // "cc_out" operand in the last operand position.
7858  if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
7859  assert(!NewOpc && "Optional cc_out operand required");
7860  return;
7861  }
7862  // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
7863  // since we already have an optional CPSR def.
7864  bool definesCPSR = false;
7865  bool deadCPSR = false;
7866  for (unsigned i = MCID->getNumOperands(), e = MI->getNumOperands();
7867  i != e; ++i) {
7868  const MachineOperand &MO = MI->getOperand(i);
7869  if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
7870  definesCPSR = true;
7871  if (MO.isDead())
7872  deadCPSR = true;
7873  MI->RemoveOperand(i);
7874  break;
7875  }
7876  }
7877  if (!definesCPSR) {
7878  assert(!NewOpc && "Optional cc_out operand required");
7879  return;
7880  }
7881  assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
7882  if (deadCPSR) {
7883  assert(!MI->getOperand(ccOutIdx).getReg() &&
7884  "expect uninitialized optional cc_out operand");
7885  return;
7886  }
7887 
7888  // If this instruction was defined with an optional CPSR def and its dag node
7889  // had a live implicit CPSR def, then activate the optional CPSR def.
7890  MachineOperand &MO = MI->getOperand(ccOutIdx);
7891  MO.setReg(ARM::CPSR);
7892  MO.setIsDef(true);
7893 }
7894 
7895 //===----------------------------------------------------------------------===//
7896 // ARM Optimization Hooks
7897 //===----------------------------------------------------------------------===//
7898 
7899 // Helper function that checks if N is a null or all ones constant.
7900 static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
7902  if (!C)
7903  return false;
7904  return AllOnes ? C->isAllOnesValue() : C->isNullValue();
7905 }
7906 
7907 // Return true if N is conditionally 0 or all ones.
7908 // Detects these expressions where cc is an i1 value:
7909 //
7910 // (select cc 0, y) [AllOnes=0]
7911 // (select cc y, 0) [AllOnes=0]
7912 // (zext cc) [AllOnes=0]
7913 // (sext cc) [AllOnes=0/1]
7914 // (select cc -1, y) [AllOnes=1]
7915 // (select cc y, -1) [AllOnes=1]
7916 //
7917 // Invert is set when N is the null/all ones constant when CC is false.
7918 // OtherOp is set to the alternative value of N.
7919 static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,
7920  SDValue &CC, bool &Invert,
7921  SDValue &OtherOp,
7922  SelectionDAG &DAG) {
7923  switch (N->getOpcode()) {
7924  default: return false;
7925  case ISD::SELECT: {
7926  CC = N->getOperand(0);
7927  SDValue N1 = N->getOperand(1);
7928  SDValue N2 = N->getOperand(2);
7929  if (isZeroOrAllOnes(N1, AllOnes)) {
7930  Invert = false;
7931  OtherOp = N2;
7932  return true;
7933  }
7934  if (isZeroOrAllOnes(N2, AllOnes)) {
7935  Invert = true;
7936  OtherOp = N1;
7937  return true;
7938  }
7939  return false;
7940  }
7941  case ISD::ZERO_EXTEND:
7942  // (zext cc) can never be the all ones value.
7943  if (AllOnes)
7944  return false;
7945  // Fall through.
7946  case ISD::SIGN_EXTEND: {
7947  SDLoc dl(N);
7948  EVT VT = N->getValueType(0);
7949  CC = N->getOperand(0);
7950  if (CC.getValueType() != MVT::i1)
7951  return false;
7952  Invert = !AllOnes;
7953  if (AllOnes)
7954  // When looking for an AllOnes constant, N is an sext, and the 'other'
7955  // value is 0.
7956  OtherOp = DAG.getConstant(0, dl, VT);
7957  else if (N->getOpcode() == ISD::ZERO_EXTEND)
7958  // When looking for a 0 constant, N can be zext or sext.
7959  OtherOp = DAG.getConstant(1, dl, VT);
7960  else
7961  OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl,
7962  VT);
7963  return true;
7964  }
7965  }
7966 }
7967 
7968 // Combine a constant select operand into its use:
7969 //
7970 // (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
7971 // (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
7972 // (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1]
7973 // (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
7974 // (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
7975 //
7976 // The transform is rejected if the select doesn't have a constant operand that
7977 // is null, or all ones when AllOnes is set.
7978 //
7979 // Also recognize sext/zext from i1:
7980 //
7981 // (add (zext cc), x) -> (select cc (add x, 1), x)
7982 // (add (sext cc), x) -> (select cc (add x, -1), x)
7983 //
7984 // These transformations eventually create predicated instructions.
7985 //
7986 // @param N The node to transform.
7987 // @param Slct The N operand that is a select.
7988 // @param OtherOp The other N operand (x above).
7989 // @param DCI Context.
7990 // @param AllOnes Require the select constant to be all ones instead of null.
7991 // @returns The new node, or SDValue() on failure.
7992 static
7995  bool AllOnes = false) {
7996  SelectionDAG &DAG = DCI.DAG;
7997  EVT VT = N->getValueType(0);
7998  SDValue NonConstantVal;
7999  SDValue CCOp;
8000  bool SwapSelectOps;
8001  if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
8002  NonConstantVal, DAG))
8003  return SDValue();
8004 
8005  // Slct is now know to be the desired identity constant when CC is true.
8006  SDValue TrueVal = OtherOp;
8007  SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
8008  OtherOp, NonConstantVal);
8009  // Unless SwapSelectOps says CC should be false.
8010  if (SwapSelectOps)
8011  std::swap(TrueVal, FalseVal);
8012 
8013  return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
8014  CCOp, TrueVal, FalseVal);
8015 }
8016 
8017 // Attempt combineSelectAndUse on each operand of a commutative operator N.
8018 static
8021  SDValue N0 = N->getOperand(0);
8022  SDValue N1 = N->getOperand(1);
8023  if (N0.getNode()->hasOneUse()) {
8024  SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes);
8025  if (Result.getNode())
8026  return Result;
8027  }
8028  if (N1.getNode()->hasOneUse()) {
8029  SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes);
8030  if (Result.getNode())
8031  return Result;
8032  }
8033  return SDValue();
8034 }
8035 
8036 // AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction
8037 // (only after legalization).
8040  const ARMSubtarget *Subtarget) {
8041 
8042  // Only perform optimization if after legalize, and if NEON is available. We
8043  // also expected both operands to be BUILD_VECTORs.
8044  if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
8045  || N0.getOpcode() != ISD::BUILD_VECTOR
8046  || N1.getOpcode() != ISD::BUILD_VECTOR)
8047  return SDValue();
8048 
8049  // Check output type since VPADDL operand elements can only be 8, 16, or 32.
8050  EVT VT = N->getValueType(0);
8051  if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
8052  return SDValue();
8053 
8054  // Check that the vector operands are of the right form.
8055  // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
8056  // operands, where N is the size of the formed vector.
8057  // Each EXTRACT_VECTOR should have the same input vector and odd or even
8058  // index such that we have a pair wise add pattern.
8059 
8060  // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
8061  if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
8062  return SDValue();
8063  SDValue Vec = N0->getOperand(0)->getOperand(0);
8064  SDNode *V = Vec.getNode();
8065  unsigned nextIndex = 0;
8066 
8067  // For each operands to the ADD which are BUILD_VECTORs,
8068  // check to see if each of their operands are an EXTRACT_VECTOR with
8069  // the same vector and appropriate index.
8070  for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
8072  && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
8073 
8074  SDValue ExtVec0 = N0->getOperand(i);
8075  SDValue ExtVec1 = N1->getOperand(i);
8076 
8077  // First operand is the vector, verify its the same.
8078  if (V != ExtVec0->getOperand(0).getNode() ||
8079  V != ExtVec1->getOperand(0).getNode())
8080  return SDValue();
8081 
8082  // Second is the constant, verify its correct.
8083  ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
8084  ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
8085 
8086  // For the constant, we want to see all the even or all the odd.
8087  if (!C0 || !C1 || C0->getZExtValue() != nextIndex
8088  || C1->getZExtValue() != nextIndex+1)
8089  return SDValue();
8090 
8091  // Increment index.
8092  nextIndex+=2;
8093  } else
8094  return SDValue();
8095  }
8096 
8097  // Create VPADDL node.
8098  SelectionDAG &DAG = DCI.DAG;
8099  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8100 
8101  SDLoc dl(N);
8102 
8103  // Build operand list.
8105  Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl,
8106  TLI.getPointerTy(DAG.getDataLayout())));
8107 
8108  // Input is the vector.
8109  Ops.push_back(Vec);
8110 
8111  // Get widened type and narrowed type.
8112  MVT widenType;
8113  unsigned numElem = VT.getVectorNumElements();
8114 
8115  EVT inputLaneType = Vec.getValueType().getVectorElementType();
8116  switch (inputLaneType.getSimpleVT().SimpleTy) {
8117  case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
8118  case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
8119  case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
8120  default:
8121  llvm_unreachable("Invalid vector element type for padd optimization.");
8122  }
8123 
8124  SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, widenType, Ops);
8125  unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
8126  return DAG.getNode(ExtOp, dl, VT, tmp);
8127 }
8128 
8130  if (V->getOpcode() == ISD::UMUL_LOHI ||
8131  V->getOpcode() == ISD::SMUL_LOHI)
8132  return V;
8133  return SDValue();
8134 }
8135 
8138  const ARMSubtarget *Subtarget) {
8139 
8140  if (Subtarget->isThumb1Only()) return SDValue();
8141 
8142  // Only perform the checks after legalize when the pattern is available.
8143  if (DCI.isBeforeLegalize()) return SDValue();
8144 
8145  // Look for multiply add opportunities.
8146  // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
8147  // each add nodes consumes a value from ISD::UMUL_LOHI and there is
8148  // a glue link from the first add to the second add.
8149  // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
8150  // a S/UMLAL instruction.
8151  // UMUL_LOHI
8152  // / :lo \ :hi
8153  // / \ [no multiline comment]
8154  // loAdd -> ADDE |
8155  // \ :glue /
8156  // \ /
8157  // ADDC <- hiAdd
8158  //
8159  assert(AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC");
8160  SDValue AddcOp0 = AddcNode->getOperand(0);
8161  SDValue AddcOp1 = AddcNode->getOperand(1);
8162 
8163  // Check if the two operands are from the same mul_lohi node.
8164  if (AddcOp0.getNode() == AddcOp1.getNode())
8165  return SDValue();
8166 
8167  assert(AddcNode->getNumValues() == 2 &&
8168  AddcNode->getValueType(0) == MVT::i32 &&
8169  "Expect ADDC with two result values. First: i32");
8170 
8171  // Check that we have a glued ADDC node.
8172  if (AddcNode->getValueType(1) != MVT::Glue)
8173  return SDValue();
8174 
8175  // Check that the ADDC adds the low result of the S/UMUL_LOHI.
8176  if (AddcOp0->getOpcode() != ISD::UMUL_LOHI &&
8177  AddcOp0->getOpcode() != ISD::SMUL_LOHI &&
8178  AddcOp1->getOpcode() != ISD::UMUL_LOHI &&
8179  AddcOp1->getOpcode() != ISD::SMUL_LOHI)
8180  return SDValue();
8181 
8182  // Look for the glued ADDE.
8183  SDNode* AddeNode = AddcNode->getGluedUser();
8184  if (!AddeNode)
8185  return SDValue();
8186 
8187  // Make sure it is really an ADDE.
8188  if (AddeNode->getOpcode() != ISD::ADDE)
8189  return SDValue();
8190 
8191  assert(AddeNode->getNumOperands() == 3 &&
8192  AddeNode->getOperand(2).getValueType() == MVT::Glue &&
8193  "ADDE node has the wrong inputs");
8194 
8195  // Check for the triangle shape.
8196  SDValue AddeOp0 = AddeNode->getOperand(0);
8197  SDValue AddeOp1 = AddeNode->getOperand(1);
8198 
8199  // Make sure that the ADDE operands are not coming from the same node.
8200  if (AddeOp0.getNode() == AddeOp1.getNode())
8201  return SDValue();
8202 
8203  // Find the MUL_LOHI node walking up ADDE's operands.
8204  bool IsLeftOperandMUL = false;
8205  SDValue MULOp = findMUL_LOHI(AddeOp0);
8206  if (MULOp == SDValue())
8207  MULOp = findMUL_LOHI(AddeOp1);
8208  else
8209  IsLeftOperandMUL = true;
8210  if (MULOp == SDValue())
8211  return SDValue();
8212 
8213  // Figure out the right opcode.
8214  unsigned Opc = MULOp->getOpcode();
8215  unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
8216 
8217  // Figure out the high and low input values to the MLAL node.
8218  SDValue* HiAdd = nullptr;
8219  SDValue* LoMul = nullptr;
8220  SDValue* LowAdd = nullptr;
8221 
8222  // Ensure that ADDE is from high result of ISD::SMUL_LOHI.
8223  if ((AddeOp0 != MULOp.getValue(1)) && (AddeOp1 != MULOp.getValue(1)))
8224  return SDValue();
8225 
8226  if (IsLeftOperandMUL)
8227  HiAdd = &AddeOp1;
8228  else
8229  HiAdd = &AddeOp0;
8230 
8231 
8232  // Ensure that LoMul and LowAdd are taken from correct ISD::SMUL_LOHI node
8233  // whose low result is fed to the ADDC we are checking.
8234 
8235  if (AddcOp0 == MULOp.getValue(0)) {
8236  LoMul = &AddcOp0;
8237  LowAdd = &AddcOp1;
8238  }
8239  if (AddcOp1 == MULOp.getValue(0)) {
8240  LoMul = &AddcOp1;
8241  LowAdd = &AddcOp0;
8242  }
8243 
8244  if (!LoMul)
8245  return SDValue();
8246 
8247  // Create the merged node.
8248  SelectionDAG &DAG = DCI.DAG;
8249 
8250  // Build operand list.
8252  Ops.push_back(LoMul->getOperand(0));
8253  Ops.push_back(LoMul->getOperand(1));
8254  Ops.push_back(*LowAdd);
8255  Ops.push_back(*HiAdd);
8256 
8257  SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode),
8258  DAG.getVTList(MVT::i32, MVT::i32), Ops);
8259 
8260  // Replace the ADDs' nodes uses by the MLA node's values.
8261  SDValue HiMLALResult(MLALNode.getNode(), 1);
8262  DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
8263 
8264  SDValue LoMLALResult(MLALNode.getNode(), 0);
8265  DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
8266 
8267  // Return original node to notify the driver to stop replacing.
8268  SDValue resNode(AddcNode, 0);
8269  return resNode;
8270 }
8271 
8272 /// PerformADDCCombine - Target-specific dag combine transform from
8273 /// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL.
8276  const ARMSubtarget *Subtarget) {
8277 
8278  return AddCombineTo64bitMLAL(N, DCI, Subtarget);
8279 
8280 }
8281 
8282 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
8283 /// operands N0 and N1. This is a helper for PerformADDCombine that is
8284 /// called with the default operands, and if that fails, with commuted
8285 /// operands.
8288  const ARMSubtarget *Subtarget){
8289 
8290  // Attempt to create vpaddl for this add.
8291  SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget);
8292  if (Result.getNode())
8293  return Result;
8294 
8295  // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
8296  if (N0.getNode()->hasOneUse()) {
8297  SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
8298  if (Result.getNode()) return Result;
8299  }
8300  return SDValue();
8301 }
8302 
8303 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
8304 ///
8307  const ARMSubtarget *Subtarget) {
8308  SDValue N0 = N->getOperand(0);
8309  SDValue N1 = N->getOperand(1);
8310 
8311  // First try with the default operand order.
8312  SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget);
8313  if (Result.getNode())
8314  return Result;
8315 
8316  // If that didn't work, try again with the operands commuted.
8317  return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
8318 }
8319 
8320 /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
8321 ///
8324  SDValue N0 = N->getOperand(0);
8325  SDValue N1 = N->getOperand(1);
8326 
8327  // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
8328  if (N1.getNode()->hasOneUse()) {
8329  SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
8330  if (Result.getNode()) return Result;
8331  }
8332 
8333  return SDValue();
8334 }
8335 
8336 /// PerformVMULCombine
8337 /// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
8338 /// special multiplier accumulator forwarding.
8339 /// vmul d3, d0, d2
8340 /// vmla d3, d1, d2
8341 /// is faster than
8342 /// vadd d3, d0, d1
8343 /// vmul d3, d3, d2
8344 // However, for (A + B) * (A + B),
8345 // vadd d2, d0, d1
8346 // vmul d3, d0, d2
8347 // vmla d3, d1, d2
8348 // is slower than
8349 // vadd d2, d0, d1
8350 // vmul d3, d2, d2
8353  const ARMSubtarget *Subtarget) {
8354  if (!Subtarget->hasVMLxForwarding())
8355  return SDValue();
8356 
8357  SelectionDAG &DAG = DCI.DAG;
8358  SDValue N0 = N->getOperand(0);
8359  SDValue N1 = N->getOperand(1);
8360  unsigned Opcode = N0.getOpcode();
8361  if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
8362  Opcode != ISD::FADD && Opcode != ISD::FSUB) {
8363  Opcode = N1.getOpcode();
8364  if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
8365  Opcode != ISD::FADD && Opcode != ISD::FSUB)
8366  return SDValue();
8367  std::swap(N0, N1);
8368  }
8369 
8370  if (N0 == N1)
8371  return SDValue();
8372 
8373  EVT VT = N->getValueType(0);
8374  SDLoc DL(N);
8375  SDValue N00 = N0->getOperand(0);
8376  SDValue N01 = N0->getOperand(1);
8377  return DAG.getNode(Opcode, DL, VT,
8378  DAG.getNode(ISD::MUL, DL, VT, N00, N1),
8379  DAG.getNode(ISD::MUL, DL, VT, N01, N1));
8380 }
8381 
8384  const ARMSubtarget *Subtarget) {
8385  SelectionDAG &DAG = DCI.DAG;
8386 
8387  if (Subtarget->isThumb1Only())
8388  return SDValue();
8389 
8390  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
8391  return SDValue();
8392 
8393  EVT VT = N->getValueType(0);
8394  if (VT.is64BitVector() || VT.is128BitVector())
8395  return PerformVMULCombine(N, DCI, Subtarget);
8396  if (VT != MVT::i32)
8397  return SDValue();
8398 
8400  if (!C)
8401  return SDValue();
8402 
8403  int64_t MulAmt = C->getSExtValue();
8404  unsigned ShiftAmt = countTrailingZeros<uint64_t>(MulAmt);
8405 
8406  ShiftAmt = ShiftAmt & (32 - 1);
8407  SDValue V = N->getOperand(0);
8408  SDLoc DL(N);
8409 
8410  SDValue Res;
8411  MulAmt >>= ShiftAmt;
8412 
8413  if (MulAmt >= 0) {
8414  if (isPowerOf2_32(MulAmt - 1)) {
8415  // (mul x, 2^N + 1) => (add (shl x, N), x)
8416  Res = DAG.getNode(ISD::ADD, DL, VT,
8417  V,
8418  DAG.getNode(ISD::SHL, DL, VT,
8419  V,
8420  DAG.getConstant(Log2_32(MulAmt - 1), DL,
8421  MVT::i32)));
8422  } else if (isPowerOf2_32(MulAmt + 1)) {
8423  // (mul x, 2^N - 1) => (sub (shl x, N), x)
8424  Res = DAG.getNode(ISD::SUB, DL, VT,
8425  DAG.getNode(ISD::SHL, DL, VT,
8426  V,
8427  DAG.getConstant(Log2_32(MulAmt + 1), DL,
8428  MVT::i32)),
8429  V);
8430  } else
8431  return SDValue();
8432  } else {
8433  uint64_t MulAmtAbs = -MulAmt;
8434  if (isPowerOf2_32(MulAmtAbs + 1)) {
8435  // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
8436  Res = DAG.getNode(ISD::SUB, DL, VT,
8437  V,
8438  DAG.getNode(ISD::SHL, DL, VT,
8439  V,
8440  DAG.getConstant(Log2_32(MulAmtAbs + 1), DL,
8441  MVT::i32)));
8442  } else if (isPowerOf2_32(MulAmtAbs - 1)) {
8443  // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
8444  Res = DAG.getNode(ISD::ADD, DL, VT,
8445  V,
8446  DAG.getNode(ISD::SHL, DL, VT,
8447  V,
8448  DAG.getConstant(Log2_32(MulAmtAbs - 1), DL,
8449  MVT::i32)));
8450  Res = DAG.getNode(ISD::SUB, DL, VT,
8451  DAG.getConstant(0, DL, MVT::i32), Res);
8452 
8453  } else
8454  return SDValue();
8455  }
8456 
8457  if (ShiftAmt != 0)
8458  Res = DAG.getNode(ISD::SHL, DL, VT,
8459  Res, DAG.getConstant(ShiftAmt, DL, MVT::i32));
8460 
8461  // Do not add new nodes to DAG combiner worklist.
8462  DCI.CombineTo(N, Res, false);
8463  return SDValue();
8464 }
8465 
8468  const ARMSubtarget *Subtarget) {
8469 
8470  // Attempt to use immediate-form VBIC
8472  SDLoc dl(N);
8473  EVT VT = N->getValueType(0);
8474  SelectionDAG &DAG = DCI.DAG;
8475 
8476  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
8477  return SDValue();
8478 
8479  APInt SplatBits, SplatUndef;
8480  unsigned SplatBitSize;
8481  bool HasAnyUndefs;
8482  if (BVN &&
8483  BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
8484  if (SplatBitSize <= 64) {
8485  EVT VbicVT;
8486  SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(),
8487  SplatUndef.getZExtValue(), SplatBitSize,
8488  DAG, dl, VbicVT, VT.is128BitVector(),
8489  OtherModImm);
8490  if (Val.getNode()) {
8491  SDValue Input =
8492  DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
8493  SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
8494  return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
8495  }
8496  }
8497  }
8498 
8499  if (!Subtarget->isThumb1Only()) {
8500  // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
8501  SDValue Result = combineSelectAndUseCommutative(N, true, DCI);
8502  if (Result.getNode())
8503  return Result;
8504  }
8505 
8506  return SDValue();
8507 }
8508 
8509 /// PerformORCombine - Target-specific dag combine xforms for ISD::OR
8512  const ARMSubtarget *Subtarget) {
8513  // Attempt to use immediate-form VORR
8515  SDLoc dl(N);
8516  EVT VT = N->getValueType(0);
8517  SelectionDAG &DAG = DCI.DAG;
8518 
8519  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
8520  return SDValue();
8521 
8522  APInt SplatBits, SplatUndef;
8523  unsigned SplatBitSize;
8524  bool HasAnyUndefs;
8525  if (BVN && Subtarget->hasNEON() &&
8526  BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
8527  if (SplatBitSize <= 64) {
8528  EVT VorrVT;
8529  SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
8530  SplatUndef.getZExtValue(), SplatBitSize,
8531  DAG, dl, VorrVT, VT.is128BitVector(),
8532  OtherModImm);
8533  if (Val.getNode()) {
8534  SDValue Input =
8535  DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
8536  SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
8537  return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
8538  }
8539  }
8540  }
8541 
8542  if (!Subtarget->isThumb1Only()) {
8543  // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
8544  SDValue Result = combineSelectAndUseCommutative(N, false, DCI);
8545  if (Result.getNode())
8546  return Result;
8547  }
8548 
8549  // The code below optimizes (or (and X, Y), Z).
8550  // The AND operand needs to have a single user to make these optimizations
8551  // profitable.
8552  SDValue N0 = N->getOperand(0);
8553  if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
8554  return SDValue();
8555  SDValue N1 = N->getOperand(1);
8556 
8557  // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
8558  if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
8559  DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
8560  APInt SplatUndef;
8561  unsigned SplatBitSize;
8562  bool HasAnyUndefs;
8563 
8564  APInt SplatBits0, SplatBits1;
8567  // Ensure that the second operand of both ands are constants
8568  if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
8569  HasAnyUndefs) && !HasAnyUndefs) {
8570  if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
8571  HasAnyUndefs) && !HasAnyUndefs) {
8572  // Ensure that the bit width of the constants are the same and that
8573  // the splat arguments are logical inverses as per the pattern we
8574  // are trying to simplify.
8575  if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
8576  SplatBits0 == ~SplatBits1) {
8577  // Canonicalize the vector type to make instruction selection
8578  // simpler.
8579  EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
8580  SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
8581  N0->getOperand(1),
8582  N0->getOperand(0),
8583  N1->getOperand(0));
8584  return DAG.getNode(ISD::BITCAST, dl, VT, Result);
8585  }
8586  }
8587  }
8588  }
8589 
8590  // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
8591  // reasonable.
8592 
8593  // BFI is only available on V6T2+
8594  if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
8595  return SDValue();
8596 
8597  SDLoc DL(N);
8598  // 1) or (and A, mask), val => ARMbfi A, val, mask
8599  // iff (val & mask) == val
8600  //
8601  // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
8602  // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
8603  // && mask == ~mask2
8604  // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
8605  // && ~mask == mask2
8606  // (i.e., copy a bitfield value into another bitfield of the same width)
8607 
8608  if (VT != MVT::i32)
8609  return SDValue();
8610 
8611  SDValue N00 = N0.getOperand(0);
8612 
8613  // The value and the mask need to be constants so we can verify this is
8614  // actually a bitfield set. If the mask is 0xffff, we can do better
8615  // via a movt instruction, so don't use BFI in that case.
8616  SDValue MaskOp = N0.getOperand(1);
8617  ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
8618  if (!MaskC)
8619  return SDValue();
8620  unsigned Mask = MaskC->getZExtValue();
8621  if (Mask == 0xffff)
8622  return SDValue();
8623  SDValue Res;
8624  // Case (1): or (and A, mask), val => ARMbfi A, val, mask
8626  if (N1C) {
8627  unsigned Val = N1C->getZExtValue();
8628  if ((Val & ~Mask) != Val)
8629  return SDValue();
8630 
8631  if (ARM::isBitFieldInvertedMask(Mask)) {
8632  Val >>= countTrailingZeros(~Mask);
8633 
8634  Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
8635  DAG.getConstant(Val, DL, MVT::i32),
8636  DAG.getConstant(Mask, DL, MVT::i32));
8637 
8638  // Do not add new nodes to DAG combiner worklist.
8639  DCI.CombineTo(N, Res, false);
8640  return SDValue();
8641  }
8642  } else if (N1.getOpcode() == ISD::AND) {
8643  // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
8645  if (!N11C)
8646  return SDValue();
8647  unsigned Mask2 = N11C->getZExtValue();
8648 
8649  // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
8650  // as is to match.
8651  if (ARM::isBitFieldInvertedMask(Mask) &&
8652  (Mask == ~Mask2)) {
8653  // The pack halfword instruction works better for masks that fit it,
8654  // so use that when it's available.
8655  if (Subtarget->hasT2ExtractPack() &&
8656  (Mask == 0xffff || Mask == 0xffff0000))
8657  return SDValue();
8658  // 2a
8659  unsigned amt = countTrailingZeros(Mask2);
8660  Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
8661  DAG.getConstant(amt, DL, MVT::i32));
8662  Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
8663  DAG.getConstant(Mask, DL, MVT::i32));
8664  // Do not add new nodes to DAG combiner worklist.
8665  DCI.CombineTo(N, Res, false);
8666  return SDValue();
8667  } else if (ARM::isBitFieldInvertedMask(~Mask) &&
8668  (~Mask == Mask2)) {
8669  // The pack halfword instruction works better for masks that fit it,
8670  // so use that when it's available.
8671  if (Subtarget->hasT2ExtractPack() &&
8672  (Mask2 == 0xffff || Mask2 == 0xffff0000))
8673  return SDValue();
8674  // 2b
8675  unsigned lsb = countTrailingZeros(Mask);
8676  Res = DAG.getNode(ISD::SRL, DL, VT, N00,
8677  DAG.getConstant(lsb, DL, MVT::i32));
8678  Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
8679  DAG.getConstant(Mask2, DL, MVT::i32));
8680  // Do not add new nodes to DAG combiner worklist.
8681  DCI.CombineTo(N, Res, false);
8682  return SDValue();
8683  }
8684  }
8685 
8686  if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
8687  N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
8688  ARM::isBitFieldInvertedMask(~Mask)) {
8689  // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
8690  // where lsb(mask) == #shamt and masked bits of B are known zero.
8691  SDValue ShAmt = N00.getOperand(1);
8692  unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
8693  unsigned LSB = countTrailingZeros(Mask);
8694  if (ShAmtC != LSB)
8695  return SDValue();
8696 
8697  Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
8698  DAG.getConstant(~Mask, DL, MVT::i32));
8699 
8700  // Do not add new nodes to DAG combiner worklist.
8701  DCI.CombineTo(N, Res, false);
8702  }
8703 
8704  return SDValue();
8705 }
8706 
8709  const ARMSubtarget *Subtarget) {
8710  EVT VT = N->getValueType(0);
8711  SelectionDAG &DAG = DCI.DAG;
8712 
8713  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
8714  return SDValue();
8715 
8716  if (!Subtarget->isThumb1Only()) {
8717  // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
8718  SDValue Result = combineSelectAndUseCommutative(N, false, DCI);
8719  if (Result.getNode())
8720  return Result;
8721  }
8722 
8723  return SDValue();
8724 }
8725 
8726 /// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
8727 /// the bits being cleared by the AND are not demanded by the BFI.
8730  SDValue N1 = N->getOperand(1);
8731  if (N1.getOpcode() == ISD::AND) {
8733  if (!N11C)
8734  return SDValue();
8735  unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
8736  unsigned LSB = countTrailingZeros(~InvMask);
8737  unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB;
8738  assert(Width <
8739  static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
8740  "undefined behavior");
8741  unsigned Mask = (1u << Width) - 1;
8742  unsigned Mask2 = N11C->getZExtValue();
8743  if ((Mask & (~Mask2)) == 0)
8744  return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
8745  N->getOperand(0), N1.getOperand(0),
8746  N->getOperand(2));
8747  }
8748  return SDValue();
8749 }
8750 
8751 /// PerformVMOVRRDCombine - Target-specific dag combine xforms for
8752 /// ARMISD::VMOVRRD.
8755  const ARMSubtarget *Subtarget) {
8756  // vmovrrd(vmovdrr x, y) -> x,y
8757  SDValue InDouble = N->getOperand(0);
8758  if (InDouble.getOpcode() == ARMISD::VMOVDRR && !Subtarget->isFPOnlySP())
8759  return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
8760 
8761  // vmovrrd(load f64) -> (load i32), (load i32)
8762  SDNode *InNode = InDouble.getNode();
8763  if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
8764  InNode->getValueType(0) == MVT::f64 &&
8765  InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
8766  !cast<LoadSDNode>(InNode)->isVolatile()) {
8767  // TODO: Should this be done for non-FrameIndex operands?
8768  LoadSDNode *LD = cast<LoadSDNode>(InNode);
8769 
8770  SelectionDAG &DAG = DCI.DAG;
8771  SDLoc DL(LD);
8772  SDValue BasePtr = LD->getBasePtr();
8773  SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr,
8774  LD->getPointerInfo(), LD->isVolatile(),
8775  LD->isNonTemporal(), LD->isInvariant(),
8776  LD->getAlignment());
8777 
8778  SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
8779  DAG.getConstant(4, DL, MVT::i32));
8780  SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr,
8781  LD->getPointerInfo(), LD->isVolatile(),
8782  LD->isNonTemporal(), LD->isInvariant(),
8783  std::min(4U, LD->getAlignment() / 2));
8784 
8785  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
8786  if (DCI.DAG.getDataLayout().isBigEndian())
8787  std::swap (NewLD1, NewLD2);
8788  SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
8789  return Result;
8790  }
8791 
8792  return SDValue();
8793 }
8794 
8795 /// PerformVMOVDRRCombine - Target-specific dag combine xforms for
8796 /// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
8798  // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
8799  SDValue Op0 = N->getOperand(0);
8800  SDValue Op1 = N->getOperand(1);
8801  if (Op0.getOpcode() == ISD::BITCAST)
8802  Op0 = Op0.getOperand(0);
8803  if (Op1.getOpcode() == ISD::BITCAST)
8804  Op1 = Op1.getOperand(0);
8805  if (Op0.getOpcode() == ARMISD::VMOVRRD &&
8806  Op0.getNode() == Op1.getNode() &&
8807  Op0.getResNo() == 0 && Op1.getResNo() == 1)
8808  return DAG.getNode(ISD::BITCAST, SDLoc(N),
8809  N->getValueType(0), Op0.getOperand(0));
8810  return SDValue();
8811 }
8812 
8813 /// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
8814 /// are normal, non-volatile loads. If so, it is profitable to bitcast an
8815 /// i64 vector to have f64 elements, since the value can then be loaded
8816 /// directly into a VFP register.
8817 static bool hasNormalLoadOperand(SDNode *N) {
8818  unsigned NumElts = N->getValueType(0).getVectorNumElements();
8819  for (unsigned i = 0; i < NumElts; ++i) {
8820  SDNode *Elt = N->getOperand(i).getNode();
8821  if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
8822  return true;
8823  }
8824  return false;
8825 }
8826 
8827 /// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
8828 /// ISD::BUILD_VECTOR.
8831  const ARMSubtarget *Subtarget) {
8832  // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
8833  // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
8834  // into a pair of GPRs, which is fine when the value is used as a scalar,
8835  // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
8836  SelectionDAG &DAG = DCI.DAG;
8837  if (N->getNumOperands() == 2) {
8838  SDValue RV = PerformVMOVDRRCombine(N, DAG);
8839  if (RV.getNode())
8840  return RV;
8841  }
8842 
8843  // Load i64 elements as f64 values so that type legalization does not split
8844  // them up into i32 values.
8845  EVT VT = N->getValueType(0);
8847  return SDValue();
8848  SDLoc dl(N);
8850  unsigned NumElts = VT.getVectorNumElements();
8851  for (unsigned i = 0; i < NumElts; ++i) {
8852  SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
8853  Ops.push_back(V);
8854  // Make the DAGCombiner fold the bitcast.
8855  DCI.AddToWorklist(V.getNode());
8856  }
8857  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
8858  SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops);
8859  return DAG.getNode(ISD::BITCAST, dl, VT, BV);
8860 }
8861 
8862 /// \brief Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
8863 static SDValue
8865  // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR.
8866  // At that time, we may have inserted bitcasts from integer to float.
8867  // If these bitcasts have survived DAGCombine, change the lowering of this
8868  // BUILD_VECTOR in something more vector friendly, i.e., that does not
8869  // force to use floating point types.
8870 
8871  // Make sure we can change the type of the vector.
8872  // This is possible iff:
8873  // 1. The vector is only used in a bitcast to a integer type. I.e.,
8874  // 1.1. Vector is used only once.
8875  // 1.2. Use is a bit convert to an integer type.
8876  // 2. The size of its operands are 32-bits (64-bits are not legal).
8877  EVT VT = N->getValueType(0);
8878  EVT EltVT = VT.getVectorElementType();
8879 
8880  // Check 1.1. and 2.
8881  if (EltVT.getSizeInBits() != 32 || !N->hasOneUse())
8882  return SDValue();
8883 
8884  // By construction, the input type must be float.
8885  assert(EltVT == MVT::f32 && "Unexpected type!");
8886 
8887  // Check 1.2.
8888  SDNode *Use = *N->use_begin();
8889  if (Use->getOpcode() != ISD::BITCAST ||
8890  Use->getValueType(0).isFloatingPoint())
8891  return SDValue();
8892 
8893  // Check profitability.
8894  // Model is, if more than half of the relevant operands are bitcast from
8895  // i32, turn the build_vector into a sequence of insert_vector_elt.
8896  // Relevant operands are everything that is not statically
8897  // (i.e., at compile time) bitcasted.
8898  unsigned NumOfBitCastedElts = 0;
8899  unsigned NumElts = VT.getVectorNumElements();
8900  unsigned NumOfRelevantElts = NumElts;
8901  for (unsigned Idx = 0; Idx < NumElts; ++Idx) {
8902  SDValue Elt = N->getOperand(Idx);
8903  if (Elt->getOpcode() == ISD::BITCAST) {
8904  // Assume only bit cast to i32 will go away.
8905  if (Elt->getOperand(0).getValueType() == MVT::i32)
8906  ++NumOfBitCastedElts;
8907  } else if (Elt.getOpcode() == ISD::UNDEF || isa<ConstantSDNode>(Elt))
8908  // Constants are statically casted, thus do not count them as
8909  // relevant operands.
8910  --NumOfRelevantElts;
8911  }
8912 
8913  // Check if more than half of the elements require a non-free bitcast.
8914  if (NumOfBitCastedElts <= NumOfRelevantElts / 2)
8915  return SDValue();
8916 
8917  SelectionDAG &DAG = DCI.DAG;
8918  // Create the new vector type.
8919  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
8920  // Check if the type is legal.
8921  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8922  if (!TLI.isTypeLegal(VecVT))
8923  return SDValue();
8924 
8925  // Combine:
8926  // ARMISD::BUILD_VECTOR E1, E2, ..., EN.
8927  // => BITCAST INSERT_VECTOR_ELT
8928  // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),
8929  // (BITCAST EN), N.
8930  SDValue Vec = DAG.getUNDEF(VecVT);
8931  SDLoc dl(N);
8932  for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
8933  SDValue V = N->getOperand(Idx);
8934  if (V.getOpcode() == ISD::UNDEF)
8935  continue;
8936  if (V.getOpcode() == ISD::BITCAST &&
8937  V->getOperand(0).getValueType() == MVT::i32)
8938  // Fold obvious case.
8939  V = V.getOperand(0);
8940  else {
8941  V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
8942  // Make the DAGCombiner fold the bitcasts.
8943  DCI.AddToWorklist(V.getNode());
8944  }
8945  SDValue LaneIdx = DAG.getConstant(Idx, dl, MVT::i32);
8946  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx);
8947  }
8948  Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec);
8949  // Make the DAGCombiner fold the bitcasts.
8950  DCI.AddToWorklist(Vec.getNode());
8951  return Vec;
8952 }
8953 
8954 /// PerformInsertEltCombine - Target-specific dag combine xforms for
8955 /// ISD::INSERT_VECTOR_ELT.
8958  // Bitcast an i64 load inserted into a vector to f64.
8959  // Otherwise, the i64 value will be legalized to a pair of i32 values.
8960  EVT VT = N->getValueType(0);
8961  SDNode *Elt = N->getOperand(1).getNode();
8962  if (VT.getVectorElementType() != MVT::i64 ||
8963  !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
8964  return SDValue();
8965 
8966  SelectionDAG &DAG = DCI.DAG;
8967  SDLoc dl(N);
8968  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
8969  VT.getVectorNumElements());
8970  SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
8971  SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
8972  // Make the DAGCombiner fold the bitcasts.
8973  DCI.AddToWorklist(Vec.getNode());
8974  DCI.AddToWorklist(V.getNode());
8975  SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
8976  Vec, V, N->getOperand(2));
8977  return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
8978 }
8979 
8980 /// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
8981 /// ISD::VECTOR_SHUFFLE.
8983  // The LLVM shufflevector instruction does not require the shuffle mask
8984  // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
8985  // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
8986  // operands do not match the mask length, they are extended by concatenating
8987  // them with undef vectors. That is probably the right thing for other
8988  // targets, but for NEON it is better to concatenate two double-register
8989  // size vector operands into a single quad-register size vector. Do that
8990  // transformation here:
8991  // shuffle(concat(v1, undef), concat(v2, undef)) ->
8992  // shuffle(concat(v1, v2), undef)
8993  SDValue Op0 = N->getOperand(0);
8994  SDValue Op1 = N->getOperand(1);
8995  if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
8996  Op1.getOpcode() != ISD::CONCAT_VECTORS ||
8997  Op0.getNumOperands() != 2 ||
8998  Op1.getNumOperands() != 2)
8999  return SDValue();
9000  SDValue Concat0Op1 = Op0.getOperand(1);
9001  SDValue Concat1Op1 = Op1.getOperand(1);
9002  if (Concat0Op1.getOpcode() != ISD::UNDEF ||
9003  Concat1Op1.getOpcode() != ISD::UNDEF)
9004  return SDValue();
9005  // Skip the transformation if any of the types are illegal.
9006  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9007  EVT VT = N->getValueType(0);
9008  if (!TLI.isTypeLegal(VT) ||
9009  !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
9010  !TLI.isTypeLegal(Concat1Op1.getValueType()))
9011  return SDValue();
9012 
9013  SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
9014  Op0.getOperand(0), Op1.getOperand(0));
9015  // Translate the shuffle mask.
9016  SmallVector<int, 16> NewMask;
9017  unsigned NumElts = VT.getVectorNumElements();
9018  unsigned HalfElts = NumElts/2;
9019  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
9020  for (unsigned n = 0; n < NumElts; ++n) {
9021  int MaskElt = SVN->getMaskElt(n);
9022  int NewElt = -1;
9023  if (MaskElt < (int)HalfElts)
9024  NewElt = MaskElt;
9025  else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
9026  NewElt = HalfElts + MaskElt - NumElts;
9027  NewMask.push_back(NewElt);
9028  }
9029  return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,
9030  DAG.getUNDEF(VT), NewMask.data());
9031 }
9032 
9033 /// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
9034 /// NEON load/store intrinsics, and generic vector load/stores, to merge
9035 /// base address updates.
9036 /// For generic load/stores, the memory type is assumed to be a vector.
9037 /// The caller is assumed to have checked legality.
9040  SelectionDAG &DAG = DCI.DAG;
9041  const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
9043  const bool isStore = N->getOpcode() == ISD::STORE;
9044  const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
9045  SDValue Addr = N->getOperand(AddrOpIdx);
9046  MemSDNode *MemN = cast<MemSDNode>(N);
9047  SDLoc dl(N);
9048 
9049  // Search for a use of the address operand that is an increment.
9050  for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
9051  UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
9052  SDNode *User = *UI;
9053  if (User->getOpcode() != ISD::ADD ||
9054  UI.getUse().getResNo() != Addr.getResNo())
9055  continue;
9056 
9057  // Check that the add is independent of the load/store. Otherwise, folding
9058  // it would create a cycle.
9059  if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
9060  continue;
9061 
9062  // Find the new opcode for the updating load/store.
9063  bool isLoadOp = true;
9064  bool isLaneOp = false;
9065  unsigned NewOpc = 0;
9066  unsigned NumVecs = 0;
9067  if (isIntrinsic) {
9068  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
9069  switch (IntNo) {
9070  default: llvm_unreachable("unexpected intrinsic for Neon base update");
9071  case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD;
9072  NumVecs = 1; break;
9073  case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD;
9074  NumVecs = 2; break;
9075  case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD;
9076  NumVecs = 3; break;
9077  case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD;
9078  NumVecs = 4; break;
9079  case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
9080  NumVecs = 2; isLaneOp = true; break;
9081  case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD;
9082  NumVecs = 3; isLaneOp = true; break;
9083  case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;
9084  NumVecs = 4; isLaneOp = true; break;
9085  case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD;
9086  NumVecs = 1; isLoadOp = false; break;
9087  case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD;
9088  NumVecs = 2; isLoadOp = false; break;
9089  case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD;
9090  NumVecs = 3; isLoadOp = false; break;
9091  case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD;
9092  NumVecs = 4; isLoadOp = false; break;
9093  case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;
9094  NumVecs = 2; isLoadOp = false; isLaneOp = true; break;
9095  case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;
9096  NumVecs = 3; isLoadOp = false; isLaneOp = true; break;
9097  case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;
9098  NumVecs = 4; isLoadOp = false; isLaneOp = true; break;
9099  }
9100  } else {
9101  isLaneOp = true;
9102  switch (N->getOpcode()) {
9103  default: llvm_unreachable("unexpected opcode for Neon base update");
9104  case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
9105  case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
9106  case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
9107  case ISD::LOAD: NewOpc = ARMISD::VLD1_UPD;
9108  NumVecs = 1; isLaneOp = false; break;
9109  case ISD::STORE: NewOpc = ARMISD::VST1_UPD;
9110  NumVecs = 1; isLaneOp = false; isLoadOp = false; break;
9111  }
9112  }
9113 
9114  // Find the size of memory referenced by the load/store.
9115  EVT VecTy;
9116  if (isLoadOp) {
9117  VecTy = N->getValueType(0);
9118  } else if (isIntrinsic) {
9119  VecTy = N->getOperand(AddrOpIdx+1).getValueType();
9120  } else {
9121  assert(isStore && "Node has to be a load, a store, or an intrinsic!");
9122  VecTy = N->getOperand(1).getValueType();
9123  }
9124 
9125  unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
9126  if (isLaneOp)
9127  NumBytes /= VecTy.getVectorNumElements();
9128 
9129  // If the increment is a constant, it must match the memory ref size.
9130  SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
9131  if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
9132  uint64_t IncVal = CInc->getZExtValue();
9133  if (IncVal != NumBytes)
9134  continue;
9135  } else if (NumBytes >= 3 * 16) {
9136  // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
9137  // separate instructions that make it harder to use a non-constant update.
9138  continue;
9139  }
9140 
9141  // OK, we found an ADD we can fold into the base update.
9142  // Now, create a _UPD node, taking care of not breaking alignment.
9143 
9144  EVT AlignedVecTy = VecTy;
9145  unsigned Alignment = MemN->getAlignment();
9146 
9147  // If this is a less-than-standard-aligned load/store, change the type to
9148  // match the standard alignment.
9149  // The alignment is overlooked when selecting _UPD variants; and it's
9150  // easier to introduce bitcasts here than fix that.
9151  // There are 3 ways to get to this base-update combine:
9152  // - intrinsics: they are assumed to be properly aligned (to the standard
9153  // alignment of the memory type), so we don't need to do anything.
9154  // - ARMISD::VLDx nodes: they are only generated from the aforementioned
9155  // intrinsics, so, likewise, there's nothing to do.
9156  // - generic load/store instructions: the alignment is specified as an
9157  // explicit operand, rather than implicitly as the standard alignment
9158  // of the memory type (like the intrisics). We need to change the
9159  // memory type to match the explicit alignment. That way, we don't
9160  // generate non-standard-aligned ARMISD::VLDx nodes.
9161  if (isa<LSBaseSDNode>(N)) {
9162  if (Alignment == 0)
9163  Alignment = 1;
9164  if (Alignment < VecTy.getScalarSizeInBits() / 8) {
9165  MVT EltTy = MVT::getIntegerVT(Alignment * 8);
9166  assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
9167  assert(!isLaneOp && "Unexpected generic load/store lane.");
9168  unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
9169  AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
9170  }
9171  // Don't set an explicit alignment on regular load/stores that we want
9172  // to transform to VLD/VST 1_UPD nodes.
9173  // This matches the behavior of regular load/stores, which only get an
9174  // explicit alignment if the MMO alignment is larger than the standard
9175  // alignment of the memory type.
9176  // Intrinsics, however, always get an explicit alignment, set to the
9177  // alignment of the MMO.
9178  Alignment = 1;
9179  }
9180 
9181  // Create the new updating load/store node.
9182  // First, create an SDVTList for the new updating node's results.
9183  EVT Tys[6];
9184  unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
9185  unsigned n;
9186  for (n = 0; n < NumResultVecs; ++n)
9187  Tys[n] = AlignedVecTy;
9188  Tys[n++] = MVT::i32;
9189  Tys[n] = MVT::Other;
9190  SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2));
9191 
9192  // Then, gather the new node's operands.
9194  Ops.push_back(N->getOperand(0)); // incoming chain
9195  Ops.push_back(N->getOperand(AddrOpIdx));
9196  Ops.push_back(Inc);
9197 
9198  if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
9199  // Try to match the intrinsic's signature
9200  Ops.push_back(StN->getValue());
9201  } else {
9202  // Loads (and of course intrinsics) match the intrinsics' signature,
9203  // so just add all but the alignment operand.
9204  for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands() - 1; ++i)
9205  Ops.push_back(N->getOperand(i));
9206  }
9207 
9208  // For all node types, the alignment operand is always the last one.
9209  Ops.push_back(DAG.getConstant(Alignment, dl, MVT::i32));
9210 
9211  // If this is a non-standard-aligned STORE, the penultimate operand is the
9212  // stored value. Bitcast it to the aligned type.
9213  if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
9214  SDValue &StVal = Ops[Ops.size()-2];
9215  StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
9216  }
9217 
9218  SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys,
9219  Ops, AlignedVecTy,
9220  MemN->getMemOperand());
9221 
9222  // Update the uses.
9223  SmallVector<SDValue, 5> NewResults;
9224  for (unsigned i = 0; i < NumResultVecs; ++i)
9225  NewResults.push_back(SDValue(UpdN.getNode(), i));
9226 
9227  // If this is an non-standard-aligned LOAD, the first result is the loaded
9228  // value. Bitcast it to the expected result type.
9229  if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
9230  SDValue &LdVal = NewResults[0];
9231  LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal);
9232  }
9233 
9234  NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
9235  DCI.CombineTo(N, NewResults);
9236  DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
9237 
9238  break;
9239  }
9240  return SDValue();
9241 }
9242 
9245  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
9246  return SDValue();
9247 
9248  return CombineBaseUpdate(N, DCI);
9249 }
9250 
9251 /// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
9252 /// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
9253 /// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
9254 /// return true.
9256  SelectionDAG &DAG = DCI.DAG;
9257  EVT VT = N->getValueType(0);
9258  // vldN-dup instructions only support 64-bit vectors for N > 1.
9259  if (!VT.is64BitVector())
9260  return false;
9261 
9262  // Check if the VDUPLANE operand is a vldN-dup intrinsic.
9263  SDNode *VLD = N->getOperand(0).getNode();
9264  if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
9265  return false;
9266  unsigned NumVecs = 0;
9267  unsigned NewOpc = 0;
9268  unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
9269  if (IntNo == Intrinsic::arm_neon_vld2lane) {
9270  NumVecs = 2;
9271  NewOpc = ARMISD::VLD2DUP;
9272  } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
9273  NumVecs = 3;
9274  NewOpc = ARMISD::VLD3DUP;
9275  } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
9276  NumVecs = 4;
9277  NewOpc = ARMISD::VLD4DUP;
9278  } else {
9279  return false;
9280  }
9281 
9282  // First check that all the vldN-lane uses are VDUPLANEs and that the lane
9283  // numbers match the load.
9284  unsigned VLDLaneNo =
9285  cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue();
9286  for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
9287  UI != UE; ++UI) {
9288  // Ignore uses of the chain result.
9289  if (UI.getUse().getResNo() == NumVecs)
9290  continue;
9291  SDNode *User = *UI;
9292  if (User->getOpcode() != ARMISD::VDUPLANE ||
9293  VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
9294  return false;
9295  }
9296 
9297  // Create the vldN-dup node.
9298  EVT Tys[5];
9299  unsigned n;
9300  for (n = 0; n < NumVecs; ++n)
9301  Tys[n] = VT;
9302  Tys[n] = MVT::Other;
9303  SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumVecs+1));
9304  SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
9305  MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
9306  SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
9307  Ops, VLDMemInt->getMemoryVT(),
9308  VLDMemInt->getMemOperand());
9309 
9310  // Update the uses.
9311  for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
9312  UI != UE; ++UI) {
9313  unsigned ResNo = UI.getUse().getResNo();
9314  // Ignore uses of the chain result.
9315  if (ResNo == NumVecs)
9316  continue;
9317  SDNode *User = *UI;
9318  DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
9319  }
9320 
9321  // Now the vldN-lane intrinsic is dead except for its chain result.
9322  // Update uses of the chain.
9323  std::vector<SDValue> VLDDupResults;
9324  for (unsigned n = 0; n < NumVecs; ++n)
9325  VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
9326  VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
9327  DCI.CombineTo(VLD, VLDDupResults);
9328 
9329  return true;
9330 }
9331 
9332 /// PerformVDUPLANECombine - Target-specific dag combine xforms for
9333 /// ARMISD::VDUPLANE.
9336  SDValue Op = N->getOperand(0);
9337 
9338  // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
9339  // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
9340  if (CombineVLDDUP(N, DCI))
9341  return SDValue(N, 0);
9342 
9343  // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
9344  // redundant. Ignore bit_converts for now; element sizes are checked below.
9345  while (Op.getOpcode() == ISD::BITCAST)
9346  Op = Op.getOperand(0);
9347  if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
9348  return SDValue();
9349 
9350  // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
9351  unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits();
9352  // The canonical VMOV for a zero vector uses a 32-bit element size.
9353  unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
9354  unsigned EltBits;
9355  if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
9356  EltSize = 8;
9357  EVT VT = N->getValueType(0);
9358  if (EltSize > VT.getVectorElementType().getSizeInBits())
9359  return SDValue();
9360 
9361  return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
9362 }
9363 
9366  EVT VT = N->getValueType(0);
9367 
9368  // If this is a legal vector load, try to combine it into a VLD1_UPD.
9369  if (ISD::isNormalLoad(N) && VT.isVector() &&
9371  return CombineBaseUpdate(N, DCI);
9372 
9373  return SDValue();
9374 }
9375 
9376 /// PerformSTORECombine - Target-specific dag combine xforms for
9377 /// ISD::STORE.
9380  StoreSDNode *St = cast<StoreSDNode>(N);
9381  if (St->isVolatile())
9382  return SDValue();
9383 
9384  // Optimize trunc store (of multiple scalars) to shuffle and store. First,
9385  // pack all of the elements in one place. Next, store to memory in fewer
9386  // chunks.
9387  SDValue StVal = St->getValue();
9388  EVT VT = StVal.getValueType();
9389  if (St->isTruncatingStore() && VT.isVector()) {
9390  SelectionDAG &DAG = DCI.DAG;
9391  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9392  EVT StVT = St->getMemoryVT();
9393  unsigned NumElems = VT.getVectorNumElements();
9394  assert(StVT != VT && "Cannot truncate to the same type");
9395  unsigned FromEltSz = VT.getVectorElementType().getSizeInBits();
9396  unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits();
9397 
9398  // From, To sizes and ElemCount must be pow of two
9399  if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue();
9400 
9401  // We are going to use the original vector elt for storing.
9402  // Accumulated smaller vector elements must be a multiple of the store size.
9403  if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue();
9404 
9405  unsigned SizeRatio = FromEltSz / ToEltSz;
9406  assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
9407 
9408  // Create a type on which we perform the shuffle.
9409  EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
9410  NumElems*SizeRatio);
9411  assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
9412 
9413  SDLoc DL(St);
9414  SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
9415  SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
9416  for (unsigned i = 0; i < NumElems; ++i)
9417  ShuffleVec[i] = DAG.getDataLayout().isBigEndian()
9418  ? (i + 1) * SizeRatio - 1
9419  : i * SizeRatio;
9420 
9421  // Can't shuffle using an illegal type.
9422  if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
9423 
9424  SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec,
9425  DAG.getUNDEF(WideVec.getValueType()),
9426  ShuffleVec.data());
9427  // At this point all of the data is stored at the bottom of the
9428  // register. We now need to save it to mem.
9429 
9430  // Find the largest store unit
9431  MVT StoreType = MVT::i8;
9432  for (MVT Tp : MVT::integer_valuetypes()) {
9433  if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
9434  StoreType = Tp;
9435  }
9436  // Didn't find a legal store type.
9437  if (!TLI.isTypeLegal(StoreType))
9438  return SDValue();
9439 
9440  // Bitcast the original vector into a vector of store-size units
9441  EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
9442  StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
9443  assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
9444  SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
9445  SmallVector<SDValue, 8> Chains;
9446  SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,
9447  TLI.getPointerTy(DAG.getDataLayout()));
9448  SDValue BasePtr = St->getBasePtr();
9449 
9450  // Perform one or more big stores into memory.
9451  unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits();
9452  for (unsigned I = 0; I < E; I++) {
9453  SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
9454  StoreType, ShuffWide,
9455  DAG.getIntPtrConstant(I, DL));
9456  SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr,
9457  St->getPointerInfo(), St->isVolatile(),
9458  St->isNonTemporal(), St->getAlignment());
9459  BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
9460  Increment);
9461  Chains.push_back(Ch);
9462  }
9463  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
9464  }
9465 
9466  if (!ISD::isNormalStore(St))
9467  return SDValue();
9468 
9469  // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
9470  // ARM stores of arguments in the same cache line.
9471  if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
9472  StVal.getNode()->hasOneUse()) {
9473  SelectionDAG &DAG = DCI.DAG;
9474  bool isBigEndian = DAG.getDataLayout().isBigEndian();
9475  SDLoc DL(St);
9476  SDValue BasePtr = St->getBasePtr();
9477  SDValue NewST1 = DAG.getStore(St->getChain(), DL,
9478  StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ),
9479  BasePtr, St->getPointerInfo(), St->isVolatile(),
9480  St->isNonTemporal(), St->getAlignment());
9481 
9482  SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
9483  DAG.getConstant(4, DL, MVT::i32));
9484  return DAG.getStore(NewST1.getValue(0), DL,
9485  StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
9486  OffsetPtr, St->getPointerInfo(), St->isVolatile(),
9487  St->isNonTemporal(),
9488  std::min(4U, St->getAlignment() / 2));
9489  }
9490 
9491  if (StVal.getValueType() == MVT::i64 &&
9492  StVal.getNode()->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
9493 
9494  // Bitcast an i64 store extracted from a vector to f64.
9495  // Otherwise, the i64 value will be legalized to a pair of i32 values.
9496  SelectionDAG &DAG = DCI.DAG;
9497  SDLoc dl(StVal);
9498  SDValue IntVec = StVal.getOperand(0);
9499  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
9500  IntVec.getValueType().getVectorNumElements());
9501  SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
9502  SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
9503  Vec, StVal.getOperand(1));
9504  dl = SDLoc(N);
9505  SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
9506  // Make the DAGCombiner fold the bitcasts.
9507  DCI.AddToWorklist(Vec.getNode());
9508  DCI.AddToWorklist(ExtElt.getNode());
9509  DCI.AddToWorklist(V.getNode());
9510  return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
9511  St->getPointerInfo(), St->isVolatile(),
9512  St->isNonTemporal(), St->getAlignment(),
9513  St->getAAInfo());
9514  }
9515 
9516  // If this is a legal vector store, try to combine it into a VST1_UPD.
9517  if (ISD::isNormalStore(N) && VT.isVector() &&
9519  return CombineBaseUpdate(N, DCI);
9520 
9521  return SDValue();
9522 }
9523 
9524 // isConstVecPow2 - Return true if each vector element is a power of 2, all
9525 // elements are the same constant, C, and Log2(C) ranges from 1 to 32.
9526 static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C)
9527 {
9528  integerPart cN;
9529  integerPart c0 = 0;
9530  for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements();
9531  I != E; I++) {
9533  if (!C)
9534  return false;
9535 
9536  bool isExact;
9537  APFloat APF = C->getValueAPF();
9538  if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact)
9539  != APFloat::opOK || !isExact)
9540  return false;
9541 
9542  c0 = (I == 0) ? cN : c0;
9543  if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32)
9544  return false;
9545  }
9546  C = c0;
9547  return true;
9548 }
9549 
9550 /// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
9551 /// can replace combinations of VMUL and VCVT (floating-point to integer)
9552 /// when the VMUL has a constant operand that is a power of 2.
9553 ///
9554 /// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
9555 /// vmul.f32 d16, d17, d16
9556 /// vcvt.s32.f32 d16, d16
9557 /// becomes:
9558 /// vcvt.s32.f32 d16, d16, #3
9561  const ARMSubtarget *Subtarget) {
9562  SelectionDAG &DAG = DCI.DAG;
9563  SDValue Op = N->getOperand(0);
9564 
9565  if (!Subtarget->hasNEON() || !Op.getValueType().isVector() ||
9566  Op.getOpcode() != ISD::FMUL)
9567  return SDValue();
9568 
9569  uint64_t C;
9570  SDValue N0 = Op->getOperand(0);
9571  SDValue ConstVec = Op->getOperand(1);
9572  bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
9573 
9574  if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
9575  !isConstVecPow2(ConstVec, isSigned, C))
9576  return SDValue();
9577 
9578  MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
9580  unsigned NumLanes = Op.getValueType().getVectorNumElements();
9581  if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32 ||
9582  NumLanes > 4) {
9583  // These instructions only exist converting from f32 to i32. We can handle
9584  // smaller integers by generating an extra truncate, but larger ones would
9585  // be lossy. We also can't handle more then 4 lanes, since these intructions
9586  // only support v2i32/v4i32 types.
9587  return SDValue();
9588  }
9589 
9590  SDLoc dl(N);
9591  unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
9592  Intrinsic::arm_neon_vcvtfp2fxu;
9593  SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
9594  NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
9595  DAG.getConstant(IntrinsicOpcode, dl, MVT::i32),
9596  N0,
9597  DAG.getConstant(Log2_64(C), dl, MVT::i32));
9598 
9599  if (IntTy.getSizeInBits() < FloatTy.getSizeInBits())
9600  FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);
9601 
9602  return FixConv;
9603 }
9604 
9605 /// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
9606 /// can replace combinations of VCVT (integer to floating-point) and VDIV
9607 /// when the VDIV has a constant operand that is a power of 2.
9608 ///
9609 /// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
9610 /// vcvt.f32.s32 d16, d16
9611 /// vdiv.f32 d16, d17, d16
9612 /// becomes:
9613 /// vcvt.f32.s32 d16, d16, #3
9616  const ARMSubtarget *Subtarget) {
9617  SelectionDAG &DAG = DCI.DAG;
9618  SDValue Op = N->getOperand(0);
9619  unsigned OpOpcode = Op.getNode()->getOpcode();
9620 
9621  if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() ||
9622  (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
9623  return SDValue();
9624 
9625  uint64_t C;
9626  SDValue ConstVec = N->getOperand(1);
9627  bool isSigned = OpOpcode == ISD::SINT_TO_FP;
9628 
9629  if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
9630  !isConstVecPow2(ConstVec, isSigned, C))
9631  return SDValue();
9632 
9633  MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
9635  if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) {
9636  // These instructions only exist converting from i32 to f32. We can handle
9637  // smaller integers by generating an extra extend, but larger ones would
9638  // be lossy.
9639  return SDValue();
9640  }
9641 
9642  SDLoc dl(N);
9643  SDValue ConvInput = Op.getOperand(0);
9644  unsigned NumLanes = Op.getValueType().getVectorNumElements();
9645  if (IntTy.getSizeInBits() < FloatTy.getSizeInBits())
9646  ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
9647  dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
9648  ConvInput);
9649 
9650  unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
9651  Intrinsic::arm_neon_vcvtfxu2fp;
9652  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
9653  Op.getValueType(),
9654  DAG.getConstant(IntrinsicOpcode, dl, MVT::i32),
9655  ConvInput, DAG.getConstant(Log2_64(C), dl, MVT::i32));
9656 }
9657 
9658 /// Getvshiftimm - Check if this is a valid build_vector for the immediate
9659 /// operand of a vector shift operation, where all the elements of the
9660 /// build_vector must have the same constant integer value.
9661 static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
9662  // Ignore bit_converts.
9663  while (Op.getOpcode() == ISD::BITCAST)
9664  Op = Op.getOperand(0);
9666  APInt SplatBits, SplatUndef;
9667  unsigned SplatBitSize;
9668  bool HasAnyUndefs;
9669  if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
9670  HasAnyUndefs, ElementBits) ||
9671  SplatBitSize > ElementBits)
9672  return false;
9673  Cnt = SplatBits.getSExtValue();
9674  return true;
9675 }
9676 
9677 /// isVShiftLImm - Check if this is a valid build_vector for the immediate
9678 /// operand of a vector shift left operation. That value must be in the range:
9679 /// 0 <= Value < ElementBits for a left shift; or
9680 /// 0 <= Value <= ElementBits for a long left shift.
9681 static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
9682  assert(VT.isVector() && "vector shift count is not a vector type");
9683  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
9684  if (! getVShiftImm(Op, ElementBits, Cnt))
9685  return false;
9686  return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
9687 }
9688 
9689 /// isVShiftRImm - Check if this is a valid build_vector for the immediate
9690 /// operand of a vector shift right operation. For a shift opcode, the value
9691 /// is positive, but for an intrinsic the value count must be negative. The
9692 /// absolute value must be in the range:
9693 /// 1 <= |Value| <= ElementBits for a right shift; or
9694 /// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
9695 static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
9696  int64_t &Cnt) {
9697  assert(VT.isVector() && "vector shift count is not a vector type");
9698  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
9699  if (! getVShiftImm(Op, ElementBits, Cnt))
9700  return false;
9701  if (isIntrinsic)
9702  Cnt = -Cnt;
9703  return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
9704 }
9705 
9706 /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
9708  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
9709  switch (IntNo) {
9710  default:
9711  // Don't do anything for most intrinsics.
9712  break;
9713 
9714  // Vector shifts: check for immediate versions and lower them.
9715  // Note: This is done during DAG combining instead of DAG legalizing because
9716  // the build_vectors for 64-bit vector element shift counts are generally
9717  // not legal, and it is hard to see their values after they get legalized to
9718  // loads from a constant pool.
9719  case Intrinsic::arm_neon_vshifts:
9720  case Intrinsic::arm_neon_vshiftu:
9721  case Intrinsic::arm_neon_vrshifts:
9722  case Intrinsic::arm_neon_vrshiftu:
9723  case Intrinsic::arm_neon_vrshiftn:
9724  case Intrinsic::arm_neon_vqshifts:
9725  case Intrinsic::arm_neon_vqshiftu:
9726  case Intrinsic::arm_neon_vqshiftsu:
9727  case Intrinsic::arm_neon_vqshiftns:
9728  case Intrinsic::arm_neon_vqshiftnu:
9729  case Intrinsic::arm_neon_vqshiftnsu:
9730  case Intrinsic::arm_neon_vqrshiftns:
9731  case Intrinsic::arm_neon_vqrshiftnu:
9732  case Intrinsic::arm_neon_vqrshiftnsu: {
9733  EVT VT = N->getOperand(1).getValueType();
9734  int64_t Cnt;
9735  unsigned VShiftOpc = 0;
9736 
9737  switch (IntNo) {
9738  case Intrinsic::arm_neon_vshifts:
9739  case Intrinsic::arm_neon_vshiftu:
9740  if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
9741  VShiftOpc = ARMISD::VSHL;
9742  break;
9743  }
9744  if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
9745  VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
9747  break;
9748  }
9749  return SDValue();
9750 
9751  case Intrinsic::arm_neon_vrshifts:
9752  case Intrinsic::arm_neon_vrshiftu:
9753  if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
9754  break;
9755  return SDValue();
9756 
9757  case Intrinsic::arm_neon_vqshifts:
9758  case Intrinsic::arm_neon_vqshiftu:
9759  if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
9760  break;
9761  return SDValue();
9762 
9763  case Intrinsic::arm_neon_vqshiftsu:
9764  if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
9765  break;
9766  llvm_unreachable("invalid shift count for vqshlu intrinsic");
9767 
9768  case Intrinsic::arm_neon_vrshiftn:
9769  case Intrinsic::arm_neon_vqshiftns:
9770  case Intrinsic::arm_neon_vqshiftnu:
9771  case Intrinsic::arm_neon_vqshiftnsu:
9772  case Intrinsic::arm_neon_vqrshiftns:
9773  case Intrinsic::arm_neon_vqrshiftnu:
9774  case Intrinsic::arm_neon_vqrshiftnsu:
9775  // Narrowing shifts require an immediate right shift.
9776  if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
9777  break;
9778  llvm_unreachable("invalid shift count for narrowing vector shift "
9779  "intrinsic");
9780 
9781  default:
9782  llvm_unreachable("unhandled vector shift");
9783  }
9784 
9785  switch (IntNo) {
9786  case Intrinsic::arm_neon_vshifts:
9787  case Intrinsic::arm_neon_vshiftu:
9788  // Opcode already set above.
9789  break;
9790  case Intrinsic::arm_neon_vrshifts:
9791  VShiftOpc = ARMISD::VRSHRs; break;
9792  case Intrinsic::arm_neon_vrshiftu:
9793  VShiftOpc = ARMISD::VRSHRu; break;
9794  case Intrinsic::arm_neon_vrshiftn:
9795  VShiftOpc = ARMISD::VRSHRN; break;
9796  case Intrinsic::arm_neon_vqshifts:
9797  VShiftOpc = ARMISD::VQSHLs; break;
9798  case Intrinsic::arm_neon_vqshiftu:
9799  VShiftOpc = ARMISD::VQSHLu; break;
9800  case Intrinsic::arm_neon_vqshiftsu:
9801  VShiftOpc = ARMISD::VQSHLsu; break;
9802  case Intrinsic::arm_neon_vqshiftns:
9803  VShiftOpc = ARMISD::VQSHRNs; break;
9804  case Intrinsic::arm_neon_vqshiftnu:
9805  VShiftOpc = ARMISD::VQSHRNu; break;
9806  case Intrinsic::arm_neon_vqshiftnsu:
9807  VShiftOpc = ARMISD::VQSHRNsu; break;
9808  case Intrinsic::arm_neon_vqrshiftns:
9809  VShiftOpc = ARMISD::VQRSHRNs; break;
9810  case Intrinsic::arm_neon_vqrshiftnu:
9811  VShiftOpc = ARMISD::VQRSHRNu; break;
9812  case Intrinsic::arm_neon_vqrshiftnsu:
9813  VShiftOpc = ARMISD::VQRSHRNsu; break;
9814  }
9815 
9816  SDLoc dl(N);
9817  return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
9818  N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32));
9819  }
9820 
9821  case Intrinsic::arm_neon_vshiftins: {
9822  EVT VT = N->getOperand(1).getValueType();
9823  int64_t Cnt;
9824  unsigned VShiftOpc = 0;
9825 
9826  if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
9827  VShiftOpc = ARMISD::VSLI;
9828  else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
9829  VShiftOpc = ARMISD::VSRI;
9830  else {
9831  llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
9832  }
9833 
9834  SDLoc dl(N);
9835  return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
9836  N->getOperand(1), N->getOperand(2),
9837  DAG.getConstant(Cnt, dl, MVT::i32));
9838  }
9839 
9840  case Intrinsic::arm_neon_vqrshifts:
9841  case Intrinsic::arm_neon_vqrshiftu:
9842  // No immediate versions of these to check for.
9843  break;
9844  }
9845 
9846  return SDValue();
9847 }
9848 
9849 /// PerformShiftCombine - Checks for immediate versions of vector shifts and
9850 /// lowers them. As with the vector shift intrinsics, this is done during DAG
9851 /// combining instead of DAG legalizing because the build_vectors for 64-bit
9852 /// vector element shift counts are generally not legal, and it is hard to see
9853 /// their values after they get legalized to loads from a constant pool.
9855  const ARMSubtarget *ST) {
9856  EVT VT = N->getValueType(0);
9857  if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) {
9858  // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high
9859  // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16.
9860  SDValue N1 = N->getOperand(1);
9861  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
9862  SDValue N0 = N->getOperand(0);
9863  if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP &&
9864  DAG.MaskedValueIsZero(N0.getOperand(0),
9865  APInt::getHighBitsSet(32, 16)))
9866  return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1);
9867  }
9868  }
9869 
9870  // Nothing to be done for scalar shifts.
9871  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9872  if (!VT.isVector() || !TLI.isTypeLegal(VT))
9873  return SDValue();
9874 
9875  assert(ST->hasNEON() && "unexpected vector shift");
9876  int64_t Cnt;
9877 
9878  switch (N->getOpcode()) {
9879  default: llvm_unreachable("unexpected shift opcode");
9880 
9881  case ISD::SHL:
9882  if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
9883  SDLoc dl(N);
9884  return DAG.getNode(ARMISD::VSHL, dl, VT, N->getOperand(0),
9885  DAG.getConstant(Cnt, dl, MVT::i32));
9886  }
9887  break;
9888 
9889  case ISD::SRA:
9890  case ISD::SRL:
9891  if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
9892  unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
9894  SDLoc dl(N);
9895  return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
9896  DAG.getConstant(Cnt, dl, MVT::i32));
9897  }
9898  }
9899  return SDValue();
9900 }
9901 
9902 /// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
9903 /// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
9905  const ARMSubtarget *ST) {
9906  SDValue N0 = N->getOperand(0);
9907 
9908  // Check for sign- and zero-extensions of vector extract operations of 8-
9909  // and 16-bit vector elements. NEON supports these directly. They are
9910  // handled during DAG combining because type legalization will promote them
9911  // to 32-bit types and it is messy to recognize the operations after that.
9912  if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
9913  SDValue Vec = N0.getOperand(0);
9914  SDValue Lane = N0.getOperand(1);
9915  EVT VT = N->getValueType(0);
9916  EVT EltVT = N0.getValueType();
9917  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9918 
9919  if (VT == MVT::i32 &&
9920  (EltVT == MVT::i8 || EltVT == MVT::i16) &&
9921  TLI.isTypeLegal(Vec.getValueType()) &&
9922  isa<ConstantSDNode>(Lane)) {
9923 
9924  unsigned Opc = 0;
9925  switch (N->getOpcode()) {
9926  default: llvm_unreachable("unexpected opcode");
9927  case ISD::SIGN_EXTEND:
9928  Opc = ARMISD::VGETLANEs;
9929  break;
9930  case ISD::ZERO_EXTEND:
9931  case ISD::ANY_EXTEND:
9932  Opc = ARMISD::VGETLANEu;
9933  break;
9934  }
9935  return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane);
9936  }
9937  }
9938 
9939  return SDValue();
9940 }
9941 
9942 /// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
9943 /// to match f32 max/min patterns to use NEON vmax/vmin instructions.
9945  const ARMSubtarget *ST) {
9946  // If the target supports NEON, try to use vmax/vmin instructions for f32
9947  // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set,
9948  // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is
9949  // a NaN; only do the transformation when it matches that behavior.
9950 
9951  // For now only do this when using NEON for FP operations; if using VFP, it
9952  // is not obvious that the benefit outweighs the cost of switching to the
9953  // NEON pipeline.
9954  if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
9955  N->getValueType(0) != MVT::f32)
9956  return SDValue();
9957 
9958  SDValue CondLHS = N->getOperand(0);
9959  SDValue CondRHS = N->getOperand(1);
9960  SDValue LHS = N->getOperand(2);
9961  SDValue RHS = N->getOperand(3);
9962  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
9963 
9964  unsigned Opcode = 0;
9965  bool IsReversed;
9966  if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) {
9967  IsReversed = false; // x CC y ? x : y
9968  } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) {
9969  IsReversed = true ; // x CC y ? y : x
9970  } else {
9971  return SDValue();
9972  }
9973 
9974  bool IsUnordered;
9975  switch (CC) {
9976  default: break;
9977  case ISD::SETOLT:
9978  case ISD::SETOLE:
9979  case ISD::SETLT:
9980  case ISD::SETLE:
9981  case ISD::SETULT:
9982  case ISD::SETULE:
9983  // If LHS is NaN, an ordered comparison will be false and the result will
9984  // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS
9985  // != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
9986  IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE);
9987  if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
9988  break;
9989  // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin
9990  // will return -0, so vmin can only be used for unsafe math or if one of
9991  // the operands is known to be nonzero.
9992  if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
9993  !DAG.getTarget().Options.UnsafeFPMath &&
9994  !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
9995  break;
9996  Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
9997  break;
9998 
9999  case ISD::SETOGT:
10000  case ISD::SETOGE:
10001  case ISD::SETGT:
10002  case ISD::SETGE:
10003  case ISD::SETUGT:
10004  case ISD::SETUGE:
10005  // If LHS is NaN, an ordered comparison will be false and the result will
10006  // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS
10007  // != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
10008  IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE);
10009  if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
10010  break;
10011  // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax
10012  // will return +0, so vmax can only be used for unsafe math or if one of
10013  // the operands is known to be nonzero.
10014  if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
10015  !DAG.getTarget().Options.UnsafeFPMath &&
10016  !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
10017  break;
10018  Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
10019  break;
10020  }
10021 
10022  if (!Opcode)
10023  return SDValue();
10024  return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS);
10025 }
10026 
10027 /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
10028 SDValue
10030  SDValue Cmp = N->getOperand(4);
10031  if (Cmp.getOpcode() != ARMISD::CMPZ)
10032  // Only looking at EQ and NE cases.
10033  return SDValue();
10034 
10035  EVT VT = N->getValueType(0);
10036  SDLoc dl(N);
10037  SDValue LHS = Cmp.getOperand(0);
10038  SDValue RHS = Cmp.getOperand(1);
10039  SDValue FalseVal = N->getOperand(0);
10040  SDValue TrueVal = N->getOperand(1);
10041  SDValue ARMcc = N->getOperand(2);
10042  ARMCC::CondCodes CC =
10043  (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
10044 
10045  // Simplify
10046  // mov r1, r0
10047  // cmp r1, x
10048  // mov r0, y
10049  // moveq r0, x
10050  // to
10051  // cmp r0, x
10052  // movne r0, y
10053  //
10054  // mov r1, r0
10055  // cmp r1, x
10056  // mov r0, x
10057  // movne r0, y
10058  // to
10059  // cmp r0, x
10060  // movne r0, y
10061  /// FIXME: Turn this into a target neutral optimization?
10062  SDValue Res;
10063  if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
10064  Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
10065  N->getOperand(3), Cmp);
10066  } else if (CC == ARMCC::EQ && TrueVal == RHS) {
10067  SDValue ARMcc;
10068  SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
10069  Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
10070  N->getOperand(3), NewCmp);
10071  }
10072 
10073  if (Res.getNode()) {
10074  APInt KnownZero, KnownOne;
10075  DAG.computeKnownBits(SDValue(N,0), KnownZero, KnownOne);
10076  // Capture demanded bits information that would be otherwise lost.
10077  if (KnownZero == 0xfffffffe)
10078  Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
10079  DAG.getValueType(MVT::i1));
10080  else if (KnownZero == 0xffffff00)
10081  Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
10082  DAG.getValueType(MVT::i8));
10083  else if (KnownZero == 0xffff0000)
10084  Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
10085  DAG.getValueType(MVT::i16));
10086  }
10087 
10088  return Res;
10089 }
10090 
10092  DAGCombinerInfo &DCI) const {
10093  switch (N->getOpcode()) {
10094  default: break;
10095  case ISD::ADDC: return PerformADDCCombine(N, DCI, Subtarget);
10096  case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
10097  case ISD::SUB: return PerformSUBCombine(N, DCI);
10098  case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
10099  case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
10100  case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
10101  case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
10102  case ARMISD::BFI: return PerformBFICombine(N, DCI);
10103  case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
10104  case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
10105  case ISD::STORE: return PerformSTORECombine(N, DCI);
10106  case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
10107  case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
10109  case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
10110  case ISD::FP_TO_SINT:
10111  case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget);
10112  case ISD::FDIV: return PerformVDIVCombine(N, DCI, Subtarget);
10114  case ISD::SHL:
10115  case ISD::SRA:
10116  case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget);
10117  case ISD::SIGN_EXTEND:
10118  case ISD::ZERO_EXTEND:
10119  case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
10120  case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
10121  case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
10122  case ISD::LOAD: return PerformLOADCombine(N, DCI);
10123  case ARMISD::VLD2DUP:
10124  case ARMISD::VLD3DUP:
10125  case ARMISD::VLD4DUP:
10126  return PerformVLDCombine(N, DCI);
10127  case ARMISD::BUILD_VECTOR:
10128  return PerformARMBUILD_VECTORCombine(N, DCI);
10129  case ISD::INTRINSIC_VOID:
10131  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
10132  case Intrinsic::arm_neon_vld1:
10133  case Intrinsic::arm_neon_vld2:
10134  case Intrinsic::arm_neon_vld3:
10135  case Intrinsic::arm_neon_vld4:
10136  case Intrinsic::arm_neon_vld2lane:
10137  case Intrinsic::arm_neon_vld3lane:
10138  case Intrinsic::arm_neon_vld4lane:
10139  case Intrinsic::arm_neon_vst1:
10140  case Intrinsic::arm_neon_vst2:
10141  case Intrinsic::arm_neon_vst3:
10142  case Intrinsic::arm_neon_vst4:
10143  case Intrinsic::arm_neon_vst2lane:
10144  case Intrinsic::arm_neon_vst3lane:
10145  case Intrinsic::arm_neon_vst4lane:
10146  return PerformVLDCombine(N, DCI);
10147  default: break;
10148  }
10149  break;
10150  }
10151  return SDValue();
10152 }
10153 
10155  EVT VT) const {
10156  return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
10157 }
10158 
10160  unsigned,
10161  unsigned,
10162  bool *Fast) const {
10163  // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
10164  bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
10165 
10166  switch (VT.getSimpleVT().SimpleTy) {
10167  default:
10168  return false;
10169  case MVT::i8:
10170  case MVT::i16:
10171  case MVT::i32: {
10172  // Unaligned access can use (for example) LRDB, LRDH, LDR
10173  if (AllowsUnaligned) {
10174  if (Fast)
10175  *Fast = Subtarget->hasV7Ops();
10176  return true;
10177  }
10178  return false;
10179  }
10180  case MVT::f64:
10181  case MVT::v2f64: {
10182  // For any little-endian targets with neon, we can support unaligned ld/st
10183  // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
10184  // A big-endian target may also explicitly support unaligned accesses
10185  if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
10186  if (Fast)
10187  *Fast = true;
10188  return true;
10189  }
10190  return false;
10191  }
10192  }
10193 }
10194 
10195 static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
10196  unsigned AlignCheck) {
10197  return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
10198  (DstAlign == 0 || DstAlign % AlignCheck == 0));
10199 }
10200 
10202  unsigned DstAlign, unsigned SrcAlign,
10203  bool IsMemset, bool ZeroMemset,
10204  bool MemcpyStrSrc,
10205  MachineFunction &MF) const {
10206  const Function *F = MF.getFunction();
10207 
10208  // See if we can use NEON instructions for this...
10209  if ((!IsMemset || ZeroMemset) && Subtarget->hasNEON() &&
10211  bool Fast;
10212  if (Size >= 16 &&
10213  (memOpAlign(SrcAlign, DstAlign, 16) ||
10214  (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, &Fast) && Fast))) {
10215  return MVT::v2f64;
10216  } else if (Size >= 8 &&
10217  (memOpAlign(SrcAlign, DstAlign, 8) ||
10218  (allowsMisalignedMemoryAccesses(MVT::f64, 0, 1, &Fast) &&
10219  Fast))) {
10220  return MVT::f64;
10221  }
10222  }
10223 
10224  // Lowering to i32/i16 if the size permits.
10225  if (Size >= 4)
10226  return MVT::i32;
10227  else if (Size >= 2)
10228  return MVT::i16;
10229 
10230  // Let the target-independent logic figure it out.
10231  return MVT::Other;
10232 }
10233 
10235  if (Val.getOpcode() != ISD::LOAD)
10236  return false;
10237 
10238  EVT VT1 = Val.getValueType();
10239  if (!VT1.isSimple() || !VT1.isInteger() ||
10240  !VT2.isSimple() || !VT2.isInteger())
10241  return false;
10242 
10243  switch (VT1.getSimpleVT().SimpleTy) {
10244  default: break;
10245  case MVT::i1:
10246  case MVT::i8:
10247  case MVT::i16:
10248  // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
10249  return true;
10250  }
10251 
10252  return false;
10253 }
10254 
10256  EVT VT = ExtVal.getValueType();
10257 
10258  if (!isTypeLegal(VT))
10259  return false;
10260 
10261  // Don't create a loadext if we can fold the extension into a wide/long
10262  // instruction.
10263  // If there's more than one user instruction, the loadext is desirable no
10264  // matter what. There can be two uses by the same instruction.
10265  if (ExtVal->use_empty() ||
10266  !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode()))
10267  return true;
10268 
10269  SDNode *U = *ExtVal->use_begin();
10270  if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
10271  U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHL))
10272  return false;
10273 
10274  return true;
10275 }
10276 
10278  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
10279  return false;
10280 
10281  if (!isTypeLegal(EVT::getEVT(Ty1)))
10282  return false;
10283 
10284  assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");
10285 
10286  // Assuming the caller doesn't have a zeroext or signext return parameter,
10287  // truncation all the way down to i1 is valid.
10288  return true;
10289 }
10290 
10291 
10292 static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
10293  if (V < 0)
10294  return false;
10295 
10296  unsigned Scale = 1;
10297  switch (VT.getSimpleVT().SimpleTy) {
10298  default: return false;
10299  case MVT::i1:
10300  case MVT::i8:
10301  // Scale == 1;
10302  break;
10303  case MVT::i16:
10304  // Scale == 2;
10305  Scale = 2;
10306  break;
10307  case MVT::i32:
10308  // Scale == 4;
10309  Scale = 4;
10310  break;
10311  }
10312 
10313  if ((V & (Scale - 1)) != 0)
10314  return false;
10315  V /= Scale;
10316  return V == (V & ((1LL << 5) - 1));
10317 }
10318 
10319 static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
10320  const ARMSubtarget *Subtarget) {
10321  bool isNeg = false;
10322  if (V < 0) {
10323  isNeg = true;
10324  V = - V;
10325  }
10326 
10327  switch (VT.getSimpleVT().SimpleTy) {
10328  default: return false;
10329  case MVT::i1:
10330  case MVT::i8:
10331  case MVT::i16:
10332  case MVT::i32:
10333  // + imm12 or - imm8
10334  if (isNeg)
10335  return V == (V & ((1LL << 8) - 1));
10336  return V == (V & ((1LL << 12) - 1));
10337  case MVT::f32:
10338  case MVT::f64:
10339  // Same as ARM mode. FIXME: NEON?
10340  if (!Subtarget->hasVFP2())
10341  return false;
10342  if ((V & 3) != 0)
10343  return false;
10344  V >>= 2;
10345  return V == (V & ((1LL << 8) - 1));
10346  }
10347 }
10348 
10349 /// isLegalAddressImmediate - Return true if the integer value can be used
10350 /// as the offset of the target addressing mode for load / store of the
10351 /// given type.
10352 static bool isLegalAddressImmediate(int64_t V, EVT VT,
10353  const ARMSubtarget *Subtarget) {
10354  if (V == 0)
10355  return true;
10356 
10357  if (!VT.isSimple())
10358  return false;
10359 
10360  if (Subtarget->isThumb1Only())
10361  return isLegalT1AddressImmediate(V, VT);
10362  else if (Subtarget->isThumb2())
10363  return isLegalT2AddressImmediate(V, VT, Subtarget);
10364 
10365  // ARM mode.
10366  if (V < 0)
10367  V = - V;
10368  switch (VT.getSimpleVT().SimpleTy) {
10369  default: return false;
10370  case MVT::i1:
10371  case MVT::i8:
10372  case MVT::i32:
10373  // +- imm12
10374  return V == (V & ((1LL << 12) - 1));
10375  case MVT::i16:
10376  // +- imm8
10377  return V == (V & ((1LL << 8) - 1));
10378  case MVT::f32:
10379  case MVT::f64:
10380  if (!Subtarget->hasVFP2()) // FIXME: NEON?
10381  return false;
10382  if ((V & 3) != 0)
10383  return false;
10384  V >>= 2;
10385  return V == (V & ((1LL << 8) - 1));
10386  }
10387 }
10388 
10390  EVT VT) const {
10391  int Scale = AM.Scale;
10392  if (Scale < 0)
10393  return false;
10394 
10395  switch (VT.getSimpleVT().SimpleTy) {
10396  default: return false;
10397  case MVT::i1:
10398  case MVT::i8:
10399  case MVT::i16:
10400  case MVT::i32:
10401  if (Scale == 1)
10402  return true;
10403  // r + r << imm
10404  Scale = Scale & ~1;
10405  return Scale == 2 || Scale == 4 || Scale == 8;
10406  case MVT::i64:
10407  // r + r
10408  if (((unsigned)AM.HasBaseReg + Scale) <= 2)
10409  return true;
10410  return false;
10411  case MVT::isVoid:
10412  // Note, we allow "void" uses (basically, uses that aren't loads or
10413  // stores), because arm allows folding a scale into many arithmetic
10414  // operations. This should be made more precise and revisited later.
10415 
10416  // Allow r << imm, but the imm has to be a multiple of two.
10417  if (Scale & 1) return false;
10418  return isPowerOf2_32(Scale);
10419  }
10420 }
10421 
10422 /// isLegalAddressingMode - Return true if the addressing mode represented
10423 /// by AM is legal for this target, for a load/store of the specified type.
10425  const AddrMode &AM, Type *Ty,
10426  unsigned AS) const {
10427  EVT VT = getValueType(DL, Ty, true);
10428  if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
10429  return false;
10430 
10431  // Can never fold addr of global into load/store.
10432  if (AM.BaseGV)
10433  return false;
10434 
10435  switch (AM.Scale) {
10436  case 0: // no scale reg, must be "r+i" or "r", or "i".
10437  break;
10438  case 1:
10439  if (Subtarget->isThumb1Only())
10440  return false;
10441  // FALL THROUGH.
10442  default:
10443  // ARM doesn't support any R+R*scale+imm addr modes.
10444  if (AM.BaseOffs)
10445  return false;
10446 
10447  if (!VT.isSimple())
10448  return false;
10449 
10450  if (Subtarget->isThumb2())
10451  return isLegalT2ScaledAddressingMode(AM, VT);
10452 
10453  int Scale = AM.Scale;
10454  switch (VT.getSimpleVT().SimpleTy) {
10455  default: return false;
10456  case MVT::i1:
10457  case MVT::i8:
10458  case MVT::i32:
10459  if (Scale < 0) Scale = -Scale;
10460  if (Scale == 1)
10461  return true;
10462  // r + r << imm
10463  return isPowerOf2_32(Scale & ~1);
10464  case MVT::i16:
10465  case MVT::i64:
10466  // r + r
10467  if (((unsigned)AM.HasBaseReg + Scale) <= 2)
10468  return true;
10469  return false;
10470 
10471  case MVT::isVoid:
10472  // Note, we allow "void" uses (basically, uses that aren't loads or
10473  // stores), because arm allows folding a scale into many arithmetic
10474  // operations. This should be made more precise and revisited later.
10475 
10476  // Allow r << imm, but the imm has to be a multiple of two.
10477  if (Scale & 1) return false;
10478  return isPowerOf2_32(Scale);
10479  }
10480  }
10481  return true;
10482 }
10483 
10484 /// isLegalICmpImmediate - Return true if the specified immediate is legal
10485 /// icmp immediate, that is the target has icmp instructions which can compare
10486 /// a register against the immediate without having to materialize the
10487 /// immediate into a register.
10489  // Thumb2 and ARM modes can use cmn for negative immediates.
10490  if (!Subtarget->isThumb())
10491  return ARM_AM::getSOImmVal(std::abs(Imm)) != -1;
10492  if (Subtarget->isThumb2())
10493  return ARM_AM::getT2SOImmVal(std::abs(Imm)) != -1;
10494  // Thumb1 doesn't have cmn, and only 8-bit immediates.
10495  return Imm >= 0 && Imm <= 255;
10496 }
10497 
10498 /// isLegalAddImmediate - Return true if the specified immediate is a legal add
10499 /// *or sub* immediate, that is the target has add or sub instructions which can
10500 /// add a register with the immediate without having to materialize the
10501 /// immediate into a register.
10503  // Same encoding for add/sub, just flip the sign.
10504  int64_t AbsImm = std::abs(Imm);
10505  if (!Subtarget->isThumb())
10506  return ARM_AM::getSOImmVal(AbsImm) != -1;
10507  if (Subtarget->isThumb2())
10508  return ARM_AM::getT2SOImmVal(AbsImm) != -1;
10509  // Thumb1 only has 8-bit unsigned immediate.
10510  return AbsImm >= 0 && AbsImm <= 255;
10511 }
10512 
10513 static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
10514  bool isSEXTLoad, SDValue &Base,
10515  SDValue &Offset, bool &isInc,
10516  SelectionDAG &DAG) {
10517  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
10518  return false;
10519 
10520  if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
10521  // AddressingMode 3
10522  Base = Ptr->getOperand(0);
10523  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
10524  int RHSC = (int)RHS->getZExtValue();
10525  if (RHSC < 0 && RHSC > -256) {
10526  assert(Ptr->getOpcode() == ISD::ADD);
10527  isInc = false;
10528  Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
10529  return true;
10530  }
10531  }
10532  isInc = (Ptr->getOpcode() == ISD::ADD);
10533  Offset = Ptr->getOperand(1);
10534  return true;
10535  } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
10536  // AddressingMode 2
10537  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
10538  int RHSC = (int)RHS->getZExtValue();
10539  if (RHSC < 0 && RHSC > -0x1000) {
10540  assert(Ptr->getOpcode() == ISD::ADD);
10541  isInc = false;
10542  Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
10543  Base = Ptr->getOperand(0);
10544  return true;
10545  }
10546  }
10547 
10548  if (Ptr->getOpcode() == ISD::ADD) {
10549  isInc = true;
10550  ARM_AM::ShiftOpc ShOpcVal=
10552  if (ShOpcVal != ARM_AM::no_shift) {
10553  Base = Ptr->getOperand(1);
10554  Offset = Ptr->getOperand(0);
10555  } else {
10556  Base = Ptr->getOperand(0);
10557  Offset = Ptr->getOperand(1);
10558  }
10559  return true;
10560  }
10561 
10562  isInc = (Ptr->getOpcode() == ISD::ADD);
10563  Base = Ptr->getOperand(0);
10564  Offset = Ptr->getOperand(1);
10565  return true;
10566  }
10567 
10568  // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
10569  return false;
10570 }
10571 
10572 static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
10573  bool isSEXTLoad, SDValue &Base,
10574  SDValue &Offset, bool &isInc,
10575  SelectionDAG &DAG) {
10576  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
10577  return false;
10578 
10579  Base = Ptr->getOperand(0);
10580  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
10581  int RHSC = (int)RHS->getZExtValue();
10582  if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
10583  assert(Ptr->getOpcode() == ISD::ADD);
10584  isInc = false;
10585  Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
10586  return true;
10587  } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
10588  isInc = Ptr->getOpcode() == ISD::ADD;
10589  Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
10590  return true;
10591  }
10592  }
10593 
10594  return false;
10595 }
10596 
10597 /// getPreIndexedAddressParts - returns true by value, base pointer and
10598 /// offset pointer and addressing mode by reference if the node's address
10599 /// can be legally represented as pre-indexed load / store address.
10600 bool
10602  SDValue &Offset,
10603  ISD::MemIndexedMode &AM,
10604  SelectionDAG &DAG) const {
10605  if (Subtarget->isThumb1Only())
10606  return false;
10607 
10608  EVT VT;
10609  SDValue Ptr;
10610  bool isSEXTLoad = false;
10611  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
10612  Ptr = LD->getBasePtr();
10613  VT = LD->getMemoryVT();
10614  isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
10615  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
10616  Ptr = ST->getBasePtr();
10617  VT = ST->getMemoryVT();
10618  } else
10619  return false;
10620 
10621  bool isInc;
10622  bool isLegal = false;
10623  if (Subtarget->isThumb2())
10624  isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
10625  Offset, isInc, DAG);
10626  else
10627  isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
10628  Offset, isInc, DAG);
10629  if (!isLegal)
10630  return false;
10631 
10632  AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
10633  return true;
10634 }
10635 
10636 /// getPostIndexedAddressParts - returns true by value, base pointer and
10637 /// offset pointer and addressing mode by reference if this node can be
10638 /// combined with a load / store to form a post-indexed load / store.
10640  SDValue &Base,
10641  SDValue &Offset,
10642  ISD::MemIndexedMode &AM,
10643  SelectionDAG &DAG) const {
10644  if (Subtarget->isThumb1Only())
10645  return false;
10646 
10647  EVT VT;
10648  SDValue Ptr;
10649  bool isSEXTLoad = false;
10650  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
10651  VT = LD->getMemoryVT();
10652  Ptr = LD->getBasePtr();
10653  isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
10654  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
10655  VT = ST->getMemoryVT();
10656  Ptr = ST->getBasePtr();
10657  } else
10658  return false;
10659 
10660  bool isInc;
10661  bool isLegal = false;
10662  if (Subtarget->isThumb2())
10663  isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
10664  isInc, DAG);
10665  else
10666  isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
10667  isInc, DAG);
10668  if (!isLegal)
10669  return false;
10670 
10671  if (Ptr != Base) {
10672  // Swap base ptr and offset to catch more post-index load / store when
10673  // it's legal. In Thumb2 mode, offset must be an immediate.
10674  if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
10675  !Subtarget->isThumb2())
10676  std::swap(Base, Offset);
10677 
10678  // Post-indexed load / store update the base pointer.
10679  if (Ptr != Base)
10680  return false;
10681  }
10682 
10683  AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
10684  return true;
10685 }
10686 
10688  APInt &KnownZero,
10689  APInt &KnownOne,
10690  const SelectionDAG &DAG,
10691  unsigned Depth) const {
10692  unsigned BitWidth = KnownOne.getBitWidth();
10693  KnownZero = KnownOne = APInt(BitWidth, 0);
10694  switch (Op.getOpcode()) {
10695  default: break;
10696  case ARMISD::ADDC:
10697  case ARMISD::ADDE:
10698  case ARMISD::SUBC:
10699  case ARMISD::SUBE:
10700  // These nodes' second result is a boolean
10701  if (Op.getResNo() == 0)
10702  break;
10703  KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
10704  break;
10705  case ARMISD::CMOV: {
10706  // Bits are known zero/one if known on the LHS and RHS.
10707  DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
10708  if (KnownZero == 0 && KnownOne == 0) return;
10709 
10710  APInt KnownZeroRHS, KnownOneRHS;
10711  DAG.computeKnownBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1);
10712  KnownZero &= KnownZeroRHS;
10713  KnownOne &= KnownOneRHS;
10714  return;
10715  }
10716  case ISD::INTRINSIC_W_CHAIN: {
10717  ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
10718  Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
10719  switch (IntID) {
10720  default: return;
10721  case Intrinsic::arm_ldaex:
10722  case Intrinsic::arm_ldrex: {
10723  EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
10724  unsigned MemBits = VT.getScalarType().getSizeInBits();
10725  KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
10726  return;
10727  }
10728  }
10729  }
10730  }
10731 }
10732 
10733 //===----------------------------------------------------------------------===//
10734 // ARM Inline Assembly Support
10735 //===----------------------------------------------------------------------===//
10736 
10738  // Looking for "rev" which is V6+.
10739  if (!Subtarget->hasV6Ops())
10740  return false;
10741 
10742  InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
10743  std::string AsmStr = IA->getAsmString();
10744  SmallVector<StringRef, 4> AsmPieces;
10745  SplitString(AsmStr, AsmPieces, ";\n");
10746 
10747  switch (AsmPieces.size()) {
10748  default: return false;
10749  case 1:
10750  AsmStr = AsmPieces[0];
10751  AsmPieces.clear();
10752  SplitString(AsmStr, AsmPieces, " \t,");
10753 
10754  // rev $0, $1
10755  if (AsmPieces.size() == 3 &&
10756  AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
10757  IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
10758  IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
10759  if (Ty && Ty->getBitWidth() == 32)
10761  }
10762  break;
10763  }
10764 
10765  return false;
10766 }
10767 
10768 /// getConstraintType - Given a constraint letter, return the type of
10769 /// constraint it is for this target.
10772  if (Constraint.size() == 1) {
10773  switch (Constraint[0]) {
10774  default: break;
10775  case 'l': return C_RegisterClass;
10776  case 'w': return C_RegisterClass;
10777  case 'h': return C_RegisterClass;
10778  case 'x': return C_RegisterClass;
10779  case 't': return C_RegisterClass;
10780  case 'j': return C_Other; // Constant for movw.
10781  // An address with a single base register. Due to the way we
10782  // currently handle addresses it is the same as an 'r' memory constraint.
10783  case 'Q': return C_Memory;
10784  }
10785  } else if (Constraint.size() == 2) {
10786  switch (Constraint[0]) {
10787  default: break;
10788  // All 'U+' constraints are addresses.
10789  case 'U': return C_Memory;
10790  }
10791  }
10792  return TargetLowering::getConstraintType(Constraint);
10793 }
10794 
10795 /// Examine constraint type and operand type and determine a weight value.
10796 /// This object must already have been set up with the operand type
10797 /// and the current alternative constraint selected.
10800  AsmOperandInfo &info, const char *constraint) const {
10801  ConstraintWeight weight = CW_Invalid;
10802  Value *CallOperandVal = info.CallOperandVal;
10803  // If we don't have a value, we can't do a match,
10804  // but allow it at the lowest weight.
10805  if (!CallOperandVal)
10806  return CW_Default;
10807  Type *type = CallOperandVal->getType();
10808  // Look at the constraint type.
10809  switch (*constraint) {
10810  default:
10811  weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
10812  break;
10813  case 'l':
10814  if (type->isIntegerTy()) {
10815  if (Subtarget->isThumb())
10816  weight = CW_SpecificReg;
10817  else
10818  weight = CW_Register;
10819  }
10820  break;
10821  case 'w':
10822  if (type->isFloatingPointTy())
10823  weight = CW_Register;
10824  break;
10825  }
10826  return weight;
10827 }
10828 
10829 typedef std::pair<unsigned, const TargetRegisterClass*> RCPair;
10831  const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
10832  if (Constraint.size() == 1) {
10833  // GCC ARM Constraint Letters
10834  switch (Constraint[0]) {
10835  case 'l': // Low regs or general regs.
10836  if (Subtarget->isThumb())
10837  return RCPair(0U, &ARM::tGPRRegClass);
10838  return RCPair(0U, &ARM::GPRRegClass);
10839  case 'h': // High regs or no regs.
10840  if (Subtarget->isThumb())
10841  return RCPair(0U, &ARM::hGPRRegClass);
10842  break;
10843  case 'r':
10844  if (Subtarget->isThumb1Only())
10845  return RCPair(0U, &ARM::tGPRRegClass);
10846  return RCPair(0U, &ARM::GPRRegClass);
10847  case 'w':
10848  if (VT == MVT::Other)
10849  break;
10850  if (VT == MVT::f32)
10851  return RCPair(0U, &ARM::SPRRegClass);
10852  if (VT.getSizeInBits() == 64)
10853  return RCPair(0U, &ARM::DPRRegClass);
10854  if (VT.getSizeInBits() == 128)
10855  return RCPair(0U, &ARM::QPRRegClass);
10856  break;
10857  case 'x':
10858  if (VT == MVT::Other)
10859  break;
10860  if (VT == MVT::f32)
10861  return RCPair(0U, &ARM::SPR_8RegClass);
10862  if (VT.getSizeInBits() == 64)
10863  return RCPair(0U, &ARM::DPR_8RegClass);
10864  if (VT.getSizeInBits() == 128)
10865  return RCPair(0U, &ARM::QPR_8RegClass);
10866  break;
10867  case 't':
10868  if (VT == MVT::f32)
10869  return RCPair(0U, &ARM::SPRRegClass);
10870  break;
10871  }
10872  }
10873  if (StringRef("{cc}").equals_lower(Constraint))
10874  return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
10875 
10876  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
10877 }
10878 
10879 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
10880 /// vector. If it is invalid, don't add anything to Ops.
10882  std::string &Constraint,
10883  std::vector<SDValue>&Ops,
10884  SelectionDAG &DAG) const {
10885  SDValue Result;
10886 
10887  // Currently only support length 1 constraints.
10888  if (Constraint.length() != 1) return;
10889 
10890  char ConstraintLetter = Constraint[0];
10891  switch (ConstraintLetter) {
10892  default: break;
10893  case 'j':
10894  case 'I': case 'J': case 'K': case 'L':
10895  case 'M': case 'N': case 'O':
10897  if (!C)
10898  return;
10899 
10900  int64_t CVal64 = C->getSExtValue();
10901  int CVal = (int) CVal64;
10902  // None of these constraints allow values larger than 32 bits. Check
10903  // that the value fits in an int.
10904  if (CVal != CVal64)
10905  return;
10906 
10907  switch (ConstraintLetter) {
10908  case 'j':
10909  // Constant suitable for movw, must be between 0 and
10910  // 65535.
10911  if (Subtarget->hasV6T2Ops())
10912  if (CVal >= 0 && CVal <= 65535)
10913  break;
10914  return;
10915  case 'I':
10916  if (Subtarget->isThumb1Only()) {
10917  // This must be a constant between 0 and 255, for ADD
10918  // immediates.
10919  if (CVal >= 0 && CVal <= 255)
10920  break;
10921  } else if (Subtarget->isThumb2()) {
10922  // A constant that can be used as an immediate value in a
10923  // data-processing instruction.
10924  if (ARM_AM::getT2SOImmVal(CVal) != -1)
10925  break;
10926  } else {
10927  // A constant that can be used as an immediate value in a
10928  // data-processing instruction.
10929  if (ARM_AM::getSOImmVal(CVal) != -1)
10930  break;
10931  }
10932  return;
10933 
10934  case 'J':
10935  if (Subtarget->isThumb()) { // FIXME thumb2
10936  // This must be a constant between -255 and -1, for negated ADD
10937  // immediates. This can be used in GCC with an "n" modifier that
10938  // prints the negated value, for use with SUB instructions. It is
10939  // not useful otherwise but is implemented for compatibility.
10940  if (CVal >= -255 && CVal <= -1)
10941  break;
10942  } else {
10943  // This must be a constant between -4095 and 4095. It is not clear
10944  // what this constraint is intended for. Implemented for
10945  // compatibility with GCC.
10946  if (CVal >= -4095 && CVal <= 4095)
10947  break;
10948  }
10949  return;
10950 
10951  case 'K':
10952  if (Subtarget->isThumb1Only()) {
10953  // A 32-bit value where only one byte has a nonzero value. Exclude
10954  // zero to match GCC. This constraint is used by GCC internally for
10955  // constants that can be loaded with a move/shift combination.
10956  // It is not useful otherwise but is implemented for compatibility.
10957  if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
10958  break;
10959  } else if (Subtarget->isThumb2()) {
10960  // A constant whose bitwise inverse can be used as an immediate
10961  // value in a data-processing instruction. This can be used in GCC
10962  // with a "B" modifier that prints the inverted value, for use with
10963  // BIC and MVN instructions. It is not useful otherwise but is
10964  // implemented for compatibility.
10965  if (ARM_AM::getT2SOImmVal(~CVal) != -1)
10966  break;
10967  } else {
10968  // A constant whose bitwise inverse can be used as an immediate
10969  // value in a data-processing instruction. This can be used in GCC
10970  // with a "B" modifier that prints the inverted value, for use with
10971  // BIC and MVN instructions. It is not useful otherwise but is
10972  // implemented for compatibility.
10973  if (ARM_AM::getSOImmVal(~CVal) != -1)
10974  break;
10975  }
10976  return;
10977 
10978  case 'L':
10979  if (Subtarget->isThumb1Only()) {
10980  // This must be a constant between -7 and 7,
10981  // for 3-operand ADD/SUB immediate instructions.
10982  if (CVal >= -7 && CVal < 7)
10983  break;
10984  } else if (Subtarget->isThumb2()) {
10985  // A constant whose negation can be used as an immediate value in a
10986  // data-processing instruction. This can be used in GCC with an "n"
10987  // modifier that prints the negated value, for use with SUB
10988  // instructions. It is not useful otherwise but is implemented for
10989  // compatibility.
10990  if (ARM_AM::getT2SOImmVal(-CVal) != -1)
10991  break;
10992  } else {
10993  // A constant whose negation can be used as an immediate value in a
10994  // data-processing instruction. This can be used in GCC with an "n"
10995  // modifier that prints the negated value, for use with SUB
10996  // instructions. It is not useful otherwise but is implemented for
10997  // compatibility.
10998  if (ARM_AM::getSOImmVal(-CVal) != -1)
10999  break;
11000  }
11001  return;
11002 
11003  case 'M':
11004  if (Subtarget->isThumb()) { // FIXME thumb2
11005  // This must be a multiple of 4 between 0 and 1020, for
11006  // ADD sp + immediate.
11007  if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
11008  break;
11009  } else {
11010  // A power of two or a constant between 0 and 32. This is used in
11011  // GCC for the shift amount on shifted register operands, but it is
11012  // useful in general for any shift amounts.
11013  if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
11014  break;
11015  }
11016  return;
11017 
11018  case 'N':
11019  if (Subtarget->isThumb()) { // FIXME thumb2
11020  // This must be a constant between 0 and 31, for shift amounts.
11021  if (CVal >= 0 && CVal <= 31)
11022  break;
11023  }
11024  return;
11025 
11026  case 'O':
11027  if (Subtarget->isThumb()) { // FIXME thumb2
11028  // This must be a multiple of 4 between -508 and 508, for
11029  // ADD/SUB sp = sp + immediate.
11030  if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
11031  break;
11032  }
11033  return;
11034  }
11035  Result = DAG.getTargetConstant(CVal, SDLoc(Op), Op.getValueType());
11036  break;
11037  }
11038 
11039  if (Result.getNode()) {
11040  Ops.push_back(Result);
11041  return;
11042  }
11043  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
11044 }
11045 
11046 SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
11047  assert(Subtarget->isTargetAEABI() && "Register-based DivRem lowering only");
11048  unsigned Opcode = Op->getOpcode();
11049  assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
11050  "Invalid opcode for Div/Rem lowering");
11051  bool isSigned = (Opcode == ISD::SDIVREM);
11052  EVT VT = Op->getValueType(0);
11053  Type *Ty = VT.getTypeForEVT(*DAG.getContext());
11054 
11055  RTLIB::Libcall LC;
11056  switch (VT.getSimpleVT().SimpleTy) {
11057  default: llvm_unreachable("Unexpected request for libcall!");
11058  case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
11059  case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
11060  case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
11061  case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
11062  }
11063 
11064  SDValue InChain = DAG.getEntryNode();
11065 
11068  for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {
11069  EVT ArgVT = Op->getOperand(i).getValueType();
11070  Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
11071  Entry.Node = Op->getOperand(i);
11072  Entry.Ty = ArgTy;
11073  Entry.isSExt = isSigned;
11074  Entry.isZExt = !isSigned;
11075  Args.push_back(Entry);
11076  }
11077 
11078  SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
11079  getPointerTy(DAG.getDataLayout()));
11080 
11081  Type *RetTy = (Type*)StructType::get(Ty, Ty, nullptr);
11082 
11083  SDLoc dl(Op);
11085  CLI.setDebugLoc(dl).setChain(InChain)
11086  .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
11087  .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);
11088 
11089  std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
11090  return CallInfo.first;
11091 }
11092 
11093 SDValue
11094 ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
11095  assert(Subtarget->isTargetWindows() && "unsupported target platform");
11096  SDLoc DL(Op);
11097 
11098  // Get the inputs.
11099  SDValue Chain = Op.getOperand(0);
11100  SDValue Size = Op.getOperand(1);
11101 
11102  SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
11103  DAG.getConstant(2, DL, MVT::i32));
11104 
11105  SDValue Flag;
11106  Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag);
11107  Flag = Chain.getValue(1);
11108 
11109  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
11110  Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag);
11111 
11112  SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
11113  Chain = NewSP.getValue(1);
11114 
11115  SDValue Ops[2] = { NewSP, Chain };
11116  return DAG.getMergeValues(Ops, DL);
11117 }
11118 
11119 SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
11120  assert(Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() &&
11121  "Unexpected type for custom-lowering FP_EXTEND");
11122 
11123  RTLIB::Libcall LC;
11125 
11126  SDValue SrcVal = Op.getOperand(0);
11127  return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
11128  /*isSigned*/ false, SDLoc(Op)).first;
11129 }
11130 
11131 SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
11132  assert(Op.getOperand(0).getValueType() == MVT::f64 &&
11133  Subtarget->isFPOnlySP() &&
11134  "Unexpected type for custom-lowering FP_ROUND");
11135 
11136  RTLIB::Libcall LC;
11138 
11139  SDValue SrcVal = Op.getOperand(0);
11140  return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
11141  /*isSigned*/ false, SDLoc(Op)).first;
11142 }
11143 
11144 bool
11146  // The ARM target isn't yet aware of offsets.
11147  return false;
11148 }
11149 
11151  if (v == 0xffffffff)
11152  return false;
11153 
11154  // there can be 1's on either or both "outsides", all the "inside"
11155  // bits must be 0's
11156  return isShiftedMask_32(~v);
11157 }
11158 
11159 /// isFPImmLegal - Returns true if the target can instruction select the
11160 /// specified FP immediate natively. If false, the legalizer will
11161 /// materialize the FP immediate as a load from a constant pool.
11162 bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
11163  if (!Subtarget->hasVFP3())
11164  return false;
11165  if (VT == MVT::f32)
11166  return ARM_AM::getFP32Imm(Imm) != -1;
11167  if (VT == MVT::f64 && !Subtarget->isFPOnlySP())
11168  return ARM_AM::getFP64Imm(Imm) != -1;
11169  return false;
11170 }
11171 
11172 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
11173 /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
11174 /// specified in the intrinsic calls.
11176  const CallInst &I,
11177  unsigned Intrinsic) const {
11178  switch (Intrinsic) {
11179  case Intrinsic::arm_neon_vld1:
11180  case Intrinsic::arm_neon_vld2:
11181  case Intrinsic::arm_neon_vld3:
11182  case Intrinsic::arm_neon_vld4:
11183  case Intrinsic::arm_neon_vld2lane:
11184  case Intrinsic::arm_neon_vld3lane:
11185  case Intrinsic::arm_neon_vld4lane: {
11186  Info.opc = ISD::INTRINSIC_W_CHAIN;
11187  // Conservatively set memVT to the entire set of vectors loaded.
11188  auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
11189  uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8;
11190  Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
11191  Info.ptrVal = I.getArgOperand(0);
11192  Info.offset = 0;
11193  Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
11194  Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
11195  Info.vol = false; // volatile loads with NEON intrinsics not supported
11196  Info.readMem = true;
11197  Info.writeMem = false;
11198  return true;
11199  }
11200  case Intrinsic::arm_neon_vst1:
11201  case Intrinsic::arm_neon_vst2:
11202  case Intrinsic::arm_neon_vst3:
11203  case Intrinsic::arm_neon_vst4:
11204  case Intrinsic::arm_neon_vst2lane:
11205  case Intrinsic::arm_neon_vst3lane:
11206  case Intrinsic::arm_neon_vst4lane: {
11207  Info.opc = ISD::INTRINSIC_VOID;
11208  // Conservatively set memVT to the entire set of vectors stored.
11209  auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
11210  unsigned NumElts = 0;
11211  for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
11212  Type *ArgTy = I.getArgOperand(ArgI)->getType();
11213  if (!ArgTy->isVectorTy())
11214  break;
11215  NumElts += DL.getTypeAllocSize(ArgTy) / 8;
11216  }
11217  Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
11218  Info.ptrVal = I.getArgOperand(0);
11219  Info.offset = 0;
11220  Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
11221  Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
11222  Info.vol = false; // volatile stores with NEON intrinsics not supported
11223  Info.readMem = false;
11224  Info.writeMem = true;
11225  return true;
11226  }
11227  case Intrinsic::arm_ldaex:
11228  case Intrinsic::arm_ldrex: {
11229  auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
11230  PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
11231  Info.opc = ISD::INTRINSIC_W_CHAIN;
11232  Info.memVT = MVT::getVT(PtrTy->getElementType());
11233  Info.ptrVal = I.getArgOperand(0);
11234  Info.offset = 0;
11235  Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
11236  Info.vol = true;
11237  Info.readMem = true;
11238  Info.writeMem = false;
11239  return true;
11240  }
11241  case Intrinsic::arm_stlex:
11242  case Intrinsic::arm_strex: {
11243  auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
11244  PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
11245  Info.opc = ISD::INTRINSIC_W_CHAIN;
11246  Info.memVT = MVT::getVT(PtrTy->getElementType());
11247  Info.ptrVal = I.getArgOperand(1);
11248  Info.offset = 0;
11249  Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
11250  Info.vol = true;
11251  Info.readMem = false;
11252  Info.writeMem = true;
11253  return true;
11254  }
11255  case Intrinsic::arm_stlexd:
11256  case Intrinsic::arm_strexd: {
11257  Info.opc = ISD::INTRINSIC_W_CHAIN;
11258  Info.memVT = MVT::i64;
11259  Info.ptrVal = I.getArgOperand(2);
11260  Info.offset = 0;
11261  Info.align = 8;
11262  Info.vol = true;
11263  Info.readMem = false;
11264  Info.writeMem = true;
11265  return true;
11266  }
11267  case Intrinsic::arm_ldaexd:
11268  case Intrinsic::arm_ldrexd: {
11269  Info.opc = ISD::INTRINSIC_W_CHAIN;
11270  Info.memVT = MVT::i64;
11271  Info.ptrVal = I.getArgOperand(0);
11272  Info.offset = 0;
11273  Info.align = 8;
11274  Info.vol = true;
11275  Info.readMem = true;
11276  Info.writeMem = false;
11277  return true;
11278  }
11279  default:
11280  break;
11281  }
11282 
11283  return false;
11284 }
11285 
11286 /// \brief Returns true if it is beneficial to convert a load of a constant
11287 /// to just the constant itself.
11289  Type *Ty) const {
11290  assert(Ty->isIntegerTy());
11291 
11292  unsigned Bits = Ty->getPrimitiveSizeInBits();
11293  if (Bits == 0 || Bits > 32)
11294  return false;
11295  return true;
11296 }
11297 
11299 
11301  ARM_MB::MemBOpt Domain) const {
11302  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11303 
11304  // First, if the target has no DMB, see what fallback we can use.
11305  if (!Subtarget->hasDataBarrier()) {
11306  // Some ARMv6 cpus can support data barriers with an mcr instruction.
11307  // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
11308  // here.
11309  if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
11310  Function *MCR = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);
11311  Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
11312  Builder.getInt32(0), Builder.getInt32(7),
11313  Builder.getInt32(10), Builder.getInt32(5)};
11314  return Builder.CreateCall(MCR, args);
11315  } else {
11316  // Instead of using barriers, atomic accesses on these subtargets use
11317  // libcalls.
11318  llvm_unreachable("makeDMB on a target so old that it has no barriers");
11319  }
11320  } else {
11321  Function *DMB = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
11322  // Only a full system barrier exists in the M-class architectures.
11323  Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
11324  Constant *CDomain = Builder.getInt32(Domain);
11325  return Builder.CreateCall(DMB, CDomain);
11326  }
11327 }
11328 
11329 // Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
11331  AtomicOrdering Ord, bool IsStore,
11332  bool IsLoad) const {
11333  if (!getInsertFencesForAtomic())
11334  return nullptr;
11335 
11336  switch (Ord) {
11337  case NotAtomic:
11338  case Unordered:
11339  llvm_unreachable("Invalid fence: unordered/non-atomic");
11340  case Monotonic:
11341  case Acquire:
11342  return nullptr; // Nothing to do
11344  if (!IsStore)
11345  return nullptr; // Nothing to do
11346  /*FALLTHROUGH*/
11347  case Release:
11348  case AcquireRelease:
11349  if (Subtarget->isSwift())
11350  return makeDMB(Builder, ARM_MB::ISHST);
11351  // FIXME: add a comment with a link to documentation justifying this.
11352  else
11353  return makeDMB(Builder, ARM_MB::ISH);
11354  }
11355  llvm_unreachable("Unknown fence ordering in emitLeadingFence");
11356 }
11357 
11359  AtomicOrdering Ord, bool IsStore,
11360  bool IsLoad) const {
11361  if (!getInsertFencesForAtomic())
11362  return nullptr;
11363 
11364  switch (Ord) {
11365  case NotAtomic:
11366  case Unordered:
11367  llvm_unreachable("Invalid fence: unordered/not-atomic");
11368  case Monotonic:
11369  case Release:
11370  return nullptr; // Nothing to do
11371  case Acquire:
11372  case AcquireRelease:
11374  return makeDMB(Builder, ARM_MB::ISH);
11375  }
11376  llvm_unreachable("Unknown fence ordering in emitTrailingFence");
11377 }
11378 
11379 // Loads and stores less than 64-bits are already atomic; ones above that
11380 // are doomed anyway, so defer to the default libcall and blame the OS when
11381 // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
11382 // anything for those.
11384  unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
11385  return (Size == 64) && !Subtarget->isMClass();
11386 }
11387 
11388 // Loads and stores less than 64-bits are already atomic; ones above that
11389 // are doomed anyway, so defer to the default libcall and blame the OS when
11390 // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
11391 // anything for those.
11392 // FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
11393 // guarantee, see DDI0406C ARM architecture reference manual,
11394 // sections A8.8.72-74 LDRD)
11396  unsigned Size = LI->getType()->getPrimitiveSizeInBits();
11397  return (Size == 64) && !Subtarget->isMClass();
11398 }
11399 
11400 // For the real atomic operations, we have ldrex/strex up to 32 bits,
11401 // and up to 64 bits on the non-M profiles
11404  unsigned Size = AI->getType()->getPrimitiveSizeInBits();
11405  return (Size <= (Subtarget->isMClass() ? 32U : 64U))
11408 }
11409 
11410 // This has so far only been implemented for MachO.
11412  return Subtarget->isTargetMachO();
11413 }
11414 
11416  unsigned &Cost) const {
11417  // If we do not have NEON, vector types are not natively supported.
11418  if (!Subtarget->hasNEON())
11419  return false;
11420 
11421  // Floating point values and vector values map to the same register file.
11422  // Therefore, althought we could do a store extract of a vector type, this is
11423  // better to leave at float as we have more freedom in the addressing mode for
11424  // those.
11425  if (VectorTy->isFPOrFPVectorTy())
11426  return false;
11427 
11428  // If the index is unknown at compile time, this is very expensive to lower
11429  // and it is not possible to combine the store with the extract.
11430  if (!isa<ConstantInt>(Idx))
11431  return false;
11432 
11433  assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
11434  unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth();
11435  // We can do a store + vector extract on any vector that fits perfectly in a D
11436  // or Q register.
11437  if (BitWidth == 64 || BitWidth == 128) {
11438  Cost = 0;
11439  return true;
11440  }
11441  return false;
11442 }
11443 
11444 Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
11445  AtomicOrdering Ord) const {
11446  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11447  Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
11448  bool IsAcquire = isAtLeastAcquire(Ord);
11449 
11450  // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
11451  // intrinsic must return {i32, i32} and we have to recombine them into a
11452  // single i64 here.
11453  if (ValTy->getPrimitiveSizeInBits() == 64) {
11454  Intrinsic::ID Int =
11455  IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
11456  Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int);
11457 
11458  Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
11459  Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
11460 
11461  Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
11462  Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
11463  if (!Subtarget->isLittle())
11464  std::swap (Lo, Hi);
11465  Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
11466  Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
11467  return Builder.CreateOr(
11468  Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64");
11469  }
11470 
11471  Type *Tys[] = { Addr->getType() };
11472  Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
11473  Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys);
11474 
11475  return Builder.CreateTruncOrBitCast(
11476  Builder.CreateCall(Ldrex, Addr),
11477  cast<PointerType>(Addr->getType())->getElementType());
11478 }
11479 
11481  Value *Addr,
11482  AtomicOrdering Ord) const {
11483  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11484  bool IsRelease = isAtLeastRelease(Ord);
11485 
11486  // Since the intrinsics must have legal type, the i64 intrinsics take two
11487  // parameters: "i32, i32". We must marshal Val into the appropriate form
11488  // before the call.
11489  if (Val->getType()->getPrimitiveSizeInBits() == 64) {
11490  Intrinsic::ID Int =
11491  IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
11492  Function *Strex = Intrinsic::getDeclaration(M, Int);
11493  Type *Int32Ty = Type::getInt32Ty(M->getContext());
11494 
11495  Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
11496  Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
11497  if (!Subtarget->isLittle())
11498  std::swap (Lo, Hi);
11499  Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
11500  return Builder.CreateCall(Strex, {Lo, Hi, Addr});
11501  }
11502 
11503  Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
11504  Type *Tys[] = { Addr->getType() };
11505  Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
11506 
11507  return Builder.CreateCall(
11508  Strex, {Builder.CreateZExtOrBitCast(
11509  Val, Strex->getFunctionType()->getParamType(0)),
11510  Addr});
11511 }
11512 
11513 /// \brief Lower an interleaved load into a vldN intrinsic.
11514 ///
11515 /// E.g. Lower an interleaved load (Factor = 2):
11516 /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
11517 /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
11518 /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
11519 ///
11520 /// Into:
11521 /// %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4)
11522 /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0
11523 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1
11526  ArrayRef<unsigned> Indices, unsigned Factor) const {
11527  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
11528  "Invalid interleave factor");
11529  assert(!Shuffles.empty() && "Empty shufflevector input");
11530  assert(Shuffles.size() == Indices.size() &&
11531  "Unmatched number of shufflevectors and indices");
11532 
11533  VectorType *VecTy = Shuffles[0]->getType();
11534  Type *EltTy = VecTy->getVectorElementType();
11535 
11536  const DataLayout &DL = LI->getModule()->getDataLayout();
11537  unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy);
11538  bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64;
11539 
11540  // Skip illegal vector types and vector types of i64/f64 element (vldN doesn't
11541  // support i64/f64 element).
11542  if ((VecSize != 64 && VecSize != 128) || EltIs64Bits)
11543  return false;
11544 
11545  // A pointer vector can not be the return type of the ldN intrinsics. Need to
11546  // load integer vectors first and then convert to pointer vectors.
11547  if (EltTy->isPointerTy())
11548  VecTy =
11549  VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements());
11550 
11551  static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
11552  Intrinsic::arm_neon_vld3,
11553  Intrinsic::arm_neon_vld4};
11554 
11555  Function *VldnFunc =
11556  Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], VecTy);
11557 
11558  IRBuilder<> Builder(LI);
11560 
11561  Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace());
11562  Ops.push_back(Builder.CreateBitCast(LI->getPointerOperand(), Int8Ptr));
11563  Ops.push_back(Builder.getInt32(LI->getAlignment()));
11564 
11565  CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN");
11566 
11567  // Replace uses of each shufflevector with the corresponding vector loaded
11568  // by ldN.
11569  for (unsigned i = 0; i < Shuffles.size(); i++) {
11570  ShuffleVectorInst *SV = Shuffles[i];
11571  unsigned Index = Indices[i];
11572 
11573  Value *SubVec = Builder.CreateExtractValue(VldN, Index);
11574 
11575  // Convert the integer vector to pointer vector if the element is pointer.
11576  if (EltTy->isPointerTy())
11577  SubVec = Builder.CreateIntToPtr(SubVec, SV->getType());
11578 
11579  SV->replaceAllUsesWith(SubVec);
11580  }
11581 
11582  return true;
11583 }
11584 
11585 /// \brief Get a mask consisting of sequential integers starting from \p Start.
11586 ///
11587 /// I.e. <Start, Start + 1, ..., Start + NumElts - 1>
11588 static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start,
11589  unsigned NumElts) {
11591  for (unsigned i = 0; i < NumElts; i++)
11592  Mask.push_back(Builder.getInt32(Start + i));
11593 
11594  return ConstantVector::get(Mask);
11595 }
11596 
11597 /// \brief Lower an interleaved store into a vstN intrinsic.
11598 ///
11599 /// E.g. Lower an interleaved store (Factor = 3):
11600 /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
11601 /// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
11602 /// store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4
11603 ///
11604 /// Into:
11605 /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
11606 /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
11607 /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
11608 /// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
11609 ///
11610 /// Note that the new shufflevectors will be removed and we'll only generate one
11611 /// vst3 instruction in CodeGen.
11613  ShuffleVectorInst *SVI,
11614  unsigned Factor) const {
11615  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
11616  "Invalid interleave factor");
11617 
11618  VectorType *VecTy = SVI->getType();
11619  assert(VecTy->getVectorNumElements() % Factor == 0 &&
11620  "Invalid interleaved store");
11621 
11622  unsigned NumSubElts = VecTy->getVectorNumElements() / Factor;
11623  Type *EltTy = VecTy->getVectorElementType();
11624  VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);
11625 
11626  const DataLayout &DL = SI->getModule()->getDataLayout();
11627  unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy);
11628  bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64;
11629 
11630  // Skip illegal sub vector types and vector types of i64/f64 element (vstN
11631  // doesn't support i64/f64 element).
11632  if ((SubVecSize != 64 && SubVecSize != 128) || EltIs64Bits)
11633  return false;
11634 
11635  Value *Op0 = SVI->getOperand(0);
11636  Value *Op1 = SVI->getOperand(1);
11637  IRBuilder<> Builder(SI);
11638 
11639  // StN intrinsics don't support pointer vectors as arguments. Convert pointer
11640  // vectors to integer vectors.
11641  if (EltTy->isPointerTy()) {
11642  Type *IntTy = DL.getIntPtrType(EltTy);
11643 
11644  // Convert to the corresponding integer vector.
11645  Type *IntVecTy =
11646  VectorType::get(IntTy, Op0->getType()->getVectorNumElements());
11647  Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
11648  Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
11649 
11650  SubVecTy = VectorType::get(IntTy, NumSubElts);
11651  }
11652 
11653  static Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
11654  Intrinsic::arm_neon_vst3,
11655  Intrinsic::arm_neon_vst4};
11656  Function *VstNFunc = Intrinsic::getDeclaration(
11657  SI->getModule(), StoreInts[Factor - 2], SubVecTy);
11658 
11660 
11661  Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
11662  Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), Int8Ptr));
11663 
11664  // Split the shufflevector operands into sub vectors for the new vstN call.
11665  for (unsigned i = 0; i < Factor; i++)
11666  Ops.push_back(Builder.CreateShuffleVector(
11667  Op0, Op1, getSequentialMask(Builder, NumSubElts * i, NumSubElts)));
11668 
11669  Ops.push_back(Builder.getInt32(SI->getAlignment()));
11670  Builder.CreateCall(VstNFunc, Ops);
11671  return true;
11672 }
11673 
11680 };
11681 
11682 static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,
11683  uint64_t &Members) {
11684  if (const StructType *ST = dyn_cast<StructType>(Ty)) {
11685  for (unsigned i = 0; i < ST->getNumElements(); ++i) {
11686  uint64_t SubMembers = 0;
11687  if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
11688  return false;
11689  Members += SubMembers;
11690  }
11691  } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
11692  uint64_t SubMembers = 0;
11693  if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
11694  return false;
11695  Members += SubMembers * AT->getNumElements();
11696  } else if (Ty->isFloatTy()) {
11697  if (Base != HA_UNKNOWN && Base != HA_FLOAT)
11698  return false;
11699  Members = 1;
11700  Base = HA_FLOAT;
11701  } else if (Ty->isDoubleTy()) {
11702  if (Base != HA_UNKNOWN && Base != HA_DOUBLE)
11703  return false;
11704  Members = 1;
11705  Base = HA_DOUBLE;
11706  } else if (const VectorType *VT = dyn_cast<VectorType>(Ty)) {
11707  Members = 1;
11708  switch (Base) {
11709  case HA_FLOAT:
11710  case HA_DOUBLE:
11711  return false;
11712  case HA_VECT64:
11713  return VT->getBitWidth() == 64;
11714  case HA_VECT128:
11715  return VT->getBitWidth() == 128;
11716  case HA_UNKNOWN:
11717  switch (VT->getBitWidth()) {
11718  case 64:
11719  Base = HA_VECT64;
11720  return true;
11721  case 128:
11722  Base = HA_VECT128;
11723  return true;
11724  default:
11725  return false;
11726  }
11727  }
11728  }
11729 
11730  return (Members > 0 && Members <= 4);
11731 }
11732 
11733 /// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
11734 /// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
11735 /// passing according to AAPCS rules.
11737  Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
11738  if (getEffectiveCallingConv(CallConv, isVarArg) !=
11740  return false;
11741 
11742  HABaseType Base = HA_UNKNOWN;
11743  uint64_t Members = 0;
11744  bool IsHA = isHomogeneousAggregate(Ty, Base, Members);
11745  DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump());
11746 
11747  bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
11748  return IsHA || IsIntArray;
11749 }
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:842
static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG)
PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG)
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG)
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type (if unknown returns 0).
int getFunctionContextIndex() const
Return the index for the function context object.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
void setFrameAddressIsTaken(bool T)
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:477
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:450
Value * getValueOperand()
Definition: Instructions.h:406
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
Definition: ARMBaseInfo.h:303
bool use_empty() const
Return true if there are no uses of this node.
const Value * getCalledValue() const
getCalledValue - Get a pointer to the function that is invoked by this instruction.
static MVT getIntegerVT(unsigned BitWidth)
The memory access reads data.
const MachineFunction * getParent() const
getParent - Return the MachineFunction containing this basic block.
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG)
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:104
static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC to match f32 max/min patte...
static bool isVZIPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, unsigned FixedArgs=-1)
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
SDValue getValue(unsigned R) const
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
The memory access writes data.
TargetLoweringBase::AtomicRMWExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
MVT getValVT() const
static SDValue PerformBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformBUILD_VECTORCombine - Target-specific dag combine xforms for ISD::BUILD_VECTOR.
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt)
isVShiftLImm - Check if this is a valid build_vector for the immediate operand of a vector shift left...
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand...
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the value type to use for ISD::SETCC.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant, which is required to be operand #1) half of the integer or float value specified as operand #0.
Definition: ISDOpcodes.h:175
#define R4(n)
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:223
bool isFPOnlySP() const
Definition: ARMSubtarget.h:337
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:453
LLVMContext * getContext() const
Definition: SelectionDAG.h:289
LLVM Argument representation.
Definition: Argument.h:35
bool isOSBinFormatMachO() const
Tests whether the environment is MachO.
Definition: Triple.h:489
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG)
SDValue getCopyToReg(SDValue Chain, SDLoc dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:522
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1327
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, SDLoc DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd)...
Definition: SelectionDAG.h:646
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG)
STATISTIC(NumFunctions,"Total number of functions")
size_t size() const
size - Get the string size.
Definition: StringRef.h:113
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:292
bool isKnownNeverNaN(SDValue Op) const
Test whether the given SDValue is known to never be NaN.
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:554
static SDValue PerformVMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMULCombine Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the special multi...
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, SelectionDAG &DAG)
LocInfo getLocInfo() const
static MVT getVectorVT(MVT VT, unsigned NumElements)
TOF
Target Operand Flag enum.
Definition: ARMBaseInfo.h:278
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:301
static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:39
static MachinePointerInfo getJumpTable()
getJumpTable - Return a MachinePointerInfo record that refers to a jump table entry.
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:114
void setIsLandingPad(bool V=true)
setIsLandingPad - Indicates the block is a landing pad.
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
void getInRegsParamInfo(unsigned InRegsParamRecordIndex, unsigned &BeginReg, unsigned &EndReg) const
ARM_APCS - ARM Procedure Calling Standard calling convention (obsolete, but still used on some target...
Definition: CallingConv.h:89
const TargetMachine & getTargetMachine() const
bool isAtLeastAcquire(AtomicOrdering Ord)
Returns true if the ordering is at least as strong as acquire (i.e.
Definition: Instructions.h:56
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:191
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
static unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG)
lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the bit-count for each 32-bit eleme...
static SDValue PerformInsertEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformInsertEltCombine - Target-specific dag combine xforms for ISD::INSERT_VECTOR_ELT.
CallLoweringInfo & setDebugLoc(SDLoc dl)
static bool isVTRNMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
SDValue getMergeValues(ArrayRef< SDValue > Ops, SDLoc dl)
Create a MERGE_VALUES node from the given operands.
static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG &DAG)
SkipLoadExtensionForVMULL - return a load of the original vector size that does not do any sign/zero ...
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:210
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:285
ARMConstantPoolValue - ARM specific constantpool value.
static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, bool isSigned)
isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each element has been zero/sign-...
unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:684
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:138
iterator end() const
Definition: ArrayRef.h:123
bool isDead() const
static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, uint64_t &Members)
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
Y = RRC X, rotate right via carry.
static bool isVirtualRegister(unsigned Reg)
isVirtualRegister - Return true if the specified register number is in the virtual register namespace...
static bool hasNormalLoadOperand(SDNode *N)
hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node are normal, non-volatile loads.
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain...
Definition: ISDOpcodes.h:585
bool hasT2ExtractPack() const
Definition: ARMSubtarget.h:328
CallInst - This class represents a function call, abstracting a target machine's calling convention...
unsigned InferPtrAlignment(SDValue Ptr) const
Infer alignment of a load / store address.
void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in the KnownZero/KnownO...
EK_Inline - Jump table entries are emitted inline at their point of use.
size_type count(PtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:276
const GlobalValue * getGlobal() const
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
const std::string & getAsmString() const
Definition: InlineAsm.h:82
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:228
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDValue getSelectCC(SDLoc DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:752
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
static bool isLegalT1AddressImmediate(int64_t V, EVT VT)
static cl::opt< bool > ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true))
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Type * getTypeForEVT(LLVMContext &Context) const
getTypeForEVT - This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:181
unsigned getSizeInBits() const
ShuffleVectorInst - This instruction constructs a fixed permutation of two input vectors.
SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
bool isDoubleTy() const
isDoubleTy - Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:146
unsigned getByValSize() const
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
If this value is smaller than the specified limit, return it, otherwise return the limit value...
Definition: APInt.h:404
unsigned getInRegsParamsCount() const
static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG)
SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending load, or BUILD_VECTOR with extended elements, return the unextended value.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:264
unsigned getNumOperands() const
Return the number of values used by this operation.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
bool hasV5TEOps() const
Definition: ARMSubtarget.h:292
const std::string & getConstraintString() const
Definition: InlineAsm.h:83
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:111
unsigned getNumOperands() const
static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG)
bool hasV6Ops() const
Definition: ARMSubtarget.h:293
A debug info location.
Definition: DebugLoc.h:34
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB)
transferSuccessorsAndUpdatePHIs - Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor blocks which refer to fromMBB to refer to this.
const SDValue & getOperand(unsigned Num) const
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:225
F(f)
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
setjmp/longjmp based exceptions
LoadInst - an instruction for reading from memory.
Definition: Instructions.h:177
static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1...
static MachinePointerInfo getConstantPool()
getConstantPool - Return a MachinePointerInfo record that refers to the constant pool.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:61
AtomicRMWInst - an instruction that atomically reads a memory location, combines it with another valu...
Definition: Instructions.h:674
#define R2(n)
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned char TargetFlags=0)
bool hasV6T2Ops() const
Definition: ARMSubtarget.h:296
static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
MO_PLT - On a symbol operand, this represents an ELF PLT reference on a call operand.
Definition: ARMBaseInfo.h:294
Same for subtraction.
Definition: ISDOpcodes.h:231
Value * CallOperandVal
If this is the result output operand or a clobber, this is null, otherwise it is the incoming operand...
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override
createFastISel - This method returns a target specific FastISel object, or null if the target does no...
bool isThumb1Only() const
Definition: ARMSubtarget.h:405
static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:1522
static SDValue CombineBaseUpdate(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineBaseUpdate - Target-specific DAG combine function for VLDDUP, NEON load/store intrinsics...
unsigned getValNo() const
const SDValue & getBasePtr() const
bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
ThreadModel::Model ThreadModel
ThreadModel - This flag specifies the type of threading model to assume for things like atomics...
static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, const TargetInstrInfo *TII)
MatchingStackOffset - Return true if the given stack call argument is already available in the same p...
bool hasDivide() const
Definition: ARMSubtarget.h:326
static SDValue PerformSUBCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override
Return true if a truncation from Ty1 to Ty2 is permitted when deciding whether a call is in tail posi...
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:178
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
getMachineMemOperand - Allocate a new MachineMemOperand.
The address of the GOT.
Definition: ISDOpcodes.h:66
static bool isThumb(const MCSubtargetInfo &STI)
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:658
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:357
unsigned getResNo() const
get the index which selects a specific result in the SDNode
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition: ISDOpcodes.h:254
bool isRegLoc() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason, bool gen_crash_diag=true)
Reports a serious error, calling any installed error handler.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
bool isAllOnesValue() const
SDValue getExternalSymbol(const char *Sym, EVT VT)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:172
uint64_t getTypeAllocSizeInBits(Type *Ty) const
Returns the offset in bits between successive objects of the specified type, including alignment padd...
Definition: DataLayout.h:398
bool isShiftedMask_32(uint32_t Value)
isShiftedMask_32 - This function returns true if the argument contains a non-empty sequence of ones w...
Definition: MathExtras.h:342
bool isOSWindows() const
Tests whether the OS is Windows.
Definition: Triple.h:464
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
static MachinePointerInfo getFixedStack(int FI, int64_t offset=0)
getFixedStack - Return a MachinePointerInfo record that refers to the the specified FrameIndex...
bool hasPerfMon() const
Definition: ARMSubtarget.h:338
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:150
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vstN intrinsic.
MachineJumpTableInfo * getOrCreateJumpTableInfo(unsigned JTEntryKind)
getOrCreateJumpTableInfo - Get the JumpTableInfo for this function, if it does already exist...
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG)
const Triple & getTargetTriple() const
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:262
bool isKnownNeverZero(SDValue Op) const
Test whether the given SDValue is known to never be positive or negative Zero.
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic...
Definition: ISDOpcodes.h:109
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:200
AtomicRMWExpansionKind
Enum that specifies what a AtomicRMWInst is expanded to, if at all.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:169
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations...
Definition: ISDOpcodes.h:371
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:115
static SDValue LowerInterruptReturn(SmallVectorImpl< SDValue > &RetOps, SDLoc DL, SelectionDAG &DAG)
lazy value info
BlockAddress - The address of a basic block.
Definition: Constants.h:802
SDValue getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
static SDValue PerformVCVTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) can replace combinations of ...
bool isTargetAEABI() const
Definition: ARMSubtarget.h:373
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:1541
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
bool isNegative() const
Return true if the value is negative.
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
bool useSoftFloat() const
Definition: ARMSubtarget.h:403
bool hasV8Ops() const
Definition: ARMSubtarget.h:298
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
int getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
static const MachineInstrBuilder & AddDefaultPred(const MachineInstrBuilder &MIB)
MachineMemOperand - A description of a memory reference used in the backend.
bool hasVFP3() const
Definition: ARMSubtarget.h:315
static SDValue PerformLOADCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
ParmContext getCallOrPrologue() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:79
const HexagonInstrInfo * TII
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a vldN-lane (N > 1) intrinsic...
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:308
bool isTargetELF() const
Definition: ARMSubtarget.h:363
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false)
Shift and rotation operations.
Definition: ISDOpcodes.h:332
static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG)
static bool isVUZPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
bool isTargetDarwin() const
Definition: ARMSubtarget.h:355
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
StructType - Class to represent struct types.
Definition: DerivedTypes.h:191
opStatus convertToInteger(integerPart *, unsigned int, bool, roundingMode, bool *) const
Definition: APFloat.cpp:2191
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef...
Type * getArrayElementType() const
Definition: Type.h:361
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:69
static SDValue findMUL_LOHI(SDValue V)
static bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:283
static bool isVZIP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of "vector_shuffle v...
Value * emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, unsigned NumOps, bool isSigned, SDLoc dl, bool doesNotReturn=false, bool isReturnValueUsed=true) const
Returns a pair of (return value, chain).
unsigned getFrameRegister(const MachineFunction &MF) const override
static void advance(T &it, size_t Val)
CallLoweringInfo & setChain(SDValue InChain)
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:181
static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG)
PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for ISD::VECTOR_SHUFFLE.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APInt.h:33
separate const offset from Split GEPs to a variadic base and a constant offset for better CSE
bool isReg() const
isReg - Tests if this is a MO_Register operand.
unsigned getNumArgOperands() const
getNumArgOperands - Return the number of call arguments.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
SDValue getTargetGlobalAddress(const GlobalValue *GV, SDLoc DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:467
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:161
static SDValue PerformADDCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCCombine - Target-specific dag combine transform from ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL.
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1057
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:517
unsigned getArgRegsSaveSize() const
FLT_ROUNDS_ - Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest 2 Round to ...
Definition: ISDOpcodes.h:458
std::vector< MachineBasicBlock * >::iterator succ_iterator
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const override
Returns the target specific optimal type for load and store operations as a result of memset...
Reg
All possible values of the reg field in the ModR/M byte.
SimpleValueType SimpleTy
bool getInsertFencesForAtomic() const
Return whether the DAG builder should automatically insert fences and reduce ordering for atomics...
EVT getScalarType() const
getScalarType - If this is a vector type, return the element type, otherwise return this...
Definition: ValueTypes.h:210
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amt) For double-word atomic operations: ValLo, ValHi, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amtLo, amtHi) ValLo, ValHi, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amtLo, amtHi) These correspond to the atomicrmw instruction.
Definition: ISDOpcodes.h:687
Number of individual test Apply this number of consecutive mutations to each input exit after the first new interesting input is found the minimized corpus is saved into the first input directory Number of jobs to run If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
ObjectFormatType getObjectFormat() const
getFormat - Get the object format for this triple.
Definition: Triple.h:272
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool isLittle() const
Definition: ARMSubtarget.h:428
bool hasMPExtension() const
Definition: ARMSubtarget.h:345
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:353
int getMaskElt(unsigned Idx) const
static bool isSingletonVEXTMask(ArrayRef< int > M, EVT VT, unsigned &Imm)
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
static bool isZeroExtended(SDNode *N, SelectionDAG &DAG)
isZeroExtended - Check if a node is a vector value that is zero-extended or a constant BUILD_VECTOR w...
The memory access is volatile.
static ShiftOpc getShiftOpcForNode(unsigned Opcode)
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG...
Definition: ISDOpcodes.h:73
bool isFPBrccSlow() const
Definition: ARMSubtarget.h:336
Type * getVectorElementType() const
Definition: Type.h:364
static SDValue PerformMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
#define im(i)
bool isThumb() const
Definition: ARMSubtarget.h:404
const MachineInstrBuilder & addImm(int64_t Val) const
addImm - Add a new immediate operand.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
#define G(x, y, z)
Definition: MD5.cpp:52
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
Definition: ValueTypes.h:110
static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2)
Return the store opcode for a given store size.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:271
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPostIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mo...
SmallVector< ISD::InputArg, 32 > Ins
AtomicOrdering
Definition: Instructions.h:38
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:581
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const override
Return true if the target can combine store(extractelement VectorTy, Idx).
EVT getVectorElementType() const
getVectorElementType - Given a vector type, return the type of each element.
Definition: ValueTypes.h:216
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:414
bool hasV7Ops() const
Definition: ARMSubtarget.h:297
bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns true if the given (atomic) load should be expanded by the IR-level AtomicExpand pass into a l...
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector...
static const unsigned PerfectShuffleTable[6561+1]
static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG)
SDValue getCALLSEQ_START(SDValue Chain, SDValue Op, SDLoc DL)
Return a new CALLSEQ_START node, which always must have a glue result (to ensure it's not CSE'd)...
Definition: SelectionDAG.h:637
void RemoveOperand(unsigned i)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vldN intrinsic.
unsigned getLocReg() const
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:30
Value * emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself...
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:393
const Triple & getTargetTriple() const
Definition: ARMSubtarget.h:353
LLVMContext & getContext() const
getContext - Return the LLVMContext in which this type was uniqued.
Definition: Type.h:125
const uint32_t * getThisReturnPreservedMask(const MachineFunction &MF, CallingConv::ID) const
getThisReturnPreservedMask - Returns a call preserved mask specific to the case that 'returned' is on...
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:894
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
static unsigned createNEONModImm(unsigned OpCmode, unsigned Val)
unsigned createJumpTableIndex(const std::vector< MachineBasicBlock * > &DestBBs)
createJumpTableIndex - Create a new jump table.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:57
bool isTargetCOFF() const
Definition: ARMSubtarget.h:362
SDValue getRegisterMask(const uint32_t *RegMask)
bool hasStructRetAttr() const
Determine if the function returns a structure through first pointer argument.
Definition: Function.h:360
bool hasCallSiteLandingPad(MCSymbol *Sym)
hasCallSiteLandingPad - Return true if the landing pad Eh symbol has an associated call site...
static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG)
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:351
bool isTargetMachO() const
Definition: ARMSubtarget.h:364
void AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag...
ArrayType - Class to represent array types.
Definition: DerivedTypes.h:336
This contains information for each constraint that we are lowering.
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:191
static int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
Return an ISD::VECTOR_SHUFFLE node.
void addInRegsParamInfo(unsigned RegBegin, unsigned RegEnd)
SmallVector< ISD::OutputArg, 32 > Outs
void dump() const
Definition: AsmWriter.cpp:3357
bool hasFPARMv8() const
Definition: ARMSubtarget.h:317
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: ArrayRef.h:31
CallLoweringInfo & setZExtResult(bool Value=true)
bool isFloatingPointTy() const
isFloatingPointTy - Return true if this is one of the six floating point types
Definition: Type.h:159
bool hasNEON() const
Definition: ARMSubtarget.h:318
bool hasAnyDataBarrier() const
Definition: ARMSubtarget.h:330
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:866
const SDValue & getBasePtr() const
MachineConstantPoolValue * getMachineCPVal() const
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:659
StoreInst - an instruction for storing to memory.
Definition: Instructions.h:316
static mvt_range integer_vector_valuetypes()
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:804
const APInt & getAPIntValue() const
bool hasVFP4() const
Definition: ARMSubtarget.h:316
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:351
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isArrayTy() const
isArrayTy - True if this is an instance of ArrayType.
Definition: Type.h:213
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification, or lowering of the constant.
Definition: ISDOpcodes.h:116
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
EVT getMemoryVT() const
Return the type of the in-memory value.
int64_t getImm() const
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:624
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG)
static const MCPhysReg GPRArgRegs[]
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:142
Type * getElementType() const
Definition: DerivedTypes.h:323
bool hasARMOps() const
Definition: ARMSubtarget.h:312
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:134
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:284
PointerType - Class to represent pointers.
Definition: DerivedTypes.h:449
const BasicBlock * getBasicBlock() const
getBasicBlock - Return the LLVM basic block that this instance corresponded to originally.
UNDEF - An undefined node.
Definition: ISDOpcodes.h:169
This class is used to represent ISD::STORE nodes.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:267
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:436
static bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG)
PerformVMOVDRRCombine - Target-specific dag combine xforms for ARMISD::VMOVDRR.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:97
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:262
TargetInstrInfo - Interface to description of machine instruction set.
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
LLVM_CONSTEXPR size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:247
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:513
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
static EVT getExtensionTo64Bits(const EVT &OrigVT)
SDNode * getNode() const
get the SDNode which holds the desired result
void setReturnRegsCount(unsigned s)
bundle_iterator< MachineInstr, instr_iterator > iterator
bool isiOS() const
Is this an iOS triple.
Definition: Triple.h:399
A self-contained host- and target-independent arbitrary-precision floating-point software implementat...
Definition: APFloat.h:122
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:239
unsigned getStoreSize() const
getStoreSize - Return the number of bytes overwritten by a store of the specified value type...
Definition: ValueTypes.h:245
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:42
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:131
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set, or Regs.size() if they are all allocated.
static SDValue PerformADDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
#define true
Definition: ConvertUTF.c:66
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:325
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:157
static SDValue PerformBFICombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff the bits being cleared by...
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C)
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:85
unsigned getAlignment() const
getAlignment - Return the alignment of the access that is being performed
Definition: Instructions.h:365
bool useSoftFloat() const override
static bool isSignExtended(SDNode *N, SelectionDAG &DAG)
isSignExtended - Check if a node is a vector value that is sign-extended or a constant BUILD_VECTOR w...
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:109
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
bool isMachineConstantPoolEntry() const
CodeModel::Model getCodeModel() const
Returns the code model.
MVT - Machine Value Type.
ParmContext
ParmContext - This enum tracks whether calling convention lowering is in the context of prologue or c...
static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, SDLoc dl, EVT &VT, bool is128Bits, NEONModImmType type)
isNEONModifiedImm - Check if the specified splat value corresponds to a valid vector constant for a N...
static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
LLVM Basic Block Representation.
Definition: BasicBlock.h:65
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
const SDValue & getOperand(unsigned i) const
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
size_type size() const
Definition: SmallPtrSet.h:79
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, unsigned &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:41
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
static volatile int One
Definition: InfiniteTest.cpp:9
ARM_AAPCS_VFP - Same as ARM_AAPCS, but uses hard floating point ABI.
Definition: CallingConv.h:96
Simple binary floating point operators.
Definition: ISDOpcodes.h:237
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
bool isNonTemporal() const
bool isVectorTy() const
isVectorTy - True if this is an instance of VectorType.
Definition: Type.h:226
bool supportsTailCall() const
Definition: ARMSubtarget.h:420
MVT getLocVT() const
bool isOptionalDef() const
Set if this operand is a optional def.
Definition: MCInstrDesc.h:85
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1245
This is an important base class in LLVM.
Definition: Constant.h:41
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE...
Definition: ISDOpcodes.h:607
bool isMClass() const
Definition: ARMSubtarget.h:408
bool hasHiddenVisibility() const
Definition: GlobalValue.h:141
static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG)
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:267
const Constant * getConstVal() const
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:273
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:219
bool isFloatTy() const
isFloatTy - Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:143
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:233
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:264
static bool isReverseMask(ArrayRef< int > M, EVT VT)
unsigned getInRegsParamsProcessed() const
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
static mvt_range fp_valuetypes()
static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC)
IntCCToARMCC - Convert a DAG integer condition code to an ARM CC.
static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, SDLoc dl)
getZeroVector - Returns a vector of specified type with all zero elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:225
This class provides iterator support for SDUse operands that use a specific SDNode.
static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
SDValue getCopyFromReg(SDValue Chain, SDLoc dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:547
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
SDValue getSExtOrTrunc(SDValue Op, SDLoc DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang","erlang-compatible garbage collector")
static EVT getFloatingPointVT(unsigned BitWidth)
getFloatingPointVT - Returns the EVT that represents a floating point type with the given number of b...
Definition: ValueTypes.h:55
SDValue getTargetConstant(uint64_t Val, SDLoc DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:436
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1273
iterator begin() const
Definition: ArrayRef.h:122
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
unsigned getOpcode() const
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:644
Value * getOperand(unsigned i) const
Definition: User.h:118
unsigned convertAddSubFlagsOpcode(unsigned OldOpc)
Map pseudo instructions that imply an 'S' bit onto real opcodes.
static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, SDValue &RetVal1, SDValue &RetVal2)
Value * getPointerOperand()
Definition: Instructions.h:284
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:57
static mvt_range vector_valuetypes()
std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override
Return the largest legal super-reg register class of the register class for the specified type and it...
SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const
PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
arg_iterator arg_begin()
Definition: Function.h:472
static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG)
getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count for each 16-bit element fr...
bool hasVMLxForwarding() const
Definition: ARMSubtarget.h:335
Class to represent integer types.
Definition: DerivedTypes.h:37
CondCode getSetCCSwappedOperands(CondCode Operation)
getSetCCSwappedOperands - Return the operation corresponding to (Y op X) when given the operation for...
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:249
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool Immutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:598
static SDValue PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
bool useNEONForSinglePrecisionFP() const
Definition: ARMSubtarget.h:322
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:129
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
bool isVolatile() const
const SDValue & getValue() const
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
unsigned MaxStoresPerMemmove
Specify maximum bytes of store instructions per memmove call.
SDValue getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes())
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:338
static SDValue PerformVDUPLANECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformVDUPLANECombine - Target-specific dag combine xforms for ARMISD::VDUPLANE. ...
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
BuildMI - Builder interface.
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo...
Definition: ISDOpcodes.h:673
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:468
void removeSuccessor(MachineBasicBlock *succ)
removeSuccessor - Remove successor from the successors list of this MachineBasicBlock.
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
EVT - Extended Value Type.
Definition: ValueTypes.h:31
static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformORCombine - Target-specific dag combine xforms for ISD::OR.
bool isIntN(unsigned N, int64_t x)
isIntN - Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:321
bool isPointerTy() const
isPointerTy - True if this is an instance of PointerType.
Definition: Type.h:217
std::vector< ArgListEntry > ArgListTy
const APFloat & getValueAPF() const
unsigned getNextStackOffset() const
bool hasSinCos() const
This function returns true if the target has sincos() routine in its compiler runtime or math librari...
bool isEqualTo(SDValue A, SDValue B) const
Test whether two SDValues are known to compare equal.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
CallInst * CreateCall(Value *Callee, ArrayRef< Value * > Args=None, const Twine &Name="")
Definition: IRBuilder.h:1467
This structure contains all information that is necessary for lowering calls.
bool isFPOrFPVectorTy() const
isFPOrFPVectorTy - Return true if this is a FP type or a vector of FP.
Definition: Type.h:183
PointerType * getPointerTo(unsigned AddrSpace=0)
getPointerTo - Return a pointer to the current type.
Definition: Type.cpp:764
PointerType * getInt8PtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer to an 8-bit integer value.
Definition: IRBuilder.h:346
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:283
ARM_AAPCS - ARM Architecture Procedure Calling Standard calling convention (aka EABI).
Definition: CallingConv.h:93
static ARMConstantPoolMBB * Create(LLVMContext &C, const MachineBasicBlock *mbb, unsigned ID, unsigned char PCAdj)
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements)
getVectorVT - Returns the EVT that represents a vector NumElements in length, where each element is o...
Definition: ValueTypes.h:70
MachinePointerInfo - This class contains a discriminated union of information about pointers in memor...
void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC)
Set the CallingConv that should be used for the specified libcall.
std::pair< unsigned, const TargetRegisterClass * > RCPair
static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG)
lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the bit-count for each 16-bit eleme...
static Constant * getSequentialMask(IRBuilder<> &Builder, unsigned Start, unsigned NumElts)
Get a mask consisting of sequential integers starting from Start.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
bool needsCustom() const
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
const MachinePointerInfo & getPointerInfo() const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG)
void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC)
Override the default CondCode to be used to test the result of the comparison libcall against zero...
unsigned getByValAlign() const
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
void SplitString(StringRef Source, SmallVectorImpl< StringRef > &OutFragments, StringRef Delimiters=" \t\n\v\f\r")
SplitString - Split up the specified string according to the specified delimiters, appending the result fragments to the output list.
SmallVectorImpl< unsigned > & getCallSiteLandingPad(MCSymbol *Sym)
getCallSiteLandingPad - Get the call site indexes for a landing pad EH symbol.
bool bitsGT(EVT VT) const
bitsGT - Return true if this has more bits than VT.
Definition: ValueTypes.h:177
bool genLongCalls() const
Definition: ARMSubtarget.h:348
ArrayRef< int > getMask() const
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:694
static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformShiftCombine - Checks for immediate versions of vector shifts and lowers them.
static AddrOpc getAM2Op(unsigned AM2Opc)
SDValue getTargetConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:484
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:478
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:281
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
bool hasDLLImportStorageClass() const
Definition: GlobalValue.h:167
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Definition: CallSite.h:174
static const MachineInstrBuilder & AddDefaultCC(const MachineInstrBuilder &MIB)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
SDValue getNOT(SDLoc DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
CCState - This class holds information needed while lowering arguments and return values...
bool isSwift() const
Definition: ARMSubtarget.h:306
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:299
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1192
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const
GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:273
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:388
unsigned getVectorNumElements() const
Definition: Type.cpp:212
void setExceptionPointerRegister(unsigned R)
If set to a physical register, this sets the register that receives the exception address on entry to...
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
StructType::get - This static method is the primary way to create a literal StructType.
Definition: Type.cpp:404
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG)
static unsigned getAM2Offset(unsigned AM2Opc)
bool isInvariant() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:179
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:57
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt)
Getvshiftimm - Check if this is a valid build_vector for the immediate operand of a vector shift oper...
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned char TargetFlags=0) const
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1253
MachineOperand class - Representation of each machine instruction operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, const EVT &OrigTy, const EVT &ExtTy, unsigned ExtOpcode)
AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total value size to 64 bits...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:222
const InstrItineraryData * getInstrItineraryData() const override
getInstrItins - Return the instruction itineraries based on subtarget selection.
Definition: ARMSubtarget.h:444
Provides information about what library functions are available for the current target.
CCValAssign - Represent assignment of one arg/retval to a location.
static void ReplaceREADCYCLECOUNTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
const uint32_t * getNoPreservedMask() const
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:548
SDNode * getGluedUser() const
If this node has a glue value with a user, return the user (there is at most one).
const SDValue & getChain() const
SI Lower i1 Copies
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:335
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:500
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:266
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
This is an abstract virtual class for memory operations.
bool hasVFP2() const
Definition: ARMSubtarget.h:314
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:79
static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG)
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
CallLoweringInfo & setSExtResult(bool Value=true)
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:582
Function * getCalledFunction() const
getCalledFunction - Return the function called, or null if this is an indirect function invocation...
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override
isFPImmLegal - Returns true if the target can instruction select the specified FP immediate natively...
MachineFrameInfo * getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
static void emitPostSt(MachineBasicBlock *BB, MachineInstr *Pos, const TargetInstrInfo *TII, DebugLoc dl, unsigned StSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment store operation with given size.
Represents one node in the SelectionDAG.
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
getSetCCInverse - Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operat...
const MachineInstrBuilder & addFrameIndex(int Idx) const
static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, bool &swpCmpOps, bool &swpVselOps)
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:289
static MachinePointerInfo getStack(int64_t Offset)
getStack - stack pointer relative access.
static cl::opt< AlignMode > Align(cl::desc("Load/store alignment support"), cl::Hidden, cl::init(NoStrictAlign), cl::values(clEnumValN(StrictAlign,"aarch64-strict-align","Disallow all unaligned memory accesses"), clEnumValN(NoStrictAlign,"aarch64-no-strict-align","Allow unaligned memory accesses"), clEnumValEnd))
static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG)
ExpandBITCAST - If the target supports VFP, this function is called to expand a bit convert where eit...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
R Default(const T &Value) const
Definition: StringSwitch.h:111
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
static mvt_range integer_valuetypes()
unsigned Log2_32(uint32_t Value)
Log2_32 - This function returns the floor log base 2 of the specified value, -1 if the value is zero...
Definition: MathExtras.h:468
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:576
static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, const ARMSubtarget *ST, SDLoc dl)
Value * CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1261
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
static unsigned isNEONTwoResultShuffleMask(ArrayRef< int > ShuffleMask, EVT VT, unsigned &WhichResult, bool &isV_UNDEF)
Check if ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN), and return the corresponding AR...
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool ExpandInlineAsm(CallInst *CI) const override
This hook allows the target to expand an inline asm call to be explicit llvm code if it wants to...
VectorType - Class to represent vector types.
Definition: DerivedTypes.h:362
void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
Class for arbitrary precision integers.
Definition: APInt.h:73
static ARMConstantPoolSymbol * Create(LLVMContext &C, const char *s, unsigned ID, unsigned char PCAdj)
void setExceptionSelectorRegister(unsigned R)
If set to a physical register, this sets the register that receives the exception typeid on entry to ...
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:342
void setMinFunctionAlignment(unsigned Align)
Set the target's minimum function alignment (in log2(bytes))
int64_t getSExtValue() const
bool isIntegerTy() const
isIntegerTy - True if this is an instance of IntegerType.
Definition: Type.h:193
virtual const TargetRegisterClass * getRegClassFor(MVT VT) const
Return the register class that should be used for the specified value type.
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:383
Instruction * emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isPowerOf2_64(uint64_t Value)
isPowerOf2_64 - This function returns true if the argument is a power of two 0 (64 bit edition...
Definition: MathExtras.h:360
bool isMemLoc() const
APInt bitcastToAPInt() const
Definition: APFloat.cpp:3084
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:386
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:238
Value * CreateTruncOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1277
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:481
bool useNaClTrap() const
Definition: ARMSubtarget.h:347
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:250
static int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
void setArgumentStackSize(unsigned size)
bool isOSVersionLT(unsigned Major, unsigned Minor=0, unsigned Micro=0) const
isOSVersionLT - Helper function for doing comparisons against version numbers included in the target ...
Definition: Triple.h:355
static SDValue PerformXORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool isVTBLMask(ArrayRef< int > M, EVT VT)
static void emitPostLd(MachineBasicBlock *BB, MachineInstr *Pos, const TargetInstrInfo *TII, DebugLoc dl, unsigned LdSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment load operation with given size.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
const TargetRegisterClass * getRegClassFor(MVT VT) const override
getRegClassFor - Return the register class that should be used for the specified value type...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:823
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:542
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const override
getTgtMemIntrinsic - Represent NEON load and store intrinsics as MemIntrinsicNodes.
Representation of each machine instruction.
Definition: MachineInstr.h:51
static MachinePointerInfo getGOT()
getGOT - Return a MachinePointerInfo record that refers to a GOT entry.
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer, a SRCVALUE for the destination, and a SRCVALUE for the source.
Definition: ISDOpcodes.h:603
uint64_t MinAlign(uint64_t A, uint64_t B)
MinAlign - A and B are either alignments or offsets.
Definition: MathExtras.h:552
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
static CondCodes getOppositeCondition(CondCodes CC)
Definition: ARMBaseInfo.h:47
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:372
static bool canChangeToInt(SDValue Op, bool &SeenZero, const ARMSubtarget *Subtarget)
canChangeToInt - Given the fp compare operand, return true if it is suitable to morph to an integer c...
static bool definesCPSR(const MachineInstr *MI)
void setVarArgsFrameIndex(int Index)
SmallVector< SDValue, 32 > OutVals
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:528
bool isLandingPad() const
isLandingPad - Returns true if the block is a landing pad.
static SDValue PerformVMOVRRDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMOVRRDCombine - Target-specific dag combine xforms for ARMISD::VMOVRRD.
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, SDLoc dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool LowerToByteSwap(CallInst *CI)
LowerToByteSwap - Replace a call instruction into a call to bswap intrinsic.
bool isOSBinFormatELF() const
Tests whether the OS uses the ELF binary format.
Definition: Triple.h:479
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:321
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:134
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:217
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
bool isThumb2() const
Definition: ARMSubtarget.h:406
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
static bool isVUZP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of "vector_shuffle v...
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:196
bool isAAPCS_ABI() const
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:401
unsigned getAlignment() const
getAlignment - Return the alignment of the access that is being performed
Definition: Instructions.h:243
bool hasV5TOps() const
Definition: ARMSubtarget.h:291
bool isVectorLoadExtDesirable(SDValue ExtVal) const override
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable...
virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
If the specified machine instruction is a direct load from a stack slot, return the virtual or physic...
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
static bool isLegalT2AddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:518
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:233
static bool isLegalAddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
isLegalAddressImmediate - Return true if the integer value can be used as the offset of the target ad...
void setReg(unsigned Reg)
Change the register this operand corresponds to.
static SDValue PerformVDIVCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) can replace combinations of ...
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned char TargetFlags=0) const
void setArgRegsSaveSize(unsigned s)
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
FunctionType * getFunctionType() const
Definition: Function.cpp:227
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:651
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:271
static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, ARMCC::CondCodes &CondCode2)
FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
unsigned MaxStoresPerMemmoveOptSize
Maximum number of store instructions that may be substituted for a call to memmove, used for functions with OpSize attribute.
unsigned MaxStoresPerMemcpyOptSize
Maximum number of store operations that may be substituted for a call to memcpy, used for functions w...
void setStackPointerRegisterToSaveRestore(unsigned R)
If set to a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save and restore.
bool isTailCall() const
bool allowsUnalignedMem() const
Definition: ARMSubtarget.h:422
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
VectorType * getType() const
getType - Overload to return most specific vector type.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate, that is the target has add instructions which can add a register and the immediate without having to materialize the immediate into a register.
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:512
unsigned MaxStoresPerMemcpy
Specify maximum bytes of store instructions per memcpy call.
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
getEVT - Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:277
bool isAtLeastRelease(AtomicOrdering Ord)
Returns true if the ordering is at least as strong as release (i.e.
Definition: Instructions.h:64
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.setjmp intrinsic.
Definition: ISDOpcodes.h:103
static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:166
EVT getValueType() const
Return the ValueType of the referenced return value.
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool useMovt(const MachineFunction &MF) const
bool hasLocalLinkage() const
Definition: GlobalValue.h:280
bool all_of(R &&Range, UnaryPredicate &&P)
Provide wrappers to std::all_of which take ranges instead of having to pass being/end explicitly...
Definition: STLExtras.h:334
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
SDValue getConstant(uint64_t Val, SDLoc DL, EVT VT, bool isTarget=false, bool isOpaque=false)
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:135
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
Definition: SelectionDAG.h:664
static SDValue PerformVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:279
unsigned getReg() const
getReg - Returns the register number.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:140
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
Definition: ValueTypes.h:105
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructions which can compare a register against the immediate without having to materialize the immediate into a register.
bool isLikeA9() const
Definition: ARMSubtarget.h:308
static bool isVTRN_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of "vector_shuffle v...
bool hasLoadLinkedStoreConditional() const override
True if AtomicExpandPass should use emitLoadLinked/emitStoreConditional and expand AtomicCmpXchgInst...
static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, SDValue &CC, bool &Invert, SDValue &OtherOp, SelectionDAG &DAG)
void insert(iterator MBBI, MachineBasicBlock *MBB)
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
void setReturnAddressIsTaken(bool s)
bool isSimple() const
isSimple - Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:94
unsigned getAlignment() const
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
void setMinStackArgumentAlignment(unsigned Align)
Set the minimum stack alignment of an argument (in log2(bytes)).
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
bool isShuffleMaskLegal(const SmallVectorImpl< int > &M, EVT VT) const override
isShuffleMaskLegal - Targets can use this to indicate that they only support some VECTOR_SHUFFLE oper...
CallLoweringInfo & setInRegister(bool Value=true)
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
getPrimitiveSizeInBits - Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:121
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
std::reverse_iterator< iterator > reverse_iterator
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:365
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1189
LLVM Value Representation.
Definition: Value.h:69
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:240
bool hasRAS() const
Definition: ARMSubtarget.h:344
SDValue getRegister(unsigned Reg, EVT VT)
bool useLoadStackGuardNode() const override
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
Instruction * makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) const
void setInsertFencesForAtomic(bool fence)
Set if the DAG builder should automatically insert fences and reduce the order of atomic memory opera...
static VectorType * get(Type *ElementType, unsigned NumElements)
VectorType::get - This static method is the primary way to construct an VectorType.
Definition: Type.cpp:713
static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2)
Return the load opcode for a given load size.
bool isTruncatingStore() const
Return true if the op does a truncation before store.
ARMTargetLowering(const TargetMachine &TM, const ARMSubtarget &STI)
bool hasFP16() const
Definition: ARMSubtarget.h:350
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:185
SDValue getValueType(EVT)
Disable implicit floating point insts.
Definition: Attributes.h:87
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:149
bool isWindowsItaniumEnvironment() const
Definition: Triple.h:440
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:653
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override
Returns true if an argument of type Ty needs to be passed in a contiguous block of registers in calli...
MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it represents a symbol which...
Definition: ARMBaseInfo.h:310
bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:298
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target...
BasicBlockListType::iterator iterator
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:287
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG)
void rewindByValRegsInfo()
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.Val alone...
#define DEBUG(X)
Definition: Debug.h:92
Primary interface to the complete machine description for the target machine.
bool hasDataBarrier() const
Definition: ARMSubtarget.h:329
static SDValue PerformSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformSTORECombine - Target-specific dag combine xforms for ISD::STORE.
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isPowerOf2_32(uint32_t Value)
isPowerOf2_32 - This function returns true if the argument is a power of two > 0. ...
Definition: MathExtras.h:354
bool hasDivideInARMMode() const
Definition: ARMSubtarget.h:327
static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:40
MachineModuleInfo & getMMI() const
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:365
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
unsigned MaxStoresPerMemsetOptSize
Maximum number of stores operations that may be substituted for the call to memset, used for functions with OptSize attribute.
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml","ocaml 3.10-compatible collector")
bool hasOptionalDef(QueryType Type=IgnoreBundle) const
Set if this instruction has an optional definition, e.g.
Definition: MachineInstr.h:389
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:188
unsigned getLocMemOffset() const
MVT getVectorElementType() const
static bool isVolatile(Instruction *Inst)
Conversion operators.
Definition: ISDOpcodes.h:380
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:338
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:666
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG)
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:389
bool isUIntN(unsigned N, uint64_t x)
isUIntN - Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:315
unsigned getAlignment() const
bool isBitFieldInvertedMask(unsigned v)
unsigned AllocateReg(unsigned Reg)
AllocateReg - Attempt to allocate one register.
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
bool isBigEndian() const
Definition: DataLayout.h:218
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
addReg - Add a new virtual register operand...
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns true if the given (atomic) store should be expanded by the IR-level AtomicExpand pass into an...
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const
bool is64BitVector() const
is64BitVector - Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:130
FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, FLOG, FLOG2, FLOG10, FEXP, FEXP2, FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary floating point operations.
Definition: ISDOpcodes.h:506
bool isEmpty() const
Returns true if there are no itineraries.
Value * getPointerOperand()
Definition: Instructions.h:409
void addSuccessor(MachineBasicBlock *succ, uint32_t weight=0)
addSuccessor - Add succ as a successor of this MachineBasicBlock.
bool hasThumb2DSP() const
Definition: ARMSubtarget.h:346
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:662
bool isTargetWindows() const
Definition: ARMSubtarget.h:360
unsigned Log2_64(uint64_t Value)
Log2_64 - This function returns the floor log base 2 of the specified value, -1 if the value is zero...
Definition: MathExtras.h:474
const BasicBlock * getParent() const
Definition: Instruction.h:72
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
static ARMConstantPoolConstant * Create(const Constant *C, unsigned ID)
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:309
static bool isSplatMask(const int *Mask, EVT VT)
unsigned getConstantPoolIndex(const Constant *C, unsigned Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one...
static bool isVEXTMask(ArrayRef< int > M, EVT VT, bool &ReverseVEXT, unsigned &Imm)
EVT changeVectorElementTypeToInteger() const
changeVectorElementTypeToInteger - Return a vector with the same number of elements as this vector...
Definition: ValueTypes.h:80
static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, unsigned AlignCheck)
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG)
Instruction * emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const override
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:203
static const MachineInstrBuilder & AddDefaultT1CC(const MachineInstrBuilder &MIB, bool isDead=false)
SDValue getIntPtrConstant(uint64_t Val, SDLoc DL, bool isTarget=false)
SDValue getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, bool Vol=false, bool ReadMem=true, bool WriteMem=true, unsigned Size=0)
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
MachineModuleInfo - This class contains meta information specific to a module.
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:265
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, unsigned Align, bool *Fast) const override
allowsMisalignedMemoryAccesses - Returns true if the target allows unaligned memory accesses of the s...
void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
FloatABI::ABIType FloatABIType
FloatABIType - This setting is set by -float-abi=xxx option is specfied on the command line...
uint64_t getZExtValue() const
static uint64_t decodeNEONModImm(unsigned ModImm, unsigned &EltBits)
decodeNEONModImm - Decode a NEON modified immediate value into the element value and the element size...
void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:761
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:314
uint64_t integerPart
Definition: APInt.h:33
unsigned getVectorNumElements() const
getVectorNumElements - Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:225
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, SDLoc DL) const
SoftenSetCCOperands - Soften the operands of a comparison.
This class is used to represent ISD::LOAD nodes.
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary...
Definition: ISDOpcodes.h:527
Function must be optimized for size first.
Definition: Attributes.h:80