LLVM  4.0.0
ARMISelLowering.cpp
Go to the documentation of this file.
1 //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the interfaces that ARM uses to lower LLVM code into a
11 // selection DAG.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ARMISelLowering.h"
16 #include "ARMCallingConv.h"
17 #include "ARMConstantPoolValue.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMPerfectShuffle.h"
20 #include "ARMSubtarget.h"
21 #include "ARMTargetMachine.h"
22 #include "ARMTargetObjectFile.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/ADT/StringExtras.h"
26 #include "llvm/ADT/StringSwitch.h"
37 #include "llvm/IR/CallingConv.h"
38 #include "llvm/IR/Constants.h"
39 #include "llvm/IR/Function.h"
41 #include "llvm/IR/GlobalValue.h"
42 #include "llvm/IR/IRBuilder.h"
43 #include "llvm/IR/Instruction.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/IntrinsicInst.h"
46 #include "llvm/IR/Intrinsics.h"
47 #include "llvm/IR/Type.h"
48 #include "llvm/MC/MCSectionMachO.h"
50 #include "llvm/Support/Debug.h"
55 #include <utility>
56 using namespace llvm;
57 
58 #define DEBUG_TYPE "arm-isel"
59 
60 STATISTIC(NumTailCalls, "Number of tail calls");
61 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
62 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
63 STATISTIC(NumConstpoolPromoted,
64  "Number of constants with their storage promoted into constant pools");
65 
66 static cl::opt<bool>
67 ARMInterworking("arm-interworking", cl::Hidden,
68  cl::desc("Enable / disable ARM interworking (for debugging only)"),
69  cl::init(true));
70 
72  "arm-promote-constant", cl::Hidden,
73  cl::desc("Enable / disable promotion of unnamed_addr constants into "
74  "constant pools"),
75  cl::init(true));
77  "arm-promote-constant-max-size", cl::Hidden,
78  cl::desc("Maximum size of constant to promote into a constant pool"),
79  cl::init(64));
81  "arm-promote-constant-max-total", cl::Hidden,
82  cl::desc("Maximum size of ALL constants to promote into a constant pool"),
83  cl::init(128));
84 
85 namespace {
86  class ARMCCState : public CCState {
87  public:
88  ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
91  : CCState(CC, isVarArg, MF, locs, C) {
92  assert(((PC == Call) || (PC == Prologue)) &&
93  "ARMCCState users must specify whether their context is call"
94  "or prologue generation.");
95  CallOrPrologue = PC;
96  }
97  };
98 }
99 
100 // The APCS parameter registers.
101 static const MCPhysReg GPRArgRegs[] = {
102  ARM::R0, ARM::R1, ARM::R2, ARM::R3
103 };
104 
105 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
106  MVT PromotedBitwiseVT) {
107  if (VT != PromotedLdStVT) {
109  AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
110 
112  AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
113  }
114 
115  MVT ElemTy = VT.getVectorElementType();
116  if (ElemTy != MVT::f64)
120  if (ElemTy == MVT::i32) {
125  } else {
130  }
139  if (VT.isInteger()) {
143  }
144 
145  // Promote all bit-wise operations.
146  if (VT.isInteger() && VT != PromotedBitwiseVT) {
148  AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
150  AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
152  AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
153  }
154 
155  // Neon does not support vector divide/remainder operations.
162 
163  if (!VT.isFloatingPoint() &&
164  VT != MVT::v2i64 && VT != MVT::v1i64)
165  for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
166  setOperationAction(Opcode, VT, Legal);
167 }
168 
169 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
170  addRegisterClass(VT, &ARM::DPRRegClass);
171  addTypeForNEON(VT, MVT::f64, MVT::v2i32);
172 }
173 
174 void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
175  addRegisterClass(VT, &ARM::DPairRegClass);
176  addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
177 }
178 
180  const ARMSubtarget &STI)
181  : TargetLowering(TM), Subtarget(&STI) {
182  RegInfo = Subtarget->getRegisterInfo();
183  Itins = Subtarget->getInstrItineraryData();
184 
186 
187  if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
188  !Subtarget->isTargetWatchOS()) {
189  const auto &E = Subtarget->getTargetTriple().getEnvironment();
190 
191  bool IsHFTarget = E == Triple::EABIHF || E == Triple::GNUEABIHF ||
193  // Windows is a special case. Technically, we will replace all of the "GNU"
194  // calls with calls to MSVCRT if appropriate and adjust the calling
195  // convention then.
196  IsHFTarget = IsHFTarget || Subtarget->isTargetWindows();
197 
198  for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
199  setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
200  IsHFTarget ? CallingConv::ARM_AAPCS_VFP
202  }
203 
204  if (Subtarget->isTargetMachO()) {
205  // Uses VFP for Thumb libfuncs if available.
206  if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
207  Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
208  static const struct {
209  const RTLIB::Libcall Op;
210  const char * const Name;
211  const ISD::CondCode Cond;
212  } LibraryCalls[] = {
213  // Single-precision floating-point arithmetic.
214  { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
215  { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
216  { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
217  { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
218 
219  // Double-precision floating-point arithmetic.
220  { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
221  { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
222  { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
223  { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
224 
225  // Single-precision comparisons.
226  { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
227  { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
228  { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
229  { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
230  { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
231  { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
232  { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
233  { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ },
234 
235  // Double-precision comparisons.
236  { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
237  { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
238  { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
239  { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
240  { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
241  { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
242  { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
243  { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ },
244 
245  // Floating-point to integer conversions.
246  // i64 conversions are done via library routines even when generating VFP
247  // instructions, so use the same ones.
248  { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
249  { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
250  { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
251  { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
252 
253  // Conversions between floating types.
254  { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
255  { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
256 
257  // Integer to floating-point conversions.
258  // i64 conversions are done via library routines even when generating VFP
259  // instructions, so use the same ones.
260  // FIXME: There appears to be some naming inconsistency in ARM libgcc:
261  // e.g., __floatunsidf vs. __floatunssidfvfp.
262  { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
263  { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
264  { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
265  { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
266  };
267 
268  for (const auto &LC : LibraryCalls) {
269  setLibcallName(LC.Op, LC.Name);
270  if (LC.Cond != ISD::SETCC_INVALID)
271  setCmpLibcallCC(LC.Op, LC.Cond);
272  }
273  }
274 
275  // Set the correct calling convention for ARMv7k WatchOS. It's just
276  // AAPCS_VFP for functions as simple as libcalls.
277  if (Subtarget->isTargetWatchABI()) {
278  for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i)
280  }
281  }
282 
283  // These libcalls are not available in 32-bit.
287 
288  // RTLIB
289  if (Subtarget->isAAPCS_ABI() &&
290  (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
291  Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
292  static const struct {
293  const RTLIB::Libcall Op;
294  const char * const Name;
295  const CallingConv::ID CC;
296  const ISD::CondCode Cond;
297  } LibraryCalls[] = {
298  // Double-precision floating-point arithmetic helper functions
299  // RTABI chapter 4.1.2, Table 2
304 
305  // Double-precision floating-point comparison helper functions
306  // RTABI chapter 4.1.2, Table 3
307  { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
308  { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
309  { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
310  { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
311  { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
312  { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
313  { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
314  { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
315 
316  // Single-precision floating-point arithmetic helper functions
317  // RTABI chapter 4.1.2, Table 4
322 
323  // Single-precision floating-point comparison helper functions
324  // RTABI chapter 4.1.2, Table 5
325  { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
326  { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
327  { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
328  { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
329  { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
330  { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
331  { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
332  { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
333 
334  // Floating-point to integer conversions.
335  // RTABI chapter 4.1.2, Table 6
344 
345  // Conversions between floating types.
346  // RTABI chapter 4.1.2, Table 7
350 
351  // Integer to floating-point conversions.
352  // RTABI chapter 4.1.2, Table 8
361 
362  // Long long helper functions
363  // RTABI chapter 4.2, Table 9
368 
369  // Integer division functions
370  // RTABI chapter 4.3.1
379  };
380 
381  for (const auto &LC : LibraryCalls) {
382  setLibcallName(LC.Op, LC.Name);
383  setLibcallCallingConv(LC.Op, LC.CC);
384  if (LC.Cond != ISD::SETCC_INVALID)
385  setCmpLibcallCC(LC.Op, LC.Cond);
386  }
387 
388  // EABI dependent RTLIB
389  if (TM.Options.EABIVersion == EABI::EABI4 ||
391  static const struct {
392  const RTLIB::Libcall Op;
393  const char *const Name;
394  const CallingConv::ID CC;
395  const ISD::CondCode Cond;
396  } MemOpsLibraryCalls[] = {
397  // Memory operations
398  // RTABI chapter 4.3.4
402  };
403 
404  for (const auto &LC : MemOpsLibraryCalls) {
405  setLibcallName(LC.Op, LC.Name);
406  setLibcallCallingConv(LC.Op, LC.CC);
407  if (LC.Cond != ISD::SETCC_INVALID)
408  setCmpLibcallCC(LC.Op, LC.Cond);
409  }
410  }
411  }
412 
413  if (Subtarget->isTargetWindows()) {
414  static const struct {
415  const RTLIB::Libcall Op;
416  const char * const Name;
417  const CallingConv::ID CC;
418  } LibraryCalls[] = {
427  };
428 
429  for (const auto &LC : LibraryCalls) {
430  setLibcallName(LC.Op, LC.Name);
431  setLibcallCallingConv(LC.Op, LC.CC);
432  }
433  }
434 
435  // Use divmod compiler-rt calls for iOS 5.0 and later.
436  if (Subtarget->isTargetWatchOS() ||
437  (Subtarget->isTargetIOS() &&
438  !Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
439  setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
440  setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
441  }
442 
443  // The half <-> float conversion functions are always soft-float on
444  // non-watchos platforms, but are needed for some targets which use a
445  // hard-float calling convention by default.
446  if (!Subtarget->isTargetWatchABI()) {
447  if (Subtarget->isAAPCS_ABI()) {
451  } else {
455  }
456  }
457 
458  // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
459  // a __gnu_ prefix (which is the default).
460  if (Subtarget->isTargetAEABI()) {
461  static const struct {
462  const RTLIB::Libcall Op;
463  const char * const Name;
464  const CallingConv::ID CC;
465  } LibraryCalls[] = {
468  { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
469  };
470 
471  for (const auto &LC : LibraryCalls) {
472  setLibcallName(LC.Op, LC.Name);
473  setLibcallCallingConv(LC.Op, LC.CC);
474  }
475  }
476 
477  if (Subtarget->isThumb1Only())
478  addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
479  else
480  addRegisterClass(MVT::i32, &ARM::GPRRegClass);
481 
482  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
483  !Subtarget->isThumb1Only()) {
484  addRegisterClass(MVT::f32, &ARM::SPRRegClass);
485  addRegisterClass(MVT::f64, &ARM::DPRRegClass);
486  }
487 
488  for (MVT VT : MVT::vector_valuetypes()) {
489  for (MVT InnerVT : MVT::vector_valuetypes()) {
490  setTruncStoreAction(VT, InnerVT, Expand);
491  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
492  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
493  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
494  }
495 
500 
502  }
503 
506 
509 
510  if (Subtarget->hasNEON()) {
511  addDRTypeForNEON(MVT::v2f32);
512  addDRTypeForNEON(MVT::v8i8);
513  addDRTypeForNEON(MVT::v4i16);
514  addDRTypeForNEON(MVT::v2i32);
515  addDRTypeForNEON(MVT::v1i64);
516 
517  addQRTypeForNEON(MVT::v4f32);
518  addQRTypeForNEON(MVT::v2f64);
519  addQRTypeForNEON(MVT::v16i8);
520  addQRTypeForNEON(MVT::v8i16);
521  addQRTypeForNEON(MVT::v4i32);
522  addQRTypeForNEON(MVT::v2i64);
523 
524  // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
525  // neither Neon nor VFP support any arithmetic operations on it.
526  // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
527  // supported for v4f32.
531  // FIXME: Code duplication: FDIV and FREM are expanded always, see
532  // ARMTargetLowering::addTypeForNEON method for details.
535  // FIXME: Create unittest.
536  // In another words, find a way when "copysign" appears in DAG with vector
537  // operands.
539  // FIXME: Code duplication: SETCC has custom operation action, see
540  // ARMTargetLowering::addTypeForNEON method for details.
542  // FIXME: Create unittest for FNEG and for FABS.
555  // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
562 
578 
579  // Mark v2f32 intrinsics.
595 
596  // Neon does not support some operations on v1i64 and v2i64 types.
598  // Custom handling for some quad-vector types to detect VMULL.
602  // Custom handling for some vector types to avoid expensive expansions
607  // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
608  // a destination type that is wider than the source, and nor does
609  // it have a FP_TO_[SU]INT instruction with a narrower destination than
610  // source.
615 
618 
619  // NEON does not have single instruction CTPOP for vectors with element
620  // types wider than 8-bits. However, custom lowering can leverage the
621  // v8i8/v16i8 vcnt instruction.
628 
631 
632  // NEON does not have single instruction CTTZ for vectors.
637 
642 
647 
652 
653  // NEON only has FMA instructions as of VFP4.
654  if (!Subtarget->hasVFP4()) {
657  }
658 
676 
677  // It is legal to extload from v4i8 to v4i16 or v4i32.
679  MVT::v2i32}) {
680  for (MVT VT : MVT::integer_vector_valuetypes()) {
684  }
685  }
686  }
687 
688  // ARM and Thumb2 support UMLAL/SMLAL.
689  if (!Subtarget->isThumb1Only())
691 
692  if (Subtarget->isFPOnlySP()) {
693  // When targeting a floating-point unit with only single-precision
694  // operations, f64 is legal for the few double-precision instructions which
695  // are present However, no double-precision operations other than moves,
696  // loads and stores are provided by the hardware.
730  }
731 
733 
734  // ARM does not have floating-point extending loads.
735  for (MVT VT : MVT::fp_valuetypes()) {
738  }
739 
740  // ... or truncating stores
744 
745  // ARM does not have i1 sign extending load.
746  for (MVT VT : MVT::integer_valuetypes())
748 
749  // ARM supports all 4 flavors of integer indexed load / store.
750  if (!Subtarget->isThumb1Only()) {
751  for (unsigned im = (unsigned)ISD::PRE_INC;
761  }
762  } else {
763  // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
766  }
767 
772 
773  // i64 operation support.
776  if (Subtarget->isThumb1Only()) {
779  }
780  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
781  || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
783 
789 
790  if (!Subtarget->isThumb1Only()) {
791  // FIXME: We should do this for Thumb1 as well.
796  }
797 
798  if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
800 
801  // ARM does not have ROTL.
803  for (MVT VT : MVT::vector_valuetypes()) {
806  }
809  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
811 
812  // @llvm.readcyclecounter requires the Performance Monitors extension.
813  // Default to the 0 expansion on unsupported platforms.
814  // FIXME: Technically there are older ARM CPUs that have
815  // implementation-specific ways of obtaining this information.
816  if (Subtarget->hasPerfMon())
818 
819  // Only ARMv6 has BSWAP.
820  if (!Subtarget->hasV6Ops())
822 
823  bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivide()
824  : Subtarget->hasDivideInARMMode();
825  if (!hasDivide) {
826  // These are expanded into libcalls if the cpu doesn't have HW divider.
829  }
830 
831  if (Subtarget->isTargetWindows() && !Subtarget->hasDivide()) {
834 
837  }
838 
841 
842  // Register based DivRem for AEABI (RTABI 4.2)
843  if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
844  Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
845  Subtarget->isTargetWindows()) {
848  HasStandaloneRem = false;
849 
850  if (Subtarget->isTargetWindows()) {
851  const struct {
852  const RTLIB::Libcall Op;
853  const char * const Name;
854  const CallingConv::ID CC;
855  } LibraryCalls[] = {
856  { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
857  { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
858  { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
859  { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
860 
861  { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
862  { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
863  { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
864  { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
865  };
866 
867  for (const auto &LC : LibraryCalls) {
868  setLibcallName(LC.Op, LC.Name);
869  setLibcallCallingConv(LC.Op, LC.CC);
870  }
871  } else {
872  const struct {
873  const RTLIB::Libcall Op;
874  const char * const Name;
875  const CallingConv::ID CC;
876  } LibraryCalls[] = {
877  { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
878  { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
879  { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
880  { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
881 
882  { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
883  { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
884  { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
885  { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
886  };
887 
888  for (const auto &LC : LibraryCalls) {
889  setLibcallName(LC.Op, LC.Name);
890  setLibcallCallingConv(LC.Op, LC.CC);
891  }
892  }
893 
898  } else {
901  }
902 
903  if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT())
904  for (auto &VT : {MVT::f32, MVT::f64})
906 
911 
913 
914  // Use the default implementation.
921 
922  if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
924  else
926 
927  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
928  // the default expansion.
929  InsertFencesForAtomic = false;
930  if (Subtarget->hasAnyDataBarrier() &&
931  (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
932  // ATOMIC_FENCE needs custom lowering; the others should have been expanded
933  // to ldrex/strex loops already.
935  if (!Subtarget->isThumb() || !Subtarget->isMClass())
937 
938  // On v8, we have particularly efficient implementations of atomic fences
939  // if they can be combined with nearby atomic loads and stores.
940  if (!Subtarget->hasV8Ops() || getTargetMachine().getOptLevel() == 0) {
941  // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
942  InsertFencesForAtomic = true;
943  }
944  } else {
945  // If there's anything we can use as a barrier, go through custom lowering
946  // for ATOMIC_FENCE.
947  // If target has DMB in thumb, Fences can be inserted.
948  if (Subtarget->hasDataBarrier())
949  InsertFencesForAtomic = true;
950 
952  Subtarget->hasAnyDataBarrier() ? Custom : Expand);
953 
954  // Set them all for expansion, which will force libcalls.
967  // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
968  // Unordered/Monotonic case.
969  if (!InsertFencesForAtomic) {
972  }
973  }
974 
976 
977  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
978  if (!Subtarget->hasV6Ops()) {
981  }
983 
984  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
985  !Subtarget->isThumb1Only()) {
986  // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
987  // iff target supports vfp2.
990  }
991 
992  // We want to custom lower some of our intrinsics.
997  if (Subtarget->useSjLjEH())
998  setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
999 
1009 
1010  // Thumb-1 cannot currently select ARMISD::SUBE.
1011  if (!Subtarget->isThumb1Only())
1013 
1019 
1020  // We don't support sin/cos/fmod/copysign/pow
1029  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1030  !Subtarget->isThumb1Only()) {
1033  }
1036 
1037  if (!Subtarget->hasVFP4()) {
1040  }
1041 
1042  // Various VFP goodness
1043  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1044  // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1045  if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
1048  }
1049 
1050  // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1051  if (!Subtarget->hasFP16()) {
1054  }
1055  }
1056 
1057  // Combine sin / cos into one node or libcall if possible.
1058  if (Subtarget->hasSinCos()) {
1059  setLibcallName(RTLIB::SINCOS_F32, "sincosf");
1060  setLibcallName(RTLIB::SINCOS_F64, "sincos");
1061  if (Subtarget->isTargetWatchABI()) {
1064  }
1065  if (Subtarget->isTargetIOS() || Subtarget->isTargetWatchOS()) {
1066  // For iOS, we don't want to the normal expansion of a libcall to
1067  // sincos. We want to issue a libcall to __sincos_stret.
1070  }
1071  }
1072 
1073  // FP-ARMv8 implements a lot of rounding-like FP operations.
1074  if (Subtarget->hasFPARMv8()) {
1087 
1088  if (!Subtarget->isFPOnlySP()) {
1097  }
1098  }
1099 
1100  if (Subtarget->hasNEON()) {
1101  // vmin and vmax aren't available in a scalar form, so we use
1102  // a NEON instruction with an undef lane instead.
1109  }
1110 
1111  // We have target-specific dag combine patterns for the following nodes:
1112  // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1119 
1120  if (Subtarget->hasV6Ops())
1122 
1124 
1125  if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1126  !Subtarget->hasVFP2())
1128  else
1130 
1131  //// temporary - rewrite interface to use type
1132  MaxStoresPerMemset = 8;
1134  MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1136  MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1138 
1139  // On ARM arguments smaller than 4 bytes are extended, so all arguments
1140  // are at least 4 bytes aligned.
1142 
1143  // Prefer likely predicted branches to selects on out-of-order cores.
1144  PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1145 
1146  setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
1147 }
1148 
1150  return Subtarget->useSoftFloat();
1151 }
1152 
1153 // FIXME: It might make sense to define the representative register class as the
1154 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1155 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1156 // SPR's representative would be DPR_VFP2. This should work well if register
1157 // pressure tracking were modified such that a register use would increment the
1158 // pressure of the register class's representative and all of it's super
1159 // classes' representatives transitively. We have not implemented this because
1160 // of the difficulty prior to coalescing of modeling operand register classes
1161 // due to the common occurrence of cross class copies and subregister insertions
1162 // and extractions.
1163 std::pair<const TargetRegisterClass *, uint8_t>
1165  MVT VT) const {
1166  const TargetRegisterClass *RRC = nullptr;
1167  uint8_t Cost = 1;
1168  switch (VT.SimpleTy) {
1169  default:
1171  // Use DPR as representative register class for all floating point
1172  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1173  // the cost is 1 for both f32 and f64.
1174  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1175  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1176  RRC = &ARM::DPRRegClass;
1177  // When NEON is used for SP, only half of the register file is available
1178  // because operations that define both SP and DP results will be constrained
1179  // to the VFP2 class (D0-D15). We currently model this constraint prior to
1180  // coalescing by double-counting the SP regs. See the FIXME above.
1181  if (Subtarget->useNEONForSinglePrecisionFP())
1182  Cost = 2;
1183  break;
1184  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1185  case MVT::v4f32: case MVT::v2f64:
1186  RRC = &ARM::DPRRegClass;
1187  Cost = 2;
1188  break;
1189  case MVT::v4i64:
1190  RRC = &ARM::DPRRegClass;
1191  Cost = 4;
1192  break;
1193  case MVT::v8i64:
1194  RRC = &ARM::DPRRegClass;
1195  Cost = 8;
1196  break;
1197  }
1198  return std::make_pair(RRC, Cost);
1199 }
1200 
1201 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1202  switch ((ARMISD::NodeType)Opcode) {
1203  case ARMISD::FIRST_NUMBER: break;
1204  case ARMISD::Wrapper: return "ARMISD::Wrapper";
1205  case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
1206  case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
1207  case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
1208  case ARMISD::CALL: return "ARMISD::CALL";
1209  case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
1210  case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
1211  case ARMISD::BRCOND: return "ARMISD::BRCOND";
1212  case ARMISD::BR_JT: return "ARMISD::BR_JT";
1213  case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
1214  case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
1215  case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
1216  case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
1217  case ARMISD::CMP: return "ARMISD::CMP";
1218  case ARMISD::CMN: return "ARMISD::CMN";
1219  case ARMISD::CMPZ: return "ARMISD::CMPZ";
1220  case ARMISD::CMPFP: return "ARMISD::CMPFP";
1221  case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
1222  case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
1223  case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
1224 
1225  case ARMISD::CMOV: return "ARMISD::CMOV";
1226 
1227  case ARMISD::SSAT: return "ARMISD::SSAT";
1228 
1229  case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
1230  case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
1231  case ARMISD::RRX: return "ARMISD::RRX";
1232 
1233  case ARMISD::ADDC: return "ARMISD::ADDC";
1234  case ARMISD::ADDE: return "ARMISD::ADDE";
1235  case ARMISD::SUBC: return "ARMISD::SUBC";
1236  case ARMISD::SUBE: return "ARMISD::SUBE";
1237 
1238  case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
1239  case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
1240 
1241  case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
1242  case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
1243  case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
1244 
1245  case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
1246 
1247  case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
1248 
1249  case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
1250 
1251  case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
1252 
1253  case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
1254 
1255  case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
1256  case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
1257 
1258  case ARMISD::VCEQ: return "ARMISD::VCEQ";
1259  case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
1260  case ARMISD::VCGE: return "ARMISD::VCGE";
1261  case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
1262  case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
1263  case ARMISD::VCGEU: return "ARMISD::VCGEU";
1264  case ARMISD::VCGT: return "ARMISD::VCGT";
1265  case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
1266  case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
1267  case ARMISD::VCGTU: return "ARMISD::VCGTU";
1268  case ARMISD::VTST: return "ARMISD::VTST";
1269 
1270  case ARMISD::VSHL: return "ARMISD::VSHL";
1271  case ARMISD::VSHRs: return "ARMISD::VSHRs";
1272  case ARMISD::VSHRu: return "ARMISD::VSHRu";
1273  case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
1274  case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
1275  case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
1276  case ARMISD::VQSHLs: return "ARMISD::VQSHLs";
1277  case ARMISD::VQSHLu: return "ARMISD::VQSHLu";
1278  case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu";
1279  case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs";
1280  case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu";
1281  case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu";
1282  case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs";
1283  case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu";
1284  case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
1285  case ARMISD::VSLI: return "ARMISD::VSLI";
1286  case ARMISD::VSRI: return "ARMISD::VSRI";
1287  case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
1288  case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
1289  case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
1290  case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
1291  case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
1292  case ARMISD::VDUP: return "ARMISD::VDUP";
1293  case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
1294  case ARMISD::VEXT: return "ARMISD::VEXT";
1295  case ARMISD::VREV64: return "ARMISD::VREV64";
1296  case ARMISD::VREV32: return "ARMISD::VREV32";
1297  case ARMISD::VREV16: return "ARMISD::VREV16";
1298  case ARMISD::VZIP: return "ARMISD::VZIP";
1299  case ARMISD::VUZP: return "ARMISD::VUZP";
1300  case ARMISD::VTRN: return "ARMISD::VTRN";
1301  case ARMISD::VTBL1: return "ARMISD::VTBL1";
1302  case ARMISD::VTBL2: return "ARMISD::VTBL2";
1303  case ARMISD::VMULLs: return "ARMISD::VMULLs";
1304  case ARMISD::VMULLu: return "ARMISD::VMULLu";
1305  case ARMISD::UMAAL: return "ARMISD::UMAAL";
1306  case ARMISD::UMLAL: return "ARMISD::UMLAL";
1307  case ARMISD::SMLAL: return "ARMISD::SMLAL";
1308  case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
1309  case ARMISD::BFI: return "ARMISD::BFI";
1310  case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
1311  case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
1312  case ARMISD::VBSL: return "ARMISD::VBSL";
1313  case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
1314  case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
1315  case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
1316  case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
1317  case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
1318  case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
1319  case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
1320  case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
1321  case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
1322  case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
1323  case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
1324  case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
1325  case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD";
1326  case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
1327  case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
1328  case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
1329  case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
1330  case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
1331  case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
1332  case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
1333  case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
1334  case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
1335  case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
1336  }
1337  return nullptr;
1338 }
1339 
1341  EVT VT) const {
1342  if (!VT.isVector())
1343  return getPointerTy(DL);
1345 }
1346 
1347 /// getRegClassFor - Return the register class that should be used for the
1348 /// specified value type.
1350  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1351  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1352  // load / store 4 to 8 consecutive D registers.
1353  if (Subtarget->hasNEON()) {
1354  if (VT == MVT::v4i64)
1355  return &ARM::QQPRRegClass;
1356  if (VT == MVT::v8i64)
1357  return &ARM::QQQQPRRegClass;
1358  }
1359  return TargetLowering::getRegClassFor(VT);
1360 }
1361 
1362 // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1363 // source/dest is aligned and the copy size is large enough. We therefore want
1364 // to align such objects passed to memory intrinsics.
1366  unsigned &PrefAlign) const {
1367  if (!isa<MemIntrinsic>(CI))
1368  return false;
1369  MinSize = 8;
1370  // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1371  // cycle faster than 4-byte aligned LDM.
1372  PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
1373  return true;
1374 }
1375 
1376 // Create a fast isel object.
1377 FastISel *
1379  const TargetLibraryInfo *libInfo) const {
1380  return ARM::createFastISel(funcInfo, libInfo);
1381 }
1382 
1384  unsigned NumVals = N->getNumValues();
1385  if (!NumVals)
1386  return Sched::RegPressure;
1387 
1388  for (unsigned i = 0; i != NumVals; ++i) {
1389  EVT VT = N->getValueType(i);
1390  if (VT == MVT::Glue || VT == MVT::Other)
1391  continue;
1392  if (VT.isFloatingPoint() || VT.isVector())
1393  return Sched::ILP;
1394  }
1395 
1396  if (!N->isMachineOpcode())
1397  return Sched::RegPressure;
1398 
1399  // Load are scheduled for latency even if there instruction itinerary
1400  // is not available.
1401  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1402  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1403 
1404  if (MCID.getNumDefs() == 0)
1405  return Sched::RegPressure;
1406  if (!Itins->isEmpty() &&
1407  Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1408  return Sched::ILP;
1409 
1410  return Sched::RegPressure;
1411 }
1412 
1413 //===----------------------------------------------------------------------===//
1414 // Lowering Code
1415 //===----------------------------------------------------------------------===//
1416 
1417 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1419  switch (CC) {
1420  default: llvm_unreachable("Unknown condition code!");
1421  case ISD::SETNE: return ARMCC::NE;
1422  case ISD::SETEQ: return ARMCC::EQ;
1423  case ISD::SETGT: return ARMCC::GT;
1424  case ISD::SETGE: return ARMCC::GE;
1425  case ISD::SETLT: return ARMCC::LT;
1426  case ISD::SETLE: return ARMCC::LE;
1427  case ISD::SETUGT: return ARMCC::HI;
1428  case ISD::SETUGE: return ARMCC::HS;
1429  case ISD::SETULT: return ARMCC::LO;
1430  case ISD::SETULE: return ARMCC::LS;
1431  }
1432 }
1433 
1434 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1436  ARMCC::CondCodes &CondCode2) {
1437  CondCode2 = ARMCC::AL;
1438  switch (CC) {
1439  default: llvm_unreachable("Unknown FP condition!");
1440  case ISD::SETEQ:
1441  case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
1442  case ISD::SETGT:
1443  case ISD::SETOGT: CondCode = ARMCC::GT; break;
1444  case ISD::SETGE:
1445  case ISD::SETOGE: CondCode = ARMCC::GE; break;
1446  case ISD::SETOLT: CondCode = ARMCC::MI; break;
1447  case ISD::SETOLE: CondCode = ARMCC::LS; break;
1448  case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
1449  case ISD::SETO: CondCode = ARMCC::VC; break;
1450  case ISD::SETUO: CondCode = ARMCC::VS; break;
1451  case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
1452  case ISD::SETUGT: CondCode = ARMCC::HI; break;
1453  case ISD::SETUGE: CondCode = ARMCC::PL; break;
1454  case ISD::SETLT:
1455  case ISD::SETULT: CondCode = ARMCC::LT; break;
1456  case ISD::SETLE:
1457  case ISD::SETULE: CondCode = ARMCC::LE; break;
1458  case ISD::SETNE:
1459  case ISD::SETUNE: CondCode = ARMCC::NE; break;
1460  }
1461 }
1462 
1463 //===----------------------------------------------------------------------===//
1464 // Calling Convention Implementation
1465 //===----------------------------------------------------------------------===//
1466 
1467 #include "ARMGenCallingConv.inc"
1468 
1469 /// getEffectiveCallingConv - Get the effective calling convention, taking into
1470 /// account presence of floating point hardware and calling convention
1471 /// limitations, such as support for variadic functions.
1473 ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1474  bool isVarArg) const {
1475  switch (CC) {
1476  default:
1477  llvm_unreachable("Unsupported calling convention");
1479  case CallingConv::ARM_APCS:
1480  case CallingConv::GHC:
1481  return CC;
1485  case CallingConv::Swift:
1487  case CallingConv::C:
1488  if (!Subtarget->isAAPCS_ABI())
1489  return CallingConv::ARM_APCS;
1490  else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
1492  !isVarArg)
1494  else
1495  return CallingConv::ARM_AAPCS;
1496  case CallingConv::Fast:
1498  if (!Subtarget->isAAPCS_ABI()) {
1499  if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1500  return CallingConv::Fast;
1501  return CallingConv::ARM_APCS;
1502  } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1504  else
1505  return CallingConv::ARM_AAPCS;
1506  }
1507 }
1508 
1510  bool isVarArg) const {
1511  return CCAssignFnForNode(CC, false, isVarArg);
1512 }
1513 
1515  bool isVarArg) const {
1516  return CCAssignFnForNode(CC, true, isVarArg);
1517 }
1518 
1519 /// CCAssignFnForNode - Selects the correct CCAssignFn for the given
1520 /// CallingConvention.
1521 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1522  bool Return,
1523  bool isVarArg) const {
1524  switch (getEffectiveCallingConv(CC, isVarArg)) {
1525  default:
1526  llvm_unreachable("Unsupported calling convention");
1527  case CallingConv::ARM_APCS:
1528  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1530  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1532  return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1533  case CallingConv::Fast:
1534  return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1535  case CallingConv::GHC:
1536  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
1538  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1539  }
1540 }
1541 
1542 /// LowerCallResult - Lower the result values of a call into the
1543 /// appropriate copies out of appropriate physical registers.
1544 SDValue ARMTargetLowering::LowerCallResult(
1545  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
1546  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1547  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1548  SDValue ThisVal) const {
1549 
1550  // Assign locations to each value returned by this call.
1552  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1553  *DAG.getContext(), Call);
1554  CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
1555 
1556  // Copy all of the result registers out of their specified physreg.
1557  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1558  CCValAssign VA = RVLocs[i];
1559 
1560  // Pass 'this' value directly from the argument to return value, to avoid
1561  // reg unit interference
1562  if (i == 0 && isThisReturn) {
1563  assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
1564  "unexpected return calling convention register assignment");
1565  InVals.push_back(ThisVal);
1566  continue;
1567  }
1568 
1569  SDValue Val;
1570  if (VA.needsCustom()) {
1571  // Handle f64 or half of a v2f64.
1572  SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1573  InFlag);
1574  Chain = Lo.getValue(1);
1575  InFlag = Lo.getValue(2);
1576  VA = RVLocs[++i]; // skip ahead to next loc
1577  SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1578  InFlag);
1579  Chain = Hi.getValue(1);
1580  InFlag = Hi.getValue(2);
1581  if (!Subtarget->isLittle())
1582  std::swap (Lo, Hi);
1583  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1584 
1585  if (VA.getLocVT() == MVT::v2f64) {
1586  SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1587  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1588  DAG.getConstant(0, dl, MVT::i32));
1589 
1590  VA = RVLocs[++i]; // skip ahead to next loc
1591  Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1592  Chain = Lo.getValue(1);
1593  InFlag = Lo.getValue(2);
1594  VA = RVLocs[++i]; // skip ahead to next loc
1595  Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1596  Chain = Hi.getValue(1);
1597  InFlag = Hi.getValue(2);
1598  if (!Subtarget->isLittle())
1599  std::swap (Lo, Hi);
1600  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1601  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1602  DAG.getConstant(1, dl, MVT::i32));
1603  }
1604  } else {
1605  Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1606  InFlag);
1607  Chain = Val.getValue(1);
1608  InFlag = Val.getValue(2);
1609  }
1610 
1611  switch (VA.getLocInfo()) {
1612  default: llvm_unreachable("Unknown loc info!");
1613  case CCValAssign::Full: break;
1614  case CCValAssign::BCvt:
1615  Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1616  break;
1617  }
1618 
1619  InVals.push_back(Val);
1620  }
1621 
1622  return Chain;
1623 }
1624 
1625 /// LowerMemOpCallTo - Store the argument to the stack.
1626 SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1627  SDValue Arg, const SDLoc &dl,
1628  SelectionDAG &DAG,
1629  const CCValAssign &VA,
1630  ISD::ArgFlagsTy Flags) const {
1631  unsigned LocMemOffset = VA.getLocMemOffset();
1632  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1633  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1634  StackPtr, PtrOff);
1635  return DAG.getStore(
1636  Chain, dl, Arg, PtrOff,
1637  MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
1638 }
1639 
1640 void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
1641  SDValue Chain, SDValue &Arg,
1642  RegsToPassVector &RegsToPass,
1643  CCValAssign &VA, CCValAssign &NextVA,
1644  SDValue &StackPtr,
1645  SmallVectorImpl<SDValue> &MemOpChains,
1646  ISD::ArgFlagsTy Flags) const {
1647 
1648  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1649  DAG.getVTList(MVT::i32, MVT::i32), Arg);
1650  unsigned id = Subtarget->isLittle() ? 0 : 1;
1651  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
1652 
1653  if (NextVA.isRegLoc())
1654  RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
1655  else {
1656  assert(NextVA.isMemLoc());
1657  if (!StackPtr.getNode())
1658  StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
1659  getPointerTy(DAG.getDataLayout()));
1660 
1661  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
1662  dl, DAG, NextVA,
1663  Flags));
1664  }
1665 }
1666 
1667 /// LowerCall - Lowering a call into a callseq_start <-
1668 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
1669 /// nodes.
1670 SDValue
1671 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1672  SmallVectorImpl<SDValue> &InVals) const {
1673  SelectionDAG &DAG = CLI.DAG;
1674  SDLoc &dl = CLI.DL;
1676  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1678  SDValue Chain = CLI.Chain;
1679  SDValue Callee = CLI.Callee;
1680  bool &isTailCall = CLI.IsTailCall;
1681  CallingConv::ID CallConv = CLI.CallConv;
1682  bool doesNotRet = CLI.DoesNotReturn;
1683  bool isVarArg = CLI.IsVarArg;
1684 
1685  MachineFunction &MF = DAG.getMachineFunction();
1686  bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1687  bool isThisReturn = false;
1688  bool isSibCall = false;
1689  auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
1690 
1691  // Disable tail calls if they're not supported.
1692  if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
1693  isTailCall = false;
1694 
1695  if (isTailCall) {
1696  // Check if it's really possible to do a tail call.
1697  isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
1698  isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
1699  Outs, OutVals, Ins, DAG);
1700  if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
1701  report_fatal_error("failed to perform tail call elimination on a call "
1702  "site marked musttail");
1703  // We don't support GuaranteedTailCallOpt for ARM, only automatically
1704  // detected sibcalls.
1705  if (isTailCall) {
1706  ++NumTailCalls;
1707  isSibCall = true;
1708  }
1709  }
1710 
1711  // Analyze operands of the call, assigning locations to each operand.
1713  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1714  *DAG.getContext(), Call);
1715  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
1716 
1717  // Get a count of how many bytes are to be pushed on the stack.
1718  unsigned NumBytes = CCInfo.getNextStackOffset();
1719 
1720  // For tail calls, memory operands are available in our caller's stack.
1721  if (isSibCall)
1722  NumBytes = 0;
1723 
1724  // Adjust the stack pointer for the new arguments...
1725  // These operations are automatically eliminated by the prolog/epilog pass
1726  if (!isSibCall)
1727  Chain = DAG.getCALLSEQ_START(Chain,
1728  DAG.getIntPtrConstant(NumBytes, dl, true), dl);
1729 
1730  SDValue StackPtr =
1731  DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
1732 
1733  RegsToPassVector RegsToPass;
1734  SmallVector<SDValue, 8> MemOpChains;
1735 
1736  // Walk the register/memloc assignments, inserting copies/loads. In the case
1737  // of tail call optimization, arguments are handled later.
1738  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1739  i != e;
1740  ++i, ++realArgIdx) {
1741  CCValAssign &VA = ArgLocs[i];
1742  SDValue Arg = OutVals[realArgIdx];
1743  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1744  bool isByVal = Flags.isByVal();
1745 
1746  // Promote the value if needed.
1747  switch (VA.getLocInfo()) {
1748  default: llvm_unreachable("Unknown loc info!");
1749  case CCValAssign::Full: break;
1750  case CCValAssign::SExt:
1751  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
1752  break;
1753  case CCValAssign::ZExt:
1754  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
1755  break;
1756  case CCValAssign::AExt:
1757  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
1758  break;
1759  case CCValAssign::BCvt:
1760  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
1761  break;
1762  }
1763 
1764  // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
1765  if (VA.needsCustom()) {
1766  if (VA.getLocVT() == MVT::v2f64) {
1767  SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1768  DAG.getConstant(0, dl, MVT::i32));
1769  SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1770  DAG.getConstant(1, dl, MVT::i32));
1771 
1772  PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
1773  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1774 
1775  VA = ArgLocs[++i]; // skip ahead to next loc
1776  if (VA.isRegLoc()) {
1777  PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
1778  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1779  } else {
1780  assert(VA.isMemLoc());
1781 
1782  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
1783  dl, DAG, VA, Flags));
1784  }
1785  } else {
1786  PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
1787  StackPtr, MemOpChains, Flags);
1788  }
1789  } else if (VA.isRegLoc()) {
1790  if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
1791  Outs[0].VT == MVT::i32) {
1792  assert(VA.getLocVT() == MVT::i32 &&
1793  "unexpected calling convention register assignment");
1794  assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
1795  "unexpected use of 'returned'");
1796  isThisReturn = true;
1797  }
1798  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1799  } else if (isByVal) {
1800  assert(VA.isMemLoc());
1801  unsigned offset = 0;
1802 
1803  // True if this byval aggregate will be split between registers
1804  // and memory.
1805  unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
1806  unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
1807 
1808  if (CurByValIdx < ByValArgsCount) {
1809 
1810  unsigned RegBegin, RegEnd;
1811  CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
1812 
1813  EVT PtrVT =
1815  unsigned int i, j;
1816  for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
1817  SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
1818  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
1819  SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
1821  DAG.InferPtrAlignment(AddArg));
1822  MemOpChains.push_back(Load.getValue(1));
1823  RegsToPass.push_back(std::make_pair(j, Load));
1824  }
1825 
1826  // If parameter size outsides register area, "offset" value
1827  // helps us to calculate stack slot for remained part properly.
1828  offset = RegEnd - RegBegin;
1829 
1830  CCInfo.nextInRegsParam();
1831  }
1832 
1833  if (Flags.getByValSize() > 4*offset) {
1834  auto PtrVT = getPointerTy(DAG.getDataLayout());
1835  unsigned LocMemOffset = VA.getLocMemOffset();
1836  SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1837  SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
1838  SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
1839  SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
1840  SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
1841  MVT::i32);
1842  SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
1843  MVT::i32);
1844 
1845  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
1846  SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
1847  MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
1848  Ops));
1849  }
1850  } else if (!isSibCall) {
1851  assert(VA.isMemLoc());
1852 
1853  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
1854  dl, DAG, VA, Flags));
1855  }
1856  }
1857 
1858  if (!MemOpChains.empty())
1859  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
1860 
1861  // Build a sequence of copy-to-reg nodes chained together with token chain
1862  // and flag operands which copy the outgoing args into the appropriate regs.
1863  SDValue InFlag;
1864  // Tail call byval lowering might overwrite argument registers so in case of
1865  // tail call optimization the copies to registers are lowered later.
1866  if (!isTailCall)
1867  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1868  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1869  RegsToPass[i].second, InFlag);
1870  InFlag = Chain.getValue(1);
1871  }
1872 
1873  // For tail calls lower the arguments to the 'real' stack slot.
1874  if (isTailCall) {
1875  // Force all the incoming stack arguments to be loaded from the stack
1876  // before any new outgoing arguments are stored to the stack, because the
1877  // outgoing stack slots may alias the incoming argument stack slots, and
1878  // the alias isn't otherwise explicit. This is slightly more conservative
1879  // than necessary, because it means that each store effectively depends
1880  // on every argument instead of just those arguments it would clobber.
1881 
1882  // Do not flag preceding copytoreg stuff together with the following stuff.
1883  InFlag = SDValue();
1884  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1885  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1886  RegsToPass[i].second, InFlag);
1887  InFlag = Chain.getValue(1);
1888  }
1889  InFlag = SDValue();
1890  }
1891 
1892  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1893  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1894  // node so that legalize doesn't hack it.
1895  bool isDirect = false;
1896 
1897  const TargetMachine &TM = getTargetMachine();
1898  const Module *Mod = MF.getFunction()->getParent();
1899  const GlobalValue *GV = nullptr;
1900  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
1901  GV = G->getGlobal();
1902  bool isStub =
1903  !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
1904 
1905  bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
1906  bool isLocalARMFunc = false;
1908  auto PtrVt = getPointerTy(DAG.getDataLayout());
1909 
1910  if (Subtarget->genLongCalls()) {
1911  assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
1912  "long-calls codegen is not position independent!");
1913  // Handle a global address or an external symbol. If it's not one of
1914  // those, the target's already in a register, so we don't need to do
1915  // anything extra.
1916  if (isa<GlobalAddressSDNode>(Callee)) {
1917  // Create a constant pool entry for the callee address
1918  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
1919  ARMConstantPoolValue *CPV =
1920  ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
1921 
1922  // Get the address of the callee into a register
1923  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
1924  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1925  Callee = DAG.getLoad(
1926  PtrVt, dl, DAG.getEntryNode(), CPAddr,
1928  } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
1929  const char *Sym = S->getSymbol();
1930 
1931  // Create a constant pool entry for the callee address
1932  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
1933  ARMConstantPoolValue *CPV =
1935  ARMPCLabelIndex, 0);
1936  // Get the address of the callee into a register
1937  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
1938  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1939  Callee = DAG.getLoad(
1940  PtrVt, dl, DAG.getEntryNode(), CPAddr,
1942  }
1943  } else if (isa<GlobalAddressSDNode>(Callee)) {
1944  // If we're optimizing for minimum size and the function is called three or
1945  // more times in this block, we can improve codesize by calling indirectly
1946  // as BLXr has a 16-bit encoding.
1947  auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
1948  auto *BB = CLI.CS->getParent();
1949  bool PreferIndirect =
1950  Subtarget->isThumb() && MF.getFunction()->optForMinSize() &&
1951  count_if(GV->users(), [&BB](const User *U) {
1952  return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
1953  }) > 2;
1954 
1955  if (!PreferIndirect) {
1956  isDirect = true;
1957  bool isDef = GV->isStrongDefinitionForLinker();
1958 
1959  // ARM call to a local ARM function is predicable.
1960  isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
1961  // tBX takes a register source operand.
1962  if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
1963  assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
1964  Callee = DAG.getNode(
1965  ARMISD::WrapperPIC, dl, PtrVt,
1966  DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
1967  Callee = DAG.getLoad(
1968  PtrVt, dl, DAG.getEntryNode(), Callee,
1970  /* Alignment = */ 0, MachineMemOperand::MODereferenceable |
1972  } else if (Subtarget->isTargetCOFF()) {
1973  assert(Subtarget->isTargetWindows() &&
1974  "Windows is the only supported COFF target");
1975  unsigned TargetFlags = GV->hasDLLImportStorageClass()
1978  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0,
1979  TargetFlags);
1980  if (GV->hasDLLImportStorageClass())
1981  Callee =
1982  DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
1983  DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
1985  } else {
1986  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
1987  }
1988  }
1989  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1990  isDirect = true;
1991  // tBX takes a register source operand.
1992  const char *Sym = S->getSymbol();
1993  if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
1994  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
1995  ARMConstantPoolValue *CPV =
1997  ARMPCLabelIndex, 4);
1998  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
1999  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2000  Callee = DAG.getLoad(
2001  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2003  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2004  Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2005  } else {
2006  Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2007  }
2008  }
2009 
2010  // FIXME: handle tail calls differently.
2011  unsigned CallOpc;
2012  if (Subtarget->isThumb()) {
2013  if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2014  CallOpc = ARMISD::CALL_NOLINK;
2015  else
2016  CallOpc = ARMISD::CALL;
2017  } else {
2018  if (!isDirect && !Subtarget->hasV5TOps())
2019  CallOpc = ARMISD::CALL_NOLINK;
2020  else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2021  // Emit regular call when code size is the priority
2022  !MF.getFunction()->optForMinSize())
2023  // "mov lr, pc; b _foo" to avoid confusing the RSP
2024  CallOpc = ARMISD::CALL_NOLINK;
2025  else
2026  CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2027  }
2028 
2029  std::vector<SDValue> Ops;
2030  Ops.push_back(Chain);
2031  Ops.push_back(Callee);
2032 
2033  // Add argument registers to the end of the list so that they are known live
2034  // into the call.
2035  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2036  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2037  RegsToPass[i].second.getValueType()));
2038 
2039  // Add a register mask operand representing the call-preserved registers.
2040  if (!isTailCall) {
2041  const uint32_t *Mask;
2042  const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2043  if (isThisReturn) {
2044  // For 'this' returns, use the R0-preserving mask if applicable
2045  Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2046  if (!Mask) {
2047  // Set isThisReturn to false if the calling convention is not one that
2048  // allows 'returned' to be modeled in this way, so LowerCallResult does
2049  // not try to pass 'this' straight through
2050  isThisReturn = false;
2051  Mask = ARI->getCallPreservedMask(MF, CallConv);
2052  }
2053  } else
2054  Mask = ARI->getCallPreservedMask(MF, CallConv);
2055 
2056  assert(Mask && "Missing call preserved mask for calling convention");
2057  Ops.push_back(DAG.getRegisterMask(Mask));
2058  }
2059 
2060  if (InFlag.getNode())
2061  Ops.push_back(InFlag);
2062 
2063  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2064  if (isTailCall) {
2066  return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2067  }
2068 
2069  // Returns a chain and a flag for retval copy to use.
2070  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2071  InFlag = Chain.getValue(1);
2072 
2073  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2074  DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2075  if (!Ins.empty())
2076  InFlag = Chain.getValue(1);
2077 
2078  // Handle result values, copying them out of physregs into vregs that we
2079  // return.
2080  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2081  InVals, isThisReturn,
2082  isThisReturn ? OutVals[0] : SDValue());
2083 }
2084 
2085 /// HandleByVal - Every parameter *after* a byval parameter is passed
2086 /// on the stack. Remember the next parameter register to allocate,
2087 /// and then confiscate the rest of the parameter registers to insure
2088 /// this.
2089 void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2090  unsigned Align) const {
2091  assert((State->getCallOrPrologue() == Prologue ||
2092  State->getCallOrPrologue() == Call) &&
2093  "unhandled ParmContext");
2094 
2095  // Byval (as with any stack) slots are always at least 4 byte aligned.
2096  Align = std::max(Align, 4U);
2097 
2098  unsigned Reg = State->AllocateReg(GPRArgRegs);
2099  if (!Reg)
2100  return;
2101 
2102  unsigned AlignInRegs = Align / 4;
2103  unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2104  for (unsigned i = 0; i < Waste; ++i)
2105  Reg = State->AllocateReg(GPRArgRegs);
2106 
2107  if (!Reg)
2108  return;
2109 
2110  unsigned Excess = 4 * (ARM::R4 - Reg);
2111 
2112  // Special case when NSAA != SP and parameter size greater than size of
2113  // all remained GPR regs. In that case we can't split parameter, we must
2114  // send it to stack. We also must set NCRN to R4, so waste all
2115  // remained registers.
2116  const unsigned NSAAOffset = State->getNextStackOffset();
2117  if (NSAAOffset != 0 && Size > Excess) {
2118  while (State->AllocateReg(GPRArgRegs))
2119  ;
2120  return;
2121  }
2122 
2123  // First register for byval parameter is the first register that wasn't
2124  // allocated before this method call, so it would be "reg".
2125  // If parameter is small enough to be saved in range [reg, r4), then
2126  // the end (first after last) register would be reg + param-size-in-regs,
2127  // else parameter would be splitted between registers and stack,
2128  // end register would be r4 in this case.
2129  unsigned ByValRegBegin = Reg;
2130  unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2131  State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2132  // Note, first register is allocated in the beginning of function already,
2133  // allocate remained amount of registers we need.
2134  for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2135  State->AllocateReg(GPRArgRegs);
2136  // A byval parameter that is split between registers and memory needs its
2137  // size truncated here.
2138  // In the case where the entire structure fits in registers, we set the
2139  // size in memory to zero.
2140  Size = std::max<int>(Size - Excess, 0);
2141 }
2142 
2143 /// MatchingStackOffset - Return true if the given stack call argument is
2144 /// already available in the same position (relatively) of the caller's
2145 /// incoming argument stack.
2146 static
2149  const TargetInstrInfo *TII) {
2150  unsigned Bytes = Arg.getValueSizeInBits() / 8;
2151  int FI = INT_MAX;
2152  if (Arg.getOpcode() == ISD::CopyFromReg) {
2153  unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2155  return false;
2156  MachineInstr *Def = MRI->getVRegDef(VR);
2157  if (!Def)
2158  return false;
2159  if (!Flags.isByVal()) {
2160  if (!TII->isLoadFromStackSlot(*Def, FI))
2161  return false;
2162  } else {
2163  return false;
2164  }
2165  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2166  if (Flags.isByVal())
2167  // ByVal argument is passed in as a pointer but it's now being
2168  // dereferenced. e.g.
2169  // define @foo(%struct.X* %A) {
2170  // tail call @bar(%struct.X* byval %A)
2171  // }
2172  return false;
2173  SDValue Ptr = Ld->getBasePtr();
2175  if (!FINode)
2176  return false;
2177  FI = FINode->getIndex();
2178  } else
2179  return false;
2180 
2181  assert(FI != INT_MAX);
2182  if (!MFI.isFixedObjectIndex(FI))
2183  return false;
2184  return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2185 }
2186 
2187 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
2188 /// for tail call optimization. Targets which want to do tail call
2189 /// optimization should implement this function.
2190 bool
2191 ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
2192  CallingConv::ID CalleeCC,
2193  bool isVarArg,
2194  bool isCalleeStructRet,
2195  bool isCallerStructRet,
2196  const SmallVectorImpl<ISD::OutputArg> &Outs,
2197  const SmallVectorImpl<SDValue> &OutVals,
2198  const SmallVectorImpl<ISD::InputArg> &Ins,
2199  SelectionDAG& DAG) const {
2200  MachineFunction &MF = DAG.getMachineFunction();
2201  const Function *CallerF = MF.getFunction();
2202  CallingConv::ID CallerCC = CallerF->getCallingConv();
2203 
2204  assert(Subtarget->supportsTailCall());
2205 
2206  // Look for obvious safe cases to perform tail call optimization that do not
2207  // require ABI changes. This is what gcc calls sibcall.
2208 
2209  // Exception-handling functions need a special set of instructions to indicate
2210  // a return to the hardware. Tail-calling another function would probably
2211  // break this.
2212  if (CallerF->hasFnAttribute("interrupt"))
2213  return false;
2214 
2215  // Also avoid sibcall optimization if either caller or callee uses struct
2216  // return semantics.
2217  if (isCalleeStructRet || isCallerStructRet)
2218  return false;
2219 
2220  // Externally-defined functions with weak linkage should not be
2221  // tail-called on ARM when the OS does not support dynamic
2222  // pre-emption of symbols, as the AAELF spec requires normal calls
2223  // to undefined weak functions to be replaced with a NOP or jump to the
2224  // next instruction. The behaviour of branch instructions in this
2225  // situation (as used for tail calls) is implementation-defined, so we
2226  // cannot rely on the linker replacing the tail call with a return.
2227  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2228  const GlobalValue *GV = G->getGlobal();
2229  const Triple &TT = getTargetMachine().getTargetTriple();
2230  if (GV->hasExternalWeakLinkage() &&
2231  (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2232  return false;
2233  }
2234 
2235  // Check that the call results are passed in the same way.
2236  LLVMContext &C = *DAG.getContext();
2237  if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2238  CCAssignFnForReturn(CalleeCC, isVarArg),
2239  CCAssignFnForReturn(CallerCC, isVarArg)))
2240  return false;
2241  // The callee has to preserve all registers the caller needs to preserve.
2242  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2243  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2244  if (CalleeCC != CallerCC) {
2245  const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2246  if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2247  return false;
2248  }
2249 
2250  // If Caller's vararg or byval argument has been split between registers and
2251  // stack, do not perform tail call, since part of the argument is in caller's
2252  // local frame.
2253  const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
2254  if (AFI_Caller->getArgRegsSaveSize())
2255  return false;
2256 
2257  // If the callee takes no arguments then go on to check the results of the
2258  // call.
2259  if (!Outs.empty()) {
2260  // Check if stack adjustment is needed. For now, do not do this if any
2261  // argument is passed on the stack.
2263  ARMCCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C, Call);
2264  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2265  if (CCInfo.getNextStackOffset()) {
2266  // Check if the arguments are already laid out in the right way as
2267  // the caller's fixed stack objects.
2268  MachineFrameInfo &MFI = MF.getFrameInfo();
2269  const MachineRegisterInfo *MRI = &MF.getRegInfo();
2270  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2271  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2272  i != e;
2273  ++i, ++realArgIdx) {
2274  CCValAssign &VA = ArgLocs[i];
2275  EVT RegVT = VA.getLocVT();
2276  SDValue Arg = OutVals[realArgIdx];
2277  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2278  if (VA.getLocInfo() == CCValAssign::Indirect)
2279  return false;
2280  if (VA.needsCustom()) {
2281  // f64 and vector types are split into multiple registers or
2282  // register/stack-slot combinations. The types will not match
2283  // the registers; give up on memory f64 refs until we figure
2284  // out what to do about this.
2285  if (!VA.isRegLoc())
2286  return false;
2287  if (!ArgLocs[++i].isRegLoc())
2288  return false;
2289  if (RegVT == MVT::v2f64) {
2290  if (!ArgLocs[++i].isRegLoc())
2291  return false;
2292  if (!ArgLocs[++i].isRegLoc())
2293  return false;
2294  }
2295  } else if (!VA.isRegLoc()) {
2296  if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
2297  MFI, MRI, TII))
2298  return false;
2299  }
2300  }
2301  }
2302 
2303  const MachineRegisterInfo &MRI = MF.getRegInfo();
2304  if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2305  return false;
2306  }
2307 
2308  return true;
2309 }
2310 
2311 bool
2312 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2313  MachineFunction &MF, bool isVarArg,
2314  const SmallVectorImpl<ISD::OutputArg> &Outs,
2315  LLVMContext &Context) const {
2317  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2318  return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2319 }
2320 
2322  const SDLoc &DL, SelectionDAG &DAG) {
2323  const MachineFunction &MF = DAG.getMachineFunction();
2324  const Function *F = MF.getFunction();
2325 
2326  StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString();
2327 
2328  // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
2329  // version of the "preferred return address". These offsets affect the return
2330  // instruction if this is a return from PL1 without hypervisor extensions.
2331  // IRQ/FIQ: +4 "subs pc, lr, #4"
2332  // SWI: 0 "subs pc, lr, #0"
2333  // ABORT: +4 "subs pc, lr, #4"
2334  // UNDEF: +4/+2 "subs pc, lr, #0"
2335  // UNDEF varies depending on where the exception came from ARM or Thumb
2336  // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
2337 
2338  int64_t LROffset;
2339  if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
2340  IntKind == "ABORT")
2341  LROffset = 4;
2342  else if (IntKind == "SWI" || IntKind == "UNDEF")
2343  LROffset = 0;
2344  else
2345  report_fatal_error("Unsupported interrupt attribute. If present, value "
2346  "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2347 
2348  RetOps.insert(RetOps.begin() + 1,
2349  DAG.getConstant(LROffset, DL, MVT::i32, false));
2350 
2351  return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
2352 }
2353 
2354 SDValue
2355 ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2356  bool isVarArg,
2357  const SmallVectorImpl<ISD::OutputArg> &Outs,
2358  const SmallVectorImpl<SDValue> &OutVals,
2359  const SDLoc &dl, SelectionDAG &DAG) const {
2360 
2361  // CCValAssign - represent the assignment of the return value to a location.
2363 
2364  // CCState - Info about the registers and stack slots.
2365  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2366  *DAG.getContext(), Call);
2367 
2368  // Analyze outgoing return values.
2369  CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2370 
2371  SDValue Flag;
2372  SmallVector<SDValue, 4> RetOps;
2373  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2374  bool isLittleEndian = Subtarget->isLittle();
2375 
2376  MachineFunction &MF = DAG.getMachineFunction();
2378  AFI->setReturnRegsCount(RVLocs.size());
2379 
2380  // Copy the result values into the output registers.
2381  for (unsigned i = 0, realRVLocIdx = 0;
2382  i != RVLocs.size();
2383  ++i, ++realRVLocIdx) {
2384  CCValAssign &VA = RVLocs[i];
2385  assert(VA.isRegLoc() && "Can only return in registers!");
2386 
2387  SDValue Arg = OutVals[realRVLocIdx];
2388 
2389  switch (VA.getLocInfo()) {
2390  default: llvm_unreachable("Unknown loc info!");
2391  case CCValAssign::Full: break;
2392  case CCValAssign::BCvt:
2393  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2394  break;
2395  }
2396 
2397  if (VA.needsCustom()) {
2398  if (VA.getLocVT() == MVT::v2f64) {
2399  // Extract the first half and return it in two registers.
2400  SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2401  DAG.getConstant(0, dl, MVT::i32));
2402  SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
2403  DAG.getVTList(MVT::i32, MVT::i32), Half);
2404 
2405  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2406  HalfGPRs.getValue(isLittleEndian ? 0 : 1),
2407  Flag);
2408  Flag = Chain.getValue(1);
2409  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2410  VA = RVLocs[++i]; // skip ahead to next loc
2411  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2412  HalfGPRs.getValue(isLittleEndian ? 1 : 0),
2413  Flag);
2414  Flag = Chain.getValue(1);
2415  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2416  VA = RVLocs[++i]; // skip ahead to next loc
2417 
2418  // Extract the 2nd half and fall through to handle it as an f64 value.
2419  Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2420  DAG.getConstant(1, dl, MVT::i32));
2421  }
2422  // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
2423  // available.
2424  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2425  DAG.getVTList(MVT::i32, MVT::i32), Arg);
2426  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2427  fmrrd.getValue(isLittleEndian ? 0 : 1),
2428  Flag);
2429  Flag = Chain.getValue(1);
2430  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2431  VA = RVLocs[++i]; // skip ahead to next loc
2432  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2433  fmrrd.getValue(isLittleEndian ? 1 : 0),
2434  Flag);
2435  } else
2436  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
2437 
2438  // Guarantee that all emitted copies are
2439  // stuck together, avoiding something bad.
2440  Flag = Chain.getValue(1);
2441  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2442  }
2443  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2444  const MCPhysReg *I =
2446  if (I) {
2447  for (; *I; ++I) {
2448  if (ARM::GPRRegClass.contains(*I))
2449  RetOps.push_back(DAG.getRegister(*I, MVT::i32));
2450  else if (ARM::DPRRegClass.contains(*I))
2451  RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
2452  else
2453  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
2454  }
2455  }
2456 
2457  // Update chain and glue.
2458  RetOps[0] = Chain;
2459  if (Flag.getNode())
2460  RetOps.push_back(Flag);
2461 
2462  // CPUs which aren't M-class use a special sequence to return from
2463  // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
2464  // though we use "subs pc, lr, #N").
2465  //
2466  // M-class CPUs actually use a normal return sequence with a special
2467  // (hardware-provided) value in LR, so the normal code path works.
2468  if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") &&
2469  !Subtarget->isMClass()) {
2470  if (Subtarget->isThumb1Only())
2471  report_fatal_error("interrupt attribute is not supported in Thumb1");
2472  return LowerInterruptReturn(RetOps, dl, DAG);
2473  }
2474 
2475  return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
2476 }
2477 
2478 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2479  if (N->getNumValues() != 1)
2480  return false;
2481  if (!N->hasNUsesOfValue(1, 0))
2482  return false;
2483 
2484  SDValue TCChain = Chain;
2485  SDNode *Copy = *N->use_begin();
2486  if (Copy->getOpcode() == ISD::CopyToReg) {
2487  // If the copy has a glue operand, we conservatively assume it isn't safe to
2488  // perform a tail call.
2489  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2490  return false;
2491  TCChain = Copy->getOperand(0);
2492  } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
2493  SDNode *VMov = Copy;
2494  // f64 returned in a pair of GPRs.
2495  SmallPtrSet<SDNode*, 2> Copies;
2496  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2497  UI != UE; ++UI) {
2498  if (UI->getOpcode() != ISD::CopyToReg)
2499  return false;
2500  Copies.insert(*UI);
2501  }
2502  if (Copies.size() > 2)
2503  return false;
2504 
2505  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2506  UI != UE; ++UI) {
2507  SDValue UseChain = UI->getOperand(0);
2508  if (Copies.count(UseChain.getNode()))
2509  // Second CopyToReg
2510  Copy = *UI;
2511  else {
2512  // We are at the top of this chain.
2513  // If the copy has a glue operand, we conservatively assume it
2514  // isn't safe to perform a tail call.
2515  if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
2516  return false;
2517  // First CopyToReg
2518  TCChain = UseChain;
2519  }
2520  }
2521  } else if (Copy->getOpcode() == ISD::BITCAST) {
2522  // f32 returned in a single GPR.
2523  if (!Copy->hasOneUse())
2524  return false;
2525  Copy = *Copy->use_begin();
2526  if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
2527  return false;
2528  // If the copy has a glue operand, we conservatively assume it isn't safe to
2529  // perform a tail call.
2530  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2531  return false;
2532  TCChain = Copy->getOperand(0);
2533  } else {
2534  return false;
2535  }
2536 
2537  bool HasRet = false;
2538  for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2539  UI != UE; ++UI) {
2540  if (UI->getOpcode() != ARMISD::RET_FLAG &&
2541  UI->getOpcode() != ARMISD::INTRET_FLAG)
2542  return false;
2543  HasRet = true;
2544  }
2545 
2546  if (!HasRet)
2547  return false;
2548 
2549  Chain = TCChain;
2550  return true;
2551 }
2552 
2553 bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
2554  if (!Subtarget->supportsTailCall())
2555  return false;
2556 
2557  auto Attr =
2558  CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2559  if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2560  return false;
2561 
2562  return true;
2563 }
2564 
2565 // Trying to write a 64 bit value so need to split into two 32 bit values first,
2566 // and pass the lower and high parts through.
2568  SDLoc DL(Op);
2569  SDValue WriteValue = Op->getOperand(2);
2570 
2571  // This function is only supposed to be called for i64 type argument.
2572  assert(WriteValue.getValueType() == MVT::i64
2573  && "LowerWRITE_REGISTER called for non-i64 type argument.");
2574 
2575  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2576  DAG.getConstant(0, DL, MVT::i32));
2577  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2578  DAG.getConstant(1, DL, MVT::i32));
2579  SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
2580  return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
2581 }
2582 
2583 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
2584 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
2585 // one of the above mentioned nodes. It has to be wrapped because otherwise
2586 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2587 // be used to form addressing mode. These wrapped nodes will be selected
2588 // into MOVi.
2590  EVT PtrVT = Op.getValueType();
2591  // FIXME there is no actual debug info here
2592  SDLoc dl(Op);
2593  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2594  SDValue Res;
2595  if (CP->isMachineConstantPoolEntry())
2596  Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2597  CP->getAlignment());
2598  else
2599  Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2600  CP->getAlignment());
2601  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
2602 }
2603 
2606 }
2607 
2608 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
2609  SelectionDAG &DAG) const {
2610  MachineFunction &MF = DAG.getMachineFunction();
2612  unsigned ARMPCLabelIndex = 0;
2613  SDLoc DL(Op);
2614  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2615  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
2616  SDValue CPAddr;
2617  bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
2618  if (!IsPositionIndependent) {
2619  CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
2620  } else {
2621  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2622  ARMPCLabelIndex = AFI->createPICLabelUId();
2623  ARMConstantPoolValue *CPV =
2624  ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
2625  ARMCP::CPBlockAddress, PCAdj);
2626  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2627  }
2628  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
2629  SDValue Result = DAG.getLoad(
2630  PtrVT, DL, DAG.getEntryNode(), CPAddr,
2632  if (!IsPositionIndependent)
2633  return Result;
2634  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
2635  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
2636 }
2637 
2638 /// \brief Convert a TLS address reference into the correct sequence of loads
2639 /// and calls to compute the variable's address for Darwin, and return an
2640 /// SDValue containing the final node.
2641 
2642 /// Darwin only has one TLS scheme which must be capable of dealing with the
2643 /// fully general situation, in the worst case. This means:
2644 /// + "extern __thread" declaration.
2645 /// + Defined in a possibly unknown dynamic library.
2646 ///
2647 /// The general system is that each __thread variable has a [3 x i32] descriptor
2648 /// which contains information used by the runtime to calculate the address. The
2649 /// only part of this the compiler needs to know about is the first word, which
2650 /// contains a function pointer that must be called with the address of the
2651 /// entire descriptor in "r0".
2652 ///
2653 /// Since this descriptor may be in a different unit, in general access must
2654 /// proceed along the usual ARM rules. A common sequence to produce is:
2655 ///
2656 /// movw rT1, :lower16:_var$non_lazy_ptr
2657 /// movt rT1, :upper16:_var$non_lazy_ptr
2658 /// ldr r0, [rT1]
2659 /// ldr rT2, [r0]
2660 /// blx rT2
2661 /// [...address now in r0...]
2662 SDValue
2663 ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
2664  SelectionDAG &DAG) const {
2665  assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin");
2666  SDLoc DL(Op);
2667 
2668  // First step is to get the address of the actua global symbol. This is where
2669  // the TLS descriptor lives.
2670  SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
2671 
2672  // The first entry in the descriptor is a function pointer that we must call
2673  // to obtain the address of the variable.
2674  SDValue Chain = DAG.getEntryNode();
2675  SDValue FuncTLVGet = DAG.getLoad(
2676  MVT::i32, DL, Chain, DescAddr,
2678  /* Alignment = */ 4,
2681  Chain = FuncTLVGet.getValue(1);
2682 
2684  MachineFrameInfo &MFI = F.getFrameInfo();
2685  MFI.setAdjustsStack(true);
2686 
2687  // TLS calls preserve all registers except those that absolutely must be
2688  // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
2689  // silly).
2690  auto TRI =
2691  getTargetMachine().getSubtargetImpl(*F.getFunction())->getRegisterInfo();
2692  auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
2693  const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
2694 
2695  // Finally, we can make the call. This is just a degenerate version of a
2696  // normal AArch64 call node: r0 takes the address of the descriptor, and
2697  // returns the address of the variable in this thread.
2698  Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
2699  Chain =
2701  Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
2702  DAG.getRegisterMask(Mask), Chain.getValue(1));
2703  return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
2704 }
2705 
2706 SDValue
2707 ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
2708  SelectionDAG &DAG) const {
2709  assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
2710 
2711  SDValue Chain = DAG.getEntryNode();
2712  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2713  SDLoc DL(Op);
2714 
2715  // Load the current TEB (thread environment block)
2716  SDValue Ops[] = {Chain,
2717  DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
2718  DAG.getConstant(15, DL, MVT::i32),
2719  DAG.getConstant(0, DL, MVT::i32),
2720  DAG.getConstant(13, DL, MVT::i32),
2721  DAG.getConstant(0, DL, MVT::i32),
2722  DAG.getConstant(2, DL, MVT::i32)};
2723  SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
2724  DAG.getVTList(MVT::i32, MVT::Other), Ops);
2725 
2726  SDValue TEB = CurrentTEB.getValue(0);
2727  Chain = CurrentTEB.getValue(1);
2728 
2729  // Load the ThreadLocalStoragePointer from the TEB
2730  // A pointer to the TLS array is located at offset 0x2c from the TEB.
2731  SDValue TLSArray =
2732  DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
2733  TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
2734 
2735  // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
2736  // offset into the TLSArray.
2737 
2738  // Load the TLS index from the C runtime
2739  SDValue TLSIndex =
2740  DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
2741  TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
2742  TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
2743 
2744  SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
2745  DAG.getConstant(2, DL, MVT::i32));
2746  SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
2747  DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
2748  MachinePointerInfo());
2749 
2750  // Get the offset of the start of the .tls section (section base)
2751  const auto *GA = cast<GlobalAddressSDNode>(Op);
2752  auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
2753  SDValue Offset = DAG.getLoad(
2754  PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
2755  DAG.getTargetConstantPool(CPV, PtrVT, 4)),
2757 
2758  return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
2759 }
2760 
2761 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
2762 SDValue
2763 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
2764  SelectionDAG &DAG) const {
2765  SDLoc dl(GA);
2766  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2767  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2768  MachineFunction &MF = DAG.getMachineFunction();
2770  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2771  ARMConstantPoolValue *CPV =
2772  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2773  ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
2774  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2775  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
2776  Argument = DAG.getLoad(
2777  PtrVT, dl, DAG.getEntryNode(), Argument,
2779  SDValue Chain = Argument.getValue(1);
2780 
2781  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2782  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
2783 
2784  // call __tls_get_addr.
2785  ArgListTy Args;
2786  ArgListEntry Entry;
2787  Entry.Node = Argument;
2788  Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
2789  Args.push_back(Entry);
2790 
2791  // FIXME: is there useful debug info available here?
2793  CLI.setDebugLoc(dl).setChain(Chain)
2795  DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
2796 
2797  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2798  return CallResult.first;
2799 }
2800 
2801 // Lower ISD::GlobalTLSAddress using the "initial exec" or
2802 // "local exec" model.
2803 SDValue
2804 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
2805  SelectionDAG &DAG,
2806  TLSModel::Model model) const {
2807  const GlobalValue *GV = GA->getGlobal();
2808  SDLoc dl(GA);
2809  SDValue Offset;
2810  SDValue Chain = DAG.getEntryNode();
2811  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2812  // Get the Thread Pointer
2814 
2815  if (model == TLSModel::InitialExec) {
2816  MachineFunction &MF = DAG.getMachineFunction();
2818  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2819  // Initial exec model.
2820  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2821  ARMConstantPoolValue *CPV =
2822  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2824  true);
2825  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2826  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2827  Offset = DAG.getLoad(
2828  PtrVT, dl, Chain, Offset,
2830  Chain = Offset.getValue(1);
2831 
2832  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2833  Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
2834 
2835  Offset = DAG.getLoad(
2836  PtrVT, dl, Chain, Offset,
2838  } else {
2839  // local exec model
2840  assert(model == TLSModel::LocalExec);
2841  ARMConstantPoolValue *CPV =
2843  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2844  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2845  Offset = DAG.getLoad(
2846  PtrVT, dl, Chain, Offset,
2848  }
2849 
2850  // The address of the thread local variable is the add of the thread
2851  // pointer with the offset of the variable.
2852  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
2853 }
2854 
2855 SDValue
2856 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
2857  if (Subtarget->isTargetDarwin())
2858  return LowerGlobalTLSAddressDarwin(Op, DAG);
2859 
2860  if (Subtarget->isTargetWindows())
2861  return LowerGlobalTLSAddressWindows(Op, DAG);
2862 
2863  // TODO: implement the "local dynamic" model
2864  assert(Subtarget->isTargetELF() && "Only ELF implemented here");
2865  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2866  if (DAG.getTarget().Options.EmulatedTLS)
2867  return LowerToTLSEmulatedModel(GA, DAG);
2868 
2869  TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
2870 
2871  switch (model) {
2874  return LowerToTLSGeneralDynamicModel(GA, DAG);
2875  case TLSModel::InitialExec:
2876  case TLSModel::LocalExec:
2877  return LowerToTLSExecModels(GA, DAG, model);
2878  }
2879  llvm_unreachable("bogus TLS model");
2880 }
2881 
2882 /// Return true if all users of V are within function F, looking through
2883 /// ConstantExprs.
2884 static bool allUsersAreInFunction(const Value *V, const Function *F) {
2885  SmallVector<const User*,4> Worklist;
2886  for (auto *U : V->users())
2887  Worklist.push_back(U);
2888  while (!Worklist.empty()) {
2889  auto *U = Worklist.pop_back_val();
2890  if (isa<ConstantExpr>(U)) {
2891  for (auto *UU : U->users())
2892  Worklist.push_back(UU);
2893  continue;
2894  }
2895 
2896  auto *I = dyn_cast<Instruction>(U);
2897  if (!I || I->getParent()->getParent() != F)
2898  return false;
2899  }
2900  return true;
2901 }
2902 
2903 /// Return true if all users of V are within some (any) function, looking through
2904 /// ConstantExprs. In other words, are there any global constant users?
2905 static bool allUsersAreInFunctions(const Value *V) {
2906  SmallVector<const User*,4> Worklist;
2907  for (auto *U : V->users())
2908  Worklist.push_back(U);
2909  while (!Worklist.empty()) {
2910  auto *U = Worklist.pop_back_val();
2911  if (isa<ConstantExpr>(U)) {
2912  for (auto *UU : U->users())
2913  Worklist.push_back(UU);
2914  continue;
2915  }
2916 
2917  if (!isa<Instruction>(U))
2918  return false;
2919  }
2920  return true;
2921 }
2922 
2923 // Return true if T is an integer, float or an array/vector of either.
2924 static bool isSimpleType(Type *T) {
2925  if (T->isIntegerTy() || T->isFloatingPointTy())
2926  return true;
2927  Type *SubT = nullptr;
2928  if (T->isArrayTy())
2929  SubT = T->getArrayElementType();
2930  else if (T->isVectorTy())
2931  SubT = T->getVectorElementType();
2932  else
2933  return false;
2934  return SubT->isIntegerTy() || SubT->isFloatingPointTy();
2935 }
2936 
2938  EVT PtrVT, SDLoc dl) {
2939  // If we're creating a pool entry for a constant global with unnamed address,
2940  // and the global is small enough, we can emit it inline into the constant pool
2941  // to save ourselves an indirection.
2942  //
2943  // This is a win if the constant is only used in one function (so it doesn't
2944  // need to be duplicated) or duplicating the constant wouldn't increase code
2945  // size (implying the constant is no larger than 4 bytes).
2946  const Function *F = DAG.getMachineFunction().getFunction();
2947 
2948  // We rely on this decision to inline being idemopotent and unrelated to the
2949  // use-site. We know that if we inline a variable at one use site, we'll
2950  // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
2951  // doesn't know about this optimization, so bail out if it's enabled else
2952  // we could decide to inline here (and thus never emit the GV) but require
2953  // the GV from fast-isel generated code.
2954  if (!EnableConstpoolPromotion ||
2956  return SDValue();
2957 
2958  auto *GVar = dyn_cast<GlobalVariable>(GV);
2959  if (!GVar || !GVar->hasInitializer() ||
2960  !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
2961  !GVar->hasLocalLinkage())
2962  return SDValue();
2963 
2964  // Ensure that we don't try and inline any type that contains pointers. If
2965  // we inline a value that contains relocations, we move the relocations from
2966  // .data to .text which is not ideal.
2967  auto *Init = GVar->getInitializer();
2968  if (!isSimpleType(Init->getType()))
2969  return SDValue();
2970 
2971  // The constant islands pass can only really deal with alignment requests
2972  // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
2973  // any type wanting greater alignment requirements than 4 bytes. We also
2974  // can only promote constants that are multiples of 4 bytes in size or
2975  // are paddable to a multiple of 4. Currently we only try and pad constants
2976  // that are strings for simplicity.
2977  auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
2978  unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
2979  unsigned Align = GVar->getAlignment();
2980  unsigned RequiredPadding = 4 - (Size % 4);
2981  bool PaddingPossible =
2982  RequiredPadding == 4 || (CDAInit && CDAInit->isString());
2983  if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize)
2984  return SDValue();
2985 
2986  unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
2987  MachineFunction &MF = DAG.getMachineFunction();
2989 
2990  // We can't bloat the constant pool too much, else the ConstantIslands pass
2991  // may fail to converge. If we haven't promoted this global yet (it may have
2992  // multiple uses), and promoting it would increase the constant pool size (Sz
2993  // > 4), ensure we have space to do so up to MaxTotal.
2994  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
2995  if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
2997  return SDValue();
2998 
2999  // This is only valid if all users are in a single function OR it has users
3000  // in multiple functions but it no larger than a pointer. We also check if
3001  // GVar has constant (non-ConstantExpr) users. If so, it essentially has its
3002  // address taken.
3003  if (!allUsersAreInFunction(GVar, F) &&
3004  !(Size <= 4 && allUsersAreInFunctions(GVar)))
3005  return SDValue();
3006 
3007  // We're going to inline this global. Pad it out if needed.
3008  if (RequiredPadding != 4) {
3009  StringRef S = CDAInit->getAsString();
3010 
3012  std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3013  while (RequiredPadding--)
3014  V.push_back(0);
3015  Init = ConstantDataArray::get(*DAG.getContext(), V);
3016  }
3017 
3018  auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3019  SDValue CPAddr =
3020  DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4);
3021  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3024  PaddedSize - 4);
3025  }
3026  ++NumConstpoolPromoted;
3027  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3028 }
3029 
3030 static bool isReadOnly(const GlobalValue *GV) {
3031  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3032  GV = GA->getBaseObject();
3033  return (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) ||
3034  isa<Function>(GV);
3035 }
3036 
3037 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3038  SelectionDAG &DAG) const {
3039  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3040  SDLoc dl(Op);
3041  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3042  const TargetMachine &TM = getTargetMachine();
3043  bool IsRO = isReadOnly(GV);
3044 
3045  // promoteToConstantPool only if not generating XO text section
3046  if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
3047  if (SDValue V = promoteToConstantPool(GV, DAG, PtrVT, dl))
3048  return V;
3049 
3050  if (isPositionIndependent()) {
3051  bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
3052 
3053  MachineFunction &MF = DAG.getMachineFunction();
3055  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3056  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3057  SDLoc dl(Op);
3058  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3060  GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj,
3061  UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,
3062  /*AddCurrentAddress=*/UseGOT_PREL);
3063  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3064  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3065  SDValue Result = DAG.getLoad(
3066  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3068  SDValue Chain = Result.getValue(1);
3069  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3070  Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3071  if (UseGOT_PREL)
3072  Result =
3073  DAG.getLoad(PtrVT, dl, Chain, Result,
3075  return Result;
3076  } else if (Subtarget->isROPI() && IsRO) {
3077  // PC-relative.
3078  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3079  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3080  return Result;
3081  } else if (Subtarget->isRWPI() && !IsRO) {
3082  // SB-relative.
3083  ARMConstantPoolValue *CPV =
3085  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3086  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3087  SDValue G = DAG.getLoad(
3088  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3090  SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3091  SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, G);
3092  return Result;
3093  }
3094 
3095  // If we have T2 ops, we can materialize the address directly via movt/movw
3096  // pair. This is always cheaper.
3097  if (Subtarget->useMovt(DAG.getMachineFunction())) {
3098  ++NumMovwMovt;
3099  // FIXME: Once remat is capable of dealing with instructions with register
3100  // operands, expand this into two nodes.
3101  return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3102  DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3103  } else {
3104  SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
3105  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3106  return DAG.getLoad(
3107  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3109  }
3110 }
3111 
3112 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3113  SelectionDAG &DAG) const {
3114  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3115  "ROPI/RWPI not currently supported for Darwin");
3116  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3117  SDLoc dl(Op);
3118  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3119 
3120  if (Subtarget->useMovt(DAG.getMachineFunction()))
3121  ++NumMovwMovt;
3122 
3123  // FIXME: Once remat is capable of dealing with instructions with register
3124  // operands, expand this into multiple nodes
3125  unsigned Wrapper =
3127 
3128  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3129  SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3130 
3131  if (Subtarget->isGVIndirectSymbol(GV))
3132  Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3134  return Result;
3135 }
3136 
3137 SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
3138  SelectionDAG &DAG) const {
3139  assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
3140  assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
3141  "Windows on ARM expects to use movw/movt");
3142  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3143  "ROPI/RWPI not currently supported for Windows");
3144 
3145  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3146  const ARMII::TOF TargetFlags =
3147  (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
3148  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3149  SDValue Result;
3150  SDLoc DL(Op);
3151 
3152  ++NumMovwMovt;
3153 
3154  // FIXME: Once remat is capable of dealing with instructions with register
3155  // operands, expand this into two nodes.
3156  Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
3157  DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
3158  TargetFlags));
3159  if (GV->hasDLLImportStorageClass())
3160  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3162  return Result;
3163 }
3164 
3165 SDValue
3166 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
3167  SDLoc dl(Op);
3168  SDValue Val = DAG.getConstant(0, dl, MVT::i32);
3169  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3170  DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
3171  Op.getOperand(1), Val);
3172 }
3173 
3174 SDValue
3175 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
3176  SDLoc dl(Op);
3177  return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
3178  Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
3179 }
3180 
3181 SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
3182  SelectionDAG &DAG) const {
3183  SDLoc dl(Op);
3185  Op.getOperand(0));
3186 }
3187 
3188 SDValue
3189 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
3190  const ARMSubtarget *Subtarget) const {
3191  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3192  SDLoc dl(Op);
3193  switch (IntNo) {
3194  default: return SDValue(); // Don't custom lower most intrinsics.
3195  case Intrinsic::thread_pointer: {
3196  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3197  return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3198  }
3199  case Intrinsic::eh_sjlj_lsda: {
3200  MachineFunction &MF = DAG.getMachineFunction();
3202  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3203  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3204  SDValue CPAddr;
3205  bool IsPositionIndependent = isPositionIndependent();
3206  unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
3207  ARMConstantPoolValue *CPV =
3208  ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
3209  ARMCP::CPLSDA, PCAdj);
3210  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3211  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3212  SDValue Result = DAG.getLoad(
3213  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3215 
3216  if (IsPositionIndependent) {
3217  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3218  Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3219  }
3220  return Result;
3221  }
3222  case Intrinsic::arm_neon_vmulls:
3223  case Intrinsic::arm_neon_vmullu: {
3224  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
3226  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3227  Op.getOperand(1), Op.getOperand(2));
3228  }
3229  case Intrinsic::arm_neon_vminnm:
3230  case Intrinsic::arm_neon_vmaxnm: {
3231  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
3233  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3234  Op.getOperand(1), Op.getOperand(2));
3235  }
3236  case Intrinsic::arm_neon_vminu:
3237  case Intrinsic::arm_neon_vmaxu: {
3238  if (Op.getValueType().isFloatingPoint())
3239  return SDValue();
3240  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
3241  ? ISD::UMIN : ISD::UMAX;
3242  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3243  Op.getOperand(1), Op.getOperand(2));
3244  }
3245  case Intrinsic::arm_neon_vmins:
3246  case Intrinsic::arm_neon_vmaxs: {
3247  // v{min,max}s is overloaded between signed integers and floats.
3248  if (!Op.getValueType().isFloatingPoint()) {
3249  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3250  ? ISD::SMIN : ISD::SMAX;
3251  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3252  Op.getOperand(1), Op.getOperand(2));
3253  }
3254  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3256  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3257  Op.getOperand(1), Op.getOperand(2));
3258  }
3259  }
3260 }
3261 
3263  const ARMSubtarget *Subtarget) {
3264  // FIXME: handle "fence singlethread" more efficiently.
3265  SDLoc dl(Op);
3266  if (!Subtarget->hasDataBarrier()) {
3267  // Some ARMv6 cpus can support data barriers with an mcr instruction.
3268  // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
3269  // here.
3270  assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
3271  "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
3272  return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
3273  DAG.getConstant(0, dl, MVT::i32));
3274  }
3275 
3276  ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
3277  AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
3278  ARM_MB::MemBOpt Domain = ARM_MB::ISH;
3279  if (Subtarget->isMClass()) {
3280  // Only a full system barrier exists in the M-class architectures.
3281  Domain = ARM_MB::SY;
3282  } else if (Subtarget->preferISHSTBarriers() &&
3283  Ord == AtomicOrdering::Release) {
3284  // Swift happens to implement ISHST barriers in a way that's compatible with
3285  // Release semantics but weaker than ISH so we'd be fools not to use
3286  // it. Beware: other processors probably don't!
3287  Domain = ARM_MB::ISHST;
3288  }
3289 
3290  return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
3291  DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
3292  DAG.getConstant(Domain, dl, MVT::i32));
3293 }
3294 
3296  const ARMSubtarget *Subtarget) {
3297  // ARM pre v5TE and Thumb1 does not have preload instructions.
3298  if (!(Subtarget->isThumb2() ||
3299  (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
3300  // Just preserve the chain.
3301  return Op.getOperand(0);
3302 
3303  SDLoc dl(Op);
3304  unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
3305  if (!isRead &&
3306  (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
3307  // ARMv7 with MP extension has PLDW.
3308  return Op.getOperand(0);
3309 
3310  unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3311  if (Subtarget->isThumb()) {
3312  // Invert the bits.
3313  isRead = ~isRead & 1;
3314  isData = ~isData & 1;
3315  }
3316 
3317  return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
3318  Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
3319  DAG.getConstant(isData, dl, MVT::i32));
3320 }
3321 
3323  MachineFunction &MF = DAG.getMachineFunction();
3324  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
3325 
3326  // vastart just stores the address of the VarArgsFrameIndex slot into the
3327  // memory location argument.
3328  SDLoc dl(Op);
3329  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
3330  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3331  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3332  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3333  MachinePointerInfo(SV));
3334 }
3335 
3336 SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
3337  CCValAssign &NextVA,
3338  SDValue &Root,
3339  SelectionDAG &DAG,
3340  const SDLoc &dl) const {
3341  MachineFunction &MF = DAG.getMachineFunction();
3343 
3344  const TargetRegisterClass *RC;
3345  if (AFI->isThumb1OnlyFunction())
3346  RC = &ARM::tGPRRegClass;
3347  else
3348  RC = &ARM::GPRRegClass;
3349 
3350  // Transform the arguments stored in physical registers into virtual ones.
3351  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3352  SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3353 
3354  SDValue ArgValue2;
3355  if (NextVA.isMemLoc()) {
3356  MachineFrameInfo &MFI = MF.getFrameInfo();
3357  int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
3358 
3359  // Create load node to retrieve arguments from the stack.
3360  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3361  ArgValue2 = DAG.getLoad(
3362  MVT::i32, dl, Root, FIN,
3364  } else {
3365  Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3366  ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3367  }
3368  if (!Subtarget->isLittle())
3369  std::swap (ArgValue, ArgValue2);
3370  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
3371 }
3372 
3373 // The remaining GPRs hold either the beginning of variable-argument
3374 // data, or the beginning of an aggregate passed by value (usually
3375 // byval). Either way, we allocate stack slots adjacent to the data
3376 // provided by our caller, and store the unallocated registers there.
3377 // If this is a variadic function, the va_list pointer will begin with
3378 // these values; otherwise, this reassembles a (byval) structure that
3379 // was split between registers and memory.
3380 // Return: The frame index registers were stored into.
3381 int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
3382  const SDLoc &dl, SDValue &Chain,
3383  const Value *OrigArg,
3384  unsigned InRegsParamRecordIdx,
3385  int ArgOffset, unsigned ArgSize) const {
3386  // Currently, two use-cases possible:
3387  // Case #1. Non-var-args function, and we meet first byval parameter.
3388  // Setup first unallocated register as first byval register;
3389  // eat all remained registers
3390  // (these two actions are performed by HandleByVal method).
3391  // Then, here, we initialize stack frame with
3392  // "store-reg" instructions.
3393  // Case #2. Var-args function, that doesn't contain byval parameters.
3394  // The same: eat all remained unallocated registers,
3395  // initialize stack frame.
3396 
3397  MachineFunction &MF = DAG.getMachineFunction();
3398  MachineFrameInfo &MFI = MF.getFrameInfo();
3400  unsigned RBegin, REnd;
3401  if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
3402  CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
3403  } else {
3404  unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3405  RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
3406  REnd = ARM::R4;
3407  }
3408 
3409  if (REnd != RBegin)
3410  ArgOffset = -4 * (ARM::R4 - RBegin);
3411 
3412  auto PtrVT = getPointerTy(DAG.getDataLayout());
3413  int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
3414  SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
3415 
3416  SmallVector<SDValue, 4> MemOps;
3417  const TargetRegisterClass *RC =
3418  AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
3419 
3420  for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
3421  unsigned VReg = MF.addLiveIn(Reg, RC);
3422  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3423  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3424  MachinePointerInfo(OrigArg, 4 * i));
3425  MemOps.push_back(Store);
3426  FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
3427  }
3428 
3429  if (!MemOps.empty())
3430  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3431  return FrameIndex;
3432 }
3433 
3434 // Setup stack frame, the va_list pointer will start from.
3435 void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
3436  const SDLoc &dl, SDValue &Chain,
3437  unsigned ArgOffset,
3438  unsigned TotalArgRegsSaveSize,
3439  bool ForceMutable) const {
3440  MachineFunction &MF = DAG.getMachineFunction();
3442 
3443  // Try to store any remaining integer argument regs
3444  // to their spots on the stack so that they may be loaded by dereferencing
3445  // the result of va_next.
3446  // If there is no regs to be stored, just point address after last
3447  // argument passed via stack.
3448  int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
3449  CCInfo.getInRegsParamsCount(),
3450  CCInfo.getNextStackOffset(), 4);
3451  AFI->setVarArgsFrameIndex(FrameIndex);
3452 }
3453 
3454 SDValue ARMTargetLowering::LowerFormalArguments(
3455  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3456  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3457  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3458  MachineFunction &MF = DAG.getMachineFunction();
3459  MachineFrameInfo &MFI = MF.getFrameInfo();
3460 
3462 
3463  // Assign locations to all of the incoming arguments.
3465  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3466  *DAG.getContext(), Prologue);
3467  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
3468 
3469  SmallVector<SDValue, 16> ArgValues;
3470  SDValue ArgValue;
3471  Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
3472  unsigned CurArgIdx = 0;
3473 
3474  // Initially ArgRegsSaveSize is zero.
3475  // Then we increase this value each time we meet byval parameter.
3476  // We also increase this value in case of varargs function.
3477  AFI->setArgRegsSaveSize(0);
3478 
3479  // Calculate the amount of stack space that we need to allocate to store
3480  // byval and variadic arguments that are passed in registers.
3481  // We need to know this before we allocate the first byval or variadic
3482  // argument, as they will be allocated a stack slot below the CFA (Canonical
3483  // Frame Address, the stack pointer at entry to the function).
3484  unsigned ArgRegBegin = ARM::R4;
3485  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3486  if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
3487  break;
3488 
3489  CCValAssign &VA = ArgLocs[i];
3490  unsigned Index = VA.getValNo();
3491  ISD::ArgFlagsTy Flags = Ins[Index].Flags;
3492  if (!Flags.isByVal())
3493  continue;
3494 
3495  assert(VA.isMemLoc() && "unexpected byval pointer in reg");
3496  unsigned RBegin, REnd;
3497  CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
3498  ArgRegBegin = std::min(ArgRegBegin, RBegin);
3499 
3500  CCInfo.nextInRegsParam();
3501  }
3502  CCInfo.rewindByValRegsInfo();
3503 
3504  int lastInsIndex = -1;
3505  if (isVarArg && MFI.hasVAStart()) {
3506  unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3507  if (RegIdx != array_lengthof(GPRArgRegs))
3508  ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
3509  }
3510 
3511  unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
3512  AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
3513  auto PtrVT = getPointerTy(DAG.getDataLayout());
3514 
3515  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3516  CCValAssign &VA = ArgLocs[i];
3517  if (Ins[VA.getValNo()].isOrigArg()) {
3518  std::advance(CurOrigArg,
3519  Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
3520  CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
3521  }
3522  // Arguments stored in registers.
3523  if (VA.isRegLoc()) {
3524  EVT RegVT = VA.getLocVT();
3525 
3526  if (VA.needsCustom()) {
3527  // f64 and vector types are split up into multiple registers or
3528  // combinations of registers and stack slots.
3529  if (VA.getLocVT() == MVT::v2f64) {
3530  SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
3531  Chain, DAG, dl);
3532  VA = ArgLocs[++i]; // skip ahead to next loc
3533  SDValue ArgValue2;
3534  if (VA.isMemLoc()) {
3535  int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
3536  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3537  ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
3539  DAG.getMachineFunction(), FI));
3540  } else {
3541  ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
3542  Chain, DAG, dl);
3543  }
3544  ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
3545  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3546  ArgValue, ArgValue1,
3547  DAG.getIntPtrConstant(0, dl));
3548  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3549  ArgValue, ArgValue2,
3550  DAG.getIntPtrConstant(1, dl));
3551  } else
3552  ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
3553 
3554  } else {
3555  const TargetRegisterClass *RC;
3556 
3557  if (RegVT == MVT::f32)
3558  RC = &ARM::SPRRegClass;
3559  else if (RegVT == MVT::f64)
3560  RC = &ARM::DPRRegClass;
3561  else if (RegVT == MVT::v2f64)
3562  RC = &ARM::QPRRegClass;
3563  else if (RegVT == MVT::i32)
3564  RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
3565  : &ARM::GPRRegClass;
3566  else
3567  llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
3568 
3569  // Transform the arguments in physical registers into virtual ones.
3570  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3571  ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3572  }
3573 
3574  // If this is an 8 or 16-bit value, it is really passed promoted
3575  // to 32 bits. Insert an assert[sz]ext to capture this, then
3576  // truncate to the right size.
3577  switch (VA.getLocInfo()) {
3578  default: llvm_unreachable("Unknown loc info!");
3579  case CCValAssign::Full: break;
3580  case CCValAssign::BCvt:
3581  ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
3582  break;
3583  case CCValAssign::SExt:
3584  ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3585  DAG.getValueType(VA.getValVT()));
3586  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3587  break;
3588  case CCValAssign::ZExt:
3589  ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3590  DAG.getValueType(VA.getValVT()));
3591  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3592  break;
3593  }
3594 
3595  InVals.push_back(ArgValue);
3596 
3597  } else { // VA.isRegLoc()
3598 
3599  // sanity check
3600  assert(VA.isMemLoc());
3601  assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
3602 
3603  int index = VA.getValNo();
3604 
3605  // Some Ins[] entries become multiple ArgLoc[] entries.
3606  // Process them only once.
3607  if (index != lastInsIndex)
3608  {
3609  ISD::ArgFlagsTy Flags = Ins[index].Flags;
3610  // FIXME: For now, all byval parameter objects are marked mutable.
3611  // This can be changed with more analysis.
3612  // In case of tail call optimization mark all arguments mutable.
3613  // Since they could be overwritten by lowering of arguments in case of
3614  // a tail call.
3615  if (Flags.isByVal()) {
3616  assert(Ins[index].isOrigArg() &&
3617  "Byval arguments cannot be implicit");
3618  unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
3619 
3620  int FrameIndex = StoreByValRegs(
3621  CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
3622  VA.getLocMemOffset(), Flags.getByValSize());
3623  InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
3624  CCInfo.nextInRegsParam();
3625  } else {
3626  unsigned FIOffset = VA.getLocMemOffset();
3627  int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
3628  FIOffset, true);
3629 
3630  // Create load nodes to retrieve arguments from the stack.
3631  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3632  InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
3634  DAG.getMachineFunction(), FI)));
3635  }
3636  lastInsIndex = index;
3637  }
3638  }
3639  }
3640 
3641  // varargs
3642  if (isVarArg && MFI.hasVAStart())
3643  VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
3644  CCInfo.getNextStackOffset(),
3645  TotalArgRegsSaveSize);
3646 
3648 
3649  return Chain;
3650 }
3651 
3652 /// isFloatingPointZero - Return true if this is +0.0.
3653 static bool isFloatingPointZero(SDValue Op) {
3654  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
3655  return CFP->getValueAPF().isPosZero();
3656  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
3657  // Maybe this has already been legalized into the constant pool?
3658  if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
3659  SDValue WrapperOp = Op.getOperand(1).getOperand(0);
3660  if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
3661  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
3662  return CFP->getValueAPF().isPosZero();
3663  }
3664  } else if (Op->getOpcode() == ISD::BITCAST &&
3665  Op->getValueType(0) == MVT::f64) {
3666  // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
3667  // created by LowerConstantFP().
3668  SDValue BitcastOp = Op->getOperand(0);
3669  if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
3670  isNullConstant(BitcastOp->getOperand(0)))
3671  return true;
3672  }
3673  return false;
3674 }
3675 
3676 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
3677 /// the given operands.
3678 SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3679  SDValue &ARMcc, SelectionDAG &DAG,
3680  const SDLoc &dl) const {
3681  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
3682  unsigned C = RHSC->getZExtValue();
3683  if (!isLegalICmpImmediate(C)) {
3684  // Constant does not fit, try adjusting it by one?
3685  switch (CC) {
3686  default: break;
3687  case ISD::SETLT:
3688  case ISD::SETGE:
3689  if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
3690  CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
3691  RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3692  }
3693  break;
3694  case ISD::SETULT:
3695  case ISD::SETUGE:
3696  if (C != 0 && isLegalICmpImmediate(C-1)) {
3697  CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
3698  RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3699  }
3700  break;
3701  case ISD::SETLE:
3702  case ISD::SETGT:
3703  if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
3704  CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
3705  RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3706  }
3707  break;
3708  case ISD::SETULE:
3709  case ISD::SETUGT:
3710  if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
3711  CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
3712  RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3713  }
3714  break;
3715  }
3716  }
3717  }
3718 
3720  ARMISD::NodeType CompareType;
3721  switch (CondCode) {
3722  default:
3723  CompareType = ARMISD::CMP;
3724  break;
3725  case ARMCC::EQ:
3726  case ARMCC::NE:
3727  // Uses only Z Flag
3728  CompareType = ARMISD::CMPZ;
3729  break;
3730  }
3731  ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
3732  return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
3733 }
3734 
3735 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
3736 SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
3737  SelectionDAG &DAG, const SDLoc &dl) const {
3738  assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
3739  SDValue Cmp;
3740  if (!isFloatingPointZero(RHS))
3741  Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
3742  else
3743  Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
3744  return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
3745 }
3746 
3747 /// duplicateCmp - Glue values can have only one use, so this function
3748 /// duplicates a comparison node.
3749 SDValue
3750 ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
3751  unsigned Opc = Cmp.getOpcode();
3752  SDLoc DL(Cmp);
3753  if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
3754  return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
3755 
3756  assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
3757  Cmp = Cmp.getOperand(0);
3758  Opc = Cmp.getOpcode();
3759  if (Opc == ARMISD::CMPFP)
3760  Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
3761  else {
3762  assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
3763  Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
3764  }
3765  return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
3766 }
3767 
3768 std::pair<SDValue, SDValue>
3769 ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
3770  SDValue &ARMcc) const {
3771  assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
3772 
3773  SDValue Value, OverflowCmp;
3774  SDValue LHS = Op.getOperand(0);
3775  SDValue RHS = Op.getOperand(1);
3776  SDLoc dl(Op);
3777 
3778  // FIXME: We are currently always generating CMPs because we don't support
3779  // generating CMN through the backend. This is not as good as the natural
3780  // CMP case because it causes a register dependency and cannot be folded
3781  // later.
3782 
3783  switch (Op.getOpcode()) {
3784  default:
3785  llvm_unreachable("Unknown overflow instruction!");
3786  case ISD::SADDO:
3787  ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3788  Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
3789  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3790  break;
3791  case ISD::UADDO:
3792  ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3793  Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
3794  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3795  break;
3796  case ISD::SSUBO:
3797  ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3798  Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3799  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3800  break;
3801  case ISD::USUBO:
3802  ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3803  Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3804  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3805  break;
3806  } // switch (...)
3807 
3808  return std::make_pair(Value, OverflowCmp);
3809 }
3810 
3811 
3812 SDValue
3813 ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
3814  // Let legalize expand this if it isn't a legal type yet.
3816  return SDValue();
3817 
3818  SDValue Value, OverflowCmp;
3819  SDValue ARMcc;
3820  std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
3821  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3822  SDLoc dl(Op);
3823  // We use 0 and 1 as false and true values.
3824  SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3825  SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3826  EVT VT = Op.getValueType();
3827 
3828  SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
3829  ARMcc, CCR, OverflowCmp);
3830 
3831  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
3832  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
3833 }
3834 
3835 
3836 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
3837  SDValue Cond = Op.getOperand(0);
3838  SDValue SelectTrue = Op.getOperand(1);
3839  SDValue SelectFalse = Op.getOperand(2);
3840  SDLoc dl(Op);
3841  unsigned Opc = Cond.getOpcode();
3842 
3843  if (Cond.getResNo() == 1 &&
3844  (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
3845  Opc == ISD::USUBO)) {
3846  if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
3847  return SDValue();
3848 
3849  SDValue Value, OverflowCmp;
3850  SDValue ARMcc;
3851  std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
3852  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3853  EVT VT = Op.getValueType();
3854 
3855  return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
3856  OverflowCmp, DAG);
3857  }
3858 
3859  // Convert:
3860  //
3861  // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
3862  // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
3863  //
3864  if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
3865  const ConstantSDNode *CMOVTrue =
3867  const ConstantSDNode *CMOVFalse =
3869 
3870  if (CMOVTrue && CMOVFalse) {
3871  unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
3872  unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
3873 
3874  SDValue True;
3875  SDValue False;
3876  if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
3877  True = SelectTrue;
3878  False = SelectFalse;
3879  } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
3880  True = SelectFalse;
3881  False = SelectTrue;
3882  }
3883 
3884  if (True.getNode() && False.getNode()) {
3885  EVT VT = Op.getValueType();
3886  SDValue ARMcc = Cond.getOperand(2);
3887  SDValue CCR = Cond.getOperand(3);
3888  SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
3889  assert(True.getValueType() == VT);
3890  return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
3891  }
3892  }
3893  }
3894 
3895  // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
3896  // undefined bits before doing a full-word comparison with zero.
3897  Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
3898  DAG.getConstant(1, dl, Cond.getValueType()));
3899 
3900  return DAG.getSelectCC(dl, Cond,
3901  DAG.getConstant(0, dl, Cond.getValueType()),
3902  SelectTrue, SelectFalse, ISD::SETNE);
3903 }
3904 
3906  bool &swpCmpOps, bool &swpVselOps) {
3907  // Start by selecting the GE condition code for opcodes that return true for
3908  // 'equality'
3909  if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
3910  CC == ISD::SETULE)
3911  CondCode = ARMCC::GE;
3912 
3913  // and GT for opcodes that return false for 'equality'.
3914  else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
3915  CC == ISD::SETULT)
3916  CondCode = ARMCC::GT;
3917 
3918  // Since we are constrained to GE/GT, if the opcode contains 'less', we need
3919  // to swap the compare operands.
3920  if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
3921  CC == ISD::SETULT)
3922  swpCmpOps = true;
3923 
3924  // Both GT and GE are ordered comparisons, and return false for 'unordered'.
3925  // If we have an unordered opcode, we need to swap the operands to the VSEL
3926  // instruction (effectively negating the condition).
3927  //
3928  // This also has the effect of swapping which one of 'less' or 'greater'
3929  // returns true, so we also swap the compare operands. It also switches
3930  // whether we return true for 'equality', so we compensate by picking the
3931  // opposite condition code to our original choice.
3932  if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
3933  CC == ISD::SETUGT) {
3934  swpCmpOps = !swpCmpOps;
3935  swpVselOps = !swpVselOps;
3936  CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
3937  }
3938 
3939  // 'ordered' is 'anything but unordered', so use the VS condition code and
3940  // swap the VSEL operands.
3941  if (CC == ISD::SETO) {
3942  CondCode = ARMCC::VS;
3943  swpVselOps = true;
3944  }
3945 
3946  // 'unordered or not equal' is 'anything but equal', so use the EQ condition
3947  // code and swap the VSEL operands.
3948  if (CC == ISD::SETUNE) {
3949  CondCode = ARMCC::EQ;
3950  swpVselOps = true;
3951  }
3952 }
3953 
3954 SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
3955  SDValue TrueVal, SDValue ARMcc, SDValue CCR,
3956  SDValue Cmp, SelectionDAG &DAG) const {
3957  if (Subtarget->isFPOnlySP() && VT == MVT::f64) {
3958  FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
3959  DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
3960  TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
3961  DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
3962 
3963  SDValue TrueLow = TrueVal.getValue(0);
3964  SDValue TrueHigh = TrueVal.getValue(1);
3965  SDValue FalseLow = FalseVal.getValue(0);
3966  SDValue FalseHigh = FalseVal.getValue(1);
3967 
3968  SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
3969  ARMcc, CCR, Cmp);
3970  SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
3971  ARMcc, CCR, duplicateCmp(Cmp, DAG));
3972 
3973  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
3974  } else {
3975  return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
3976  Cmp);
3977  }
3978 }
3979 
3980 static bool isGTorGE(ISD::CondCode CC) {
3981  return CC == ISD::SETGT || CC == ISD::SETGE;
3982 }
3983 
3984 static bool isLTorLE(ISD::CondCode CC) {
3985  return CC == ISD::SETLT || CC == ISD::SETLE;
3986 }
3987 
3988 // See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
3989 // All of these conditions (and their <= and >= counterparts) will do:
3990 // x < k ? k : x
3991 // x > k ? x : k
3992 // k < x ? x : k
3993 // k > x ? k : x
3994 static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
3995  const SDValue TrueVal, const SDValue FalseVal,
3996  const ISD::CondCode CC, const SDValue K) {
3997  return (isGTorGE(CC) &&
3998  ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
3999  (isLTorLE(CC) &&
4000  ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
4001 }
4002 
4003 // Similar to isLowerSaturate(), but checks for upper-saturating conditions.
4004 static bool isUpperSaturate(const SDValue LHS, const SDValue RHS,
4005  const SDValue TrueVal, const SDValue FalseVal,
4006  const ISD::CondCode CC, const SDValue K) {
4007  return (isGTorGE(CC) &&
4008  ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))) ||
4009  (isLTorLE(CC) &&
4010  ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal)));
4011 }
4012 
4013 // Check if two chained conditionals could be converted into SSAT.
4014 //
4015 // SSAT can replace a set of two conditional selectors that bound a number to an
4016 // interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
4017 //
4018 // x < -k ? -k : (x > k ? k : x)
4019 // x < -k ? -k : (x < k ? x : k)
4020 // x > -k ? (x > k ? k : x) : -k
4021 // x < k ? (x < -k ? -k : x) : k
4022 // etc.
4023 //
4024 // It returns true if the conversion can be done, false otherwise.
4025 // Additionally, the variable is returned in parameter V and the constant in K.
4026 static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
4027  uint64_t &K) {
4028 
4029  SDValue LHS1 = Op.getOperand(0);
4030  SDValue RHS1 = Op.getOperand(1);
4031  SDValue TrueVal1 = Op.getOperand(2);
4032  SDValue FalseVal1 = Op.getOperand(3);
4033  ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4034 
4035  const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
4036  if (Op2.getOpcode() != ISD::SELECT_CC)
4037  return false;
4038 
4039  SDValue LHS2 = Op2.getOperand(0);
4040  SDValue RHS2 = Op2.getOperand(1);
4041  SDValue TrueVal2 = Op2.getOperand(2);
4042  SDValue FalseVal2 = Op2.getOperand(3);
4043  ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
4044 
4045  // Find out which are the constants and which are the variables
4046  // in each conditional
4047  SDValue *K1 = isa<ConstantSDNode>(LHS1) ? &LHS1 : isa<ConstantSDNode>(RHS1)
4048  ? &RHS1
4049  : NULL;
4050  SDValue *K2 = isa<ConstantSDNode>(LHS2) ? &LHS2 : isa<ConstantSDNode>(RHS2)
4051  ? &RHS2
4052  : NULL;
4053  SDValue K2Tmp = isa<ConstantSDNode>(TrueVal2) ? TrueVal2 : FalseVal2;
4054  SDValue V1Tmp = (K1 && *K1 == LHS1) ? RHS1 : LHS1;
4055  SDValue V2Tmp = (K2 && *K2 == LHS2) ? RHS2 : LHS2;
4056  SDValue V2 = (K2Tmp == TrueVal2) ? FalseVal2 : TrueVal2;
4057 
4058  // We must detect cases where the original operations worked with 16- or
4059  // 8-bit values. In such case, V2Tmp != V2 because the comparison operations
4060  // must work with sign-extended values but the select operations return
4061  // the original non-extended value.
4062  SDValue V2TmpReg = V2Tmp;
4063  if (V2Tmp->getOpcode() == ISD::SIGN_EXTEND_INREG)
4064  V2TmpReg = V2Tmp->getOperand(0);
4065 
4066  // Check that the registers and the constants have the correct values
4067  // in both conditionals
4068  if (!K1 || !K2 || *K1 == Op2 || *K2 != K2Tmp || V1Tmp != V2Tmp ||
4069  V2TmpReg != V2)
4070  return false;
4071 
4072  // Figure out which conditional is saturating the lower/upper bound.
4073  const SDValue *LowerCheckOp =
4074  isLowerSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
4075  ? &Op
4076  : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) ? &Op2
4077  : NULL;
4078  const SDValue *UpperCheckOp =
4079  isUpperSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
4080  ? &Op
4081  : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) ? &Op2
4082  : NULL;
4083 
4084  if (!UpperCheckOp || !LowerCheckOp || LowerCheckOp == UpperCheckOp)
4085  return false;
4086 
4087  // Check that the constant in the lower-bound check is
4088  // the opposite of the constant in the upper-bound check
4089  // in 1's complement.
4090  int64_t Val1 = cast<ConstantSDNode>(*K1)->getSExtValue();
4091  int64_t Val2 = cast<ConstantSDNode>(*K2)->getSExtValue();
4092  int64_t PosVal = std::max(Val1, Val2);
4093 
4094  if (((Val1 > Val2 && UpperCheckOp == &Op) ||
4095  (Val1 < Val2 && UpperCheckOp == &Op2)) &&
4096  Val1 == ~Val2 && isPowerOf2_64(PosVal + 1)) {
4097 
4098  V = V2;
4099  K = (uint64_t)PosVal; // At this point, PosVal is guaranteed to be positive
4100  return true;
4101  }
4102 
4103  return false;
4104 }
4105 
4106 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
4107 
4108  EVT VT = Op.getValueType();
4109  SDLoc dl(Op);
4110 
4111  // Try to convert two saturating conditional selects into a single SSAT
4112  SDValue SatValue;
4113  uint64_t SatConstant;
4114  if (((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) &&
4115  isSaturatingConditional(Op, SatValue, SatConstant))
4116  return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue,
4117  DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
4118 
4119  SDValue LHS = Op.getOperand(0);
4120  SDValue RHS = Op.getOperand(1);
4121  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4122  SDValue TrueVal = Op.getOperand(2);
4123  SDValue FalseVal = Op.getOperand(3);
4124 
4125  if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
4126  DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
4127  dl);
4128 
4129  // If softenSetCCOperands only returned one value, we should compare it to
4130  // zero.
4131  if (!RHS.getNode()) {
4132  RHS = DAG.getConstant(0, dl, LHS.getValueType());
4133  CC = ISD::SETNE;
4134  }
4135  }
4136 
4137  if (LHS.getValueType() == MVT::i32) {
4138  // Try to generate VSEL on ARMv8.
4139  // The VSEL instruction can't use all the usual ARM condition
4140  // codes: it only has two bits to select the condition code, so it's
4141  // constrained to use only GE, GT, VS and EQ.
4142  //
4143  // To implement all the various ISD::SETXXX opcodes, we sometimes need to
4144  // swap the operands of the previous compare instruction (effectively
4145  // inverting the compare condition, swapping 'less' and 'greater') and
4146  // sometimes need to swap the operands to the VSEL (which inverts the
4147  // condition in the sense of firing whenever the previous condition didn't)
4148  if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
4149  TrueVal.getValueType() == MVT::f64)) {
4150  ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
4151  if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
4152  CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
4153  CC = ISD::getSetCCInverse(CC, true);
4154  std::swap(TrueVal, FalseVal);
4155  }
4156  }
4157 
4158  SDValue ARMcc;
4159  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4160  SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
4161  return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
4162  }
4163 
4164  ARMCC::CondCodes CondCode, CondCode2;
4165  FPCCToARMCC(CC, CondCode, CondCode2);
4166 
4167  // Try to generate VMAXNM/VMINNM on ARMv8.
4168  if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
4169  TrueVal.getValueType() == MVT::f64)) {
4170  bool swpCmpOps = false;
4171  bool swpVselOps = false;
4172  checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
4173 
4174  if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
4175  CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
4176  if (swpCmpOps)
4177  std::swap(LHS, RHS);
4178  if (swpVselOps)
4179  std::swap(TrueVal, FalseVal);
4180  }
4181  }
4182 
4183  SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4184  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
4185  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4186  SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
4187  if (CondCode2 != ARMCC::AL) {
4188  SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
4189  // FIXME: Needs another CMP because flag can have but one use.
4190  SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
4191  Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
4192  }
4193  return Result;
4194 }
4195 
4196 /// canChangeToInt - Given the fp compare operand, return true if it is suitable
4197 /// to morph to an integer compare sequence.
4198 static bool canChangeToInt(SDValue Op, bool &SeenZero,
4199  const ARMSubtarget *Subtarget) {
4200  SDNode *N = Op.getNode();
4201  if (!N->hasOneUse())
4202  // Otherwise it requires moving the value from fp to integer registers.
4203  return false;
4204  if (!N->getNumValues())
4205  return false;
4206  EVT VT = Op.getValueType();
4207  if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
4208  // f32 case is generally profitable. f64 case only makes sense when vcmpe +
4209  // vmrs are very slow, e.g. cortex-a8.
4210  return false;
4211 
4212  if (isFloatingPointZero(Op)) {
4213  SeenZero = true;
4214  return true;
4215  }
4216  return ISD::isNormalLoad(N);
4217 }
4218 
4220  if (isFloatingPointZero(Op))
4221  return DAG.getConstant(0, SDLoc(Op), MVT::i32);
4222 
4223  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
4224  return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
4225  Ld->getPointerInfo(), Ld->getAlignment(),
4226  Ld->getMemOperand()->getFlags());
4227 
4228  llvm_unreachable("Unknown VFP cmp argument!");
4229 }
4230 
4231 static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
4232  SDValue &RetVal1, SDValue &RetVal2) {
4233  SDLoc dl(Op);
4234 
4235  if (isFloatingPointZero(Op)) {
4236  RetVal1 = DAG.getConstant(0, dl, MVT::i32);
4237  RetVal2 = DAG.getConstant(0, dl, MVT::i32);
4238  return;
4239  }
4240 
4241  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
4242  SDValue Ptr = Ld->getBasePtr();
4243  RetVal1 =
4244  DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
4245  Ld->getAlignment(), Ld->getMemOperand()->getFlags());
4246 
4247  EVT PtrType = Ptr.getValueType();
4248  unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
4249  SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
4250  PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
4251  RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
4252  Ld->getPointerInfo().getWithOffset(4), NewAlign,
4253  Ld->getMemOperand()->getFlags());
4254  return;
4255  }
4256 
4257  llvm_unreachable("Unknown VFP cmp argument!");
4258 }
4259 
4260 /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
4261 /// f32 and even f64 comparisons to integer ones.
4262 SDValue
4263 ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
4264  SDValue Chain = Op.getOperand(0);
4265  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
4266  SDValue LHS = Op.getOperand(2);
4267  SDValue RHS = Op.getOperand(3);
4268  SDValue Dest = Op.getOperand(4);
4269  SDLoc dl(Op);
4270 
4271  bool LHSSeenZero = false;
4272  bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
4273  bool RHSSeenZero = false;
4274  bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
4275  if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
4276  // If unsafe fp math optimization is enabled and there are no other uses of
4277  // the CMP operands, and the condition code is EQ or NE, we can optimize it
4278  // to an integer comparison.
4279  if (CC == ISD::SETOEQ)
4280  CC = ISD::SETEQ;
4281  else if (CC == ISD::SETUNE)
4282  CC = ISD::SETNE;
4283 
4284  SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
4285  SDValue ARMcc;
4286  if (LHS.getValueType() == MVT::f32) {
4287  LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
4288  bitcastf32Toi32(LHS, DAG), Mask);
4289  RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
4290  bitcastf32Toi32(RHS, DAG), Mask);
4291  SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
4292  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4293  return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
4294  Chain, Dest, ARMcc, CCR, Cmp);
4295  }
4296 
4297  SDValue LHS1, LHS2;
4298  SDValue RHS1, RHS2;
4299  expandf64Toi32(LHS, DAG, LHS1, LHS2);
4300  expandf64Toi32(RHS, DAG, RHS1, RHS2);
4301  LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
4302  RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
4303  ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
4304  ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4305  SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
4306  SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
4307  return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
4308  }
4309 
4310  return SDValue();
4311 }
4312 
4313 SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
4314  SDValue Chain = Op.getOperand(0);
4315  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
4316  SDValue LHS = Op.getOperand(2);
4317  SDValue RHS = Op.getOperand(3);
4318  SDValue Dest = Op.getOperand(4);
4319  SDLoc dl(Op);
4320 
4321  if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
4322  DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
4323  dl);
4324 
4325  // If softenSetCCOperands only returned one value, we should compare it to
4326  // zero.
4327  if (!RHS.getNode()) {
4328  RHS = DAG.getConstant(0, dl, LHS.getValueType());
4329  CC = ISD::SETNE;
4330  }
4331  }
4332 
4333  if (LHS.getValueType() == MVT::i32) {
4334  SDValue ARMcc;
4335  SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
4336  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4337  return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
4338  Chain, Dest, ARMcc, CCR, Cmp);
4339  }
4340 
4341  assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
4342 
4344  (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
4345  CC == ISD::SETNE || CC == ISD::SETUNE)) {
4346  if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
4347  return Result;
4348  }
4349 
4350  ARMCC::CondCodes CondCode, CondCode2;
4351  FPCCToARMCC(CC, CondCode, CondCode2);
4352 
4353  SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4354  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
4355  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4356  SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
4357  SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
4358  SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
4359  if (CondCode2 != ARMCC::AL) {
4360  ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
4361  SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
4362  Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
4363  }
4364  return Res;
4365 }
4366 
4367 SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
4368  SDValue Chain = Op.getOperand(0);
4369  SDValue Table = Op.getOperand(1);
4370  SDValue Index = Op.getOperand(2);
4371  SDLoc dl(Op);
4372 
4373  EVT PTy = getPointerTy(DAG.getDataLayout());
4374  JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
4375  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
4376  Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
4377  Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
4378  SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
4379  if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
4380  // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
4381  // which does another jump to the destination. This also makes it easier
4382  // to translate it to TBB / TBH later (Thumb2 only).
4383  // FIXME: This might not work if the function is extremely large.
4384  return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
4385  Addr, Op.getOperand(2), JTI);
4386  }
4387  if (isPositionIndependent() || Subtarget->isROPI()) {
4388  Addr =
4389  DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
4391  Chain = Addr.getValue(1);
4392  Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
4393  return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
4394  } else {
4395  Addr =
4396  DAG.getLoad(PTy, dl, Chain, Addr,
4398  Chain = Addr.getValue(1);
4399  return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
4400  }
4401 }
4402 
4404  EVT VT = Op.getValueType();
4405  SDLoc dl(Op);
4406 
4407  if (Op.getValueType().getVectorElementType() == MVT::i32) {
4409  return Op;
4410  return DAG.UnrollVectorOp(Op.getNode());
4411  }
4412 
4414  "Invalid type for custom lowering!");
4415  if (VT != MVT::v4i16)
4416  return DAG.UnrollVectorOp(Op.getNode());
4417 
4418  Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
4419  return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
4420 }
4421 
4422 SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
4423  EVT VT = Op.getValueType();
4424  if (VT.isVector())
4425  return LowerVectorFP_TO_INT(Op, DAG);
4426  if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {
4427  RTLIB::Libcall LC;
4428  if (Op.getOpcode() == ISD::FP_TO_SINT)
4430  Op.getValueType());
4431  else
4433  Op.getValueType());
4434  return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
4435  /*isSigned*/ false, SDLoc(Op)).first;
4436  }
4437 
4438  return Op;
4439 }
4440 
4442  EVT VT = Op.getValueType();
4443  SDLoc dl(Op);
4444 
4446  if (VT.getVectorElementType() == MVT::f32)
4447  return Op;
4448  return DAG.UnrollVectorOp(Op.getNode());
4449  }
4450 
4452  "Invalid type for custom lowering!");
4453  if (VT != MVT::v4f32)
4454  return DAG.UnrollVectorOp(Op.getNode());
4455 
4456  unsigned CastOpc;
4457  unsigned Opc;
4458  switch (Op.getOpcode()) {
4459  default: llvm_unreachable("Invalid opcode!");
4460  case ISD::SINT_TO_FP:
4461  CastOpc = ISD::SIGN_EXTEND;
4462  Opc = ISD::SINT_TO_FP;
4463  break;
4464  case ISD::UINT_TO_FP:
4465  CastOpc = ISD::ZERO_EXTEND;
4466  Opc = ISD::UINT_TO_FP;
4467  break;
4468  }
4469 
4470  Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
4471  return DAG.getNode(Opc, dl, VT, Op);
4472 }
4473 
4474 SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
4475  EVT VT = Op.getValueType();
4476  if (VT.isVector())
4477  return LowerVectorINT_TO_FP(Op, DAG);
4478  if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {
4479  RTLIB::Libcall LC;
4480  if (Op.getOpcode() == ISD::SINT_TO_FP)
4482  Op.getValueType());
4483  else
4485  Op.getValueType());
4486  return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
4487  /*isSigned*/ false, SDLoc(Op)).first;
4488  }
4489 
4490  return Op;
4491 }
4492 
4493 SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
4494  // Implement fcopysign with a fabs and a conditional fneg.
4495  SDValue Tmp0 = Op.getOperand(0);
4496  SDValue Tmp1 = Op.getOperand(1);
4497  SDLoc dl(Op);
4498  EVT VT = Op.getValueType();
4499  EVT SrcVT = Tmp1.getValueType();
4500  bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
4501  Tmp0.getOpcode() == ARMISD::VMOVDRR;
4502  bool UseNEON = !InGPR && Subtarget->hasNEON();
4503 
4504  if (UseNEON) {
4505  // Use VBSL to copy the sign bit.
4506  unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
4507  SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
4508  DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
4509  EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
4510  if (VT == MVT::f64)
4511  Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
4512  DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
4513  DAG.getConstant(32, dl, MVT::i32));
4514  else /*if (VT == MVT::f32)*/
4515  Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
4516  if (SrcVT == MVT::f32) {
4517  Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
4518  if (VT == MVT::f64)
4519  Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
4520  DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
4521  DAG.getConstant(32, dl, MVT::i32));
4522  } else if (VT == MVT::f32)
4523  Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
4524  DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
4525  DAG.getConstant(32, dl, MVT::i32));
4526  Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
4527  Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
4528 
4529  SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
4530  dl, MVT::i32);
4531  AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
4532  SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
4533  DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
4534 
4535  SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
4536  DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
4537  DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
4538  if (VT == MVT::f32) {
4539  Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
4540  Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
4541  DAG.getConstant(0, dl, MVT::i32));
4542  } else {
4543  Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
4544  }
4545 
4546  return Res;
4547  }
4548 
4549  // Bitcast operand 1 to i32.
4550  if (SrcVT == MVT::f64)
4551  Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
4552  Tmp1).getValue(1);
4553  Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
4554 
4555  // Or in the signbit with integer operations.
4556  SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
4557  SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
4558  Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
4559  if (VT == MVT::f32) {
4560  Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
4561  DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
4562  return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
4563  DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
4564  }
4565 
4566  // f64: Or the high part with signbit and then combine two parts.
4567  Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
4568  Tmp0);
4569  SDValue Lo = Tmp0.getValue(0);
4570  SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
4571  Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
4572  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
4573 }
4574 
4575 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
4576  MachineFunction &MF = DAG.getMachineFunction();
4577  MachineFrameInfo &MFI = MF.getFrameInfo();
4578  MFI.setReturnAddressIsTaken(true);
4579 
4581  return SDValue();
4582 
4583  EVT VT = Op.getValueType();
4584  SDLoc dl(Op);
4585  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4586  if (Depth) {
4587  SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
4588  SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
4589  return DAG.getLoad(VT, dl, DAG.getEntryNode(),
4590  DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
4591  MachinePointerInfo());
4592  }
4593 
4594  // Return LR, which contains the return address. Mark it an implicit live-in.
4595  unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
4596  return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
4597 }
4598 
4599 SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
4600  const ARMBaseRegisterInfo &ARI =
4601  *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
4602  MachineFunction &MF = DAG.getMachineFunction();
4603  MachineFrameInfo &MFI = MF.getFrameInfo();
4604  MFI.setFrameAddressIsTaken(true);
4605 
4606  EVT VT = Op.getValueType();
4607  SDLoc dl(Op); // FIXME probably not meaningful
4608  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4609  unsigned FrameReg = ARI.getFrameRegister(MF);
4610  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
4611  while (Depth--)
4612  FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
4613  MachinePointerInfo());
4614  return FrameAddr;
4615 }
4616 
4617 // FIXME? Maybe this could be a TableGen attribute on some registers and
4618 // this table could be generated automatically from RegInfo.
4619 unsigned ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT,
4620  SelectionDAG &DAG) const {
4621  unsigned Reg = StringSwitch<unsigned>(RegName)
4622  .Case("sp", ARM::SP)
4623  .Default(0);
4624  if (Reg)
4625  return Reg;
4626  report_fatal_error(Twine("Invalid register name \""
4627  + StringRef(RegName) + "\"."));
4628 }
4629 
4630 // Result is 64 bit value so split into two 32 bit values and return as a
4631 // pair of values.
4633  SelectionDAG &DAG) {
4634  SDLoc DL(N);
4635 
4636  // This function is only supposed to be called for i64 type destination.
4637  assert(N->getValueType(0) == MVT::i64
4638  && "ExpandREAD_REGISTER called for non-i64 type result.");
4639 
4640  SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL,
4641  DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
4642  N->getOperand(0),
4643  N->getOperand(1));
4644 
4645  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
4646  Read.getValue(1)));
4647  Results.push_back(Read.getOperand(0));
4648 }
4649 
4650 /// \p BC is a bitcast that is about to be turned into a VMOVDRR.
4651 /// When \p DstVT, the destination type of \p BC, is on the vector
4652 /// register bank and the source of bitcast, \p Op, operates on the same bank,
4653 /// it might be possible to combine them, such that everything stays on the
4654 /// vector register bank.
4655 /// \p return The node that would replace \p BT, if the combine
4656 /// is possible.
4658  SelectionDAG &DAG) {
4659  SDValue Op = BC->getOperand(0);
4660  EVT DstVT = BC->getValueType(0);
4661 
4662  // The only vector instruction that can produce a scalar (remember,
4663  // since the bitcast was about to be turned into VMOVDRR, the source
4664  // type is i64) from a vector is EXTRACT_VECTOR_ELT.
4665  // Moreover, we can do this combine only if there is one use.
4666  // Finally, if the destination type is not a vector, there is not
4667  // much point on forcing everything on the vector bank.
4668  if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
4669  !Op.hasOneUse())
4670  return SDValue();
4671 
4672  // If the index is not constant, we will introduce an additional
4673  // multiply that will stick.
4674  // Give up in that case.
4676  if (!Index)
4677  return SDValue();
4678  unsigned DstNumElt = DstVT.getVectorNumElements();
4679 
4680  // Compute the new index.
4681  const APInt &APIntIndex = Index->getAPIntValue();
4682  APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
4683  NewIndex *= APIntIndex;
4684  // Check if the new constant index fits into i32.
4685  if (NewIndex.getBitWidth() > 32)
4686  return SDValue();
4687 
4688  // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
4689  // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
4690  SDLoc dl(Op);
4691  SDValue ExtractSrc = Op.getOperand(0);
4692  EVT VecVT = EVT::getVectorVT(
4693  *DAG.getContext(), DstVT.getScalarType(),
4694  ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
4695  SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
4696  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
4697  DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
4698 }
4699 
4700 /// ExpandBITCAST - If the target supports VFP, this function is called to
4701 /// expand a bit convert where either the source or destination type is i64 to
4702 /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
4703 /// operand type is illegal (e.g., v2f32 for a target that doesn't support
4704 /// vectors), since the legalizer won't know what to do with that.
4706  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4707  SDLoc dl(N);
4708  SDValue Op = N->getOperand(0);
4709 
4710  // This function is only supposed to be called for i64 types, either as the
4711  // source or destination of the bit convert.
4712  EVT SrcVT = Op.getValueType();
4713  EVT DstVT = N->getValueType(0);
4714  assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
4715  "ExpandBITCAST called for non-i64 type");
4716 
4717  // Turn i64->f64 into VMOVDRR.
4718  if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
4719  // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
4720  // if we can combine the bitcast with its source.
4721  if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG))
4722  return Val;
4723 
4724  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
4725  DAG.getConstant(0, dl, MVT::i32));
4726  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
4727  DAG.getConstant(1, dl, MVT::i32));
4728  return DAG.getNode(ISD::BITCAST, dl, DstVT,
4729  DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
4730  }
4731 
4732  // Turn f64->i64 into VMOVRRD.
4733  if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
4734  SDValue Cvt;
4735  if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
4736  SrcVT.getVectorNumElements() > 1)
4737  Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
4738  DAG.getVTList(MVT::i32, MVT::i32),
4739  DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
4740  else
4741  Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
4742  DAG.getVTList(MVT::i32, MVT::i32), Op);
4743  // Merge the pieces into a single i64 value.
4744  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
4745  }
4746 
4747  return SDValue();
4748 }
4749 
4750 /// getZeroVector - Returns a vector of specified type with all zero elements.
4751 /// Zero vectors are used to represent vector negation and in those cases
4752 /// will be implemented with the NEON VNEG instruction. However, VNEG does
4753 /// not support i64 elements, so sometimes the zero vectors will need to be
4754 /// explicitly constructed. Regardless, use a canonical VMOV to create the
4755 /// zero vector.
4756 static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
4757  assert(VT.isVector() && "Expected a vector type");
4758  // The canonical modified immediate encoding of a zero vector is....0!
4759  SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
4760  EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
4761  SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
4762  return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
4763 }
4764 
4765 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
4766 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
4767 SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
4768  SelectionDAG &DAG) const {
4769  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
4770  EVT VT = Op.getValueType();
4771  unsigned VTBits = VT.getSizeInBits();
4772  SDLoc dl(Op);
4773  SDValue ShOpLo = Op.getOperand(0);
4774  SDValue ShOpHi = Op.getOperand(1);
4775  SDValue ShAmt = Op.getOperand(2);
4776  SDValue ARMcc;
4777  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4778  unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
4779 
4781 
4782  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
4783  DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
4784  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
4785  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
4786  DAG.getConstant(VTBits, dl, MVT::i32));
4787  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
4788  SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
4789  SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
4790  SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
4791  ISD::SETGE, ARMcc, DAG, dl);
4792  SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift,
4793  ARMcc, CCR, CmpLo);
4794 
4795 
4796  SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
4797  SDValue HiBigShift = Opc == ISD::SRA
4798  ? DAG.getNode(Opc, dl, VT, ShOpHi,
4799  DAG.getConstant(VTBits - 1, dl, VT))
4800  : DAG.getConstant(0, dl, VT);
4801  SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
4802  ISD::SETGE, ARMcc, DAG, dl);
4803  SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
4804  ARMcc, CCR, CmpHi);
4805 
4806  SDValue Ops[2] = { Lo, Hi };
4807  return DAG.getMergeValues(Ops, dl);
4808 }
4809 
4810 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
4811 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
4812 SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
4813  SelectionDAG &DAG) const {
4814  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
4815  EVT VT = Op.getValueType();
4816  unsigned VTBits = VT.getSizeInBits();
4817  SDLoc dl(Op);
4818  SDValue ShOpLo = Op.getOperand(0);
4819  SDValue ShOpHi = Op.getOperand(1);
4820  SDValue ShAmt = Op.getOperand(2);
4821  SDValue ARMcc;
4822  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4823 
4824  assert(Op.getOpcode() == ISD::SHL_PARTS);
4825  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
4826  DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
4827  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
4828  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
4829  SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
4830 
4831  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
4832  DAG.getConstant(VTBits, dl, MVT::i32));
4833  SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
4834  SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
4835  ISD::SETGE, ARMcc, DAG, dl);
4836  SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
4837  ARMcc, CCR, CmpHi);
4838 
4839  SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
4840  ISD::SETGE, ARMcc, DAG, dl);
4841  SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
4842  SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
4843  DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo);
4844 
4845  SDValue Ops[2] = { Lo, Hi };
4846  return DAG.getMergeValues(Ops, dl);
4847 }
4848 
4849 SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
4850  SelectionDAG &DAG) const {
4851  // The rounding mode is in bits 23:22 of the FPSCR.
4852  // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
4853  // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
4854  // so that the shift + and get folded into a bitfield extract.
4855  SDLoc dl(Op);
4856  SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
4857  DAG.getConstant(Intrinsic::arm_get_fpscr, dl,
4858  MVT::i32));
4859  SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
4860  DAG.getConstant(1U << 22, dl, MVT::i32));
4861  SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
4862  DAG.getConstant(22, dl, MVT::i32));
4863  return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
4864  DAG.getConstant(3, dl, MVT::i32));
4865 }
4866 
4868  const ARMSubtarget *ST) {
4869  SDLoc dl(N);
4870  EVT VT = N->getValueType(0);
4871  if (VT.isVector()) {
4872  assert(ST->hasNEON());
4873 
4874  // Compute the least significant set bit: LSB = X & -X
4875  SDValue X = N->getOperand(0);
4876  SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
4877  SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
4878 
4879  EVT ElemTy = VT.getVectorElementType();
4880 
4881  if (ElemTy == MVT::i8) {
4882  // Compute with: cttz(x) = ctpop(lsb - 1)
4883  SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
4884  DAG.getTargetConstant(1, dl, ElemTy));
4885  SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
4886  return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
4887  }
4888 
4889  if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
4890  (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
4891  // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
4892  unsigned NumBits = ElemTy.getSizeInBits();
4893  SDValue WidthMinus1 =
4894  DAG.getNode(ARMISD::VMOVIMM, dl, VT,
4895  DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
4896  SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
4897  return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
4898  }
4899 
4900  // Compute with: cttz(x) = ctpop(lsb - 1)
4901 
4902  // Since we can only compute the number of bits in a byte with vcnt.8, we
4903  // have to gather the result with pairwise addition (vpaddl) for i16, i32,
4904  // and i64.
4905 
4906  // Compute LSB - 1.
4907  SDValue Bits;
4908  if (ElemTy == MVT::i64) {
4909  // Load constant 0xffff'ffff'ffff'ffff to register.
4910  SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
4911  DAG.getTargetConstant(0x1eff, dl, MVT::i32));
4912  Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
4913  } else {
4914  SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
4915  DAG.getTargetConstant(1, dl, ElemTy));
4916  Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
4917  }
4918 
4919  // Count #bits with vcnt.8.
4920  EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
4921  SDValue BitsVT8 = DAG.getNode(ISD::BITCAST, dl, VT8Bit, Bits);
4922  SDValue Cnt8 = DAG.getNode(ISD::CTPOP, dl, VT8Bit, BitsVT8);
4923 
4924  // Gather the #bits with vpaddl (pairwise add.)
4925  EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
4926  SDValue Cnt16 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT16Bit,
4927  DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
4928  Cnt8);
4929  if (ElemTy == MVT::i16)
4930  return Cnt16;
4931 
4932  EVT VT32Bit = VT.is64BitVector() ? MVT::v2i32 : MVT::v4i32;
4933  SDValue Cnt32 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT32Bit,
4934  DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
4935  Cnt16);
4936  if (ElemTy == MVT::i32)
4937  return Cnt32;
4938 
4939  assert(ElemTy == MVT::i64);
4940  SDValue Cnt64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
4941  DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
4942  Cnt32);
4943  return Cnt64;
4944  }
4945 
4946  if (!ST->hasV6T2Ops())
4947  return SDValue();
4948 
4949  SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
4950  return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
4951 }
4952 
4953 /// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count
4954 /// for each 16-bit element from operand, repeated. The basic idea is to
4955 /// leverage vcnt to get the 8-bit counts, gather and add the results.
4956 ///
4957 /// Trace for v4i16:
4958 /// input = [v0 v1 v2 v3 ] (vi 16-bit element)
4959 /// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element)
4960 /// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi)
4961 /// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6]
4962 /// [b0 b1 b2 b3 b4 b5 b6 b7]
4963 /// +[b1 b0 b3 b2 b5 b4 b7 b6]
4964 /// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0,
4965 /// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits)
4967  EVT VT = N->getValueType(0);
4968  SDLoc DL(N);
4969 
4970  EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
4971  SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0));
4972  SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0);
4973  SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1);
4974  SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2);
4975  return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3);
4976 }
4977 
4978 /// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the
4979 /// bit-count for each 16-bit element from the operand. We need slightly
4980 /// different sequencing for v4i16 and v8i16 to stay within NEON's available
4981 /// 64/128-bit registers.
4982 ///
4983 /// Trace for v4i16:
4984 /// input = [v0 v1 v2 v3 ] (vi 16-bit element)
4985 /// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi)
4986 /// v8i16:Extended = [k0 k1 k2 k3 k0 k1 k2 k3 ]
4987 /// v4i16:Extracted = [k0 k1 k2 k3 ]
4989  EVT VT = N->getValueType(0);
4990  SDLoc DL(N);
4991 
4992  SDValue BitCounts = getCTPOP16BitCounts(N, DAG);
4993  if (VT.is64BitVector()) {
4994  SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts);
4995  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended,
4996  DAG.getIntPtrConstant(0, DL));
4997  } else {
4998  SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8,
4999  BitCounts, DAG.getIntPtrConstant(0, DL));
5000  return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted);
5001  }
5002 }
5003 
5004 /// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the
5005 /// bit-count for each 32-bit element from the operand. The idea here is
5006 /// to split the vector into 16-bit elements, leverage the 16-bit count
5007 /// routine, and then combine the results.
5008 ///
5009 /// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged):
5010 /// input = [v0 v1 ] (vi: 32-bit elements)
5011 /// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1])
5012 /// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi)
5013 /// vrev: N0 = [k1 k0 k3 k2 ]
5014 /// [k0 k1 k2 k3 ]
5015 /// N1 =+[k1 k0 k3 k2 ]
5016 /// [k0 k2 k1 k3 ]
5017 /// N2 =+[k1 k3 k0 k2 ]
5018 /// [k0 k2 k1 k3 ]
5019 /// Extended =+[k1 k3 k0 k2 ]
5020 /// [k0 k2 ]
5021 /// Extracted=+[k1 k3 ]
5022 ///
5024  EVT VT = N->getValueType(0);
5025  SDLoc DL(N);
5026 
5027  EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
5028 
5029  SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0));
5030  SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG);
5031  SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16);
5032  SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0);
5033  SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1);
5034 
5035  if (VT.is64BitVector()) {
5036  SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2);
5037  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended,
5038  DAG.getIntPtrConstant(0, DL));
5039  } else {
5040  SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2,
5041  DAG.getIntPtrConstant(0, DL));
5042  return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted);
5043  }
5044 }
5045 
5047  const ARMSubtarget *ST) {
5048  EVT VT = N->getValueType(0);
5049 
5050  assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
5051  assert((VT == MVT::v2i32 || VT == MVT::v4i32 ||
5052  VT == MVT::v4i16 || VT == MVT::v8i16) &&
5053  "Unexpected type for custom ctpop lowering");
5054 
5055  if (VT.getVectorElementType() == MVT::i32)
5056  return lowerCTPOP32BitElements(N, DAG);
5057  else
5058  return lowerCTPOP16BitElements(N, DAG);
5059 }
5060 
5062  const ARMSubtarget *ST) {
5063  EVT VT = N->getValueType(0);
5064  SDLoc dl(N);
5065 
5066  if (!VT.isVector())
5067  return SDValue();
5068 
5069  // Lower vector shifts on NEON to use VSHL.
5070  assert(ST->hasNEON() && "unexpected vector shift");
5071 
5072  // Left shifts translate directly to the vshiftu intrinsic.
5073  if (N->getOpcode() == ISD::SHL)
5074  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
5075  DAG.getConstant(Intrinsic::arm_neon_vshiftu, dl,
5076  MVT::i32),
5077  N->getOperand(0), N->getOperand(1));
5078 
5079  assert((N->getOpcode() == ISD::SRA ||
5080  N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
5081 
5082  // NEON uses the same intrinsics for both left and right shifts. For
5083  // right shifts, the shift amounts are negative, so negate the vector of
5084  // shift amounts.
5085  EVT ShiftVT = N->getOperand(1).getValueType();
5086  SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
5087  getZeroVector(ShiftVT, DAG, dl),
5088  N->getOperand(1));
5089  Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
5090  Intrinsic::arm_neon_vshifts :
5091  Intrinsic::arm_neon_vshiftu);
5092  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
5093  DAG.getConstant(vshiftInt, dl, MVT::i32),
5094  N->getOperand(0), NegatedCount);
5095 }
5096 
5098  const ARMSubtarget *ST) {
5099  EVT VT = N->getValueType(0);
5100  SDLoc dl(N);
5101 
5102  // We can get here for a node like i32 = ISD::SHL i32, i64
5103  if (VT != MVT::i64)
5104  return SDValue();
5105 
5106  assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
5107  "Unknown shift to lower!");
5108 
5109  // We only lower SRA, SRL of 1 here, all others use generic lowering.
5110  if (!isOneConstant(N->getOperand(1)))
5111  return SDValue();
5112 
5113  // If we are in thumb mode, we don't have RRX.
5114  if (ST->isThumb1Only()) return SDValue();
5115 
5116  // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
5117  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
5118  DAG.getConstant(0, dl, MVT::i32));
5119  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
5120  DAG.getConstant(1, dl, MVT::i32));
5121 
5122  // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
5123  // captures the result into a carry flag.
5124  unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
5125  Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
5126 
5127  // The low part is an ARMISD::RRX operand, which shifts the carry in.
5128  Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
5129 
5130  // Merge the pieces into a single i64 value.
5131  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
5132 }
5133 
5135  SDValue TmpOp0, TmpOp1;
5136  bool Invert = false;
5137  bool Swap = false;
5138  unsigned Opc = 0;
5139 
5140  SDValue Op0 = Op.getOperand(0);
5141  SDValue Op1 = Op.getOperand(1);
5142  SDValue CC = Op.getOperand(2);
5144  EVT VT = Op.getValueType();
5145  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
5146  SDLoc dl(Op);
5147 
5148  if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
5149  (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
5150  // Special-case integer 64-bit equality comparisons. They aren't legal,
5151  // but they can be lowered with a few vector instructions.
5152  unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
5153  EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
5154  SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
5155  SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
5156  SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
5157  DAG.getCondCode(ISD::SETEQ));
5158  SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
5159  SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
5160  Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
5161  if (SetCCOpcode == ISD::SETNE)
5162  Merged = DAG.getNOT(dl, Merged, CmpVT);
5163  Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
5164  return Merged;
5165  }
5166 
5167  if (CmpVT.getVectorElementType() == MVT::i64)
5168  // 64-bit comparisons are not legal in general.
5169  return SDValue();
5170 
5171  if (Op1.getValueType().isFloatingPoint()) {
5172  switch (SetCCOpcode) {
5173  default: llvm_unreachable("Illegal FP comparison");
5174  case ISD::SETUNE:
5175  case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH;
5176  case ISD::SETOEQ:
5177  case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
5178  case ISD::SETOLT:
5179  case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
5180  case ISD::SETOGT:
5181  case ISD::SETGT: Opc = ARMISD::VCGT; break;
5182  case ISD::SETOLE:
5183  case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH;
5184  case ISD::SETOGE:
5185  case ISD::SETGE: Opc = ARMISD::VCGE; break;
5186  case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH;
5187  case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
5188  case ISD::SETUGT: Swap = true; LLVM_FALLTHROUGH;
5189  case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
5190  case ISD::SETUEQ: Invert = true; LLVM_FALLTHROUGH;
5191  case ISD::SETONE:
5192  // Expand this to (OLT | OGT).
5193  TmpOp0 = Op0;
5194  TmpOp1 = Op1;
5195  Opc = ISD::OR;
5196  Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
5197  Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1);
5198  break;
5199  case ISD::SETUO:
5200  Invert = true;
5202  case ISD::SETO:
5203  // Expand this to (OLT | OGE).
5204  TmpOp0 = Op0;
5205  TmpOp1 = Op1;
5206  Opc = ISD::OR;
5207  Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
5208  Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1);
5209  break;
5210  }
5211  } else {
5212  // Integer comparisons.
5213  switch (SetCCOpcode) {
5214  default: llvm_unreachable("Illegal integer comparison");
5215  case ISD::SETNE: Invert = true;
5216  case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
5217  case ISD::SETLT: Swap = true;
5218  case ISD::SETGT: Opc = ARMISD::VCGT; break;
5219  case ISD::SETLE: Swap = true;
5220  case ISD::SETGE: Opc = ARMISD::VCGE; break;
5221  case ISD::SETULT: Swap = true;
5222  case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
5223  case ISD::SETULE: Swap = true;
5224  case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
5225  }
5226 
5227  // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
5228  if (Opc == ARMISD::VCEQ) {
5229 
5230  SDValue AndOp;
5232  AndOp = Op0;
5233  else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
5234  AndOp = Op1;
5235 
5236  // Ignore bitconvert.
5237  if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
5238  AndOp = AndOp.getOperand(0);
5239 
5240  if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
5241  Opc = ARMISD::VTST;
5242  Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
5243  Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
5244  Invert = !Invert;
5245  }
5246  }
5247  }
5248 
5249  if (Swap)
5250  std::swap(Op0, Op1);
5251 
5252  // If one of the operands is a constant vector zero, attempt to fold the
5253  // comparison to a specialized compare-against-zero form.
5254  SDValue SingleOp;
5256  SingleOp = Op0;
5257  else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
5258  if (Opc == ARMISD::VCGE)
5259  Opc = ARMISD::VCLEZ;
5260  else if (Opc == ARMISD::VCGT)
5261  Opc = ARMISD::VCLTZ;
5262  SingleOp = Op1;
5263  }
5264 
5265  SDValue Result;
5266  if (SingleOp.getNode()) {
5267  switch (Opc) {
5268  case ARMISD::VCEQ:
5269  Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break;
5270  case ARMISD::VCGE:
5271  Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break;
5272  case ARMISD::VCLEZ:
5273  Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break;
5274  case ARMISD::VCGT:
5275  Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break;
5276  case ARMISD::VCLTZ:
5277  Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break;
5278  default:
5279  Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
5280  }
5281  } else {
5282  Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
5283  }
5284 
5285  Result = DAG.getSExtOrTrunc(Result, dl, VT);
5286 
5287  if (Invert)
5288  Result = DAG.getNOT(dl, Result, VT);
5289 
5290  return Result;
5291 }
5292 
5294  SDValue LHS = Op.getOperand(0);
5295  SDValue RHS = Op.getOperand(1);
5296  SDValue Carry = Op.getOperand(2);
5297  SDValue Cond = Op.getOperand(3);
5298  SDLoc DL(Op);
5299 
5300  assert(LHS.getSimpleValueType().isInteger() && "SETCCE is integer only.");
5301 
5302  assert(Carry.getOpcode() != ISD::CARRY_FALSE);
5303  SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
5304  SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);
5305 
5306  SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
5307  SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
5308  SDValue ARMcc = DAG.getConstant(
5309  IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
5310  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5311  SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR,
5312  Cmp.getValue(1), SDValue());
5313  return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
5314  CCR, Chain.getValue(1));
5315 }
5316 
5317 /// isNEONModifiedImm - Check if the specified splat value corresponds to a
5318 /// valid vector constant for a NEON instruction with a "modified immediate"
5319 /// operand (e.g., VMOV). If so, return the encoded value.
5320 static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
5321  unsigned SplatBitSize, SelectionDAG &DAG,
5322  const SDLoc &dl, EVT &VT, bool is128Bits,
5323  NEONModImmType type) {
5324  unsigned OpCmode, Imm;
5325 
5326  // SplatBitSize is set to the smallest size that splats the vector, so a
5327  // zero vector will always have SplatBitSize == 8. However, NEON modified
5328  // immediate instructions others than VMOV do not support the 8-bit encoding
5329  // of a zero vector, and the default encoding of zero is supposed to be the
5330  // 32-bit version.
5331  if (SplatBits == 0)
5332  SplatBitSize = 32;
5333 
5334  switch (SplatBitSize) {
5335  case 8:
5336  if (type != VMOVModImm)
5337  return SDValue();
5338  // Any 1-byte value is OK. Op=0, Cmode=1110.
5339  assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
5340  OpCmode = 0xe;
5341  Imm = SplatBits;
5342  VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
5343  break;
5344 
5345  case 16:
5346  // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
5347  VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
5348  if ((SplatBits & ~0xff) == 0) {
5349  // Value = 0x00nn: Op=x, Cmode=100x.
5350  OpCmode = 0x8;
5351  Imm = SplatBits;
5352  break;
5353  }
5354  if ((SplatBits & ~0xff00) == 0) {
5355  // Value = 0xnn00: Op=x, Cmode=101x.
5356  OpCmode = 0xa;
5357  Imm = SplatBits >> 8;
5358  break;
5359  }
5360  return SDValue();
5361 
5362  case 32:
5363  // NEON's 32-bit VMOV supports splat values where:
5364  // * only one byte is nonzero, or
5365  // * the least significant byte is 0xff and the second byte is nonzero, or
5366  // * the least significant 2 bytes are 0xff and the third is nonzero.
5367  VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
5368  if ((SplatBits & ~0xff) == 0) {
5369  // Value = 0x000000nn: Op=x, Cmode=000x.
5370  OpCmode = 0;
5371  Imm = SplatBits;
5372  break;
5373  }
5374  if ((SplatBits & ~0xff00) == 0) {
5375  // Value = 0x0000nn00: Op=x, Cmode=001x.
5376  OpCmode = 0x2;
5377  Imm = SplatBits >> 8;
5378  break;
5379  }
5380  if ((SplatBits & ~0xff0000) == 0) {
5381  // Value = 0x00nn0000: Op=x, Cmode=010x.
5382  OpCmode = 0x4;
5383  Imm = SplatBits >> 16;
5384  break;
5385  }
5386  if ((SplatBits & ~0xff000000) == 0) {
5387  // Value = 0xnn000000: Op=x, Cmode=011x.
5388  OpCmode = 0x6;
5389  Imm = SplatBits >> 24;
5390  break;
5391  }
5392 
5393  // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
5394  if (type == OtherModImm) return SDValue();
5395 
5396  if ((SplatBits & ~0xffff) == 0 &&
5397  ((SplatBits | SplatUndef) & 0xff) == 0xff) {
5398  // Value = 0x0000nnff: Op=x, Cmode=1100.
5399  OpCmode = 0xc;
5400  Imm = SplatBits >> 8;
5401  break;
5402  }
5403 
5404  if ((SplatBits & ~0xffffff) == 0 &&
5405  ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
5406  // Value = 0x00nnffff: Op=x, Cmode=1101.
5407  OpCmode = 0xd;
5408  Imm = SplatBits >> 16;
5409  break;
5410  }
5411 
5412  // Note: there are a few 32-bit splat values (specifically: 00ffff00,
5413  // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
5414  // VMOV.I32. A (very) minor optimization would be to replicate the value
5415  // and fall through here to test for a valid 64-bit splat. But, then the
5416  // caller would also need to check and handle the change in size.
5417  return SDValue();
5418 
5419  case 64: {
5420  if (type != VMOVModImm)
5421  return SDValue();
5422  // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
5423  uint64_t BitMask = 0xff;
5424  uint64_t Val = 0;
5425  unsigned ImmMask = 1;
5426  Imm = 0;
5427  for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
5428  if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
5429  Val |= BitMask;
5430  Imm |= ImmMask;
5431  } else if ((SplatBits & BitMask) != 0) {
5432  return SDValue();
5433  }
5434  BitMask <<= 8;
5435  ImmMask <<= 1;
5436  }
5437 
5438  if (DAG.getDataLayout().isBigEndian())
5439  // swap higher and lower 32 bit word
5440  Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);
5441 
5442  // Op=1, Cmode=1110.
5443  OpCmode = 0x1e;
5444  VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
5445  break;
5446  }
5447 
5448  default:
5449  llvm_unreachable("unexpected size for isNEONModifiedImm");
5450  }
5451 
5452  unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
5453  return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
5454 }
5455 
5456 SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
5457  const ARMSubtarget *ST) const {
5458  bool IsDouble = Op.getValueType() == MVT::f64;
5459  ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
5460  const APFloat &FPVal = CFP->getValueAPF();
5461 
5462  // Prevent floating-point constants from using literal loads
5463  // when execute-only is enabled.
5464  if (ST->genExecuteOnly()) {
5465  APInt INTVal = FPVal.bitcastToAPInt();
5466  SDLoc DL(CFP);
5467  if (IsDouble) {
5468  SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
5469  SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
5470  if (!ST->isLittle())
5471  std::swap(Lo, Hi);
5472  return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
5473  } else {
5474  return DAG.getConstant(INTVal, DL, MVT::i32);
5475  }
5476  }
5477 
5478  if (!ST->hasVFP3())
5479  return SDValue();
5480 
5481  // Use the default (constant pool) lowering for double constants when we have
5482  // an SP-only FPU
5483  if (IsDouble && Subtarget->isFPOnlySP())
5484  return SDValue();
5485 
5486  // Try splatting with a VMOV.f32...
5487  int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
5488 
5489  if (ImmVal != -1) {
5490  if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
5491  // We have code in place to select a valid ConstantFP already, no need to
5492  // do any mangling.
5493  return Op;
5494  }
5495 
5496  // It's a float and we are trying to use NEON operations where
5497  // possible. Lower it to a splat followed by an extract.
5498  SDLoc DL(Op);
5499  SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
5500  SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
5501  NewVal);
5502  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
5503  DAG.getConstant(0, DL, MVT::i32));
5504  }
5505 
5506  // The rest of our options are NEON only, make sure that's allowed before
5507  // proceeding..
5508  if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
5509  return SDValue();
5510 
5511  EVT VMovVT;
5512  uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
5513 
5514  // It wouldn't really be worth bothering for doubles except for one very
5515  // important value, which does happen to match: 0.0. So make sure we don't do
5516  // anything stupid.
5517  if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
5518  return SDValue();
5519 
5520  // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
5521  SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
5522  VMovVT, false, VMOVModImm);
5523  if (NewVal != SDValue()) {
5524  SDLoc DL(Op);
5525  SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
5526  NewVal);
5527  if (IsDouble)
5528  return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
5529 
5530  // It's a float: cast and extract a vector element.
5531  SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
5532  VecConstant);
5533  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
5534  DAG.getConstant(0, DL, MVT::i32));
5535  }
5536 
5537  // Finally, try a VMVN.i32
5538  NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
5539  false, VMVNModImm);
5540  if (NewVal != SDValue()) {
5541  SDLoc DL(Op);
5542  SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
5543 
5544  if (IsDouble)
5545  return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
5546 
5547  // It's a float: cast and extract a vector element.
5548  SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
5549  VecConstant);
5550  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
5551  DAG.getConstant(0, DL, MVT::i32));
5552  }
5553 
5554  return SDValue();
5555 }
5556 
5557 // check if an VEXT instruction can handle the shuffle mask when the
5558 // vector sources of the shuffle are the same.
5559 static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
5560  unsigned NumElts = VT.getVectorNumElements();
5561 
5562  // Assume that the first shuffle index is not UNDEF. Fail if it is.
5563  if (M[0] < 0)
5564  return false;
5565 
5566  Imm = M[0];
5567 
5568  // If this is a VEXT shuffle, the immediate value is the index of the first
5569  // element. The other shuffle indices must be the successive elements after
5570  // the first one.
5571  unsigned ExpectedElt = Imm;
5572  for (unsigned i = 1; i < NumElts; ++i) {
5573  // Increment the expected index. If it wraps around, just follow it
5574  // back to index zero and keep going.
5575  ++ExpectedElt;
5576  if (ExpectedElt == NumElts)
5577  ExpectedElt = 0;
5578 
5579  if (M[i] < 0) continue; // ignore UNDEF indices
5580  if (ExpectedElt != static_cast<unsigned>(M[i]))
5581  return false;
5582  }
5583 
5584  return true;
5585 }
5586 
5587 
5588 static bool isVEXTMask(ArrayRef<int> M, EVT VT,
5589  bool &ReverseVEXT, unsigned &Imm) {
5590  unsigned NumElts = VT.getVectorNumElements();
5591  ReverseVEXT = false;
5592 
5593  // Assume that the first shuffle index is not UNDEF. Fail if it is.
5594  if (M[0] < 0)
5595  return false;
5596 
5597  Imm = M[0];
5598 
5599  // If this is a VEXT shuffle, the immediate value is the index of the first
5600  // element. The other shuffle indices must be the successive elements after
5601  // the first one.
5602  unsigned ExpectedElt = Imm;
5603  for (unsigned i = 1; i < NumElts; ++i) {
5604  // Increment the expected index. If it wraps around, it may still be
5605  // a VEXT but the source vectors must be swapped.
5606  ExpectedElt += 1;
5607  if (ExpectedElt == NumElts * 2) {
5608  ExpectedElt = 0;
5609  ReverseVEXT = true;
5610  }
5611 
5612  if (M[i] < 0) continue; // ignore UNDEF indices
5613  if (ExpectedElt != static_cast<unsigned>(M[i]))
5614  return false;
5615  }
5616 
5617  // Adjust the index value if the source operands will be swapped.
5618  if (ReverseVEXT)
5619  Imm -= NumElts;
5620 
5621  return true;
5622 }
5623 
5624 /// isVREVMask - Check if a vector shuffle corresponds to a VREV
5625 /// instruction with the specified blocksize. (The order of the elements
5626 /// within each block of the vector is reversed.)
5627 static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
5628  assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
5629  "Only possible block sizes for VREV are: 16, 32, 64");
5630 
5631  unsigned EltSz = VT.getScalarSizeInBits();
5632  if (EltSz == 64)
5633  return false;
5634 
5635  unsigned NumElts = VT.getVectorNumElements();
5636  unsigned BlockElts = M[0] + 1;
5637  // If the first shuffle index is UNDEF, be optimistic.
5638  if (M[0] < 0)
5639  BlockElts = BlockSize / EltSz;
5640 
5641  if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
5642  return false;
5643 
5644  for (unsigned i = 0; i < NumElts; ++i) {
5645  if (M[i] < 0) continue; // ignore UNDEF indices
5646  if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
5647  return false;
5648  }
5649 
5650  return true;
5651 }
5652 
5653 static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
5654  // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
5655  // range, then 0 is placed into the resulting vector. So pretty much any mask
5656  // of 8 elements can work here.
5657  return VT == MVT::v8i8 && M.size() == 8;
5658 }
5659 
5660 // Checks whether the shuffle mask represents a vector transpose (VTRN) by
5661 // checking that pairs of elements in the shuffle mask represent the same index
5662 // in each vector, incrementing the expected index by 2 at each step.
5663 // e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
5664 // v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
5665 // v2={e,f,g,h}
5666 // WhichResult gives the offset for each element in the mask based on which
5667 // of the two results it belongs to.
5668 //
5669 // The transpose can be represented either as:
5670 // result1 = shufflevector v1, v2, result1_shuffle_mask
5671 // result2 = shufflevector v1, v2, result2_shuffle_mask
5672 // where v1/v2 and the shuffle masks have the same number of elements
5673 // (here WhichResult (see below) indicates which result is being checked)
5674 //
5675 // or as:
5676 // results = shufflevector v1, v2, shuffle_mask
5677 // where both results are returned in one vector and the shuffle mask has twice
5678 // as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
5679 // want to check the low half and high half of the shuffle mask as if it were
5680 // the other case
5681 static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5682  unsigned EltSz = VT.getScalarSizeInBits();
5683  if (EltSz == 64)
5684  return false;
5685 
5686  unsigned NumElts = VT.getVectorNumElements();
5687  if (M.size() != NumElts && M.size() != NumElts*2)
5688  return false;
5689 
5690  // If the mask is twice as long as the input vector then we need to check the
5691  // upper and lower parts of the mask with a matching value for WhichResult
5692  // FIXME: A mask with only even values will be rejected in case the first
5693  // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
5694  // M[0] is used to determine WhichResult
5695  for (unsigned i = 0; i < M.size(); i += NumElts) {
5696  if (M.size() == NumElts * 2)
5697  WhichResult = i / NumElts;
5698  else
5699  WhichResult = M[i] == 0 ? 0 : 1;
5700  for (unsigned j = 0; j < NumElts; j += 2) {
5701  if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
5702  (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
5703  return false;
5704  }
5705  }
5706 
5707  if (M.size() == NumElts*2)
5708  WhichResult = 0;
5709 
5710  return true;
5711 }
5712 
5713 /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
5714 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5715 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
5716 static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
5717  unsigned EltSz = VT.getScalarSizeInBits();
5718  if (EltSz == 64)
5719  return false;
5720 
5721  unsigned NumElts = VT.getVectorNumElements();
5722  if (M.size() != NumElts && M.size() != NumElts*2)
5723  return false;
5724 
5725  for (unsigned i = 0; i < M.size(); i += NumElts) {
5726  if (M.size() == NumElts * 2)
5727  WhichResult = i / NumElts;
5728  else
5729  WhichResult = M[i] == 0 ? 0 : 1;
5730  for (unsigned j = 0; j < NumElts; j += 2) {
5731  if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
5732  (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
5733  return false;
5734  }
5735  }
5736 
5737  if (M.size() == NumElts*2)
5738  WhichResult = 0;
5739 
5740  return true;
5741 }
5742 
5743 // Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
5744 // that the mask elements are either all even and in steps of size 2 or all odd
5745 // and in steps of size 2.
5746 // e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
5747 // v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
5748 // v2={e,f,g,h}
5749 // Requires similar checks to that of isVTRNMask with
5750 // respect the how results are returned.
5751 static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5752  unsigned EltSz = VT.getScalarSizeInBits();
5753  if (EltSz == 64)
5754  return false;
5755 
5756  unsigned NumElts = VT.getVectorNumElements();
5757  if (M.size() != NumElts && M.size() != NumElts*2)
5758  return false;
5759 
5760  for (unsigned i = 0; i < M.size(); i += NumElts) {
5761  WhichResult = M[i] == 0 ? 0 : 1;
5762  for (unsigned j = 0; j < NumElts; ++j) {
5763  if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
5764  return false;
5765  }
5766  }
5767 
5768  if (M.size() == NumElts*2)
5769  WhichResult = 0;
5770 
5771  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
5772  if (VT.is64BitVector() && EltSz == 32)
5773  return false;
5774 
5775  return true;
5776 }
5777 
5778 /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
5779 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5780 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
5781 static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
5782  unsigned EltSz = VT.getScalarSizeInBits();
5783  if (EltSz == 64)
5784  return false;
5785 
5786  unsigned NumElts = VT.getVectorNumElements();
5787  if (M.size() != NumElts && M.size() != NumElts*2)
5788  return false;
5789 
5790  unsigned Half = NumElts / 2;
5791  for (unsigned i = 0; i < M.size(); i += NumElts) {
5792  WhichResult = M[i] == 0 ? 0 : 1;
5793  for (unsigned j = 0; j < NumElts; j += Half) {
5794  unsigned Idx = WhichResult;
5795  for (unsigned k = 0; k < Half; ++k) {
5796  int MIdx = M[i + j + k];
5797  if (MIdx >= 0 && (unsigned) MIdx != Idx)
5798  return false;
5799  Idx += 2;
5800  }
5801  }
5802  }
5803 
5804  if (M.size() == NumElts*2)
5805  WhichResult = 0;
5806 
5807  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
5808  if (VT.is64BitVector() && EltSz == 32)
5809  return false;
5810 
5811  return true;
5812 }
5813 
5814 // Checks whether the shuffle mask represents a vector zip (VZIP) by checking
5815 // that pairs of elements of the shufflemask represent the same index in each
5816 // vector incrementing sequentially through the vectors.
5817 // e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
5818 // v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
5819 // v2={e,f,g,h}
5820 // Requires similar checks to that of isVTRNMask with respect the how results
5821 // are returned.
5822 static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5823  unsigned EltSz = VT.getScalarSizeInBits();
5824  if (EltSz == 64)
5825  return false;
5826 
5827  unsigned NumElts = VT.getVectorNumElements();
5828  if (M.size() != NumElts && M.size() != NumElts*2)
5829  return false;
5830 
5831  for (unsigned i = 0; i < M.size(); i += NumElts) {
5832  WhichResult = M[i] == 0 ? 0 : 1;
5833  unsigned Idx = WhichResult * NumElts / 2;
5834  for (unsigned j = 0; j < NumElts; j += 2) {
5835  if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
5836  (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
5837  return false;
5838  Idx += 1;
5839  }
5840  }
5841 
5842  if (M.size() == NumElts*2)
5843  WhichResult = 0;
5844 
5845  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
5846  if (VT.is64BitVector() && EltSz == 32)
5847  return false;
5848 
5849  return true;
5850 }
5851 
5852 /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
5853 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5854 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
5855 static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
5856  unsigned EltSz = VT.getScalarSizeInBits();
5857  if (EltSz == 64)
5858  return false;
5859 
5860  unsigned NumElts = VT.getVectorNumElements();
5861  if (M.size() != NumElts && M.size() != NumElts*2)
5862  return false;
5863 
5864  for (unsigned i = 0; i < M.size(); i += NumElts) {
5865  WhichResult = M[i] == 0 ? 0 : 1;
5866  unsigned Idx = WhichResult * NumElts / 2;
5867  for (unsigned j = 0; j < NumElts; j += 2) {
5868  if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
5869  (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
5870  return false;
5871  Idx += 1;
5872  }
5873  }
5874 
5875  if (M.size() == NumElts*2)
5876  WhichResult = 0;
5877 
5878  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
5879  if (VT.is64BitVector() && EltSz == 32)
5880  return false;
5881 
5882  return true;
5883 }
5884 
5885 /// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
5886 /// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
5887 static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
5888  unsigned &WhichResult,
5889  bool &isV_UNDEF) {
5890  isV_UNDEF = false;
5891  if (isVTRNMask(ShuffleMask, VT, WhichResult))
5892  return ARMISD::VTRN;
5893  if (isVUZPMask(ShuffleMask, VT, WhichResult))
5894  return ARMISD::VUZP;
5895  if (isVZIPMask(ShuffleMask, VT, WhichResult))
5896  return ARMISD::VZIP;
5897 
5898  isV_UNDEF = true;
5899  if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
5900  return ARMISD::VTRN;
5901  if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
5902  return ARMISD::VUZP;
5903  if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
5904  return ARMISD::VZIP;
5905 
5906  return 0;
5907 }
5908 
5909 /// \return true if this is a reverse operation on an vector.
5910 static bool isReverseMask(ArrayRef<int> M, EVT VT) {
5911  unsigned NumElts = VT.getVectorNumElements();
5912  // Make sure the mask has the right size.
5913  if (NumElts != M.size())
5914  return false;
5915 
5916  // Look for <15, ..., 3, -1, 1, 0>.
5917  for (unsigned i = 0; i != NumElts; ++i)
5918  if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
5919  return false;
5920 
5921  return true;
5922 }
5923 
5924 // If N is an integer constant that can be moved into a register in one
5925 // instruction, return an SDValue of such a constant (will become a MOV
5926 // instruction). Otherwise return null.
5928  const ARMSubtarget *ST, const SDLoc &dl) {
5929  uint64_t Val;
5930  if (!isa<ConstantSDNode>(N))
5931  return SDValue();
5932  Val = cast<ConstantSDNode>(N)->getZExtValue();
5933 
5934  if (ST->isThumb1Only()) {
5935  if (Val <= 255 || ~Val <= 255)
5936  return DAG.getConstant(Val, dl, MVT::i32);
5937  } else {
5938  if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
5939  return DAG.getConstant(Val, dl, MVT::i32);
5940  }
5941  return SDValue();
5942 }
5943 
5944 // If this is a case we can't handle, return null and let the default
5945 // expansion code take care of it.
5946 SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
5947  const ARMSubtarget *ST) const {
5948  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
5949  SDLoc dl(Op);
5950  EVT VT = Op.getValueType();
5951 
5952  APInt SplatBits, SplatUndef;
5953  unsigned SplatBitSize;
5954  bool HasAnyUndefs;
5955  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
5956  if (SplatUndef.isAllOnesValue())
5957  return DAG.getUNDEF(VT);
5958 
5959  if (SplatBitSize <= 64) {
5960  // Check if an immediate VMOV works.
5961  EVT VmovVT;
5962  SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
5963  SplatUndef.getZExtValue(), SplatBitSize,
5964  DAG, dl, VmovVT, VT.is128BitVector(),
5965  VMOVModImm);
5966  if (Val.getNode()) {
5967  SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
5968  return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
5969  }
5970 
5971  // Try an immediate VMVN.
5972  uint64_t NegatedImm = (~SplatBits).getZExtValue();
5973  Val = isNEONModifiedImm(NegatedImm,
5974  SplatUndef.getZExtValue(), SplatBitSize,
5975  DAG, dl, VmovVT, VT.is128BitVector(),
5976  VMVNModImm);
5977  if (Val.getNode()) {
5978  SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
5979  return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
5980  }
5981 
5982  // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
5983  if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
5984  int ImmVal = ARM_AM::getFP32Imm(SplatBits);
5985  if (ImmVal != -1) {
5986  SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
5987  return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
5988  }
5989  }
5990  }
5991  }
5992 
5993  // Scan through the operands to see if only one value is used.
5994  //
5995  // As an optimisation, even if more than one value is used it may be more
5996  // profitable to splat with one value then change some lanes.
5997  //
5998  // Heuristically we decide to do this if the vector has a "dominant" value,
5999  // defined as splatted to more than half of the lanes.
6000  unsigned NumElts = VT.getVectorNumElements();
6001  bool isOnlyLowElement = true;
6002  bool usesOnlyOneValue = true;
6003  bool hasDominantValue = false;
6004  bool isConstant = true;
6005 
6006  // Map of the number of times a particular SDValue appears in the
6007  // element list.
6008  DenseMap<SDValue, unsigned> ValueCounts;
6009  SDValue Value;
6010  for (unsigned i = 0; i < NumElts; ++i) {
6011  SDValue V = Op.getOperand(i);
6012  if (V.isUndef())
6013  continue;
6014  if (i > 0)
6015  isOnlyLowElement = false;
6016  if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
6017  isConstant = false;
6018 
6019  ValueCounts.insert(std::make_pair(V, 0));
6020  unsigned &Count = ValueCounts[V];
6021 
6022  // Is this value dominant? (takes up more than half of the lanes)
6023  if (++Count > (NumElts / 2)) {
6024  hasDominantValue = true;
6025  Value = V;
6026  }
6027  }
6028  if (ValueCounts.size() != 1)
6029  usesOnlyOneValue = false;
6030  if (!Value.getNode() && ValueCounts.size() > 0)
6031  Value = ValueCounts.begin()->first;
6032 
6033  if (ValueCounts.size() == 0)
6034  return DAG.getUNDEF(VT);
6035 
6036  // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
6037  // Keep going if we are hitting this case.
6038  if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
6039  return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
6040 
6041  unsigned EltSize = VT.getScalarSizeInBits();
6042 
6043  // Use VDUP for non-constant splats. For f32 constant splats, reduce to
6044  // i32 and try again.
6045  if (hasDominantValue && EltSize <= 32) {
6046  if (!isConstant) {
6047  SDValue N;
6048 
6049  // If we are VDUPing a value that comes directly from a vector, that will
6050  // cause an unnecessary move to and from a GPR, where instead we could
6051  // just use VDUPLANE. We can only do this if the lane being extracted
6052  // is at a constant index, as the VDUP from lane instructions only have
6053  // constant-index forms.
6054  ConstantSDNode *constIndex;
6055  if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6056  (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
6057  // We need to create a new undef vector to use for the VDUPLANE if the
6058  // size of the vector from which we get the value is different than the
6059  // size of the vector that we need to create. We will insert the element
6060  // such that the register coalescer will remove unnecessary copies.
6061  if (VT != Value->getOperand(0).getValueType()) {
6062  unsigned index = constIndex->getAPIntValue().getLimitedValue() %
6063  VT.getVectorNumElements();
6064  N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
6065  DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
6066  Value, DAG.getConstant(index, dl, MVT::i32)),
6067  DAG.getConstant(index, dl, MVT::i32));
6068  } else
6069  N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
6070  Value->getOperand(0), Value->getOperand(1));
6071  } else
6072  N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
6073 
6074  if (!usesOnlyOneValue) {
6075  // The dominant value was splatted as 'N', but we now have to insert
6076  // all differing elements.
6077  for (unsigned I = 0; I < NumElts; ++I) {
6078  if (Op.getOperand(I) == Value)
6079  continue;
6081  Ops.push_back(N);
6082  Ops.push_back(Op.getOperand(I));
6083  Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
6084  N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
6085  }
6086  }
6087  return N;
6088  }
6091  for (unsigned i = 0; i < NumElts; ++i)
6092  Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
6093  Op.getOperand(i)));
6094  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
6095  SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
6096  Val = LowerBUILD_VECTOR(Val, DAG, ST);
6097  if (Val.getNode())
6098  return DAG.getNode(ISD::BITCAST, dl, VT, Val);
6099  }
6100  if (usesOnlyOneValue) {
6101  SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
6102  if (isConstant && Val.getNode())
6103  return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
6104  }
6105  }
6106 
6107  // If all elements are constants and the case above didn't get hit, fall back
6108  // to the default expansion, which will generate a load from the constant
6109  // pool.
6110  if (isConstant)
6111  return SDValue();
6112 
6113  // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
6114  if (NumElts >= 4) {
6115  SDValue shuffle = ReconstructShuffle(Op, DAG);
6116  if (shuffle != SDValue())
6117  return shuffle;
6118  }
6119 
6120  if (VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
6121  // If we haven't found an efficient lowering, try splitting a 128-bit vector
6122  // into two 64-bit vectors; we might discover a better way to lower it.
6123  SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
6124  EVT ExtVT = VT.getVectorElementType();
6125  EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);
6126  SDValue Lower =
6127  DAG.getBuildVector(HVT, dl, makeArrayRef(&Ops[0], NumElts / 2));
6128  if (Lower.getOpcode() == ISD::BUILD_VECTOR)
6129  Lower = LowerBUILD_VECTOR(Lower, DAG, ST);
6130  SDValue Upper = DAG.getBuildVector(
6131  HVT, dl, makeArrayRef(&Ops[NumElts / 2], NumElts / 2));
6132  if (Upper.getOpcode() == ISD::BUILD_VECTOR)
6133  Upper = LowerBUILD_VECTOR(Upper, DAG, ST);
6134  if (Lower && Upper)
6135  return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);
6136  }
6137 
6138  // Vectors with 32- or 64-bit elements can be built by directly assigning
6139  // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
6140  // will be legalized.
6141  if (EltSize >= 32) {
6142  // Do the expansion with floating-point types, since that is what the VFP
6143  // registers are defined to use, and since i64 is not legal.
6144  EVT EltVT = EVT::getFloatingPointVT(EltSize);
6145  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
6147  for (unsigned i = 0; i < NumElts; ++i)
6148  Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
6149  SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
6150  return DAG.getNode(ISD::BITCAST, dl, VT, Val);
6151  }
6152 
6153  // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
6154  // know the default expansion would otherwise fall back on something even
6155  // worse. For a vector with one or two non-undef values, that's
6156  // scalar_to_vector for the elements followed by a shuffle (provided the
6157  // shuffle is valid for the target) and materialization element by element
6158  // on the stack followed by a load for everything else.
6159  if (!isConstant && !usesOnlyOneValue) {
6160  SDValue Vec = DAG.getUNDEF(VT);
6161  for (unsigned i = 0 ; i < NumElts; ++i) {
6162  SDValue V = Op.getOperand(i);
6163  if (V.isUndef())
6164  continue;
6165  SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
6166  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
6167  }
6168  return Vec;
6169  }
6170 
6171  return SDValue();
6172 }
6173 
6174 // Gather data to see if the operation can be modelled as a
6175 // shuffle in combination with VEXTs.
6176 SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
6177  SelectionDAG &DAG) const {
6178  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
6179  SDLoc dl(Op);
6180  EVT VT = Op.getValueType();
6181  unsigned NumElts = VT.getVectorNumElements();
6182 
6183  struct ShuffleSourceInfo {
6184  SDValue Vec;
6185  unsigned MinElt;
6186  unsigned MaxElt;
6187 
6188  // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
6189  // be compatible with the shuffle we intend to construct. As a result
6190  // ShuffleVec will be some sliding window into the original Vec.
6191  SDValue ShuffleVec;
6192 
6193  // Code should guarantee that element i in Vec starts at element "WindowBase
6194  // + i * WindowScale in ShuffleVec".
6195  int WindowBase;
6196  int WindowScale;
6197 
6198  bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
6199  ShuffleSourceInfo(SDValue Vec)
6200  : Vec(Vec), MinElt(UINT_MAX), MaxElt(0), ShuffleVec(Vec), WindowBase(0),
6201  WindowScale(1) {}
6202  };
6203 
6204  // First gather all vectors used as an immediate source for this BUILD_VECTOR
6205  // node.
6207  for (unsigned i = 0; i < NumElts; ++i) {
6208  SDValue V = Op.getOperand(i);
6209  if (V.isUndef())
6210  continue;
6211  else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
6212  // A shuffle can only come from building a vector from various
6213  // elements of other vectors.
6214  return SDValue();
6215  } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
6216  // Furthermore, shuffles require a constant mask, whereas extractelts
6217  // accept variable indices.
6218  return SDValue();
6219  }
6220 
6221  // Add this element source to the list if it's not already there.
6222  SDValue SourceVec = V.getOperand(0);
6223  auto Source = find(Sources, SourceVec);
6224  if (Source == Sources.end())
6225  Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
6226 
6227  // Update the minimum and maximum lane number seen.
6228  unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
6229  Source->MinElt = std::min(Source->MinElt, EltNo);
6230  Source->MaxElt = std::max(Source->MaxElt, EltNo);
6231  }
6232 
6233  // Currently only do something sane when at most two source vectors
6234  // are involved.
6235  if (Sources.size() > 2)
6236  return SDValue();
6237 
6238  // Find out the smallest element size among result and two sources, and use
6239  // it as element size to build the shuffle_vector.
6240  EVT SmallestEltTy = VT.getVectorElementType();
6241  for (auto &Source : Sources) {
6242  EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
6243  if (SrcEltTy.bitsLT(SmallestEltTy))
6244  SmallestEltTy = SrcEltTy;
6245  }
6246  unsigned ResMultiplier =
6247  VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
6248  NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
6249  EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
6250 
6251  // If the source vector is too wide or too narrow, we may nevertheless be able
6252  // to construct a compatible shuffle either by concatenating it with UNDEF or
6253  // extracting a suitable range of elements.
6254  for (auto &Src : Sources) {
6255  EVT SrcVT = Src.ShuffleVec.getValueType();
6256 
6257  if (SrcVT.getSizeInBits() == VT.getSizeInBits())
6258  continue;
6259 
6260  // This stage of the search produces a source with the same element type as
6261  // the original, but with a total width matching the BUILD_VECTOR output.
6262  EVT EltVT = SrcVT.getVectorElementType();
6263  unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
6264  EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
6265 
6266  if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
6267  if (2 * SrcVT.getSizeInBits() != VT.getSizeInBits())
6268  return SDValue();
6269  // We can pad out the smaller vector for free, so if it's part of a
6270  // shuffle...
6271  Src.ShuffleVec =
6272  DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
6273  DAG.getUNDEF(Src.ShuffleVec.getValueType()));
6274  continue;
6275  }
6276 
6277  if (SrcVT.getSizeInBits() != 2 * VT.getSizeInBits())
6278  return SDValue();
6279 
6280  if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
6281  // Span too large for a VEXT to cope
6282  return SDValue();
6283  }
6284 
6285  if (Src.MinElt >= NumSrcElts) {
6286  // The extraction can just take the second half
6287  Src.ShuffleVec =
6288  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6289  DAG.getConstant(NumSrcElts, dl, MVT::i32));
6290  Src.WindowBase = -NumSrcElts;
6291  } else if (Src.MaxElt < NumSrcElts) {
6292  // The extraction can just take the first half
6293  Src.ShuffleVec =
6294  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6295  DAG.getConstant(0, dl, MVT::i32));
6296  } else {
6297  // An actual VEXT is needed
6298  SDValue VEXTSrc1 =
6299  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6300  DAG.getConstant(0, dl, MVT::i32));
6301  SDValue VEXTSrc2 =
6302  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6303  DAG.getConstant(NumSrcElts, dl, MVT::i32));
6304 
6305  Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
6306  VEXTSrc2,
6307  DAG.getConstant(Src.MinElt, dl, MVT::i32));
6308  Src.WindowBase = -Src.MinElt;
6309  }
6310  }
6311 
6312  // Another possible incompatibility occurs from the vector element types. We
6313  // can fix this by bitcasting the source vectors to the same type we intend
6314  // for the shuffle.
6315  for (auto &Src : Sources) {
6316  EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
6317  if (SrcEltTy == SmallestEltTy)
6318  continue;
6319  assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
6320  Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
6321  Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
6322  Src.WindowBase *= Src.WindowScale;
6323  }
6324 
6325  // Final sanity check before we try to actually produce a shuffle.
6326  DEBUG(
6327  for (auto Src : Sources)
6328  assert(Src.ShuffleVec.getValueType() == ShuffleVT);
6329  );
6330 
6331  // The stars all align, our next step is to produce the mask for the shuffle.
6333  int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
6334  for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
6335  SDValue Entry = Op.getOperand(i);
6336  if (Entry.isUndef())
6337  continue;
6338 
6339  auto Src = find(Sources, Entry.getOperand(0));
6340  int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
6341 
6342  // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
6343  // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
6344  // segment.
6345  EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
6346  int BitsDefined = std::min(OrigEltTy.getSizeInBits(),
6347  VT.getScalarSizeInBits());
6348  int LanesDefined = BitsDefined / BitsPerShuffleLane;
6349 
6350  // This source is expected to fill ResMultiplier lanes of the final shuffle,
6351  // starting at the appropriate offset.
6352  int *LaneMask = &Mask[i * ResMultiplier];
6353 
6354  int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
6355  ExtractBase += NumElts * (Src - Sources.begin());
6356  for (int j = 0; j < LanesDefined; ++j)
6357  LaneMask[j] = ExtractBase + j;
6358  }
6359 
6360  // Final check before we try to produce nonsense...
6361  if (!isShuffleMaskLegal(Mask, ShuffleVT))
6362  return SDValue();
6363 
6364  // We can't handle more than two sources. This should have already
6365  // been checked before this point.
6366  assert(Sources.size() <= 2 && "Too many sources!");
6367 
6368  SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
6369  for (unsigned i = 0; i < Sources.size(); ++i)
6370  ShuffleOps[i] = Sources[i].ShuffleVec;
6371 
6372  SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
6373  ShuffleOps[1], Mask);
6374  return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
6375 }
6376 
6377 /// isShuffleMaskLegal - Targets can use this to indicate that they only
6378 /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
6379 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
6380 /// are assumed to be legal.
6381 bool
6383  EVT VT) const {
6384  if (VT.getVectorNumElements() == 4 &&
6385  (VT.is128BitVector() || VT.is64BitVector())) {
6386  unsigned PFIndexes[4];
6387  for (unsigned i = 0; i != 4; ++i) {
6388  if (M[i] < 0)
6389  PFIndexes[i] = 8;
6390  else
6391  PFIndexes[i] = M[i];
6392  }
6393 
6394  // Compute the index in the perfect shuffle table.
6395  unsigned PFTableIndex =
6396  PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
6397  unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
6398  unsigned Cost = (PFEntry >> 30);
6399 
6400  if (Cost <= 4)
6401  return true;
6402  }
6403 
6404  bool ReverseVEXT, isV_UNDEF;
6405  unsigned Imm, WhichResult;
6406 
6407  unsigned EltSize = VT.getScalarSizeInBits();
6408  return (EltSize >= 32 ||
6409  ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
6410  isVREVMask(M, VT, 64) ||
6411  isVREVMask(M, VT, 32) ||
6412  isVREVMask(M, VT, 16) ||
6413  isVEXTMask(M, VT, ReverseVEXT, Imm) ||
6414  isVTBLMask(M, VT) ||
6415  isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF) ||
6416  ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));
6417 }
6418 
6419 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
6420 /// the specified operations to build the shuffle.
6421 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
6422  SDValue RHS, SelectionDAG &DAG,
6423  const SDLoc &dl) {
6424  unsigned OpNum = (PFEntry >> 26) & 0x0F;
6425  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
6426  unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
6427 
6428  enum {
6429  OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
6430  OP_VREV,
6431  OP_VDUP0,
6432  OP_VDUP1,
6433  OP_VDUP2,
6434  OP_VDUP3,
6435  OP_VEXT1,
6436  OP_VEXT2,
6437  OP_VEXT3,
6438  OP_VUZPL, // VUZP, left result
6439  OP_VUZPR, // VUZP, right result
6440  OP_VZIPL, // VZIP, left result
6441  OP_VZIPR, // VZIP, right result
6442  OP_VTRNL, // VTRN, left result
6443  OP_VTRNR // VTRN, right result
6444  };
6445 
6446  if (OpNum == OP_COPY) {
6447  if (LHSID == (1*9+2)*9+3) return LHS;
6448  assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
6449  return RHS;
6450  }
6451 
6452  SDValue OpLHS, OpRHS;
6453  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
6454  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
6455  EVT VT = OpLHS.getValueType();
6456 
6457  switch (OpNum) {
6458  default: llvm_unreachable("Unknown shuffle opcode!");
6459  case OP_VREV:
6460  // VREV divides the vector in half and swaps within the half.
6461  if (VT.getVectorElementType() == MVT::i32 ||
6463  return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
6464  // vrev <4 x i16> -> VREV32
6465  if (VT.getVectorElementType() == MVT::i16)
6466  return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
6467  // vrev <4 x i8> -> VREV16
6469  return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
6470  case OP_VDUP0:
6471  case OP_VDUP1:
6472  case OP_VDUP2:
6473  case OP_VDUP3:
6474  return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
6475  OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
6476  case OP_VEXT1:
6477  case OP_VEXT2:
6478  case OP_VEXT3:
6479  return DAG.getNode(ARMISD::VEXT, dl, VT,
6480  OpLHS, OpRHS,
6481  DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
6482  case OP_VUZPL:
6483  case OP_VUZPR:
6484  return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
6485  OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
6486  case OP_VZIPL:
6487  case OP_VZIPR:
6488  return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
6489  OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
6490  case OP_VTRNL:
6491  case OP_VTRNR:
6492  return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
6493  OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
6494  }
6495 }
6496 
6498  ArrayRef<int> ShuffleMask,
6499  SelectionDAG &DAG) {
6500  // Check to see if we can use the VTBL instruction.
6501  SDValue V1 = Op.getOperand(0);
6502  SDValue V2 = Op.getOperand(1);
6503  SDLoc DL(Op);
6504 
6505  SmallVector<SDValue, 8> VTBLMask;
6507  I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
6508  VTBLMask.push_back(DAG.getConstant(*I, DL, MVT::i32));
6509 
6510  if (V2.getNode()->isUndef())
6511  return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
6512  DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
6513 
6514  return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
6515  DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
6516 }
6517 
6519  SelectionDAG &DAG) {
6520  SDLoc DL(Op);
6521  SDValue OpLHS = Op.getOperand(0);
6522  EVT VT = OpLHS.getValueType();
6523 
6524  assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&
6525  "Expect an v8i16/v16i8 type");
6526  OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
6527  // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
6528  // extract the first 8 bytes into the top double word and the last 8 bytes
6529  // into the bottom double word. The v8i16 case is similar.
6530  unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
6531  return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
6532  DAG.getConstant(ExtractNum, DL, MVT::i32));
6533 }
6534 
6536  SDValue V1 = Op.getOperand(0);
6537  SDValue V2 = Op.getOperand(1);
6538  SDLoc dl(Op);
6539  EVT VT = Op.getValueType();
6540  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
6541 
6542  // Convert shuffles that are directly supported on NEON to target-specific
6543  // DAG nodes, instead of keeping them as shuffles and matching them again
6544  // during code selection. This is more efficient and avoids the possibility
6545  // of inconsistencies between legalization and selection.
6546  // FIXME: floating-point vectors should be canonicalized to integer vectors
6547  // of the same time so that they get CSEd properly.
6548  ArrayRef<int> ShuffleMask = SVN->getMask();
6549 
6550  unsigned EltSize = VT.getScalarSizeInBits();
6551  if (EltSize <= 32) {
6552  if (SVN->isSplat()) {
6553  int Lane = SVN->getSplatIndex();
6554  // If this is undef splat, generate it via "just" vdup, if possible.
6555  if (Lane == -1) Lane = 0;
6556 
6557  // Test if V1 is a SCALAR_TO_VECTOR.
6558  if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
6559  return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
6560  }
6561  // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
6562  // (and probably will turn into a SCALAR_TO_VECTOR once legalization
6563  // reaches it).
6564  if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
6565  !isa<ConstantSDNode>(V1.getOperand(0))) {
6566  bool IsScalarToVector = true;
6567  for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
6568  if (!V1.getOperand(i).isUndef()) {
6569  IsScalarToVector = false;
6570  break;
6571  }
6572  if (IsScalarToVector)
6573  return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
6574  }
6575  return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
6576  DAG.getConstant(Lane, dl, MVT::i32));
6577  }
6578 
6579  bool ReverseVEXT;
6580  unsigned Imm;
6581  if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
6582  if (ReverseVEXT)
6583  std::swap(V1, V2);
6584  return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
6585  DAG.getConstant(Imm, dl, MVT::i32));
6586  }
6587 
6588  if (isVREVMask(ShuffleMask, VT, 64))
6589  return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
6590  if (isVREVMask(ShuffleMask, VT, 32))
6591  return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
6592  if (isVREVMask(ShuffleMask, VT, 16))
6593  return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
6594 
6595  if (V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
6596  return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
6597  DAG.getConstant(Imm, dl, MVT::i32));
6598  }
6599 
6600  // Check for Neon shuffles that modify both input vectors in place.
6601  // If both results are used, i.e., if there are two shuffles with the same
6602  // source operands and with masks corresponding to both results of one of
6603  // these operations, DAG memoization will ensure that a single node is
6604  // used for both shuffles.
6605  unsigned WhichResult;
6606  bool isV_UNDEF;
6607  if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
6608  ShuffleMask, VT, WhichResult, isV_UNDEF)) {
6609  if (isV_UNDEF)
6610  V2 = V1;
6611  return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
6612  .getValue(WhichResult);
6613  }
6614 
6615  // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
6616  // shuffles that produce a result larger than their operands with:
6617  // shuffle(concat(v1, undef), concat(v2, undef))
6618  // ->
6619  // shuffle(concat(v1, v2), undef)
6620  // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
6621  //
6622  // This is useful in the general case, but there are special cases where
6623  // native shuffles produce larger results: the two-result ops.
6624  //
6625  // Look through the concat when lowering them:
6626  // shuffle(concat(v1, v2), undef)
6627  // ->
6628  // concat(VZIP(v1, v2):0, :1)
6629  //
6630  if (V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
6631  SDValue SubV1 = V1->getOperand(0);
6632  SDValue SubV2 = V1->getOperand(1);
6633  EVT SubVT = SubV1.getValueType();
6634 
6635  // We expect these to have been canonicalized to -1.
6636  assert(all_of(ShuffleMask, [&](int i) {
6637  return i < (int)VT.getVectorNumElements();
6638  }) && "Unexpected shuffle index into UNDEF operand!");
6639 
6640  if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
6641  ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
6642  if (isV_UNDEF)
6643  SubV2 = SubV1;
6644  assert((WhichResult == 0) &&
6645  "In-place shuffle of concat can only have one result!");
6646  SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
6647  SubV1, SubV2);
6648  return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
6649  Res.getValue(1));
6650  }
6651  }
6652  }
6653 
6654  // If the shuffle is not directly supported and it has 4 elements, use
6655  // the PerfectShuffle-generated table to synthesize it from other shuffles.
6656  unsigned NumElts = VT.getVectorNumElements();
6657  if (NumElts == 4) {
6658  unsigned PFIndexes[4];
6659  for (unsigned i = 0; i != 4; ++i) {
6660  if (ShuffleMask[i] < 0)
6661  PFIndexes[i] = 8;
6662  else
6663  PFIndexes[i] = ShuffleMask[i];
6664  }
6665 
6666  // Compute the index in the perfect shuffle table.
6667  unsigned PFTableIndex =
6668  PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
6669  unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
6670  unsigned Cost = (PFEntry >> 30);
6671 
6672  if (Cost <= 4)
6673  return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
6674  }
6675 
6676  // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
6677  if (EltSize >= 32) {
6678  // Do the expansion with floating-point types, since that is what the VFP
6679  // registers are defined to use, and since i64 is not legal.
6680  EVT EltVT = EVT::getFloatingPointVT(EltSize);
6681  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
6682  V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
6683  V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
6685  for (unsigned i = 0; i < NumElts; ++i) {
6686  if (ShuffleMask[i] < 0)
6687  Ops.push_back(DAG.getUNDEF(EltVT));
6688  else
6689  Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
6690  ShuffleMask[i] < (int)NumElts ? V1 : V2,
6691  DAG.getConstant(ShuffleMask[i] & (NumElts-1),
6692  dl, MVT::i32)));
6693  }
6694  SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
6695  return DAG.getNode(ISD::BITCAST, dl, VT, Val);
6696  }
6697 
6698  if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
6700 
6701  if (VT == MVT::v8i8)
6702  if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
6703  return NewOp;
6704 
6705  return SDValue();
6706 }
6707 
6709  // INSERT_VECTOR_ELT is legal only for immediate indexes.
6710  SDValue Lane = Op.getOperand(2);
6711  if (!isa<ConstantSDNode>(Lane))
6712  return SDValue();
6713 
6714  return Op;
6715 }
6716 
6718  // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
6719  SDValue Lane = Op.getOperand(1);
6720  if (!isa<ConstantSDNode>(Lane))
6721  return SDValue();
6722 
6723  SDValue Vec = Op.getOperand(0);
6724  if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
6725  SDLoc dl(Op);
6726  return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
6727  }
6728 
6729  return Op;
6730 }
6731 
6733  // The only time a CONCAT_VECTORS operation can have legal types is when
6734  // two 64-bit vectors are concatenated to a 128-bit vector.
6735  assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
6736  "unexpected CONCAT_VECTORS");
6737  SDLoc dl(Op);
6738  SDValue Val = DAG.getUNDEF(MVT::v2f64);
6739  SDValue Op0 = Op.getOperand(0);
6740  SDValue Op1 = Op.getOperand(1);
6741  if (!Op0.isUndef())
6742  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
6743  DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
6744  DAG.getIntPtrConstant(0, dl));
6745  if (!Op1.isUndef())
6746  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
6747  DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
6748  DAG.getIntPtrConstant(1, dl));
6749  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
6750 }
6751 
6752 /// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
6753 /// element has been zero/sign-extended, depending on the isSigned parameter,
6754 /// from an integer type half its size.
6756  bool isSigned) {
6757  // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
6758  EVT VT = N->getValueType(0);
6759  if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
6760  SDNode *BVN = N->getOperand(0).getNode();
6761  if (BVN->getValueType(0) != MVT::v4i32 ||
6762  BVN->getOpcode() != ISD::BUILD_VECTOR)
6763  return false;
6764  unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
6765  unsigned HiElt = 1 - LoElt;
6766  ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
6767  ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
6768  ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
6769  ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
6770  if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
6771  return false;
6772  if (isSigned) {
6773  if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
6774  Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
6775  return true;
6776  } else {
6777  if (Hi0->isNullValue() && Hi1->isNullValue())
6778  return true;
6779  }
6780  return false;
6781  }
6782 
6783  if (N->getOpcode() != ISD::BUILD_VECTOR)
6784  return false;
6785 
6786  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
6787  SDNode *Elt = N->getOperand(i).getNode();
6788  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
6789  unsigned EltSize = VT.getScalarSizeInBits();
6790  unsigned HalfSize = EltSize / 2;
6791  if (isSigned) {
6792  if (!isIntN(HalfSize, C->getSExtValue()))
6793  return false;
6794  } else {
6795  if (!isUIntN(HalfSize, C->getZExtValue()))
6796  return false;
6797  }
6798  continue;
6799  }
6800  return false;
6801  }
6802 
6803  return true;
6804 }
6805 
6806 /// isSignExtended - Check if a node is a vector value that is sign-extended
6807 /// or a constant BUILD_VECTOR with sign-extended elements.
6808 static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
6809  if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
6810  return true;
6811  if (isExtendedBUILD_VECTOR(N, DAG, true))
6812  return true;
6813  return false;
6814 }
6815 
6816 /// isZeroExtended - Check if a node is a vector value that is zero-extended
6817 /// or a constant BUILD_VECTOR with zero-extended elements.
6818 static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
6819  if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
6820  return true;
6821  if (isExtendedBUILD_VECTOR(N, DAG, false))
6822  return true;
6823  return false;
6824 }
6825 
6826 static EVT getExtensionTo64Bits(const EVT &OrigVT) {
6827  if (OrigVT.getSizeInBits() >= 64)
6828  return OrigVT;
6829 
6830  assert(OrigVT.isSimple() && "Expecting a simple value type");
6831 
6832  MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
6833  switch (OrigSimpleTy) {
6834  default: llvm_unreachable("Unexpected Vector Type");
6835  case MVT::v2i8:
6836  case MVT::v2i16:
6837  return MVT::v2i32;
6838  case MVT::v4i8:
6839  return MVT::v4i16;
6840  }
6841 }
6842 
6843 /// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
6844 /// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
6845 /// We insert the required extension here to get the vector to fill a D register.
6847  const EVT &OrigTy,
6848  const EVT &ExtTy,
6849  unsigned ExtOpcode) {
6850  // The vector originally had a size of OrigTy. It was then extended to ExtTy.
6851  // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
6852  // 64-bits we need to insert a new extension so that it will be 64-bits.
6853  assert(ExtTy.is128BitVector() && "Unexpected extension size");
6854  if (OrigTy.getSizeInBits() >= 64)
6855  return N;
6856 
6857  // Must extend size to at least 64 bits to be used as an operand for VMULL.
6858  EVT NewVT = getExtensionTo64Bits(OrigTy);
6859 
6860  return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
6861 }
6862 
6863 /// SkipLoadExtensionForVMULL - return a load of the original vector size that
6864 /// does not do any sign/zero extension. If the original vector is less
6865 /// than 64 bits, an appropriate extension will be added after the load to
6866 /// reach a total size of 64 bits. We have to add the extension separately
6867 /// because ARM does not have a sign/zero extending load for vectors.
6869  EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
6870 
6871  // The load already has the right type.
6872  if (ExtendedTy == LD->getMemoryVT())
6873  return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
6874  LD->getBasePtr(), LD->getPointerInfo(),
6875  LD->getAlignment(), LD->getMemOperand()->getFlags());
6876 
6877  // We need to create a zextload/sextload. We cannot just create a load
6878  // followed by a zext/zext node because LowerMUL is also run during normal
6879  // operation legalization where we can't create illegal types.
6880  return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
6881  LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
6882  LD->getMemoryVT(), LD->getAlignment(),
6883  LD->getMemOperand()->getFlags());
6884 }
6885 
6886 /// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
6887 /// extending load, or BUILD_VECTOR with extended elements, return the
6888 /// unextended value. The unextended vector should be 64 bits so that it can
6889 /// be used as an operand to a VMULL instruction. If the original vector size
6890 /// before extension is less than 64 bits we add a an extension to resize
6891 /// the vector to 64 bits.
6893  if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
6894  return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
6895  N->getOperand(0)->getValueType(0),
6896  N->getValueType(0),
6897  N->getOpcode());
6898 
6899  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
6900  return SkipLoadExtensionForVMULL(LD, DAG);
6901 
6902  // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
6903  // have been legalized as a BITCAST from v4i32.
6904  if (N->getOpcode() == ISD::BITCAST) {
6905  SDNode *BVN = N->getOperand(0).getNode();
6906  assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
6907  BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
6908  unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
6909  return DAG.getBuildVector(
6910  MVT::v2i32, SDLoc(N),
6911  {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
6912  }
6913  // Construct a new BUILD_VECTOR with elements truncated to half the size.
6914  assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
6915  EVT VT = N->getValueType(0);
6916  unsigned EltSize = VT.getScalarSizeInBits() / 2;
6917  unsigned NumElts = VT.getVectorNumElements();
6918  MVT TruncVT = MVT::getIntegerVT(EltSize);
6920  SDLoc dl(N);
6921  for (unsigned i = 0; i != NumElts; ++i) {
6922  ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
6923  const APInt &CInt = C->getAPIntValue();
6924  // Element types smaller than 32 bits are not legal, so use i32 elements.
6925  // The values are implicitly truncated so sext vs. zext doesn't matter.
6926  Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
6927  }
6928  return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
6929 }
6930 
6931 static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
6932  unsigned Opcode = N->getOpcode();
6933  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
6934  SDNode *N0 = N->getOperand(0).getNode();
6935  SDNode *N1 = N->getOperand(1).getNode();
6936  return N0->hasOneUse() && N1->hasOneUse() &&
6937  isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
6938  }
6939  return false;
6940 }
6941 
6942 static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
6943  unsigned Opcode = N->getOpcode();
6944  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
6945  SDNode *N0 = N->getOperand(0).getNode();
6946  SDNode *N1 = N->getOperand(1).getNode();
6947  return N0->hasOneUse() && N1->hasOneUse() &&
6948  isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
6949  }
6950  return false;
6951 }
6952 
6954  // Multiplications are only custom-lowered for 128-bit vectors so that
6955  // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
6956  EVT VT = Op.getValueType();
6957  assert(VT.is128BitVector() && VT.isInteger() &&
6958  "unexpected type for custom-lowering ISD::MUL");
6959  SDNode *N0 = Op.getOperand(0).getNode();
6960  SDNode *N1 = Op.getOperand(1).getNode();
6961  unsigned NewOpc = 0;
6962  bool isMLA = false;
6963  bool isN0SExt = isSignExtended(N0, DAG);
6964  bool isN1SExt = isSignExtended(N1, DAG);
6965  if (isN0SExt && isN1SExt)
6966  NewOpc = ARMISD::VMULLs;
6967  else {
6968  bool isN0ZExt = isZeroExtended(N0, DAG);
6969  bool isN1ZExt = isZeroExtended(N1, DAG);
6970  if (isN0ZExt && isN1ZExt)
6971  NewOpc = ARMISD::VMULLu;
6972  else if (isN1SExt || isN1ZExt) {
6973  // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
6974  // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
6975  if (isN1SExt && isAddSubSExt(N0, DAG)) {
6976  NewOpc = ARMISD::VMULLs;
6977  isMLA = true;
6978  } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
6979  NewOpc = ARMISD::VMULLu;
6980  isMLA = true;
6981  } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
6982  std::swap(N0, N1);
6983  NewOpc = ARMISD::VMULLu;
6984  isMLA = true;
6985  }
6986  }
6987 
6988  if (!NewOpc) {
6989  if (VT == MVT::v2i64)
6990  // Fall through to expand this. It is not legal.
6991  return SDValue();
6992  else
6993  // Other vector multiplications are legal.
6994  return Op;
6995  }
6996  }
6997 
6998  // Legalize to a VMULL instruction.
6999  SDLoc DL(Op);
7000  SDValue Op0;
7001  SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
7002  if (!isMLA) {
7003  Op0 = SkipExtensionForVMULL(N0, DAG);
7004  assert(Op0.getValueType().is64BitVector() &&
7005  Op1.getValueType().is64BitVector() &&
7006  "unexpected types for extended operands to VMULL");
7007  return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
7008  }
7009 
7010  // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
7011  // isel lowering to take advantage of no-stall back to back vmul + vmla.
7012  // vmull q0, d4, d6
7013  // vmlal q0, d5, d6
7014  // is faster than
7015  // vaddl q0, d4, d5
7016  // vmovl q1, d6
7017  // vmul q0, q0, q1
7018  SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
7019  SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
7020  EVT Op1VT = Op1.getValueType();
7021  return DAG.getNode(N0->getOpcode(), DL, VT,
7022  DAG.getNode(NewOpc, DL, VT,
7023  DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
7024  DAG.getNode(NewOpc, DL, VT,
7025  DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
7026 }
7027 
7029  SelectionDAG &DAG) {
7030  // TODO: Should this propagate fast-math-flags?
7031 
7032  // Convert to float
7033  // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
7034  // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
7035  X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
7036  Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
7037  X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
7038  Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
7039  // Get reciprocal estimate.
7040  // float4 recip = vrecpeq_f32(yf);
7042  DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
7043  Y);
7044  // Because char has a smaller range than uchar, we can actually get away
7045  // without any newton steps. This requires that we use a weird bias
7046  // of 0xb000, however (again, this has been exhaustively tested).
7047  // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
7048  X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
7049  X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
7050  Y = DAG.getConstant(0xb000, dl, MVT::v4i32);
7051  X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
7052  X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
7053  // Convert back to short.
7054  X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
7055  X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
7056  return X;
7057 }
7058 
7059 static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl,
7060  SelectionDAG &DAG) {
7061  // TODO: Should this propagate fast-math-flags?
7062 
7063  SDValue N2;
7064  // Convert to float.
7065  // float4 yf = vcvt_f32_s32(vmovl_s16(y));
7066  // float4 xf = vcvt_f32_s32(vmovl_s16(x));
7067  N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
7068  N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
7069  N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
7070  N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
7071 
7072  // Use reciprocal estimate and one refinement step.
7073  // float4 recip = vrecpeq_f32(yf);
7074  // recip *= vrecpsq_f32(yf, recip);
7076  DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
7077  N1);
7079  DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
7080  N1, N2);
7081  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
7082  // Because short has a smaller range than ushort, we can actually get away
7083  // with only a single newton step. This requires that we use a weird bias
7084  // of 89, however (again, this has been exhaustively tested).
7085  // float4 result = as_float4(as_int4(xf*recip) + 0x89);
7086  N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
7087  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
7088  N1 = DAG.getConstant(0x89, dl, MVT::v4i32);
7089  N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
7090  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
7091  // Convert back to integer and return.
7092  // return vmovn_s32(vcvt_s32_f32(result));
7093  N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
7094  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
7095  return N0;
7096 }
7097 
7099  EVT VT = Op.getValueType();
7100  assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
7101  "unexpected type for custom-lowering ISD::SDIV");
7102 
7103  SDLoc dl(Op);
7104  SDValue N0 = Op.getOperand(0);
7105  SDValue N1 = Op.getOperand(1);
7106  SDValue N2, N3;
7107 
7108  if (VT == MVT::v8i8) {
7109  N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
7110  N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
7111 
7112  N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7113  DAG.getIntPtrConstant(4, dl));
7114  N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7115  DAG.getIntPtrConstant(4, dl));
7116  N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7117  DAG.getIntPtrConstant(0, dl));
7118  N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7119  DAG.getIntPtrConstant(0, dl));
7120 
7121  N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
7122  N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
7123 
7124  N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
7125  N0 = LowerCONCAT_VECTORS(N0, DAG);
7126 
7127  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
7128  return N0;
7129  }
7130  return LowerSDIV_v4i16(N0, N1, dl, DAG);
7131 }
7132 
7134  // TODO: Should this propagate fast-math-flags?
7135  EVT VT = Op.getValueType();
7136  assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
7137  "unexpected type for custom-lowering ISD::UDIV");
7138 
7139  SDLoc dl(Op);
7140  SDValue N0 = Op.getOperand(0);
7141  SDValue N1 = Op.getOperand(1);
7142  SDValue N2, N3;
7143 
7144  if (VT == MVT::v8i8) {
7145  N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
7146  N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
7147 
7148  N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7149  DAG.getIntPtrConstant(4, dl));
7150  N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7151  DAG.getIntPtrConstant(4, dl));
7152  N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7153  DAG.getIntPtrConstant(0, dl));
7154  N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7155  DAG.getIntPtrConstant(0, dl));
7156 
7157  N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
7158  N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
7159 
7160  N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
7161  N0 = LowerCONCAT_VECTORS(N0, DAG);
7162 
7164  DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
7165  MVT::i32),
7166  N0);
7167  return N0;
7168  }
7169 
7170  // v4i16 sdiv ... Convert to float.
7171  // float4 yf = vcvt_f32_s32(vmovl_u16(y));
7172  // float4 xf = vcvt_f32_s32(vmovl_u16(x));
7173  N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
7174  N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
7175  N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
7176  SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
7177 
7178  // Use reciprocal estimate and two refinement steps.
7179  // float4 recip = vrecpeq_f32(yf);
7180  // recip *= vrecpsq_f32(yf, recip);
7181  // recip *= vrecpsq_f32(yf, recip);
7183  DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
7184  BN1);
7186  DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
7187  BN1, N2);
7188  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
7190  DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
7191  BN1, N2);
7192  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
7193  // Simply multiplying by the reciprocal estimate can leave us a few ulps
7194  // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
7195  // and that it will never cause us to return an answer too large).
7196  // float4 result = as_float4(as_int4(xf*recip) + 2);
7197  N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
7198  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
7199  N1 = DAG.getConstant(2, dl, MVT::v4i32);
7200  N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
7201  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
7202  // Convert back to integer and return.
7203  // return vmovn_u32(vcvt_s32_f32(result));
7204  N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
7205  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
7206  return N0;
7207 }
7208 
7210  EVT VT = Op.getNode()->getValueType(0);
7211  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
7212 
7213  unsigned Opc;
7214  bool ExtraOp = false;
7215  switch (Op.getOpcode()) {
7216  default: llvm_unreachable("Invalid code");
7217  case ISD::ADDC: Opc = ARMISD::ADDC; break;
7218  case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break;
7219  case ISD::SUBC: Opc = ARMISD::SUBC; break;
7220  case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break;
7221  }
7222 
7223  if (!ExtraOp)
7224  return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
7225  Op.getOperand(1));
7226  return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
7227  Op.getOperand(1), Op.getOperand(2));
7228 }
7229 
7230 SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
7231  assert(Subtarget->isTargetDarwin());
7232 
7233  // For iOS, we want to call an alternative entry point: __sincos_stret,
7234  // return values are passed via sret.
7235  SDLoc dl(Op);
7236  SDValue Arg = Op.getOperand(0);
7237  EVT ArgVT = Arg.getValueType();
7238  Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
7239  auto PtrVT = getPointerTy(DAG.getDataLayout());
7240 
7242  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7243 
7244  // Pair of floats / doubles used to pass the result.
7245  Type *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
7246  auto &DL = DAG.getDataLayout();
7247 
7248  ArgListTy Args;
7249  bool ShouldUseSRet = Subtarget->isAPCS_ABI();
7250  SDValue SRet;
7251  if (ShouldUseSRet) {
7252  // Create stack object for sret.
7253  const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
7254  const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
7255  int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
7256  SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
7257 
7258  ArgListEntry Entry;
7259  Entry.Node = SRet;
7260  Entry.Ty = RetTy->getPointerTo();
7261  Entry.isSExt = false;
7262  Entry.isZExt = false;
7263  Entry.isSRet = true;
7264  Args.push_back(Entry);
7265  RetTy = Type::getVoidTy(*DAG.getContext());
7266  }
7267 
7268  ArgListEntry Entry;
7269  Entry.Node = Arg;
7270  Entry.Ty = ArgTy;
7271  Entry.isSExt = false;
7272  Entry.isZExt = false;
7273  Args.push_back(Entry);
7274 
7275  const char *LibcallName =
7276  (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
7277  RTLIB::Libcall LC =
7280  SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
7281 
7283  CLI.setDebugLoc(dl)
7284  .setChain(DAG.getEntryNode())
7285  .setCallee(CC, RetTy, Callee, std::move(Args))
7286  .setDiscardResult(ShouldUseSRet);
7287  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
7288 
7289  if (!ShouldUseSRet)
7290  return CallResult.first;
7291 
7292  SDValue LoadSin =
7293  DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo());
7294 
7295  // Address of cos field.
7296  SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
7297  DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
7298  SDValue LoadCos =
7299  DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo());
7300 
7301  SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
7302  return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
7303  LoadSin.getValue(0), LoadCos.getValue(0));
7304 }
7305 
7306 SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
7307  bool Signed,
7308  SDValue &Chain) const {
7309  EVT VT = Op.getValueType();
7310  assert((VT == MVT::i32 || VT == MVT::i64) &&
7311  "unexpected type for custom lowering DIV");
7312  SDLoc dl(Op);
7313 
7314  const auto &DL = DAG.getDataLayout();
7315  const auto &TLI = DAG.getTargetLoweringInfo();
7316 
7317  const char *Name = nullptr;
7318  if (Signed)
7319  Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
7320  else
7321  Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
7322 
7323  SDValue ES = DAG.getExternalSymbol(Name, TLI.getPointerTy(DL));
7324 
7326 
7327  for (auto AI : {1, 0}) {
7328  ArgListEntry Arg;
7329  Arg.Node = Op.getOperand(AI);
7330  Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
7331  Args.push_back(Arg);
7332  }
7333 
7334  CallLoweringInfo CLI(DAG);
7335  CLI.setDebugLoc(dl)
7336  .setChain(Chain)
7338  ES, std::move(Args));
7339 
7340  return LowerCallTo(CLI).first;
7341 }
7342 
7343 SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
7344  bool Signed) const {
7345  assert(Op.getValueType() == MVT::i32 &&
7346  "unexpected type for custom lowering DIV");
7347  SDLoc dl(Op);
7348 
7349  SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
7350  DAG.getEntryNode(), Op.getOperand(1));
7351 
7352  return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
7353 }
7354 
7356  SDLoc DL(N);
7357  SDValue Op = N->getOperand(1);
7358  if (N->getValueType(0) == MVT::i32)
7359  return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
7360  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
7361  DAG.getConstant(0, DL, MVT::i32));
7362  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
7363  DAG.getConstant(1, DL, MVT::i32));
7364  return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
7365  DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
7366 }
7367 
7368 void ARMTargetLowering::ExpandDIV_Windows(
7369  SDValue Op, SelectionDAG &DAG, bool Signed,
7371  const auto &DL = DAG.getDataLayout();
7372  const auto &TLI = DAG.getTargetLoweringInfo();
7373 
7374  assert(Op.getValueType() == MVT::i64 &&
7375  "unexpected type for custom lowering DIV");
7376  SDLoc dl(Op);
7377 
7378  SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());
7379 
7380  SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
7381 
7382  SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
7383  SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
7384  DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
7385  Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
7386 
7387  Results.push_back(Lower);
7388  Results.push_back(Upper);
7389 }
7390 
7392  if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getOrdering()))
7393  // Acquire/Release load/store is not legal for targets without a dmb or
7394  // equivalent available.
7395  return SDValue();
7396 
7397  // Monotonic load/store is legal for all targets.
7398  return Op;
7399 }
7400 
7402  SmallVectorImpl<SDValue> &Results,
7403  SelectionDAG &DAG,
7404  const ARMSubtarget *Subtarget) {
7405  SDLoc DL(N);
7406  // Under Power Management extensions, the cycle-count is:
7407  // mrc p15, #0, <Rt>, c9, c13, #0
7408  SDValue Ops[] = { N->getOperand(0), // Chain
7409  DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
7410  DAG.getConstant(15, DL, MVT::i32),
7411  DAG.getConstant(0, DL, MVT::i32),
7412  DAG.getConstant(9, DL, MVT::i32),
7413  DAG.getConstant(13, DL, MVT::i32),
7414  DAG.getConstant(0, DL, MVT::i32)
7415  };
7416 
7417  SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
7418  DAG.getVTList(MVT::i32, MVT::Other), Ops);
7419  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
7420  DAG.getConstant(0, DL, MVT::i32)));
7421  Results.push_back(Cycles32.getValue(1));
7422 }
7423 
7425  SDLoc dl(V.getNode());
7426  SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i32);
7427  SDValue VHi = DAG.getAnyExtOrTrunc(
7428  DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)),
7429  dl, MVT::i32);
7430  SDValue RegClass =
7431  DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
7432  SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
7433  SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
7434  const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
7435  return SDValue(
7436  DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
7437 }
7438 
7440  SmallVectorImpl<SDValue> & Results,
7441  SelectionDAG &DAG) {
7442  assert(N->getValueType(0) == MVT::i64 &&
7443  "AtomicCmpSwap on types less than 64 should be legal");
7444  SDValue Ops[] = {N->getOperand(1),
7445  createGPRPairNode(DAG, N->getOperand(2)),
7446  createGPRPairNode(DAG, N->getOperand(3)),
7447  N->getOperand(0)};
7448  SDNode *CmpSwap = DAG.getMachineNode(
7449  ARM::CMP_SWAP_64, SDLoc(N),
7450  DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops);
7451 
7452  MachineFunction &MF = DAG.getMachineFunction();
7454  MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
7455  cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
7456 
7457  Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_0, SDLoc(N), MVT::i32,
7458  SDValue(CmpSwap, 0)));
7459  Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_1, SDLoc(N), MVT::i32,
7460  SDValue(CmpSwap, 0)));
7461  Results.push_back(SDValue(CmpSwap, 2));
7462 }
7463 
7464 static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget,
7465  SelectionDAG &DAG) {
7466  const auto &TLI = DAG.getTargetLoweringInfo();
7467 
7468  assert(Subtarget.getTargetTriple().isOSMSVCRT() &&
7469  "Custom lowering is MSVCRT specific!");
7470 
7471  SDLoc dl(Op);
7472  SDValue Val = Op.getOperand(0);
7473  MVT Ty = Val->getSimpleValueType(0);
7474  SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, dl, Ty, Op.getOperand(1));
7475  SDValue Callee = DAG.getExternalSymbol(Ty == MVT::f32 ? "powf" : "pow",
7476  TLI.getPointerTy(DAG.getDataLayout()));
7477 
7480 
7481  Entry.Node = Val;
7482  Entry.Ty = Val.getValueType().getTypeForEVT(*DAG.getContext());
7483  Entry.isZExt = true;
7484  Args.push_back(Entry);
7485 
7486  Entry.Node = Exponent;
7487  Entry.Ty = Exponent.getValueType().getTypeForEVT(*DAG.getContext());
7488  Entry.isZExt = true;
7489  Args.push_back(Entry);
7490 
7491  Type *LCRTy = Val.getValueType().getTypeForEVT(*DAG.getContext());
7492 
7493  // In the in-chain to the call is the entry node If we are emitting a
7494  // tailcall, the chain will be mutated if the node has a non-entry input
7495  // chain.
7496  SDValue InChain = DAG.getEntryNode();
7497  SDValue TCChain = InChain;
7498 
7499  const auto *F = DAG.getMachineFunction().getFunction();
7500  bool IsTC = TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
7501  F->getReturnType() == LCRTy;
7502  if (IsTC)
7503  InChain = TCChain;
7504 
7506  CLI.setDebugLoc(dl)
7507  .setChain(InChain)
7508  .setCallee(CallingConv::ARM_AAPCS_VFP, LCRTy, Callee, std::move(Args))
7509  .setTailCall(IsTC);
7510  std::pair<SDValue, SDValue> CI = TLI.LowerCallTo(CLI);
7511 
7512  // Return the chain (the DAG root) if it is a tail call
7513  return !CI.second.getNode() ? DAG.getRoot() : CI.first;
7514 }
7515 
7517  switch (Op.getOpcode()) {
7518  default: llvm_unreachable("Don't know how to custom lower this!");
7519  case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
7520  case ISD::ConstantPool:
7521  if (Subtarget->genExecuteOnly())
7522  llvm_unreachable("execute-only should not generate constant pools");
7523  return LowerConstantPool(Op, DAG);
7524  case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
7525  case ISD::GlobalAddress:
7526  switch (Subtarget->getTargetTriple().getObjectFormat()) {
7527  default: llvm_unreachable("unknown object format");
7528  case Triple::COFF:
7529  return LowerGlobalAddressWindows(Op, DAG);
7530  case Triple::ELF:
7531  return LowerGlobalAddressELF(Op, DAG);
7532  case Triple::MachO:
7533  return LowerGlobalAddressDarwin(Op, DAG);
7534  }
7535  case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
7536  case ISD::SELECT: return LowerSELECT(Op, DAG);
7537  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
7538  case ISD::BR_CC: return LowerBR_CC(Op, DAG);
7539  case ISD::BR_JT: return LowerBR_JT(Op, DAG);
7540  case ISD::VASTART: return LowerVASTART(Op, DAG);
7541  case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
7542  case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
7543  case ISD::SINT_TO_FP:
7544  case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
7545  case ISD::FP_TO_SINT:
7546  case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
7547  case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
7548  case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
7549  case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
7550  case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
7551  case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
7552  case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
7553  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
7554  Subtarget);
7555  case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG);
7556  case ISD::SHL:
7557  case ISD::SRL:
7558  case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
7559  case ISD::SREM: return LowerREM(Op.getNode(), DAG);
7560  case ISD::UREM: return LowerREM(Op.getNode(), DAG);
7561  case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
7562  case ISD::SRL_PARTS:
7563  case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
7564  case ISD::CTTZ:
7565  case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
7566  case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
7567  case ISD::SETCC: return LowerVSETCC(Op, DAG);
7568  case ISD::SETCCE: return LowerSETCCE(Op, DAG);
7569  case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
7570  case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
7571  case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
7572  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
7573  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
7574  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
7575  case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
7576  case ISD::MUL: return LowerMUL(Op, DAG);
7577  case ISD::SDIV:
7578  if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
7579  return LowerDIV_Windows(Op, DAG, /* Signed */ true);
7580  return LowerSDIV(Op, DAG);
7581  case ISD::UDIV:
7582  if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
7583  return LowerDIV_Windows(Op, DAG, /* Signed */ false);
7584  return LowerUDIV(Op, DAG);
7585  case ISD::ADDC:
7586  case ISD::ADDE:
7587  case ISD::SUBC:
7588  case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
7589  case ISD::SADDO:
7590  case ISD::UADDO:
7591  case ISD::SSUBO:
7592  case ISD::USUBO:
7593  return LowerXALUO(Op, DAG);
7594  case ISD::ATOMIC_LOAD:
7595  case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
7596  case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
7597  case ISD::SDIVREM:
7598  case ISD::UDIVREM: return LowerDivRem(Op, DAG);
7600  if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
7601  return LowerDYNAMIC_STACKALLOC(Op, DAG);
7602  llvm_unreachable("Don't know how to custom lower this!");
7603  case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
7604  case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
7605  case ISD::FPOWI: return LowerFPOWI(Op, *Subtarget, DAG);
7606  case ARMISD::WIN__DBZCHK: return SDValue();
7607  }
7608 }
7609 
7610 /// ReplaceNodeResults - Replace the results of node with an illegal result
7611 /// type with new values built out of custom code.
7613  SmallVectorImpl<SDValue> &Results,
7614  SelectionDAG &DAG) const {
7615  SDValue Res;
7616  switch (N->getOpcode()) {
7617  default:
7618  llvm_unreachable("Don't know how to custom expand this!");
7619  case ISD::READ_REGISTER:
7620  ExpandREAD_REGISTER(N, Results, DAG);
7621  break;
7622  case ISD::BITCAST:
7623  Res = ExpandBITCAST(N, DAG);
7624  break;
7625  case ISD::SRL:
7626  case ISD::SRA:
7627  Res = Expand64BitShift(N, DAG, Subtarget);
7628  break;
7629  case ISD::SREM:
7630  case ISD::UREM:
7631  Res = LowerREM(N, DAG);
7632  break;
7633  case ISD::SDIVREM:
7634  case ISD::UDIVREM:
7635  Res = LowerDivRem(SDValue(N, 0), DAG);
7636  assert(Res.getNumOperands() == 2 && "DivRem needs two values");
7637  Results.push_back(Res.getValue(0));
7638  Results.push_back(Res.getValue(1));
7639  return;
7640  case ISD::READCYCLECOUNTER:
7641  ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
7642  return;
7643  case ISD::UDIV:
7644  case ISD::SDIV:
7645  assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
7646  return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
7647  Results);
7648  case ISD::ATOMIC_CMP_SWAP:
7649  ReplaceCMP_SWAP_64Results(N, Results, DAG);
7650  return;
7651  }
7652  if (Res.getNode())
7653  Results.push_back(Res);
7654 }
7655 
7656 //===----------------------------------------------------------------------===//
7657 // ARM Scheduler Hooks
7658 //===----------------------------------------------------------------------===//
7659 
7660 /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
7661 /// registers the function context.
7662 void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
7664  MachineBasicBlock *DispatchBB,
7665  int FI) const {
7666  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
7667  "ROPI/RWPI not currently supported with SjLj");
7668  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
7669  DebugLoc dl = MI.getDebugLoc();
7670  MachineFunction *MF = MBB->getParent();
7671  MachineRegisterInfo *MRI = &MF->getRegInfo();
7672  MachineConstantPool *MCP = MF->getConstantPool();
7673  ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
7674  const Function *F = MF->getFunction();
7675 
7676  bool isThumb = Subtarget->isThumb();
7677  bool isThumb2 = Subtarget->isThumb2();
7678 
7679  unsigned PCLabelId = AFI->createPICLabelUId();
7680  unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
7681  ARMConstantPoolValue *CPV =
7682  ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj);
7683  unsigned CPI = MCP->getConstantPoolIndex(CPV, 4);
7684 
7685  const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
7686  : &ARM::GPRRegClass;
7687 
7688  // Grab constant pool and fixed stack memory operands.
7689  MachineMemOperand *CPMMO =
7692 
7693  MachineMemOperand *FIMMOSt =
7696 
7697  // Load the address of the dispatch MBB into the jump buffer.
7698  if (isThumb2) {
7699  // Incoming value: jbuf
7700  // ldr.n r5, LCPI1_1
7701  // orr r5, r5, #1
7702  // add r5, pc
7703  // str r5, [$jbuf, #+4] ; &jbuf[1]
7704  unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
7705  AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
7706  .addConstantPoolIndex(CPI)
7707  .addMemOperand(CPMMO));
7708  // Set the low bit because of thumb mode.
7709  unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
7710  AddDefaultCC(
7711  AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
7712  .addReg(NewVReg1, RegState::Kill)
7713  .addImm(0x01)));
7714  unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
7715  BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
7716  .addReg(NewVReg2, RegState::Kill)
7717  .addImm(PCLabelId);
7718  AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
7719  .addReg(NewVReg3, RegState::Kill)
7720  .addFrameIndex(FI)
7721  .addImm(36) // &jbuf[1] :: pc
7722  .addMemOperand(FIMMOSt));
7723  } else if (isThumb) {
7724  // Incoming value: jbuf
7725  // ldr.n r1, LCPI1_4
7726  // add r1, pc
7727  // mov r2, #1
7728  // orrs r1, r2
7729  // add r2, $jbuf, #+4 ; &jbuf[1]
7730  // str r1, [r2]
7731  unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
7732  AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
7733  .addConstantPoolIndex(CPI)
7734  .addMemOperand(CPMMO));
7735  unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
7736  BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
7737  .addReg(NewVReg1, RegState::Kill)
7738  .addImm(PCLabelId);
7739  // Set the low bit because of thumb mode.
7740  unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
7741  AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
7742  .addReg(ARM::CPSR, RegState::Define)
7743  .addImm(1));
7744  unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
7745  AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
7746  .addReg(ARM::CPSR, RegState::Define)
7747  .addReg(NewVReg2, RegState::Kill)
7748  .addReg(NewVReg3, RegState::Kill));
7749  unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
7750  BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
7751  .addFrameIndex(FI)
7752  .addImm(36); // &jbuf[1] :: pc
7753  AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
7754  .addReg(NewVReg4, RegState::Kill)
7755  .addReg(NewVReg5, RegState::Kill)
7756  .addImm(0)
7757  .addMemOperand(FIMMOSt));
7758  } else {
7759  // Incoming value: jbuf
7760  // ldr r1, LCPI1_1
7761  // add r1, pc, r1
7762  // str r1, [$jbuf, #+4] ; &jbuf[1]
7763  unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
7764  AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
7765  .addConstantPoolIndex(CPI)
7766  .addImm(0)
7767  .addMemOperand(CPMMO));
7768  unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
7769  AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
7770  .addReg(NewVReg1, RegState::Kill)
7771  .addImm(PCLabelId));
7772  AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
7773  .addReg(NewVReg2, RegState::Kill)
7774  .addFrameIndex(FI)
7775  .addImm(36) // &jbuf[1] :: pc
7776  .addMemOperand(FIMMOSt));
7777  }
7778 }
7779 
7780 void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
7781  MachineBasicBlock *MBB) const {
7782  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
7783  DebugLoc dl = MI.getDebugLoc();
7784  MachineFunction *MF = MBB->getParent();
7785  MachineRegisterInfo *MRI = &MF->getRegInfo();
7786  MachineFrameInfo &MFI = MF->getFrameInfo();
7787  int FI = MFI.getFunctionContextIndex();
7788 
7789  const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
7790  : &ARM::GPRnopcRegClass;
7791 
7792  // Get a mapping of the call site numbers to all of the landing pads they're
7793  // associated with.
7795  unsigned MaxCSNum = 0;
7796  for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
7797  ++BB) {
7798  if (!BB->isEHPad()) continue;
7799 
7800  // FIXME: We should assert that the EH_LABEL is the first MI in the landing
7801  // pad.
7803  II = BB->begin(), IE = BB->end(); II != IE; ++II) {
7804  if (!II->isEHLabel()) continue;
7805 
7806  MCSymbol *Sym = II->getOperand(0).getMCSymbol();
7807  if (!MF->hasCallSiteLandingPad(Sym)) continue;
7808 
7809  SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);
7811  CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end();
7812  CSI != CSE; ++CSI) {
7813  CallSiteNumToLPad[*CSI].push_back(&*BB);
7814  MaxCSNum = std::max(MaxCSNum, *CSI);
7815  }
7816  break;
7817  }
7818  }
7819 
7820  // Get an ordered list of the machine basic blocks for the jump table.
7821  std::vector<MachineBasicBlock*> LPadList;
7823  LPadList.reserve(CallSiteNumToLPad.size());
7824  for (unsigned I = 1; I <= MaxCSNum; ++I) {
7825  SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
7827  II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) {
7828  LPadList.push_back(*II);
7829  InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end());
7830  }
7831  }
7832 
7833  assert(!LPadList.empty() &&
7834  "No landing pad destinations for the dispatch jump table!");
7835 
7836  // Create the jump table and associated information.
7837  MachineJumpTableInfo *JTI =
7839  unsigned MJTI = JTI->createJumpTableIndex(LPadList);
7840 
7841  // Create the MBBs for the dispatch code.
7842 
7843  // Shove the dispatch's address into the return slot in the function context.
7844  MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
7845  DispatchBB->setIsEHPad();
7846 
7848  unsigned trap_opcode;
7849  if (Subtarget->isThumb())
7850  trap_opcode = ARM::tTRAP;
7851  else
7852  trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
7853 
7854  BuildMI(TrapBB, dl, TII->get(trap_opcode));
7855  DispatchBB->addSuccessor(TrapBB);
7856 
7857  MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
7858  DispatchBB->addSuccessor(DispContBB);
7859 
7860  // Insert and MBBs.
7861  MF->insert(MF->end(), DispatchBB);
7862  MF->insert(MF->end(), DispContBB);
7863  MF->insert(MF->end(), TrapBB);
7864 
7865  // Insert code into the entry block that creates and registers the function
7866  // context.
7867  SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
7868 
7869  MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
7872 
7873  MachineInstrBuilder MIB;
7874  MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
7875 
7876  const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
7877  const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
7878 
7879  // Add a register mask with no preserved registers. This results in all
7880  // registers being marked as clobbered. This can't work if the dispatch block
7881  // is in a Thumb1 function and is linked with ARM code which uses the FP
7882  // registers, as there is no way to preserve the FP registers in Thumb1 mode.
7884 
7885  bool IsPositionIndependent = isPositionIndependent();
7886  unsigned NumLPads = LPadList.size();
7887  if (Subtarget->isThumb2()) {
7888  unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
7889  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
7890  .addFrameIndex(FI)
7891  .addImm(4)
7892  .addMemOperand(FIMMOLd));
7893 
7894  if (NumLPads < 256) {
7895  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
7896  .addReg(NewVReg1)
7897  .addImm(LPadList.size()));
7898  } else {
7899  unsigned VReg1 = MRI->createVirtualRegister(TRC);
7900  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
7901  .addImm(NumLPads & 0xFFFF));
7902 
7903  unsigned VReg2 = VReg1;
7904  if ((NumLPads & 0xFFFF0000) != 0) {
7905  VReg2 = MRI->createVirtualRegister(TRC);
7906  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
7907  .addReg(VReg1)
7908  .addImm(NumLPads >> 16));
7909  }
7910 
7911  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
7912  .addReg(NewVReg1)
7913  .addReg(VReg2));
7914  }
7915 
7916  BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
7917  .addMBB(TrapBB)
7918  .addImm(ARMCC::HI)
7919  .addReg(ARM::CPSR);
7920 
7921  unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
7922  AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3)
7923  .addJumpTableIndex(MJTI));
7924 
7925  unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
7926  AddDefaultCC(
7928  BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
7929  .addReg(NewVReg3, RegState::Kill)
7930  .addReg(NewVReg1)
7932 
7933  BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
7934  .addReg(NewVReg4, RegState::Kill)
7935  .addReg(NewVReg1)
7936  .addJumpTableIndex(MJTI);
7937  } else if (Subtarget->isThumb()) {
7938  unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
7939  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
7940  .addFrameIndex(FI)
7941  .addImm(1)
7942  .addMemOperand(FIMMOLd));
7943 
7944  if (NumLPads < 256) {
7945  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
7946  .addReg(NewVReg1)
7947  .addImm(NumLPads));
7948  } else {
7951  const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
7952 
7953  // MachineConstantPool wants an explicit alignment.
7954  unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
7955  if (Align == 0)
7956  Align = MF->getDataLayout().getTypeAllocSize(C->getType());
7957  unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
7958 
7959  unsigned VReg1 = MRI->createVirtualRegister(TRC);
7960  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
7961  .addReg(VReg1, RegState::Define)
7962  .addConstantPoolIndex(Idx));
7963  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
7964  .addReg(NewVReg1)
7965  .addReg(VReg1));
7966  }
7967 
7968  BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
7969  .addMBB(TrapBB)
7970  .addImm(ARMCC::HI)
7971  .addReg(ARM::CPSR);
7972 
7973  unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
7974  AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
7975  .addReg(ARM::CPSR, RegState::Define)
7976  .addReg(NewVReg1)
7977  .addImm(2));
7978 
7979  unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
7980  AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
7981  .addJumpTableIndex(MJTI));
7982 
7983  unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
7984  AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
7985  .addReg(ARM::CPSR, RegState::Define)
7986  .addReg(NewVReg2, RegState::Kill)
7987  .addReg(NewVReg3));
7988 
7989  MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
7991 
7992  unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
7993  AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
7994  .addReg(NewVReg4, RegState::Kill)
7995  .addImm(0)
7996  .addMemOperand(JTMMOLd));
7997 
7998  unsigned NewVReg6 = NewVReg5;
7999  if (IsPositionIndependent) {
8000  NewVReg6 = MRI->createVirtualRegister(TRC);
8001  AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
8002  .addReg(ARM::CPSR, RegState::Define)
8003  .addReg(NewVReg5, RegState::Kill)
8004  .addReg(NewVReg3));
8005  }
8006 
8007  BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
8008  .addReg(NewVReg6, RegState::Kill)
8009  .addJumpTableIndex(MJTI);
8010  } else {
8011  unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
8012  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
8013  .addFrameIndex(FI)
8014  .addImm(4)
8015  .addMemOperand(FIMMOLd));
8016 
8017  if (NumLPads < 256) {
8018  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
8019  .addReg(NewVReg1)
8020  .addImm(NumLPads));
8021  } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
8022  unsigned VReg1 = MRI->createVirtualRegister(TRC);
8023  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
8024  .addImm(NumLPads & 0xFFFF));
8025 
8026  unsigned VReg2 = VReg1;
8027  if ((NumLPads & 0xFFFF0000) != 0) {
8028  VReg2 = MRI->createVirtualRegister(TRC);
8029  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
8030  .addReg(VReg1)
8031  .addImm(NumLPads >> 16));
8032  }
8033 
8034  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
8035  .addReg(NewVReg1)
8036  .addReg(VReg2));
8037  } else {
8038  MachineConstantPool *ConstantPool = MF->getConstantPool();
8039  Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
8040  const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
8041 
8042  // MachineConstantPool wants an explicit alignment.
8043  unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
8044  if (Align == 0)
8045  Align = MF->getDataLayout().getTypeAllocSize(C->getType());
8046  unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
8047 
8048  unsigned VReg1 = MRI->createVirtualRegister(TRC);
8049  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
8050  .addReg(VReg1, RegState::Define)
8051  .addConstantPoolIndex(Idx)
8052  .addImm(0));
8053  AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
8054  .addReg(NewVReg1)
8055  .addReg(VReg1, RegState::Kill));
8056  }
8057 
8058  BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
8059  .addMBB(TrapBB)
8060  .addImm(ARMCC::HI)
8061  .addReg(ARM::CPSR);
8062 
8063  unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
8064  AddDefaultCC(
8065  AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
8066  .addReg(NewVReg1)
8068  unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
8069  AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
8070  .addJumpTableIndex(MJTI));
8071 
8072  MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
8074  unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
8076  BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
8077  .addReg(NewVReg3, RegState::Kill)
8078  .addReg(NewVReg4)
8079  .addImm(0)
8080  .addMemOperand(JTMMOLd));
8081 
8082  if (IsPositionIndependent) {
8083  BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
8084  .addReg(NewVReg5, RegState::Kill)
8085  .addReg(NewVReg4)
8086  .addJumpTableIndex(MJTI);
8087  } else {
8088  BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
8089  .addReg(NewVReg5, RegState::Kill)
8090  .addJumpTableIndex(MJTI);
8091  }
8092  }
8093 
8094  // Add the jump table entries as successors to the MBB.
8096  for (std::vector<MachineBasicBlock*>::iterator
8097  I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
8098  MachineBasicBlock *CurMBB = *I;
8099  if (SeenMBBs.insert(CurMBB).second)
8100  DispContBB->addSuccessor(CurMBB);
8101  }
8102 
8103  // N.B. the order the invoke BBs are processed in doesn't matter here.
8104  const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
8106  for (MachineBasicBlock *BB : InvokeBBs) {
8107 
8108  // Remove the landing pad successor from the invoke block and replace it
8109  // with the new dispatch block.
8110  SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(),
8111  BB->succ_end());
8112  while (!Successors.empty()) {
8113  MachineBasicBlock *SMBB = Successors.pop_back_val();
8114  if (SMBB->isEHPad()) {
8115  BB->removeSuccessor(SMBB);
8116  MBBLPads.push_back(SMBB);
8117  }
8118  }
8119 
8120  BB->addSuccessor(DispatchBB, BranchProbability::getZero());
8121  BB->normalizeSuccProbs();
8122 
8123  // Find the invoke call and mark all of the callee-saved registers as
8124  // 'implicit defined' so that they're spilled. This prevents code from
8125  // moving instructions to before the EH block, where they will never be
8126  // executed.
8128  II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
8129  if (!II->isCall()) continue;
8130 
8131  DenseMap<unsigned, bool> DefRegs;
8133  OI = II->operands_begin(), OE = II->operands_end();
8134  OI != OE; ++OI) {
8135  if (!OI->isReg()) continue;
8136  DefRegs[OI->getReg()] = true;
8137  }
8138 
8139  MachineInstrBuilder MIB(*MF, &*II);
8140 
8141  for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
8142  unsigned Reg = SavedRegs[i];
8143  if (Subtarget->isThumb2() &&
8144  !ARM::tGPRRegClass.contains(Reg) &&
8145  !ARM::hGPRRegClass.contains(Reg))
8146  continue;
8147  if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
8148  continue;
8149  if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
8150  continue;
8151  if (!DefRegs[Reg])
8153  }
8154 
8155  break;
8156  }
8157  }
8158 
8159  // Mark all former landing pads as non-landing pads. The dispatch is the only
8160  // landing pad now.
8162  I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I)
8163  (*I)->setIsEHPad(false);
8164 
8165  // The instruction is gone now.
8166  MI.eraseFromParent();
8167 }
8168 
8169 static
8172  E = MBB->succ_end(); I != E; ++I)
8173  if (*I != Succ)
8174  return *I;
8175  llvm_unreachable("Expecting a BB with two successors!");
8176 }
8177 
8178 /// Return the load opcode for a given load size. If load size >= 8,
8179 /// neon opcode will be returned.
8180 static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
8181  if (LdSize >= 8)
8182  return LdSize == 16 ? ARM::VLD1q32wb_fixed
8183  : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
8184  if (IsThumb1)
8185  return LdSize == 4 ? ARM::tLDRi
8186  : LdSize == 2 ? ARM::tLDRHi
8187  : LdSize == 1 ? ARM::tLDRBi : 0;
8188  if (IsThumb2)
8189  return LdSize == 4 ? ARM::t2LDR_POST
8190  : LdSize == 2 ? ARM::t2LDRH_POST
8191  : LdSize == 1 ? ARM::t2LDRB_POST : 0;
8192  return LdSize == 4 ? ARM::LDR_POST_IMM
8193  : LdSize == 2 ? ARM::LDRH_POST
8194  : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
8195 }
8196 
8197 /// Return the store opcode for a given store size. If store size >= 8,
8198 /// neon opcode will be returned.
8199 static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
8200  if (StSize >= 8)
8201  return StSize == 16 ? ARM::VST1q32wb_fixed
8202  : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
8203  if (IsThumb1)
8204  return StSize == 4 ? ARM::tSTRi
8205  : StSize == 2 ? ARM::tSTRHi
8206  : StSize == 1 ? ARM::tSTRBi : 0;
8207  if (IsThumb2)
8208  return StSize == 4 ? ARM::t2STR_POST
8209  : StSize == 2 ? ARM::t2STRH_POST
8210  : StSize == 1 ? ARM::t2STRB_POST : 0;
8211  return StSize == 4 ? ARM::STR_POST_IMM
8212  : StSize == 2 ? ARM::STRH_POST
8213  : StSize == 1 ? ARM::STRB_POST_IMM : 0;
8214 }
8215 
8216 /// Emit a post-increment load operation with given size. The instructions
8217 /// will be added to BB at Pos.
8219  const TargetInstrInfo *TII, const DebugLoc &dl,
8220  unsigned LdSize, unsigned Data, unsigned AddrIn,
8221  unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
8222  unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
8223  assert(LdOpc != 0 && "Should have a load opcode");
8224  if (LdSize >= 8) {
8225  AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
8226  .addReg(AddrOut, RegState::Define).addReg(AddrIn)
8227  .addImm(0));
8228  } else if (IsThumb1) {
8229  // load + update AddrIn
8230  AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
8231  .addReg(AddrIn).addImm(0));
8232  MachineInstrBuilder MIB =
8233  BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut);
8234  MIB = AddDefaultT1CC(MIB);
8235  MIB.addReg(AddrIn).addImm(LdSize);
8236  AddDefaultPred(MIB);
8237  } else if (IsThumb2) {
8238  AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
8239  .addReg(AddrOut, RegState::Define).addReg(AddrIn)
8240  .addImm(LdSize));
8241  } else { // arm
8242  AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
8243  .addReg(AddrOut, RegState::Define).addReg(AddrIn)
8244  .addReg(0).addImm(LdSize));
8245  }
8246 }
8247 
8248 /// Emit a post-increment store operation with given size. The instructions
8249 /// will be added to BB at Pos.
8251  const TargetInstrInfo *TII, const DebugLoc &dl,
8252  unsigned StSize, unsigned Data, unsigned AddrIn,
8253  unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
8254  unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
8255  assert(StOpc != 0 && "Should have a store opcode");
8256  if (StSize >= 8) {
8257  AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
8258  .addReg(AddrIn).addImm(0).addReg(Data));
8259  } else if (IsThumb1) {
8260  // store + update AddrIn
8261  AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc)).addReg(Data)
8262  .addReg(AddrIn).addImm(0));
8263  MachineInstrBuilder MIB =
8264  BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut);
8265  MIB = AddDefaultT1CC(MIB);
8266  MIB.addReg(AddrIn).addImm(StSize);
8267  AddDefaultPred(MIB);
8268  } else if (IsThumb2) {
8269  AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
8270  .addReg(Data).addReg(AddrIn).addImm(StSize));
8271  } else { // arm
8272  AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
8273  .addReg(Data).addReg(AddrIn).addReg(0)
8274  .addImm(StSize));
8275  }
8276 }
8277 
8279 ARMTargetLowering::EmitStructByval(MachineInstr &MI,
8280  MachineBasicBlock *BB) const {
8281  // This pseudo instruction has 3 operands: dst, src, size
8282  // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
8283  // Otherwise, we will generate unrolled scalar copies.
8284  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
8285  const BasicBlock *LLVM_BB = BB->getBasicBlock();
8287 
8288  unsigned dest = MI.getOperand(0).getReg();
8289  unsigned src = MI.getOperand(1).getReg();
8290  unsigned SizeVal = MI.getOperand(2).getImm();
8291  unsigned Align = MI.getOperand(3).getImm();
8292  DebugLoc dl = MI.getDebugLoc();
8293 
8294  MachineFunction *MF = BB->getParent();
8295  MachineRegisterInfo &MRI = MF->getRegInfo();
8296  unsigned UnitSize = 0;
8297  const TargetRegisterClass *TRC = nullptr;
8298  const TargetRegisterClass *VecTRC = nullptr;
8299 
8300  bool IsThumb1 = Subtarget->isThumb1Only();
8301  bool IsThumb2 = Subtarget->isThumb2();
8302  bool IsThumb = Subtarget->isThumb();
8303 
8304  if (Align & 1) {
8305  UnitSize = 1;
8306  } else if (Align & 2) {
8307  UnitSize = 2;
8308  } else {
8309  // Check whether we can use NEON instructions.
8310  if (!MF->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat) &&
8311  Subtarget->hasNEON()) {
8312  if ((Align % 16 == 0) && SizeVal >= 16)
8313  UnitSize = 16;
8314  else if ((Align % 8 == 0) && SizeVal >= 8)
8315  UnitSize = 8;
8316  }
8317  // Can't use NEON instructions.
8318  if (UnitSize == 0)
8319  UnitSize = 4;
8320  }
8321 
8322  // Select the correct opcode and register class for unit size load/store
8323  bool IsNeon = UnitSize >= 8;
8324  TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
8325  if (IsNeon)
8326  VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
8327  : UnitSize == 8 ? &ARM::DPRRegClass
8328  : nullptr;
8329 
8330  unsigned BytesLeft = SizeVal % UnitSize;
8331  unsigned LoopSize = SizeVal - BytesLeft;
8332 
8333  if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
8334  // Use LDR and STR to copy.
8335  // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
8336  // [destOut] = STR_POST(scratch, destIn, UnitSize)
8337  unsigned srcIn = src;
8338  unsigned destIn = dest;
8339  for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
8340  unsigned srcOut = MRI.createVirtualRegister(TRC);
8341  unsigned destOut = MRI.createVirtualRegister(TRC);
8342  unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
8343  emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
8344  IsThumb1, IsThumb2);
8345  emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
8346  IsThumb1, IsThumb2);
8347  srcIn = srcOut;
8348  destIn = destOut;
8349  }
8350 
8351  // Handle the leftover bytes with LDRB and STRB.
8352  // [scratch, srcOut] = LDRB_POST(srcIn, 1)
8353  // [destOut] = STRB_POST(scratch, destIn, 1)
8354  for (unsigned i = 0; i < BytesLeft; i++) {
8355  unsigned srcOut = MRI.createVirtualRegister(TRC);
8356  unsigned destOut = MRI.createVirtualRegister(TRC);
8357  unsigned scratch = MRI.createVirtualRegister(TRC);
8358  emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
8359  IsThumb1, IsThumb2);
8360  emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
8361  IsThumb1, IsThumb2);
8362  srcIn = srcOut;
8363  destIn = destOut;
8364  }
8365  MI.eraseFromParent(); // The instruction is gone now.
8366  return BB;
8367  }
8368 
8369  // Expand the pseudo op to a loop.
8370  // thisMBB:
8371  // ...
8372  // movw varEnd, # --> with thumb2
8373  // movt varEnd, #
8374  // ldrcp varEnd, idx --> without thumb2
8375  // fallthrough --> loopMBB
8376  // loopMBB:
8377  // PHI varPhi, varEnd, varLoop
8378  // PHI srcPhi, src, srcLoop
8379  // PHI destPhi, dst, destLoop
8380  // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
8381  // [destLoop] = STR_POST(scratch, destPhi, UnitSize)
8382  // subs varLoop, varPhi, #UnitSize
8383  // bne loopMBB
8384  // fallthrough --> exitMBB
8385  // exitMBB:
8386  // epilogue to handle left-over bytes
8387  // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
8388  // [destOut] = STRB_POST(scratch, destLoop, 1)
8389  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
8390  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
8391  MF->insert(It, loopMBB);
8392  MF->insert(It, exitMBB);
8393 
8394  // Transfer the remainder of BB and its successor edges to exitMBB.
8395  exitMBB->splice(exitMBB->begin(), BB,
8396  std::next(MachineBasicBlock::iterator(MI)), BB->end());
8397  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
8398 
8399  // Load an immediate to varEnd.
8400  unsigned varEnd = MRI.createVirtualRegister(TRC);
8401  if (Subtarget->useMovt(*MF)) {
8402  unsigned Vtmp = varEnd;
8403  if ((LoopSize & 0xFFFF0000) != 0)
8404  Vtmp = MRI.createVirtualRegister(TRC);
8405  AddDefaultPred(BuildMI(BB, dl,
8406  TII->get(IsThumb ? ARM::t2MOVi16 : ARM::MOVi16),
8407  Vtmp).addImm(LoopSize & 0xFFFF));
8408 
8409  if ((LoopSize & 0xFFFF0000) != 0)
8410  AddDefaultPred(BuildMI(BB, dl,
8411  TII->get(IsThumb ? ARM::t2MOVTi16 : ARM::MOVTi16),
8412  varEnd)
8413  .addReg(Vtmp)
8414  .addImm(LoopSize >> 16));
8415  } else {
8416  MachineConstantPool *ConstantPool = MF->getConstantPool();
8417  Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
8418  const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
8419 
8420  // MachineConstantPool wants an explicit alignment.
8421  unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
8422  if (Align == 0)
8423  Align = MF->getDataLayout().getTypeAllocSize(C->getType());
8424  unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
8425 
8426  if (IsThumb)
8427  AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)).addReg(
8428  varEnd, RegState::Define).addConstantPoolIndex(Idx));
8429  else
8430  AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)).addReg(
8431  varEnd, RegState::Define).addConstantPoolIndex(Idx).addImm(0));
8432  }
8433  BB->addSuccessor(loopMBB);
8434 
8435  // Generate the loop body:
8436  // varPhi = PHI(varLoop, varEnd)
8437  // srcPhi = PHI(srcLoop, src)
8438  // destPhi = PHI(destLoop, dst)
8439  MachineBasicBlock *entryBB = BB;
8440  BB = loopMBB;
8441  unsigned varLoop = MRI.createVirtualRegister(TRC);
8442  unsigned varPhi = MRI.createVirtualRegister(TRC);
8443  unsigned srcLoop = MRI.createVirtualRegister(TRC);
8444  unsigned srcPhi = MRI.createVirtualRegister(TRC);
8445  unsigned destLoop = MRI.createVirtualRegister(TRC);
8446  unsigned destPhi = MRI.createVirtualRegister(TRC);
8447 
8448  BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
8449  .addReg(varLoop).addMBB(loopMBB)
8450  .addReg(varEnd).addMBB(entryBB);
8451  BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
8452  .addReg(srcLoop).addMBB(loopMBB)
8453  .addReg(src).addMBB(entryBB);
8454  BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
8455  .addReg(destLoop).addMBB(loopMBB)
8456  .addReg(dest).addMBB(entryBB);
8457 
8458  // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
8459  // [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
8460  unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
8461  emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
8462  IsThumb1, IsThumb2);
8463  emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
8464  IsThumb1, IsThumb2);
8465 
8466  // Decrement loop variable by UnitSize.
8467  if (IsThumb1) {
8468  MachineInstrBuilder MIB =
8469  BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop);
8470  MIB = AddDefaultT1CC(MIB);
8471  MIB.addReg(varPhi).addImm(UnitSize);
8472  AddDefaultPred(MIB);
8473  } else {
8474  MachineInstrBuilder MIB =
8475  BuildMI(*BB, BB->end(), dl,
8476  TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
8477  AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize)));
8478  MIB->getOperand(5).setReg(ARM::CPSR);
8479  MIB->getOperand(5).setIsDef(true);
8480  }
8481  BuildMI(*BB, BB->end(), dl,
8482  TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
8483  .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
8484 
8485  // loopMBB can loop back to loopMBB or fall through to exitMBB.
8486  BB->addSuccessor(loopMBB);
8487  BB->addSuccessor(exitMBB);
8488 
8489  // Add epilogue to handle BytesLeft.
8490  BB = exitMBB;
8491  auto StartOfExit = exitMBB->begin();
8492 
8493  // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
8494  // [destOut] = STRB_POST(scratch, destLoop, 1)
8495  unsigned srcIn = srcLoop;
8496  unsigned destIn = destLoop;
8497  for (unsigned i = 0; i < BytesLeft; i++) {
8498  unsigned srcOut = MRI.createVirtualRegister(TRC);
8499  unsigned destOut = MRI.createVirtualRegister(TRC);
8500  unsigned scratch = MRI.createVirtualRegister(TRC);
8501  emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
8502  IsThumb1, IsThumb2);
8503  emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
8504  IsThumb1, IsThumb2);
8505  srcIn = srcOut;
8506  destIn = destOut;
8507  }
8508 
8509  MI.eraseFromParent(); // The instruction is gone now.
8510  return BB;
8511 }
8512 
8514 ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
8515  MachineBasicBlock *MBB) const {
8516  const TargetMachine &TM = getTargetMachine();
8517  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
8518  DebugLoc DL = MI.getDebugLoc();
8519 
8520  assert(Subtarget->isTargetWindows() &&
8521  "__chkstk is only supported on Windows");
8522  assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");
8523 
8524  // __chkstk takes the number of words to allocate on the stack in R4, and
8525  // returns the stack adjustment in number of bytes in R4. This will not
8526  // clober any other registers (other than the obvious lr).
8527  //
8528  // Although, technically, IP should be considered a register which may be
8529  // clobbered, the call itself will not touch it. Windows on ARM is a pure
8530  // thumb-2 environment, so there is no interworking required. As a result, we
8531  // do not expect a veneer to be emitted by the linker, clobbering IP.
8532  //
8533  // Each module receives its own copy of __chkstk, so no import thunk is
8534  // required, again, ensuring that IP is not clobbered.
8535  //
8536  // Finally, although some linkers may theoretically provide a trampoline for
8537  // out of range calls (which is quite common due to a 32M range limitation of
8538  // branches for Thumb), we can generate the long-call version via
8539  // -mcmodel=large, alleviating the need for the trampoline which may clobber
8540  // IP.
8541 
8542  switch (TM.getCodeModel()) {
8543  case CodeModel::Small:
8544  case CodeModel::Medium:
8545  case CodeModel::Default:
8546  case CodeModel::Kernel:
8547  BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
8548  .addImm((unsigned)ARMCC::AL).addReg(0)
8549  .addExternalSymbol("__chkstk")
8550  .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
8551  .addReg(ARM::R4, RegState::Implicit | RegState::Define)
8552  .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead);
8553  break;
8554  case CodeModel::Large:
8555  case CodeModel::JITDefault: {
8556  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
8557  unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
8558 
8559  BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
8560  .addExternalSymbol("__chkstk");
8561  BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr))
8562  .addImm((unsigned)ARMCC::AL).addReg(0)
8563  .addReg(Reg, RegState::Kill)
8564  .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
8565  .addReg(ARM::R4, RegState::Implicit | RegState::Define)
8566  .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead);
8567  break;
8568  }
8569  }
8570 
8571  AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr),
8572  ARM::SP)
8573  .addReg(ARM::SP, RegState::Kill)
8574  .addReg(ARM::R4, RegState::Kill)
8575  .setMIFlags(MachineInstr::FrameSetup)));
8576 
8577  MI.eraseFromParent();
8578  return MBB;
8579 }
8580 
8582 ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
8583  MachineBasicBlock *MBB) const {
8584  DebugLoc DL = MI.getDebugLoc();
8585  MachineFunction *MF = MBB->getParent();
8586  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
8587 
8589  MF->insert(++MBB->getIterator(), ContBB);
8590  ContBB->splice(ContBB->begin(), MBB,
8591  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
8592  ContBB->transferSuccessorsAndUpdatePHIs(MBB);
8593  MBB->addSuccessor(ContBB);
8594 
8596  BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0));
8597  MF->push_back(TrapBB);
8598  MBB->addSuccessor(TrapBB);
8599 
8600  AddDefaultPred(BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
8601  .addReg(MI.getOperand(0).getReg())
8602  .addImm(0));
8603  BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))
8604  .addMBB(TrapBB)
8605  .addImm(ARMCC::EQ)
8606  .addReg(ARM::CPSR);
8607 
8608  MI.eraseFromParent();
8609  return ContBB;
8610 }
8611 
8614  MachineBasicBlock *BB) const {
8615  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
8616  DebugLoc dl = MI.getDebugLoc();
8617  bool isThumb2 = Subtarget->isThumb2();
8618  switch (MI.getOpcode()) {
8619  default: {
8620  MI.dump();
8621  llvm_unreachable("Unexpected instr type to insert");
8622  }
8623 
8624  // Thumb1 post-indexed loads are really just single-register LDMs.
8625  case ARM::tLDR_postidx: {
8626  BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
8627  .addOperand(MI.getOperand(1)) // Rn_wb
8628  .addOperand(MI.getOperand(2)) // Rn
8629  .addOperand(MI.getOperand(3)) // PredImm
8630  .addOperand(MI.getOperand(4)) // PredReg
8631  .addOperand(MI.getOperand(0)); // Rt
8632  MI.eraseFromParent();
8633  return BB;
8634  }
8635 
8636  // The Thumb2 pre-indexed stores have the same MI operands, they just
8637  // define them differently in the .td files from the isel patterns, so
8638  // they need pseudos.
8639  case ARM::t2STR_preidx:
8640  MI.setDesc(TII->get(ARM::t2STR_PRE));
8641  return BB;
8642  case ARM::t2STRB_preidx:
8643  MI.setDesc(TII->get(ARM::t2STRB_PRE));
8644  return BB;
8645  case ARM::t2STRH_preidx:
8646  MI.setDesc(TII->get(ARM::t2STRH_PRE));
8647  return BB;
8648 
8649  case ARM::STRi_preidx:
8650  case ARM::STRBi_preidx: {
8651  unsigned NewOpc = MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM
8652  : ARM::STRB_PRE_IMM;
8653  // Decode the offset.
8654  unsigned Offset = MI.getOperand(4).getImm();
8655  bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
8656  Offset = ARM_AM::getAM2Offset(Offset);
8657  if (isSub)
8658  Offset = -Offset;
8659 
8660  MachineMemOperand *MMO = *MI.memoperands_begin();
8661  BuildMI(*BB, MI, dl, TII->get(NewOpc))
8662  .addOperand(MI.getOperand(0)) // Rn_wb
8663  .addOperand(MI.getOperand(1)) // Rt
8664  .addOperand(MI.getOperand(2)) // Rn
8665  .addImm(Offset) // offset (skip GPR==zero_reg)
8666  .addOperand(MI.getOperand(5)) // pred
8667  .addOperand(MI.getOperand(6))
8668  .addMemOperand(MMO);
8669  MI.eraseFromParent();
8670  return BB;
8671  }
8672  case ARM::STRr_preidx:
8673  case ARM::STRBr_preidx:
8674  case ARM::STRH_preidx: {
8675  unsigned NewOpc;
8676  switch (MI.getOpcode()) {
8677  default: llvm_unreachable("unexpected opcode!");
8678  case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
8679  case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
8680  case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
8681  }
8682  MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
8683  for (unsigned i = 0; i < MI.getNumOperands(); ++i)
8684  MIB.addOperand(MI.getOperand(i));
8685  MI.eraseFromParent();
8686  return BB;
8687  }
8688 
8689  case ARM::tMOVCCr_pseudo: {
8690  // To "insert" a SELECT_CC instruction, we actually have to insert the
8691  // diamond control-flow pattern. The incoming instruction knows the
8692  // destination vreg to set, the condition code register to branch on, the
8693  // true/false values to select between, and a branch opcode to use.
8694  const BasicBlock *LLVM_BB = BB->getBasicBlock();
8696 
8697  // thisMBB:
8698  // ...
8699  // TrueVal = ...
8700  // cmpTY ccX, r1, r2
8701  // bCC copy1MBB
8702  // fallthrough --> copy0MBB
8703  MachineBasicBlock *thisMBB = BB;
8704  MachineFunction *F = BB->getParent();
8705  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
8706  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
8707  F->insert(It, copy0MBB);
8708  F->insert(It, sinkMBB);
8709 
8710  // Transfer the remainder of BB and its successor edges to sinkMBB.
8711  sinkMBB->splice(sinkMBB->begin(), BB,
8712  std::next(MachineBasicBlock::iterator(MI)), BB->end());
8713  sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
8714 
8715  BB->addSuccessor(copy0MBB);
8716  BB->addSuccessor(sinkMBB);
8717 
8718  BuildMI(BB, dl, TII->get(ARM::tBcc))
8719  .addMBB(sinkMBB)
8720  .addImm(MI.getOperand(3).getImm())
8721  .addReg(MI.getOperand(4).getReg());
8722 
8723  // copy0MBB:
8724  // %FalseValue = ...
8725  // # fallthrough to sinkMBB
8726  BB = copy0MBB;
8727 
8728  // Update machine-CFG edges
8729  BB->addSuccessor(sinkMBB);
8730 
8731  // sinkMBB:
8732  // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
8733  // ...
8734  BB = sinkMBB;
8735  BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg())
8736  .addReg(MI.getOperand(1).getReg())
8737  .addMBB(copy0MBB)
8738  .addReg(MI.getOperand(2).getReg())
8739  .addMBB(thisMBB);
8740 
8741  MI.eraseFromParent(); // The pseudo instruction is gone now.
8742  return BB;
8743  }
8744 
8745  case ARM::BCCi64:
8746  case ARM::BCCZi64: {
8747  // If there is an unconditional branch to the other successor, remove it.
8748  BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());
8749 
8750  // Compare both parts that make up the double comparison separately for
8751  // equality.
8752  bool RHSisZero = MI.getOpcode() == ARM::BCCZi64;
8753 
8754  unsigned LHS1 = MI.getOperand(1).getReg();
8755  unsigned LHS2 = MI.getOperand(2).getReg();
8756  if (RHSisZero) {
8757  AddDefaultPred(BuildMI(BB, dl,
8758  TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
8759  .addReg(LHS1).addImm(0));
8760  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
8761  .addReg(LHS2).addImm(0)
8762  .addImm(ARMCC::EQ).addReg(ARM::CPSR);
8763  } else {
8764  unsigned RHS1 = MI.getOperand(3).getReg();
8765  unsigned RHS2 = MI.getOperand(4).getReg();
8766  AddDefaultPred(BuildMI(BB, dl,
8767  TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
8768  .addReg(LHS1).addReg(RHS1));
8769  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
8770  .addReg(LHS2).addReg(RHS2)
8771  .addImm(ARMCC::EQ).addReg(ARM::CPSR);
8772  }
8773 
8774  MachineBasicBlock *destMBB = MI.getOperand(RHSisZero ? 3 : 5).getMBB();
8775  MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
8776  if (MI.getOperand(0).getImm() == ARMCC::NE)
8777  std::swap(destMBB, exitMBB);
8778 
8779  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
8780  .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
8781  if (isThumb2)
8782  AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB));
8783  else
8784  BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
8785 
8786  MI.eraseFromParent(); // The pseudo instruction is gone now.
8787  return BB;
8788  }
8789 
8790  case ARM::Int_eh_sjlj_setjmp:
8791  case ARM::Int_eh_sjlj_setjmp_nofp:
8792  case ARM::tInt_eh_sjlj_setjmp:
8793  case ARM::t2Int_eh_sjlj_setjmp:
8794  case ARM::t2Int_eh_sjlj_setjmp_nofp:
8795  return BB;
8796 
8797  case ARM::Int_eh_sjlj_setup_dispatch:
8798  EmitSjLjDispatchBlock(MI, BB);
8799  return BB;
8800 
8801  case ARM::ABS:
8802  case ARM::t2ABS: {
8803  // To insert an ABS instruction, we have to insert the
8804  // diamond control-flow pattern. The incoming instruction knows the
8805  // source vreg to test against 0, the destination vreg to set,
8806  // the condition code register to branch on, the
8807  // true/false values to select between, and a branch opcode to use.
8808  // It transforms
8809  // V1 = ABS V0
8810  // into
8811  // V2 = MOVS V0
8812  // BCC (branch to SinkBB if V0 >= 0)
8813  // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
8814  // SinkBB: V1 = PHI(V2, V3)
8815  const BasicBlock *LLVM_BB = BB->getBasicBlock();
8816  MachineFunction::iterator BBI = ++BB->getIterator();
8817  MachineFunction *Fn = BB->getParent();
8818  MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
8819  MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
8820  Fn->insert(BBI, RSBBB);
8821  Fn->insert(BBI, SinkBB);
8822 
8823  unsigned int ABSSrcReg = MI.getOperand(1).getReg();
8824  unsigned int ABSDstReg = MI.getOperand(0).getReg();
8825  bool ABSSrcKIll = MI.getOperand(1).isKill();
8826  bool isThumb2 = Subtarget->isThumb2();
8827  MachineRegisterInfo &MRI = Fn->getRegInfo();
8828  // In Thumb mode S must not be specified if source register is the SP or
8829  // PC and if destination register is the SP, so restrict register class
8830  unsigned NewRsbDstReg =
8831  MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
8832 
8833  // Transfer the remainder of BB and its successor edges to sinkMBB.
8834  SinkBB->splice(SinkBB->begin(), BB,
8835  std::next(MachineBasicBlock::iterator(MI)), BB->end());
8836  SinkBB->transferSuccessorsAndUpdatePHIs(BB);
8837 
8838  BB->addSuccessor(RSBBB);
8839  BB->addSuccessor(SinkBB);
8840 
8841  // fall through to SinkMBB
8842  RSBBB->addSuccessor(SinkBB);
8843 
8844  // insert a cmp at the end of BB
8845  AddDefaultPred(BuildMI(BB, dl,
8846  TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
8847  .addReg(ABSSrcReg).addImm(0));
8848 
8849  // insert a bcc with opposite CC to ARMCC::MI at the end of BB
8850  BuildMI(BB, dl,
8851  TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
8853 
8854  // insert rsbri in RSBBB
8855  // Note: BCC and rsbri will be converted into predicated rsbmi
8856  // by if-conversion pass
8857  BuildMI(*RSBBB, RSBBB->begin(), dl,
8858  TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
8859  .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
8860  .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
8861 
8862  // insert PHI in SinkBB,
8863  // reuse ABSDstReg to not change uses of ABS instruction
8864  BuildMI(*SinkBB, SinkBB->begin(), dl,
8865  TII->get(ARM::PHI), ABSDstReg)
8866  .addReg(NewRsbDstReg).addMBB(RSBBB)
8867  .addReg(ABSSrcReg).addMBB(BB);
8868 
8869  // remove ABS instruction
8870  MI.eraseFromParent();
8871 
8872  // return last added BB
8873  return SinkBB;
8874  }
8875  case ARM::COPY_STRUCT_BYVAL_I32:
8876  ++NumLoopByVals;
8877  return EmitStructByval(MI, BB);
8878  case ARM::WIN__CHKSTK:
8879  return EmitLowered__chkstk(MI, BB);
8880  case ARM::WIN__DBZCHK:
8881  return EmitLowered__dbzchk(MI, BB);
8882  }
8883 }
8884 
8885 /// \brief Attaches vregs to MEMCPY that it will use as scratch registers
8886 /// when it is expanded into LDM/STM. This is done as a post-isel lowering
8887 /// instead of as a custom inserter because we need the use list from the SDNode.
8888 static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,
8889  MachineInstr &MI, const SDNode *Node) {
8890  bool isThumb1 = Subtarget->isThumb1Only();
8891 
8892  DebugLoc DL = MI.getDebugLoc();
8893  MachineFunction *MF = MI.getParent()->getParent();
8894  MachineRegisterInfo &MRI = MF->getRegInfo();
8895  MachineInstrBuilder MIB(*MF, MI);
8896 
8897  // If the new dst/src is unused mark it as dead.
8898  if (!Node->hasAnyUseOfValue(0)) {
8899  MI.getOperand(0).setIsDead(true);
8900  }
8901  if (!Node->hasAnyUseOfValue(1)) {
8902  MI.getOperand(1).setIsDead(true);
8903  }
8904 
8905  // The MEMCPY both defines and kills the scratch registers.
8906  for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) {
8907  unsigned TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
8908  : &ARM::GPRRegClass);
8909  MIB.addReg(TmpReg, RegState::Define|RegState::Dead);
8910  }
8911 }
8912 
8914  SDNode *Node) const {
8915  if (MI.getOpcode() == ARM::MEMCPY) {
8916  attachMEMCPYScratchRegs(Subtarget, MI, Node);
8917  return;
8918  }
8919 
8920  const MCInstrDesc *MCID = &MI.getDesc();
8921  // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
8922  // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
8923  // operand is still set to noreg. If needed, set the optional operand's
8924  // register to CPSR, and remove the redundant implicit def.
8925  //
8926  // e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>).
8927 
8928  // Rename pseudo opcodes.
8929  unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());
8930  if (NewOpc) {
8931  const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
8932  MCID = &TII->get(NewOpc);
8933 
8934  assert(MCID->getNumOperands() == MI.getDesc().getNumOperands() + 1 &&
8935  "converted opcode should be the same except for cc_out");
8936 
8937  MI.setDesc(*MCID);
8938 
8939  // Add the optional cc_out operand
8940  MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
8941  }
8942  unsigned ccOutIdx = MCID->getNumOperands() - 1;
8943 
8944  // Any ARM instruction that sets the 's' bit should specify an optional
8945  // "cc_out" operand in the last operand position.
8946  if (!MI.hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
8947  assert(!NewOpc && "Optional cc_out operand required");
8948  return;
8949  }
8950  // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
8951  // since we already have an optional CPSR def.
8952  bool definesCPSR = false;
8953  bool deadCPSR = false;
8954  for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e;
8955  ++i) {
8956  const MachineOperand &MO = MI.getOperand(i);
8957  if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
8958  definesCPSR = true;
8959  if (MO.isDead())
8960  deadCPSR = true;
8961  MI.RemoveOperand(i);
8962  break;
8963  }
8964  }
8965  if (!definesCPSR) {
8966  assert(!NewOpc && "Optional cc_out operand required");
8967  return;
8968  }
8969  assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
8970  if (deadCPSR) {
8971  assert(!MI.getOperand(ccOutIdx).getReg() &&
8972  "expect uninitialized optional cc_out operand");
8973  return;
8974  }
8975 
8976  // If this instruction was defined with an optional CPSR def and its dag node
8977  // had a live implicit CPSR def, then activate the optional CPSR def.
8978  MachineOperand &MO = MI.getOperand(ccOutIdx);
8979  MO.setReg(ARM::CPSR);
8980  MO.setIsDef(true);
8981 }
8982 
8983 //===----------------------------------------------------------------------===//
8984 // ARM Optimization Hooks
8985 //===----------------------------------------------------------------------===//
8986 
8987 // Helper function that checks if N is a null or all ones constant.
8988 static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
8989  return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
8990 }
8991 
8992 // Return true if N is conditionally 0 or all ones.
8993 // Detects these expressions where cc is an i1 value:
8994 //
8995 // (select cc 0, y) [AllOnes=0]
8996 // (select cc y, 0) [AllOnes=0]
8997 // (zext cc) [AllOnes=0]
8998 // (sext cc) [AllOnes=0/1]
8999 // (select cc -1, y) [AllOnes=1]
9000 // (select cc y, -1) [AllOnes=1]
9001 //
9002 // Invert is set when N is the null/all ones constant when CC is false.
9003 // OtherOp is set to the alternative value of N.
9004 static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,
9005  SDValue &CC, bool &Invert,
9006  SDValue &OtherOp,
9007  SelectionDAG &DAG) {
9008  switch (N->getOpcode()) {
9009  default: return false;
9010  case ISD::SELECT: {
9011  CC = N->getOperand(0);
9012  SDValue N1 = N->getOperand(1);
9013  SDValue N2 = N->getOperand(2);
9014  if (isZeroOrAllOnes(N1, AllOnes)) {
9015  Invert = false;
9016  OtherOp = N2;
9017  return true;
9018  }
9019  if (isZeroOrAllOnes(N2, AllOnes)) {
9020  Invert = true;
9021  OtherOp = N1;
9022  return true;
9023  }
9024  return false;
9025  }
9026  case ISD::ZERO_EXTEND:
9027  // (zext cc) can never be the all ones value.
9028  if (AllOnes)
9029  return false;
9031  case ISD::SIGN_EXTEND: {
9032  SDLoc dl(N);
9033  EVT VT = N->getValueType(0);
9034  CC = N->getOperand(0);
9035  if (CC.getValueType() != MVT::i1)
9036  return false;
9037  Invert = !AllOnes;
9038  if (AllOnes)
9039  // When looking for an AllOnes constant, N is an sext, and the 'other'
9040  // value is 0.
9041  OtherOp = DAG.getConstant(0, dl, VT);
9042  else if (N->getOpcode() == ISD::ZERO_EXTEND)
9043  // When looking for a 0 constant, N can be zext or sext.
9044  OtherOp = DAG.getConstant(1, dl, VT);
9045  else
9046  OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl,
9047  VT);
9048  return true;
9049  }
9050  }
9051 }
9052 
9053 // Combine a constant select operand into its use:
9054 //
9055 // (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
9056 // (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
9057 // (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1]
9058 // (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
9059 // (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
9060 //
9061 // The transform is rejected if the select doesn't have a constant operand that
9062 // is null, or all ones when AllOnes is set.
9063 //
9064 // Also recognize sext/zext from i1:
9065 //
9066 // (add (zext cc), x) -> (select cc (add x, 1), x)
9067 // (add (sext cc), x) -> (select cc (add x, -1), x)
9068 //
9069 // These transformations eventually create predicated instructions.
9070 //
9071 // @param N The node to transform.
9072 // @param Slct The N operand that is a select.
9073 // @param OtherOp The other N operand (x above).
9074 // @param DCI Context.
9075 // @param AllOnes Require the select constant to be all ones instead of null.
9076 // @returns The new node, or SDValue() on failure.
9077 static
9080  bool AllOnes = false) {
9081  SelectionDAG &DAG = DCI.DAG;
9082  EVT VT = N->getValueType(0);
9083  SDValue NonConstantVal;
9084  SDValue CCOp;
9085  bool SwapSelectOps;
9086  if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
9087  NonConstantVal, DAG))
9088  return SDValue();
9089 
9090  // Slct is now know to be the desired identity constant when CC is true.
9091  SDValue TrueVal = OtherOp;
9092  SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
9093  OtherOp, NonConstantVal);
9094  // Unless SwapSelectOps says CC should be false.
9095  if (SwapSelectOps)
9096  std::swap(TrueVal, FalseVal);
9097 
9098  return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
9099  CCOp, TrueVal, FalseVal);
9100 }
9101 
9102 // Attempt combineSelectAndUse on each operand of a commutative operator N.
9103 static
9106  SDValue N0 = N->getOperand(0);
9107  SDValue N1 = N->getOperand(1);
9108  if (N0.getNode()->hasOneUse())
9109  if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes))
9110  return Result;
9111  if (N1.getNode()->hasOneUse())
9112  if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes))
9113  return Result;
9114  return SDValue();
9115 }
9116 
9117 static bool IsVUZPShuffleNode(SDNode *N) {
9118  // VUZP shuffle node.
9119  if (N->getOpcode() == ARMISD::VUZP)
9120  return true;
9121 
9122  // "VUZP" on i32 is an alias for VTRN.
9123  if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32)
9124  return true;
9125 
9126  return false;
9127 }
9128 
9131  const ARMSubtarget *Subtarget) {
9132  // Look for ADD(VUZP.0, VUZP.1).
9133  if (!IsVUZPShuffleNode(N0.getNode()) || N0.getNode() != N1.getNode() ||
9134  N0 == N1)
9135  return SDValue();
9136 
9137  // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD.
9138  if (!N->getValueType(0).is64BitVector())
9139  return SDValue();
9140 
9141  // Generate vpadd.
9142  SelectionDAG &DAG = DCI.DAG;
9143  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9144  SDLoc dl(N);
9145  SDNode *Unzip = N0.getNode();
9146  EVT VT = N->getValueType(0);
9147 
9149  Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpadd, dl,
9150  TLI.getPointerTy(DAG.getDataLayout())));
9151  Ops.push_back(Unzip->getOperand(0));
9152  Ops.push_back(Unzip->getOperand(1));
9153 
9154  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
9155 }
9156 
9159  const ARMSubtarget *Subtarget) {
9160  // Check for two extended operands.
9161  if (!(N0.getOpcode() == ISD::SIGN_EXTEND &&
9162  N1.getOpcode() == ISD::SIGN_EXTEND) &&
9163  !(N0.getOpcode() == ISD::ZERO_EXTEND &&
9164  N1.getOpcode() == ISD::ZERO_EXTEND))
9165  return SDValue();
9166 
9167  SDValue N00 = N0.getOperand(0);
9168  SDValue N10 = N1.getOperand(0);
9169 
9170  // Look for ADD(SEXT(VUZP.0), SEXT(VUZP.1))
9171  if (!IsVUZPShuffleNode(N00.getNode()) || N00.getNode() != N10.getNode() ||
9172  N00 == N10)
9173  return SDValue();
9174 
9175  // We only recognize Q register paddl here; this can't be reached until
9176  // after type legalization.
9177  if (!N00.getValueType().is64BitVector() ||
9178  !N0.getValueType().is128BitVector())
9179  return SDValue();
9180 
9181  // Generate vpaddl.
9182  SelectionDAG &DAG = DCI.DAG;
9183  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9184  SDLoc dl(N);
9185  EVT VT = N->getValueType(0);
9186 
9188  // Form vpaddl.sN or vpaddl.uN depending on the kind of extension.
9189  unsigned Opcode;
9190  if (N0.getOpcode() == ISD::SIGN_EXTEND)
9191  Opcode = Intrinsic::arm_neon_vpaddls;
9192  else
9193  Opcode = Intrinsic::arm_neon_vpaddlu;
9194  Ops.push_back(DAG.getConstant(Opcode, dl,
9195  TLI.getPointerTy(DAG.getDataLayout())));
9196  EVT ElemTy = N00.getValueType().getVectorElementType();
9197  unsigned NumElts = VT.getVectorNumElements();
9198  EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), ElemTy, NumElts * 2);
9199  SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), ConcatVT,
9200  N00.getOperand(0), N00.getOperand(1));
9201  Ops.push_back(Concat);
9202 
9203  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
9204 }
9205 
9206 // FIXME: This function shouldn't be necessary; if we lower BUILD_VECTOR in
9207 // an appropriate manner, we end up with ADD(VUZP(ZEXT(N))), which is
9208 // much easier to match.
9209 static SDValue
9212  const ARMSubtarget *Subtarget) {
9213  // Only perform optimization if after legalize, and if NEON is available. We
9214  // also expected both operands to be BUILD_VECTORs.
9215  if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
9216  || N0.getOpcode() != ISD::BUILD_VECTOR
9217  || N1.getOpcode() != ISD::BUILD_VECTOR)
9218  return SDValue();
9219 
9220  // Check output type since VPADDL operand elements can only be 8, 16, or 32.
9221  EVT VT = N->getValueType(0);
9222  if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
9223  return SDValue();
9224 
9225  // Check that the vector operands are of the right form.
9226  // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
9227  // operands, where N is the size of the formed vector.
9228  // Each EXTRACT_VECTOR should have the same input vector and odd or even
9229  // index such that we have a pair wise add pattern.
9230 
9231  // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
9232  if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
9233  return SDValue();
9234  SDValue Vec = N0->getOperand(0)->getOperand(0);
9235  SDNode *V = Vec.getNode();
9236  unsigned nextIndex = 0;
9237 
9238  // For each operands to the ADD which are BUILD_VECTORs,
9239  // check to see if each of their operands are an EXTRACT_VECTOR with
9240  // the same vector and appropriate index.
9241  for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
9243  && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
9244 
9245  SDValue ExtVec0 = N0->getOperand(i);
9246  SDValue ExtVec1 = N1->getOperand(i);
9247 
9248  // First operand is the vector, verify its the same.
9249  if (V != ExtVec0->getOperand(0).getNode() ||
9250  V != ExtVec1->getOperand(0).getNode())
9251  return SDValue();
9252 
9253  // Second is the constant, verify its correct.
9254  ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
9255  ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
9256 
9257  // For the constant, we want to see all the even or all the odd.
9258  if (!C0 || !C1 || C0->getZExtValue() != nextIndex
9259  || C1->getZExtValue() != nextIndex+1)
9260  return SDValue();
9261 
9262  // Increment index.
9263  nextIndex+=2;
9264  } else
9265  return SDValue();
9266  }
9267 
9268  // Don't generate vpaddl+vmovn; we'll match it to vpadd later.
9270  return SDValue();
9271 
9272  // Create VPADDL node.
9273  SelectionDAG &DAG = DCI.DAG;
9274  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9275 
9276  SDLoc dl(N);
9277 
9278  // Build operand list.
9280  Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl,
9281  TLI.getPointerTy(DAG.getDataLayout())));
9282 
9283  // Input is the vector.
9284  Ops.push_back(Vec);
9285 
9286  // Get widened type and narrowed type.
9287  MVT widenType;
9288  unsigned numElem = VT.getVectorNumElements();
9289 
9290  EVT inputLaneType = Vec.getValueType().getVectorElementType();
9291  switch (inputLaneType.getSimpleVT().SimpleTy) {
9292  case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
9293  case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
9294  case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
9295  default:
9296  llvm_unreachable("Invalid vector element type for padd optimization.");
9297  }
9298 
9299  SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, widenType, Ops);
9300  unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
9301  return DAG.getNode(ExtOp, dl, VT, tmp);
9302 }
9303 
9305  if (V->getOpcode() == ISD::UMUL_LOHI ||
9306  V->getOpcode() == ISD::SMUL_LOHI)
9307  return V;
9308  return SDValue();
9309 }
9310 
9313  const ARMSubtarget *Subtarget) {
9314 
9315  // Look for multiply add opportunities.
9316  // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
9317  // each add nodes consumes a value from ISD::UMUL_LOHI and there is
9318  // a glue link from the first add to the second add.
9319  // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
9320  // a S/UMLAL instruction.
9321  // UMUL_LOHI
9322  // / :lo \ :hi
9323  // / \ [no multiline comment]
9324  // loAdd -> ADDE |
9325  // \ :glue /
9326  // \ /
9327  // ADDC <- hiAdd
9328  //
9329  assert(AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC");
9330  SDValue AddcOp0 = AddcNode->getOperand(0);
9331  SDValue AddcOp1 = AddcNode->getOperand(1);
9332 
9333  // Check if the two operands are from the same mul_lohi node.
9334  if (AddcOp0.getNode() == AddcOp1.getNode())
9335  return SDValue();
9336 
9337  assert(AddcNode->getNumValues() == 2 &&
9338  AddcNode->getValueType(0) == MVT::i32 &&
9339  "Expect ADDC with two result values. First: i32");
9340 
9341  // Check that we have a glued ADDC node.
9342  if (AddcNode->getValueType(1) != MVT::Glue)
9343  return SDValue();
9344 
9345  // Check that the ADDC adds the low result of the S/UMUL_LOHI.
9346  if (AddcOp0->getOpcode() != ISD::UMUL_LOHI &&
9347  AddcOp0->getOpcode() != ISD::SMUL_LOHI &&
9348  AddcOp1->getOpcode() != ISD::UMUL_LOHI &&
9349  AddcOp1->getOpcode() != ISD::SMUL_LOHI)
9350  return SDValue();
9351 
9352  // Look for the glued ADDE.
9353  SDNode* AddeNode = AddcNode->getGluedUser();
9354  if (!AddeNode)
9355  return SDValue();
9356 
9357  // Make sure it is really an ADDE.
9358  if (AddeNode->getOpcode() != ISD::ADDE)
9359  return SDValue();
9360 
9361  assert(AddeNode->getNumOperands() == 3 &&
9362  AddeNode->getOperand(2).getValueType() == MVT::Glue &&
9363  "ADDE node has the wrong inputs");
9364 
9365  // Check for the triangle shape.
9366  SDValue AddeOp0 = AddeNode->getOperand(0);
9367  SDValue AddeOp1 = AddeNode->getOperand(1);
9368 
9369  // Make sure that the ADDE operands are not coming from the same node.
9370  if (AddeOp0.getNode() == AddeOp1.getNode())
9371  return SDValue();
9372 
9373  // Find the MUL_LOHI node walking up ADDE's operands.
9374  bool IsLeftOperandMUL = false;
9375  SDValue MULOp = findMUL_LOHI(AddeOp0);
9376  if (MULOp == SDValue())
9377  MULOp = findMUL_LOHI(AddeOp1);
9378  else
9379  IsLeftOperandMUL = true;
9380  if (MULOp == SDValue())
9381  return SDValue();
9382 
9383  // Figure out the right opcode.
9384  unsigned Opc = MULOp->getOpcode();
9385  unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
9386 
9387  // Figure out the high and low input values to the MLAL node.
9388  SDValue* HiAdd = nullptr;
9389  SDValue* LoMul = nullptr;
9390  SDValue* LowAdd = nullptr;
9391 
9392  // Ensure that ADDE is from high result of ISD::SMUL_LOHI.
9393  if ((AddeOp0 != MULOp.getValue(1)) && (AddeOp1 != MULOp.getValue(1)))
9394  return SDValue();
9395 
9396  if (IsLeftOperandMUL)
9397  HiAdd = &AddeOp1;
9398  else
9399  HiAdd = &AddeOp0;
9400 
9401 
9402  // Ensure that LoMul and LowAdd are taken from correct ISD::SMUL_LOHI node
9403  // whose low result is fed to the ADDC we are checking.
9404 
9405  if (AddcOp0 == MULOp.getValue(0)) {
9406  LoMul = &AddcOp0;
9407  LowAdd = &AddcOp1;
9408  }
9409  if (AddcOp1 == MULOp.getValue(0)) {
9410  LoMul = &AddcOp1;
9411  LowAdd = &AddcOp0;
9412  }
9413 
9414  if (!LoMul)
9415  return SDValue();
9416 
9417  // Create the merged node.
9418  SelectionDAG &DAG = DCI.DAG;
9419 
9420  // Build operand list.
9422  Ops.push_back(LoMul->getOperand(0));
9423  Ops.push_back(LoMul->getOperand(1));
9424  Ops.push_back(*LowAdd);
9425  Ops.push_back(*HiAdd);
9426 
9427  SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode),
9428  DAG.getVTList(MVT::i32, MVT::i32), Ops);
9429 
9430  // Replace the ADDs' nodes uses by the MLA node's values.
9431  SDValue HiMLALResult(MLALNode.getNode(), 1);
9432  DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
9433 
9434  SDValue LoMLALResult(MLALNode.getNode(), 0);
9435  DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
9436 
9437  // Return original node to notify the driver to stop replacing.
9438  SDValue resNode(AddcNode, 0);
9439  return resNode;
9440 }
9441 
9444  const ARMSubtarget *Subtarget) {
9445  // UMAAL is similar to UMLAL except that it adds two unsigned values.
9446  // While trying to combine for the other MLAL nodes, first search for the
9447  // chance to use UMAAL. Check if Addc uses another addc node which can first
9448  // be combined into a UMLAL. The other pattern is AddcNode being combined
9449  // into an UMLAL and then using another addc is handled in ISelDAGToDAG.
9450 
9451  if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() ||
9452  (Subtarget->isThumb() && !Subtarget->hasThumb2()))
9453  return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget);
9454 
9455  SDNode *PrevAddc = nullptr;
9456  if (AddcNode->getOperand(0).getOpcode() == ISD::ADDC)
9457  PrevAddc = AddcNode->getOperand(0).getNode();
9458  else if (AddcNode->getOperand(1).getOpcode() == ISD::ADDC)
9459  PrevAddc = AddcNode->getOperand(1).getNode();
9460 
9461  // If there's no addc chains, just return a search for any MLAL.
9462  if (PrevAddc == nullptr)
9463  return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget);
9464 
9465  // Try to convert the addc operand to an MLAL and if that fails try to
9466  // combine AddcNode.
9467  SDValue MLAL = AddCombineTo64bitMLAL(PrevAddc, DCI, Subtarget);
9468  if (MLAL != SDValue(PrevAddc, 0))
9469  return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget);
9470 
9471  // Find the converted UMAAL or quit if it doesn't exist.
9472  SDNode *UmlalNode = nullptr;
9473  SDValue AddHi;
9474  if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) {
9475  UmlalNode = AddcNode->getOperand(0).getNode();
9476  AddHi = AddcNode->getOperand(1);
9477  } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) {
9478  UmlalNode = AddcNode->getOperand(1).getNode();
9479  AddHi = AddcNode->getOperand(0);
9480  } else {
9481  return SDValue();
9482  }
9483 
9484  // The ADDC should be glued to an ADDE node, which uses the same UMLAL as
9485  // the ADDC as well as Zero.
9486  auto *Zero = dyn_cast<ConstantSDNode>(UmlalNode->getOperand(3));
9487 
9488  if (!Zero || Zero->getZExtValue() != 0)
9489  return SDValue();
9490 
9491  // Check that we have a glued ADDC node.
9492  if (AddcNode->getValueType(1) != MVT::Glue)
9493  return SDValue();
9494 
9495  // Look for the glued ADDE.
9496  SDNode* AddeNode = AddcNode->getGluedUser();
9497  if (!AddeNode)
9498  return SDValue();
9499 
9500  if ((AddeNode->getOperand(0).getNode() == Zero &&
9501  AddeNode->getOperand(1).getNode() == UmlalNode) ||
9502  (AddeNode->getOperand(0).getNode() == UmlalNode &&
9503  AddeNode->getOperand(1).getNode() == Zero)) {
9504 
9505  SelectionDAG &DAG = DCI.DAG;
9506  SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
9507  UmlalNode->getOperand(2), AddHi };
9508  SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode),
9509  DAG.getVTList(MVT::i32, MVT::i32), Ops);
9510 
9511  // Replace the ADDs' nodes uses by the UMAAL node's values.
9512  DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1));
9513  DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));
9514 
9515  // Return original node to notify the driver to stop replacing.
9516  return SDValue(AddcNode, 0);
9517  }
9518  return SDValue();
9519 }
9520 
9521 /// PerformADDCCombine - Target-specific dag combine transform from
9522 /// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL or
9523 /// ISD::ADDC, ISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
9526  const ARMSubtarget *Subtarget) {
9527 
9528  if (Subtarget->isThumb1Only()) return SDValue();
9529 
9530  // Only perform the checks after legalize when the pattern is available.
9531  if (DCI.isBeforeLegalize()) return SDValue();
9532 
9533  return AddCombineTo64bitUMAAL(N, DCI, Subtarget);
9534 }
9535 
9536 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
9537 /// operands N0 and N1. This is a helper for PerformADDCombine that is
9538 /// called with the default operands, and if that fails, with commuted
9539 /// operands.
9542  const ARMSubtarget *Subtarget){
9543  // Attempt to create vpadd for this add.
9544  if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget))
9545  return Result;
9546 
9547  // Attempt to create vpaddl for this add.
9548  if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget))
9549  return Result;
9550  if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI,
9551  Subtarget))
9552  return Result;
9553 
9554  // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
9555  if (N0.getNode()->hasOneUse())
9556  if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI))
9557  return Result;
9558  return SDValue();
9559 }
9560 
9561 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
9562 ///
9565  const ARMSubtarget *Subtarget) {
9566  SDValue N0 = N->getOperand(0);
9567  SDValue N1 = N->getOperand(1);
9568 
9569  // First try with the default operand order.
9570  if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget))
9571  return Result;
9572 
9573  // If that didn't work, try again with the operands commuted.
9574  return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
9575 }
9576 
9577 /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
9578 ///
9581  SDValue N0 = N->getOperand(0);
9582  SDValue N1 = N->getOperand(1);
9583 
9584  // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
9585  if (N1.getNode()->hasOneUse())
9586  if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI))
9587  return Result;
9588 
9589  return SDValue();
9590 }
9591 
9592 /// PerformVMULCombine
9593 /// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
9594 /// special multiplier accumulator forwarding.
9595 /// vmul d3, d0, d2
9596 /// vmla d3, d1, d2
9597 /// is faster than
9598 /// vadd d3, d0, d1
9599 /// vmul d3, d3, d2
9600 // However, for (A + B) * (A + B),
9601 // vadd d2, d0, d1
9602 // vmul d3, d0, d2
9603 // vmla d3, d1, d2
9604 // is slower than
9605 // vadd d2, d0, d1
9606 // vmul d3, d2, d2
9609  const ARMSubtarget *Subtarget) {
9610  if (!Subtarget->hasVMLxForwarding())
9611  return SDValue();
9612 
9613  SelectionDAG &DAG = DCI.DAG;
9614  SDValue N0 = N->getOperand(0);
9615  SDValue N1 = N->getOperand(1);
9616  unsigned Opcode = N0.getOpcode();
9617  if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
9618  Opcode != ISD::FADD && Opcode != ISD::FSUB) {
9619  Opcode = N1.getOpcode();
9620  if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
9621  Opcode != ISD::FADD && Opcode != ISD::FSUB)
9622  return SDValue();
9623  std::swap(N0, N1);
9624  }
9625 
9626  if (N0 == N1)
9627  return SDValue();
9628 
9629  EVT VT = N->getValueType(0);
9630  SDLoc DL(N);
9631  SDValue N00 = N0->getOperand(0);
9632  SDValue N01 = N0->getOperand(1);
9633  return DAG.getNode(Opcode, DL, VT,
9634  DAG.getNode(ISD::MUL, DL, VT, N00, N1),
9635  DAG.getNode(ISD::MUL, DL, VT, N01, N1));
9636 }
9637 
9640  const ARMSubtarget *Subtarget) {
9641  SelectionDAG &DAG = DCI.DAG;
9642 
9643  if (Subtarget->isThumb1Only())
9644  return SDValue();
9645 
9646  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
9647  return SDValue();
9648 
9649  EVT VT = N->getValueType(0);
9650  if (VT.is64BitVector() || VT.is128BitVector())
9651  return PerformVMULCombine(N, DCI, Subtarget);
9652  if (VT != MVT::i32)
9653  return SDValue();
9654 
9656  if (!C)
9657  return SDValue();
9658 
9659  int64_t MulAmt = C->getSExtValue();
9660  unsigned ShiftAmt = countTrailingZeros<uint64_t>(MulAmt);
9661 
9662  ShiftAmt = ShiftAmt & (32 - 1);
9663  SDValue V = N->getOperand(0);
9664  SDLoc DL(N);
9665 
9666  SDValue Res;
9667  MulAmt >>= ShiftAmt;
9668 
9669  if (MulAmt >= 0) {
9670  if (isPowerOf2_32(MulAmt - 1)) {
9671  // (mul x, 2^N + 1) => (add (shl x, N), x)
9672  Res = DAG.getNode(ISD::ADD, DL, VT,
9673  V,
9674  DAG.getNode(ISD::SHL, DL, VT,
9675  V,
9676  DAG.getConstant(Log2_32(MulAmt - 1), DL,
9677  MVT::i32)));
9678  } else if (isPowerOf2_32(MulAmt + 1)) {
9679  // (mul x, 2^N - 1) => (sub (shl x, N), x)
9680  Res = DAG.getNode(ISD::SUB, DL, VT,
9681  DAG.getNode(ISD::SHL, DL, VT,
9682  V,
9683  DAG.getConstant(Log2_32(MulAmt + 1), DL,
9684  MVT::i32)),
9685  V);
9686  } else
9687  return SDValue();
9688  } else {
9689  uint64_t MulAmtAbs = -MulAmt;
9690  if (isPowerOf2_32(MulAmtAbs + 1)) {
9691  // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
9692  Res = DAG.getNode(ISD::SUB, DL, VT,
9693  V,
9694  DAG.getNode(ISD::SHL, DL, VT,
9695  V,
9696  DAG.getConstant(Log2_32(MulAmtAbs + 1), DL,
9697  MVT::i32)));
9698  } else if (isPowerOf2_32(MulAmtAbs - 1)) {
9699  // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
9700  Res = DAG.getNode(ISD::ADD, DL, VT,
9701  V,
9702  DAG.getNode(ISD::SHL, DL, VT,
9703  V,
9704  DAG.getConstant(Log2_32(MulAmtAbs - 1), DL,
9705  MVT::i32)));
9706  Res = DAG.getNode(ISD::SUB, DL, VT,
9707  DAG.getConstant(0, DL, MVT::i32), Res);
9708 
9709  } else
9710  return SDValue();
9711  }
9712 
9713  if (ShiftAmt != 0)
9714  Res = DAG.getNode(ISD::SHL, DL, VT,
9715  Res, DAG.getConstant(ShiftAmt, DL, MVT::i32));
9716 
9717  // Do not add new nodes to DAG combiner worklist.
9718  DCI.CombineTo(N, Res, false);
9719  return SDValue();
9720 }
9721 
9724  const ARMSubtarget *Subtarget) {
9725 
9726  // Attempt to use immediate-form VBIC
9728  SDLoc dl(N);
9729  EVT VT = N->getValueType(0);
9730  SelectionDAG &DAG = DCI.DAG;
9731 
9732  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
9733  return SDValue();
9734 
9735  APInt SplatBits, SplatUndef;
9736  unsigned SplatBitSize;
9737  bool HasAnyUndefs;
9738  if (BVN &&
9739  BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
9740  if (SplatBitSize <= 64) {
9741  EVT VbicVT;
9742  SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(),
9743  SplatUndef.getZExtValue(), SplatBitSize,
9744  DAG, dl, VbicVT, VT.is128BitVector(),
9745  OtherModImm);
9746  if (Val.getNode()) {
9747  SDValue Input =
9748  DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
9749  SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
9750  return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
9751  }
9752  }
9753  }
9754 
9755  if (!Subtarget->isThumb1Only()) {
9756  // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
9757  if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI))
9758  return Result;
9759  }
9760 
9761  return SDValue();
9762 }
9763 
9764 /// PerformORCombine - Target-specific dag combine xforms for ISD::OR
9767  const ARMSubtarget *Subtarget) {
9768  // Attempt to use immediate-form VORR
9770  SDLoc dl(N);
9771  EVT VT = N->getValueType(0);
9772  SelectionDAG &DAG = DCI.DAG;
9773 
9774  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
9775  return SDValue();
9776 
9777  APInt SplatBits, SplatUndef;
9778  unsigned SplatBitSize;
9779  bool HasAnyUndefs;
9780  if (BVN && Subtarget->hasNEON() &&
9781  BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
9782  if (SplatBitSize <= 64) {
9783  EVT VorrVT;
9784  SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
9785  SplatUndef.getZExtValue(), SplatBitSize,
9786  DAG, dl, VorrVT, VT.is128BitVector(),
9787  OtherModImm);
9788  if (Val.getNode()) {
9789  SDValue Input =
9790  DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
9791  SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
9792  return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
9793  }
9794  }
9795  }
9796 
9797  if (!Subtarget->isThumb1Only()) {
9798  // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
9799  if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
9800  return Result;
9801  }
9802 
9803  // The code below optimizes (or (and X, Y), Z).
9804  // The AND operand needs to have a single user to make these optimizations
9805  // profitable.
9806  SDValue N0 = N->getOperand(0);
9807  if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
9808  return SDValue();
9809  SDValue N1 = N->getOperand(1);
9810 
9811  // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
9812  if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
9813  DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
9814  APInt SplatUndef;
9815  unsigned SplatBitSize;
9816  bool HasAnyUndefs;
9817 
9818  APInt SplatBits0, SplatBits1;
9821  // Ensure that the second operand of both ands are constants
9822  if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
9823  HasAnyUndefs) && !HasAnyUndefs) {
9824  if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
9825  HasAnyUndefs) && !HasAnyUndefs) {
9826  // Ensure that the bit width of the constants are the same and that
9827  // the splat arguments are logical inverses as per the pattern we
9828  // are trying to simplify.
9829  if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
9830  SplatBits0 == ~SplatBits1) {
9831  // Canonicalize the vector type to make instruction selection
9832  // simpler.
9833  EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
9834  SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
9835  N0->getOperand(1),
9836  N0->getOperand(0),
9837  N1->getOperand(0));
9838  return DAG.getNode(ISD::BITCAST, dl, VT, Result);
9839  }
9840  }
9841  }
9842  }
9843 
9844  // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
9845  // reasonable.
9846 
9847  // BFI is only available on V6T2+
9848  if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
9849  return SDValue();
9850 
9851  SDLoc DL(N);
9852  // 1) or (and A, mask), val => ARMbfi A, val, mask
9853  // iff (val & mask) == val
9854  //
9855  // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
9856  // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
9857  // && mask == ~mask2
9858  // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
9859  // && ~mask == mask2
9860  // (i.e., copy a bitfield value into another bitfield of the same width)
9861 
9862  if (VT != MVT::i32)
9863  return SDValue();
9864 
9865  SDValue N00 = N0.getOperand(0);
9866 
9867  // The value and the mask need to be constants so we can verify this is
9868  // actually a bitfield set. If the mask is 0xffff, we can do better
9869  // via a movt instruction, so don't use BFI in that case.
9870  SDValue MaskOp = N0.getOperand(1);
9871  ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
9872  if (!MaskC)
9873  return SDValue();
9874  unsigned Mask = MaskC->getZExtValue();
9875  if (Mask == 0xffff)
9876  return SDValue();
9877  SDValue Res;
9878  // Case (1): or (and A, mask), val => ARMbfi A, val, mask
9880  if (N1C) {
9881  unsigned Val = N1C->getZExtValue();
9882  if ((Val & ~Mask) != Val)
9883  return SDValue();
9884 
9885  if (ARM::isBitFieldInvertedMask(Mask)) {
9886  Val >>= countTrailingZeros(~Mask);
9887 
9888  Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
9889  DAG.getConstant(Val, DL, MVT::i32),
9890  DAG.getConstant(Mask, DL, MVT::i32));
9891 
9892  // Do not add new nodes to DAG combiner worklist.
9893  DCI.CombineTo(N, Res, false);
9894  return SDValue();
9895  }
9896  } else if (N1.getOpcode() == ISD::AND) {
9897  // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
9899  if (!N11C)
9900  return SDValue();
9901  unsigned Mask2 = N11C->getZExtValue();
9902 
9903  // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
9904  // as is to match.
9905  if (ARM::isBitFieldInvertedMask(Mask) &&
9906  (Mask == ~Mask2)) {
9907  // The pack halfword instruction works better for masks that fit it,
9908  // so use that when it's available.
9909  if (Subtarget->hasT2ExtractPack() &&
9910  (Mask == 0xffff || Mask == 0xffff0000))
9911  return SDValue();
9912  // 2a
9913  unsigned amt = countTrailingZeros(Mask2);
9914  Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
9915  DAG.getConstant(amt, DL, MVT::i32));
9916  Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
9917  DAG.getConstant(Mask, DL, MVT::i32));
9918  // Do not add new nodes to DAG combiner worklist.
9919  DCI.CombineTo(N, Res, false);
9920  return SDValue();
9921  } else if (ARM::isBitFieldInvertedMask(~Mask) &&
9922  (~Mask == Mask2)) {
9923  // The pack halfword instruction works better for masks that fit it,
9924  // so use that when it's available.
9925  if (Subtarget->hasT2ExtractPack() &&
9926  (Mask2 == 0xffff || Mask2 == 0xffff0000))
9927  return SDValue();
9928  // 2b
9929  unsigned lsb = countTrailingZeros(Mask);
9930  Res = DAG.getNode(ISD::SRL, DL, VT, N00,
9931  DAG.getConstant(lsb, DL, MVT::i32));
9932  Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
9933  DAG.getConstant(Mask2, DL, MVT::i32));
9934  // Do not add new nodes to DAG combiner worklist.
9935  DCI.CombineTo(N, Res, false);
9936  return SDValue();
9937  }
9938  }
9939 
9940  if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
9941  N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
9942  ARM::isBitFieldInvertedMask(~Mask)) {
9943  // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
9944  // where lsb(mask) == #shamt and masked bits of B are known zero.
9945  SDValue ShAmt = N00.getOperand(1);
9946  unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
9947  unsigned LSB = countTrailingZeros(Mask);
9948  if (ShAmtC != LSB)
9949  return SDValue();
9950 
9951  Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
9952  DAG.getConstant(~Mask, DL, MVT::i32));
9953 
9954  // Do not add new nodes to DAG combiner worklist.
9955  DCI.CombineTo(N, Res, false);
9956  }
9957 
9958  return SDValue();
9959 }
9960 
9963  const ARMSubtarget *Subtarget) {
9964  EVT VT = N->getValueType(0);
9965  SelectionDAG &DAG = DCI.DAG;
9966 
9967  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
9968  return SDValue();
9969 
9970  if (!Subtarget->isThumb1Only()) {
9971  // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
9972  if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
9973  return Result;
9974  }
9975 
9976  return SDValue();
9977 }
9978 
9979 // ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,
9980 // and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and
9981 // their position in "to" (Rd).
9982 static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
9983  assert(N->getOpcode() == ARMISD::BFI);
9984 
9985  SDValue From = N->getOperand(1);
9986  ToMask = ~cast<ConstantSDNode>(N->getOperand(2))->getAPIntValue();
9987  FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.countPopulation());
9988 
9989  // If the Base came from a SHR #C, we can deduce that it is really testing bit
9990  // #C in the base of the SHR.
9991  if (From->getOpcode() == ISD::SRL &&
9992  isa<ConstantSDNode>(From->getOperand(1))) {
9993  APInt Shift = cast<ConstantSDNode>(From->getOperand(1))->getAPIntValue();
9994  assert(Shift.getLimitedValue() < 32 && "Shift too large!");
9995  FromMask <<= Shift.getLimitedValue(31);
9996  From = From->getOperand(0);
9997  }
9998 
9999  return From;
10000 }
10001 
10002 // If A and B contain one contiguous set of bits, does A | B == A . B?
10003 //
10004 // Neither A nor B must be zero.
10005 static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) {
10006  unsigned LastActiveBitInA = A.countTrailingZeros();
10007  unsigned FirstActiveBitInB = B.getBitWidth() - B.countLeadingZeros() - 1;
10008  return LastActiveBitInA - 1 == FirstActiveBitInB;
10009 }
10010 
10012  // We have a BFI in N. Follow a possible chain of BFIs and find a BFI it can combine with,
10013  // if one exists.
10014  APInt ToMask, FromMask;
10015  SDValue From = ParseBFI(N, ToMask, FromMask);
10016  SDValue To = N->getOperand(0);
10017 
10018  // Now check for a compatible BFI to merge with. We can pass through BFIs that
10019  // aren't compatible, but not if they set the same bit in their destination as
10020  // we do (or that of any BFI we're going to combine with).
10021  SDValue V = To;
10022  APInt CombinedToMask = ToMask;
10023  while (V.getOpcode() == ARMISD::BFI) {
10024  APInt NewToMask, NewFromMask;
10025  SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask);
10026  if (NewFrom != From) {
10027  // This BFI has a different base. Keep going.
10028  CombinedToMask |= NewToMask;
10029  V = V.getOperand(0);
10030  continue;
10031  }
10032 
10033  // Do the written bits conflict with any we've seen so far?
10034  if ((NewToMask & CombinedToMask).getBoolValue())
10035  // Conflicting bits - bail out because going further is unsafe.
10036  return SDValue();
10037 
10038  // Are the new bits contiguous when combined with the old bits?
10039  if (BitsProperlyConcatenate(ToMask, NewToMask) &&
10040  BitsProperlyConcatenate(FromMask, NewFromMask))
10041  return V;
10042  if (BitsProperlyConcatenate(NewToMask, ToMask) &&
10043  BitsProperlyConcatenate(NewFromMask, FromMask))
10044  return V;
10045 
10046  // We've seen a write to some bits, so track it.
10047  CombinedToMask |= NewToMask;
10048  // Keep going...
10049  V = V.getOperand(0);
10050  }
10051 
10052  return SDValue();
10053 }
10054 
10057  SDValue N1 = N->getOperand(1);
10058  if (N1.getOpcode() == ISD::AND) {
10059  // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
10060  // the bits being cleared by the AND are not demanded by the BFI.
10062  if (!N11C)
10063  return SDValue();
10064  unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
10065  unsigned LSB = countTrailingZeros(~InvMask);
10066  unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB;
10067  assert(Width <
10068  static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
10069  "undefined behavior");
10070  unsigned Mask = (1u << Width) - 1;
10071  unsigned Mask2 = N11C->getZExtValue();
10072  if ((Mask & (~Mask2)) == 0)
10073  return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
10074  N->getOperand(0), N1.getOperand(0),
10075  N->getOperand(2));
10076  } else if (N->getOperand(0).getOpcode() == ARMISD::BFI) {
10077  // We have a BFI of a BFI. Walk up the BFI chain to see how long it goes.
10078  // Keep track of any consecutive bits set that all come from the same base
10079  // value. We can combine these together into a single BFI.
10080  SDValue CombineBFI = FindBFIToCombineWith(N);
10081  if (CombineBFI == SDValue())
10082  return SDValue();
10083 
10084  // We've found a BFI.
10085  APInt ToMask1, FromMask1;
10086  SDValue From1 = ParseBFI(N, ToMask1, FromMask1);
10087 
10088  APInt ToMask2, FromMask2;
10089  SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2);
10090  assert(From1 == From2);
10091  (void)From2;
10092 
10093  // First, unlink CombineBFI.
10094  DCI.DAG.ReplaceAllUsesWith(CombineBFI, CombineBFI.getOperand(0));
10095  // Then create a new BFI, combining the two together.
10096  APInt NewFromMask = FromMask1 | FromMask2;
10097  APInt NewToMask = ToMask1 | ToMask2;
10098 
10099  EVT VT = N->getValueType(0);
10100  SDLoc dl(N);
10101 
10102  if (NewFromMask[0] == 0)
10103  From1 = DCI.DAG.getNode(
10104  ISD::SRL, dl, VT, From1,
10105  DCI.DAG.getConstant(NewFromMask.countTrailingZeros(), dl, VT));
10106  return DCI.DAG.getNode(ARMISD::BFI, dl, VT, N->getOperand(0), From1,
10107  DCI.DAG.getConstant(~NewToMask, dl, VT));
10108  }
10109  return SDValue();
10110 }
10111 
10112 /// PerformVMOVRRDCombine - Target-specific dag combine xforms for
10113 /// ARMISD::VMOVRRD.
10116  const ARMSubtarget *Subtarget) {
10117  // vmovrrd(vmovdrr x, y) -> x,y
10118  SDValue InDouble = N->getOperand(0);
10119  if (InDouble.getOpcode() == ARMISD::VMOVDRR && !Subtarget->isFPOnlySP())
10120  return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
10121 
10122  // vmovrrd(load f64) -> (load i32), (load i32)
10123  SDNode *InNode = InDouble.getNode();
10124  if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
10125  InNode->getValueType(0) == MVT::f64 &&
10126  InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
10127  !cast<LoadSDNode>(InNode)->isVolatile()) {
10128  // TODO: Should this be done for non-FrameIndex operands?
10129  LoadSDNode *LD = cast<LoadSDNode>(InNode);
10130 
10131  SelectionDAG &DAG = DCI.DAG;
10132  SDLoc DL(LD);
10133  SDValue BasePtr = LD->getBasePtr();
10134  SDValue NewLD1 =
10135  DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(),
10136  LD->getAlignment(), LD->getMemOperand()->getFlags());
10137 
10138  SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
10139  DAG.getConstant(4, DL, MVT::i32));
10140  SDValue NewLD2 = DAG.getLoad(
10141  MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, LD->getPointerInfo(),
10142  std::min(4U, LD->getAlignment() / 2), LD->getMemOperand()->getFlags());
10143 
10144  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
10145  if (DCI.DAG.getDataLayout().isBigEndian())
10146  std::swap (NewLD1, NewLD2);
10147  SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
10148  return Result;
10149  }
10150 
10151  return SDValue();
10152 }
10153 
10154 /// PerformVMOVDRRCombine - Target-specific dag combine xforms for
10155 /// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
10157  // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
10158  SDValue Op0 = N->getOperand(0);
10159  SDValue Op1 = N->getOperand(1);
10160  if (Op0.getOpcode() == ISD::BITCAST)
10161  Op0 = Op0.getOperand(0);
10162  if (Op1.getOpcode() == ISD::BITCAST)
10163  Op1 = Op1.getOperand(0);
10164  if (Op0.getOpcode() == ARMISD::VMOVRRD &&
10165  Op0.getNode() == Op1.getNode() &&
10166  Op0.getResNo() == 0 && Op1.getResNo() == 1)
10167  return DAG.getNode(ISD::BITCAST, SDLoc(N),
10168  N->getValueType(0), Op0.getOperand(0));
10169  return SDValue();
10170 }
10171 
10172 /// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
10173 /// are normal, non-volatile loads. If so, it is profitable to bitcast an
10174 /// i64 vector to have f64 elements, since the value can then be loaded
10175 /// directly into a VFP register.
10176 static bool hasNormalLoadOperand(SDNode *N) {
10177  unsigned NumElts = N->getValueType(0).getVectorNumElements();
10178  for (unsigned i = 0; i < NumElts; ++i) {
10179  SDNode *Elt = N->getOperand(i).getNode();
10180  if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
10181  return true;
10182  }
10183  return false;
10184 }
10185 
10186 /// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
10187 /// ISD::BUILD_VECTOR.
10190  const ARMSubtarget *Subtarget) {
10191  // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
10192  // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
10193  // into a pair of GPRs, which is fine when the value is used as a scalar,
10194  // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
10195  SelectionDAG &DAG = DCI.DAG;
10196  if (N->getNumOperands() == 2)
10197  if (SDValue RV = PerformVMOVDRRCombine(N, DAG))
10198  return RV;
10199 
10200  // Load i64 elements as f64 values so that type legalization does not split
10201  // them up into i32 values.
10202  EVT VT = N->getValueType(0);
10204  return SDValue();
10205  SDLoc dl(N);
10207  unsigned NumElts = VT.getVectorNumElements();
10208  for (unsigned i = 0; i < NumElts; ++i) {
10209  SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
10210  Ops.push_back(V);
10211  // Make the DAGCombiner fold the bitcast.
10212  DCI.AddToWorklist(V.getNode());
10213  }
10214  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
10215  SDValue BV = DAG.getBuildVector(FloatVT, dl, Ops);
10216  return DAG.getNode(ISD::BITCAST, dl, VT, BV);
10217 }
10218 
10219 /// \brief Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
10220 static SDValue
10222  // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR.
10223  // At that time, we may have inserted bitcasts from integer to float.
10224  // If these bitcasts have survived DAGCombine, change the lowering of this
10225  // BUILD_VECTOR in something more vector friendly, i.e., that does not
10226  // force to use floating point types.
10227 
10228  // Make sure we can change the type of the vector.
10229  // This is possible iff:
10230  // 1. The vector is only used in a bitcast to a integer type. I.e.,
10231  // 1.1. Vector is used only once.
10232  // 1.2. Use is a bit convert to an integer type.
10233  // 2. The size of its operands are 32-bits (64-bits are not legal).
10234  EVT VT = N->getValueType(0);
10235  EVT EltVT = VT.getVectorElementType();
10236 
10237  // Check 1.1. and 2.
10238  if (EltVT.getSizeInBits() != 32 || !N->hasOneUse())
10239  return SDValue();
10240 
10241  // By construction, the input type must be float.
10242  assert(EltVT == MVT::f32 && "Unexpected type!");
10243 
10244  // Check 1.2.
10245  SDNode *Use = *N->use_begin();
10246  if (Use->getOpcode() != ISD::BITCAST ||
10247  Use->getValueType(0).isFloatingPoint())
10248  return SDValue();
10249 
10250  // Check profitability.
10251  // Model is, if more than half of the relevant operands are bitcast from
10252  // i32, turn the build_vector into a sequence of insert_vector_elt.
10253  // Relevant operands are everything that is not statically
10254  // (i.e., at compile time) bitcasted.
10255  unsigned NumOfBitCastedElts = 0;
10256  unsigned NumElts = VT.getVectorNumElements();
10257  unsigned NumOfRelevantElts = NumElts;
10258  for (unsigned Idx = 0; Idx < NumElts; ++Idx) {
10259  SDValue Elt = N->getOperand(Idx);
10260  if (Elt->getOpcode() == ISD::BITCAST) {
10261  // Assume only bit cast to i32 will go away.
10262  if (Elt->getOperand(0).getValueType() == MVT::i32)
10263  ++NumOfBitCastedElts;
10264  } else if (Elt.isUndef() || isa<ConstantSDNode>(Elt))
10265  // Constants are statically casted, thus do not count them as
10266  // relevant operands.
10267  --NumOfRelevantElts;
10268  }
10269 
10270  // Check if more than half of the elements require a non-free bitcast.
10271  if (NumOfBitCastedElts <= NumOfRelevantElts / 2)
10272  return SDValue();
10273 
10274  SelectionDAG &DAG = DCI.DAG;
10275  // Create the new vector type.
10276  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
10277  // Check if the type is legal.
10278  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10279  if (!TLI.isTypeLegal(VecVT))
10280  return SDValue();
10281 
10282  // Combine:
10283  // ARMISD::BUILD_VECTOR E1, E2, ..., EN.
10284  // => BITCAST INSERT_VECTOR_ELT
10285  // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),
10286  // (BITCAST EN), N.
10287  SDValue Vec = DAG.getUNDEF(VecVT);
10288  SDLoc dl(N);
10289  for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
10290  SDValue V = N->getOperand(Idx);
10291  if (V.isUndef())
10292  continue;
10293  if (V.getOpcode() == ISD::BITCAST &&
10294  V->getOperand(0).getValueType() == MVT::i32)
10295  // Fold obvious case.
10296  V = V.getOperand(0);
10297  else {
10298  V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
10299  // Make the DAGCombiner fold the bitcasts.
10300  DCI.AddToWorklist(V.getNode());
10301  }
10302  SDValue LaneIdx = DAG.getConstant(Idx, dl, MVT::i32);
10303  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx);
10304  }
10305  Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec);
10306  // Make the DAGCombiner fold the bitcasts.
10307  DCI.AddToWorklist(Vec.getNode());
10308  return Vec;
10309 }
10310 
10311 /// PerformInsertEltCombine - Target-specific dag combine xforms for
10312 /// ISD::INSERT_VECTOR_ELT.
10315  // Bitcast an i64 load inserted into a vector to f64.
10316  // Otherwise, the i64 value will be legalized to a pair of i32 values.
10317  EVT VT = N->getValueType(0);
10318  SDNode *Elt = N->getOperand(1).getNode();
10319  if (VT.getVectorElementType() != MVT::i64 ||
10320  !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
10321  return SDValue();
10322 
10323  SelectionDAG &DAG = DCI.DAG;
10324  SDLoc dl(N);
10325  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
10326  VT.getVectorNumElements());
10327  SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
10328  SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
10329  // Make the DAGCombiner fold the bitcasts.
10330  DCI.AddToWorklist(Vec.getNode());
10331  DCI.AddToWorklist(V.getNode());
10332  SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
10333  Vec, V, N->getOperand(2));
10334  return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
10335 }
10336 
10337 /// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
10338 /// ISD::VECTOR_SHUFFLE.
10340  // The LLVM shufflevector instruction does not require the shuffle mask
10341  // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
10342  // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
10343  // operands do not match the mask length, they are extended by concatenating
10344  // them with undef vectors. That is probably the right thing for other
10345  // targets, but for NEON it is better to concatenate two double-register
10346  // size vector operands into a single quad-register size vector. Do that
10347  // transformation here:
10348  // shuffle(concat(v1, undef), concat(v2, undef)) ->
10349  // shuffle(concat(v1, v2), undef)
10350  SDValue Op0 = N->getOperand(0);
10351  SDValue Op1 = N->getOperand(1);
10352  if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
10353  Op1.getOpcode() != ISD::CONCAT_VECTORS ||
10354  Op0.getNumOperands() != 2 ||
10355  Op1.getNumOperands() != 2)
10356  return SDValue();
10357  SDValue Concat0Op1 = Op0.getOperand(1);
10358  SDValue Concat1Op1 = Op1.getOperand(1);
10359  if (!Concat0Op1.isUndef() || !Concat1Op1.isUndef())
10360  return SDValue();
10361  // Skip the transformation if any of the types are illegal.
10362  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10363  EVT VT = N->getValueType(0);
10364  if (!TLI.isTypeLegal(VT) ||
10365  !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
10366  !TLI.isTypeLegal(Concat1Op1.getValueType()))
10367  return SDValue();
10368 
10369  SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
10370  Op0.getOperand(0), Op1.getOperand(0));
10371  // Translate the shuffle mask.
10372  SmallVector<int, 16> NewMask;
10373  unsigned NumElts = VT.getVectorNumElements();
10374  unsigned HalfElts = NumElts/2;
10375  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
10376  for (unsigned n = 0; n < NumElts; ++n) {
10377  int MaskElt = SVN->getMaskElt(n);
10378  int NewElt = -1;
10379  if (MaskElt < (int)HalfElts)
10380  NewElt = MaskElt;
10381  else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
10382  NewElt = HalfElts + MaskElt - NumElts;
10383  NewMask.push_back(NewElt);
10384  }
10385  return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,
10386  DAG.getUNDEF(VT), NewMask);
10387 }
10388 
10389 /// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
10390 /// NEON load/store intrinsics, and generic vector load/stores, to merge
10391 /// base address updates.
10392 /// For generic load/stores, the memory type is assumed to be a vector.
10393 /// The caller is assumed to have checked legality.
10396  SelectionDAG &DAG = DCI.DAG;
10397  const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
10399  const bool isStore = N->getOpcode() == ISD::STORE;
10400  const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
10401  SDValue Addr = N->getOperand(AddrOpIdx);
10402  MemSDNode *MemN = cast<MemSDNode>(N);
10403  SDLoc dl(N);
10404 
10405  // Search for a use of the address operand that is an increment.
10406  for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
10407  UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
10408  SDNode *User = *UI;
10409  if (User->getOpcode() != ISD::ADD ||
10410  UI.getUse().getResNo() != Addr.getResNo())
10411  continue;
10412 
10413  // Check that the add is independent of the load/store. Otherwise, folding
10414  // it would create a cycle.
10415  if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
10416  continue;
10417 
10418  // Find the new opcode for the updating load/store.
10419  bool isLoadOp = true;
10420  bool isLaneOp = false;
10421  unsigned NewOpc = 0;
10422  unsigned NumVecs = 0;
10423  if (isIntrinsic) {
10424  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
10425  switch (IntNo) {
10426  default: llvm_unreachable("unexpected intrinsic for Neon base update");
10427  case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD;
10428  NumVecs = 1; break;
10429  case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD;
10430  NumVecs = 2; break;
10431  case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD;
10432  NumVecs = 3; break;
10433  case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD;
10434  NumVecs = 4; break;
10435  case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
10436  NumVecs = 2; isLaneOp = true; break;
10437  case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD;
10438  NumVecs = 3; isLaneOp = true; break;
10439  case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;
10440  NumVecs = 4; isLaneOp = true; break;
10441  case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD;
10442  NumVecs = 1; isLoadOp = false; break;
10443  case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD;
10444  NumVecs = 2; isLoadOp = false; break;
10445  case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD;
10446  NumVecs = 3; isLoadOp = false; break;
10447  case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD;
10448  NumVecs = 4; isLoadOp = false; break;
10449  case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;
10450  NumVecs = 2; isLoadOp = false; isLaneOp = true; break;
10451  case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;
10452  NumVecs = 3; isLoadOp = false; isLaneOp = true; break;
10453  case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;
10454  NumVecs = 4; isLoadOp = false; isLaneOp = true; break;
10455  }
10456  } else {
10457  isLaneOp = true;
10458  switch (N->getOpcode()) {
10459  default: llvm_unreachable("unexpected opcode for Neon base update");
10460  case ARMISD::VLD1DUP: NewOpc = ARMISD::VLD1DUP_UPD; NumVecs = 1; break;
10461  case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
10462  case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
10463  case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
10464  case ISD::LOAD: NewOpc = ARMISD::VLD1_UPD;
10465  NumVecs = 1; isLaneOp = false; break;
10466  case ISD::STORE: NewOpc = ARMISD::VST1_UPD;
10467  NumVecs = 1; isLaneOp = false; isLoadOp = false; break;
10468  }
10469  }
10470 
10471  // Find the size of memory referenced by the load/store.
10472  EVT VecTy;
10473  if (isLoadOp) {
10474  VecTy = N->getValueType(0);
10475  } else if (isIntrinsic) {
10476  VecTy = N->getOperand(AddrOpIdx+1).getValueType();
10477  } else {
10478  assert(isStore && "Node has to be a load, a store, or an intrinsic!");
10479  VecTy = N->getOperand(1).getValueType();
10480  }
10481 
10482  unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
10483  if (isLaneOp)
10484  NumBytes /= VecTy.getVectorNumElements();
10485 
10486  // If the increment is a constant, it must match the memory ref size.
10487  SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
10488  if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
10489  uint64_t IncVal = CInc->getZExtValue();
10490  if (IncVal != NumBytes)
10491  continue;
10492  } else if (NumBytes >= 3 * 16) {
10493  // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
10494  // separate instructions that make it harder to use a non-constant update.
10495  continue;
10496  }
10497 
10498  // OK, we found an ADD we can fold into the base update.
10499  // Now, create a _UPD node, taking care of not breaking alignment.
10500 
10501  EVT AlignedVecTy = VecTy;
10502  unsigned Alignment = MemN->getAlignment();
10503 
10504  // If this is a less-than-standard-aligned load/store, change the type to
10505  // match the standard alignment.
10506  // The alignment is overlooked when selecting _UPD variants; and it's
10507  // easier to introduce bitcasts here than fix that.
10508  // There are 3 ways to get to this base-update combine:
10509  // - intrinsics: they are assumed to be properly aligned (to the standard
10510  // alignment of the memory type), so we don't need to do anything.
10511  // - ARMISD::VLDx nodes: they are only generated from the aforementioned
10512  // intrinsics, so, likewise, there's nothing to do.
10513  // - generic load/store instructions: the alignment is specified as an
10514  // explicit operand, rather than implicitly as the standard alignment
10515  // of the memory type (like the intrisics). We need to change the
10516  // memory type to match the explicit alignment. That way, we don't
10517  // generate non-standard-aligned ARMISD::VLDx nodes.
10518  if (isa<LSBaseSDNode>(N)) {
10519  if (Alignment == 0)
10520  Alignment = 1;
10521  if (Alignment < VecTy.getScalarSizeInBits() / 8) {
10522  MVT EltTy = MVT::getIntegerVT(Alignment * 8);
10523  assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
10524  assert(!isLaneOp && "Unexpected generic load/store lane.");
10525  unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
10526  AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
10527  }
10528  // Don't set an explicit alignment on regular load/stores that we want
10529  // to transform to VLD/VST 1_UPD nodes.
10530  // This matches the behavior of regular load/stores, which only get an
10531  // explicit alignment if the MMO alignment is larger than the standard
10532  // alignment of the memory type.
10533  // Intrinsics, however, always get an explicit alignment, set to the
10534  // alignment of the MMO.
10535  Alignment = 1;
10536  }
10537 
10538  // Create the new updating load/store node.
10539  // First, create an SDVTList for the new updating node's results.
10540  EVT Tys[6];
10541  unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
10542  unsigned n;
10543  for (n = 0; n < NumResultVecs; ++n)
10544  Tys[n] = AlignedVecTy;
10545  Tys[n++] = MVT::i32;
10546  Tys[n] = MVT::Other;
10547  SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2));
10548 
10549  // Then, gather the new node's operands.
10551  Ops.push_back(N->getOperand(0)); // incoming chain
10552  Ops.push_back(N->getOperand(AddrOpIdx));
10553  Ops.push_back(Inc);
10554 
10555  if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
10556  // Try to match the intrinsic's signature
10557  Ops.push_back(StN->getValue());
10558  } else {
10559  // Loads (and of course intrinsics) match the intrinsics' signature,
10560  // so just add all but the alignment operand.
10561  for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands() - 1; ++i)
10562  Ops.push_back(N->getOperand(i));
10563  }
10564 
10565  // For all node types, the alignment operand is always the last one.
10566  Ops.push_back(DAG.getConstant(Alignment, dl, MVT::i32));
10567 
10568  // If this is a non-standard-aligned STORE, the penultimate operand is the
10569  // stored value. Bitcast it to the aligned type.
10570  if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
10571  SDValue &StVal = Ops[Ops.size()-2];
10572  StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
10573  }
10574 
10575  EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy;
10576  SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT,
10577  MemN->getMemOperand());
10578 
10579  // Update the uses.
10580  SmallVector<SDValue, 5> NewResults;
10581  for (unsigned i = 0; i < NumResultVecs; ++i)
10582  NewResults.push_back(SDValue(UpdN.getNode(), i));
10583 
10584  // If this is an non-standard-aligned LOAD, the first result is the loaded
10585  // value. Bitcast it to the expected result type.
10586  if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
10587  SDValue &LdVal = NewResults[0];
10588  LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal);
10589  }
10590 
10591  NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
10592  DCI.CombineTo(N, NewResults);
10593  DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
10594 
10595  break;
10596  }
10597  return SDValue();
10598 }
10599 
10602  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
10603  return SDValue();
10604 
10605  return CombineBaseUpdate(N, DCI);
10606 }
10607 
10608 /// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
10609 /// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
10610 /// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
10611 /// return true.
10613  SelectionDAG &DAG = DCI.DAG;
10614  EVT VT = N->getValueType(0);
10615  // vldN-dup instructions only support 64-bit vectors for N > 1.
10616  if (!VT.is64BitVector())
10617  return false;
10618 
10619  // Check if the VDUPLANE operand is a vldN-dup intrinsic.
10620  SDNode *VLD = N->getOperand(0).getNode();
10621  if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
10622  return false;
10623  unsigned NumVecs = 0;
10624  unsigned NewOpc = 0;
10625  unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
10626  if (IntNo == Intrinsic::arm_neon_vld2lane) {
10627  NumVecs = 2;
10628  NewOpc = ARMISD::VLD2DUP;
10629  } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
10630  NumVecs = 3;
10631  NewOpc = ARMISD::VLD3DUP;
10632  } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
10633  NumVecs = 4;
10634  NewOpc = ARMISD::VLD4DUP;
10635  } else {
10636  return false;
10637  }
10638 
10639  // First check that all the vldN-lane uses are VDUPLANEs and that the lane
10640  // numbers match the load.
10641  unsigned VLDLaneNo =
10642  cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue();
10643  for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
10644  UI != UE; ++UI) {
10645  // Ignore uses of the chain result.
10646  if (UI.getUse().getResNo() == NumVecs)
10647  continue;
10648  SDNode *User = *UI;
10649  if (User->getOpcode() != ARMISD::VDUPLANE ||
10650  VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
10651  return false;
10652  }
10653 
10654  // Create the vldN-dup node.
10655  EVT Tys[5];
10656  unsigned n;
10657  for (n = 0; n < NumVecs; ++n)
10658  Tys[n] = VT;
10659  Tys[n] = MVT::Other;
10660  SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumVecs+1));
10661  SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
10662  MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
10663  SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
10664  Ops, VLDMemInt->getMemoryVT(),
10665  VLDMemInt->getMemOperand());
10666 
10667  // Update the uses.
10668  for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
10669  UI != UE; ++UI) {
10670  unsigned ResNo = UI.getUse().getResNo();
10671  // Ignore uses of the chain result.
10672  if (ResNo == NumVecs)
10673  continue;
10674  SDNode *User = *UI;
10675  DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
10676  }
10677 
10678  // Now the vldN-lane intrinsic is dead except for its chain result.
10679  // Update uses of the chain.
10680  std::vector<SDValue> VLDDupResults;
10681  for (unsigned n = 0; n < NumVecs; ++n)
10682  VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
10683  VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
10684  DCI.CombineTo(VLD, VLDDupResults);
10685 
10686  return true;
10687 }
10688 
10689 /// PerformVDUPLANECombine - Target-specific dag combine xforms for
10690 /// ARMISD::VDUPLANE.
10693  SDValue Op = N->getOperand(0);
10694 
10695  // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
10696  // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
10697  if (CombineVLDDUP(N, DCI))
10698  return SDValue(N, 0);
10699 
10700  // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
10701  // redundant. Ignore bit_converts for now; element sizes are checked below.
10702  while (Op.getOpcode() == ISD::BITCAST)
10703  Op = Op.getOperand(0);
10704  if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
10705  return SDValue();
10706 
10707  // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
10708  unsigned EltSize = Op.getScalarValueSizeInBits();
10709  // The canonical VMOV for a zero vector uses a 32-bit element size.
10710  unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
10711  unsigned EltBits;
10712  if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
10713  EltSize = 8;
10714  EVT VT = N->getValueType(0);
10715  if (EltSize > VT.getScalarSizeInBits())
10716  return SDValue();
10717 
10718  return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
10719 }
10720 
10721 /// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
10724  SelectionDAG &DAG = DCI.DAG;
10725  SDValue Op = N->getOperand(0);
10726 
10727  // Match VDUP(LOAD) -> VLD1DUP.
10728  // We match this pattern here rather than waiting for isel because the
10729  // transform is only legal for unindexed loads.
10731  if (LD && Op.hasOneUse() && LD->isUnindexed() &&
10732  LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) {
10733  SDValue Ops[] = { LD->getOperand(0), LD->getOperand(1),
10734  DAG.getConstant(LD->getAlignment(), SDLoc(N), MVT::i32) };
10735  SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other);
10736  SDValue VLDDup = DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys,
10737  Ops, LD->getMemoryVT(),
10738  LD->getMemOperand());
10739  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), VLDDup.getValue(1));
10740  return VLDDup;
10741  }
10742 
10743  return SDValue();
10744 }
10745 
10748  EVT VT = N->getValueType(0);
10749 
10750  // If this is a legal vector load, try to combine it into a VLD1_UPD.
10751  if (ISD::isNormalLoad(N) && VT.isVector() &&
10753  return CombineBaseUpdate(N, DCI);
10754 
10755  return SDValue();
10756 }
10757 
10758 /// PerformSTORECombine - Target-specific dag combine xforms for
10759 /// ISD::STORE.
10762  StoreSDNode *St = cast<StoreSDNode>(N);
10763  if (St->isVolatile())
10764  return SDValue();
10765 
10766  // Optimize trunc store (of multiple scalars) to shuffle and store. First,
10767  // pack all of the elements in one place. Next, store to memory in fewer
10768  // chunks.
10769  SDValue StVal = St->getValue();
10770  EVT VT = StVal.getValueType();
10771  if (St->isTruncatingStore() && VT.isVector()) {
10772  SelectionDAG &DAG = DCI.DAG;
10773  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10774  EVT StVT = St->getMemoryVT();
10775  unsigned NumElems = VT.getVectorNumElements();
10776  assert(StVT != VT && "Cannot truncate to the same type");
10777  unsigned FromEltSz = VT.getScalarSizeInBits();
10778  unsigned ToEltSz = StVT.getScalarSizeInBits();
10779 
10780  // From, To sizes and ElemCount must be pow of two
10781  if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue();
10782 
10783  // We are going to use the original vector elt for storing.
10784  // Accumulated smaller vector elements must be a multiple of the store size.
10785  if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue();
10786 
10787  unsigned SizeRatio = FromEltSz / ToEltSz;
10788  assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
10789 
10790  // Create a type on which we perform the shuffle.
10791  EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
10792  NumElems*SizeRatio);
10793  assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
10794 
10795  SDLoc DL(St);
10796  SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
10797  SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
10798  for (unsigned i = 0; i < NumElems; ++i)
10799  ShuffleVec[i] = DAG.getDataLayout().isBigEndian()
10800  ? (i + 1) * SizeRatio - 1
10801  : i * SizeRatio;
10802 
10803  // Can't shuffle using an illegal type.
10804  if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
10805 
10806  SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec,
10807  DAG.getUNDEF(WideVec.getValueType()),
10808  ShuffleVec);
10809  // At this point all of the data is stored at the bottom of the
10810  // register. We now need to save it to mem.
10811 
10812  // Find the largest store unit
10813  MVT StoreType = MVT::i8;
10814  for (MVT Tp : MVT::integer_valuetypes()) {
10815  if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
10816  StoreType = Tp;
10817  }
10818  // Didn't find a legal store type.
10819  if (!TLI.isTypeLegal(StoreType))
10820  return SDValue();
10821 
10822  // Bitcast the original vector into a vector of store-size units
10823  EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
10824  StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
10825  assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
10826  SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
10827  SmallVector<SDValue, 8> Chains;
10828  SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,
10829  TLI.getPointerTy(DAG.getDataLayout()));
10830  SDValue BasePtr = St->getBasePtr();
10831 
10832  // Perform one or more big stores into memory.
10833  unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits();
10834  for (unsigned I = 0; I < E; I++) {
10835  SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
10836  StoreType, ShuffWide,
10837  DAG.getIntPtrConstant(I, DL));
10838  SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr,
10839  St->getPointerInfo(), St->getAlignment(),
10840  St->getMemOperand()->getFlags());
10841  BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
10842  Increment);
10843  Chains.push_back(Ch);
10844  }
10845  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
10846  }
10847 
10848  if (!ISD::isNormalStore(St))
10849  return SDValue();
10850 
10851  // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
10852  // ARM stores of arguments in the same cache line.
10853  if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
10854  StVal.getNode()->hasOneUse()) {
10855  SelectionDAG &DAG = DCI.DAG;
10856  bool isBigEndian = DAG.getDataLayout().isBigEndian();
10857  SDLoc DL(St);
10858  SDValue BasePtr = St->getBasePtr();
10859  SDValue NewST1 = DAG.getStore(
10860  St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0),
10861  BasePtr, St->getPointerInfo(), St->getAlignment(),
10862  St->getMemOperand()->getFlags());
10863 
10864  SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
10865  DAG.getConstant(4, DL, MVT::i32));
10866  return DAG.getStore(NewST1.getValue(0), DL,
10867  StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
10868  OffsetPtr, St->getPointerInfo(),
10869  std::min(4U, St->getAlignment() / 2),
10870  St->getMemOperand()->getFlags());
10871  }
10872 
10873  if (StVal.getValueType() == MVT::i64 &&
10874  StVal.getNode()->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
10875 
10876  // Bitcast an i64 store extracted from a vector to f64.
10877  // Otherwise, the i64 value will be legalized to a pair of i32 values.
10878  SelectionDAG &DAG = DCI.DAG;
10879  SDLoc dl(StVal);
10880  SDValue IntVec = StVal.getOperand(0);
10881  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
10882  IntVec.getValueType().getVectorNumElements());
10883  SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
10884  SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
10885  Vec, StVal.getOperand(1));
10886  dl = SDLoc(N);
10887  SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
10888  // Make the DAGCombiner fold the bitcasts.
10889  DCI.AddToWorklist(Vec.getNode());
10890  DCI.AddToWorklist(ExtElt.getNode());
10891  DCI.AddToWorklist(V.getNode());
10892  return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
10893  St->getPointerInfo(), St->getAlignment(),
10894  St->getMemOperand()->getFlags(), St->getAAInfo());
10895  }
10896 
10897  // If this is a legal vector store, try to combine it into a VST1_UPD.
10898  if (ISD::isNormalStore(N) && VT.isVector() &&
10900  return CombineBaseUpdate(N, DCI);
10901 
10902  return SDValue();
10903 }
10904 
10905 /// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
10906 /// can replace combinations of VMUL and VCVT (floating-point to integer)
10907 /// when the VMUL has a constant operand that is a power of 2.
10908 ///
10909 /// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
10910 /// vmul.f32 d16, d17, d16
10911 /// vcvt.s32.f32 d16, d16
10912 /// becomes:
10913 /// vcvt.s32.f32 d16, d16, #3
10915  const ARMSubtarget *Subtarget) {
10916  if (!Subtarget->hasNEON())
10917  return SDValue();
10918 
10919  SDValue Op = N->getOperand(0);
10920  if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
10921  Op.getOpcode() != ISD::FMUL)
10922  return SDValue();
10923 
10924  SDValue ConstVec = Op->getOperand(1);
10925  if (!isa<BuildVectorSDNode>(ConstVec))
10926  return SDValue();
10927 
10928  MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
10929  uint32_t FloatBits = FloatTy.getSizeInBits();
10931  uint32_t IntBits = IntTy.getSizeInBits();
10932  unsigned NumLanes = Op.getValueType().getVectorNumElements();
10933  if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) {
10934  // These instructions only exist converting from f32 to i32. We can handle
10935  // smaller integers by generating an extra truncate, but larger ones would
10936  // be lossy. We also can't handle more then 4 lanes, since these intructions
10937  // only support v2i32/v4i32 types.
10938  return SDValue();
10939  }
10940 
10941  BitVector UndefElements;
10942  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
10943  int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
10944  if (C == -1 || C == 0 || C > 32)
10945  return SDValue();
10946 
10947  SDLoc dl(N);
10948  bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
10949  unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
10950  Intrinsic::arm_neon_vcvtfp2fxu;
10951  SDValue FixConv = DAG.getNode(
10952  ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
10953  DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0),
10954  DAG.getConstant(C, dl, MVT::i32));
10955 
10956  if (IntBits < FloatBits)
10957  FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);
10958 
10959  return FixConv;
10960 }
10961 
10962 /// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
10963 /// can replace combinations of VCVT (integer to floating-point) and VDIV
10964 /// when the VDIV has a constant operand that is a power of 2.
10965 ///
10966 /// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
10967 /// vcvt.f32.s32 d16, d16
10968 /// vdiv.f32 d16, d17, d16
10969 /// becomes:
10970 /// vcvt.f32.s32 d16, d16, #3
10972  const ARMSubtarget *Subtarget) {
10973  if (!Subtarget->hasNEON())
10974  return SDValue();
10975 
10976  SDValue Op = N->getOperand(0);
10977  unsigned OpOpcode = Op.getNode()->getOpcode();
10978  if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() ||
10979  (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
10980  return SDValue();
10981 
10982  SDValue ConstVec = N->getOperand(1);
10983  if (!isa<BuildVectorSDNode>(ConstVec))
10984  return SDValue();
10985 
10986  MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
10987  uint32_t FloatBits = FloatTy.getSizeInBits();
10989  uint32_t IntBits = IntTy.getSizeInBits();
10990  unsigned NumLanes = Op.getValueType().getVectorNumElements();
10991  if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) {
10992  // These instructions only exist converting from i32 to f32. We can handle
10993  // smaller integers by generating an extra extend, but larger ones would
10994  // be lossy. We also can't handle more then 4 lanes, since these intructions
10995  // only support v2i32/v4i32 types.
10996  return SDValue();
10997  }
10998 
10999  BitVector UndefElements;
11000  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
11001  int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
11002  if (C == -1 || C == 0 || C > 32)
11003  return SDValue();
11004 
11005  SDLoc dl(N);
11006  bool isSigned = OpOpcode == ISD::SINT_TO_FP;
11007  SDValue ConvInput = Op.getOperand(0);
11008  if (IntBits < FloatBits)
11009  ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
11010  dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
11011  ConvInput);
11012 
11013  unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
11014  Intrinsic::arm_neon_vcvtfxu2fp;
11015  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
11016  Op.getValueType(),
11017  DAG.getConstant(IntrinsicOpcode, dl, MVT::i32),
11018  ConvInput, DAG.getConstant(C, dl, MVT::i32));
11019 }
11020 
11021 /// Getvshiftimm - Check if this is a valid build_vector for the immediate
11022 /// operand of a vector shift operation, where all the elements of the
11023 /// build_vector must have the same constant integer value.
11024 static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
11025  // Ignore bit_converts.
11026  while (Op.getOpcode() == ISD::BITCAST)
11027  Op = Op.getOperand(0);
11029  APInt SplatBits, SplatUndef;
11030  unsigned SplatBitSize;
11031  bool HasAnyUndefs;
11032  if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
11033  HasAnyUndefs, ElementBits) ||
11034  SplatBitSize > ElementBits)
11035  return false;
11036  Cnt = SplatBits.getSExtValue();
11037  return true;
11038 }
11039 
11040 /// isVShiftLImm - Check if this is a valid build_vector for the immediate
11041 /// operand of a vector shift left operation. That value must be in the range:
11042 /// 0 <= Value < ElementBits for a left shift; or
11043 /// 0 <= Value <= ElementBits for a long left shift.
11044 static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
11045  assert(VT.isVector() && "vector shift count is not a vector type");
11046  int64_t ElementBits = VT.getScalarSizeInBits();
11047  if (! getVShiftImm(Op, ElementBits, Cnt))
11048  return false;
11049  return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
11050 }
11051 
11052 /// isVShiftRImm - Check if this is a valid build_vector for the immediate
11053 /// operand of a vector shift right operation. For a shift opcode, the value
11054 /// is positive, but for an intrinsic the value count must be negative. The
11055 /// absolute value must be in the range:
11056 /// 1 <= |Value| <= ElementBits for a right shift; or
11057 /// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
11058 static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
11059  int64_t &Cnt) {
11060  assert(VT.isVector() && "vector shift count is not a vector type");
11061  int64_t ElementBits = VT.getScalarSizeInBits();
11062  if (! getVShiftImm(Op, ElementBits, Cnt))
11063  return false;
11064  if (!isIntrinsic)
11065  return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
11066  if (Cnt >= -(isNarrow ? ElementBits/2 : ElementBits) && Cnt <= -1) {
11067  Cnt = -Cnt;
11068  return true;
11069  }
11070  return false;
11071 }
11072 
11073 /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
11075  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
11076  switch (IntNo) {
11077  default:
11078  // Don't do anything for most intrinsics.
11079  break;
11080 
11081  // Vector shifts: check for immediate versions and lower them.
11082  // Note: This is done during DAG combining instead of DAG legalizing because
11083  // the build_vectors for 64-bit vector element shift counts are generally
11084  // not legal, and it is hard to see their values after they get legalized to
11085  // loads from a constant pool.
11086  case Intrinsic::arm_neon_vshifts:
11087  case Intrinsic::arm_neon_vshiftu:
11088  case Intrinsic::arm_neon_vrshifts:
11089  case Intrinsic::arm_neon_vrshiftu:
11090  case Intrinsic::arm_neon_vrshiftn:
11091  case Intrinsic::arm_neon_vqshifts:
11092  case Intrinsic::arm_neon_vqshiftu:
11093  case Intrinsic::arm_neon_vqshiftsu:
11094  case Intrinsic::arm_neon_vqshiftns:
11095  case Intrinsic::arm_neon_vqshiftnu:
11096  case Intrinsic::arm_neon_vqshiftnsu:
11097  case Intrinsic::arm_neon_vqrshiftns:
11098  case Intrinsic::arm_neon_vqrshiftnu:
11099  case Intrinsic::arm_neon_vqrshiftnsu: {
11100  EVT VT = N->getOperand(1).getValueType();
11101  int64_t Cnt;
11102  unsigned VShiftOpc = 0;
11103 
11104  switch (IntNo) {
11105  case Intrinsic::arm_neon_vshifts:
11106  case Intrinsic::arm_neon_vshiftu:
11107  if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
11108  VShiftOpc = ARMISD::VSHL;
11109  break;
11110  }
11111  if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
11112  VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
11114  break;
11115  }
11116  return SDValue();
11117 
11118  case Intrinsic::arm_neon_vrshifts:
11119  case Intrinsic::arm_neon_vrshiftu:
11120  if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
11121  break;
11122  return SDValue();
11123 
11124  case Intrinsic::arm_neon_vqshifts:
11125  case Intrinsic::arm_neon_vqshiftu:
11126  if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
11127  break;
11128  return SDValue();
11129 
11130  case Intrinsic::arm_neon_vqshiftsu:
11131  if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
11132  break;
11133  llvm_unreachable("invalid shift count for vqshlu intrinsic");
11134 
11135  case Intrinsic::arm_neon_vrshiftn:
11136  case Intrinsic::arm_neon_vqshiftns:
11137  case Intrinsic::arm_neon_vqshiftnu:
11138  case Intrinsic::arm_neon_vqshiftnsu:
11139  case Intrinsic::arm_neon_vqrshiftns:
11140  case Intrinsic::arm_neon_vqrshiftnu:
11141  case Intrinsic::arm_neon_vqrshiftnsu:
11142  // Narrowing shifts require an immediate right shift.
11143  if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
11144  break;
11145  llvm_unreachable("invalid shift count for narrowing vector shift "
11146  "intrinsic");
11147 
11148  default:
11149  llvm_unreachable("unhandled vector shift");
11150  }
11151 
11152  switch (IntNo) {
11153  case Intrinsic::arm_neon_vshifts:
11154  case Intrinsic::arm_neon_vshiftu:
11155  // Opcode already set above.
11156  break;
11157  case Intrinsic::arm_neon_vrshifts:
11158  VShiftOpc = ARMISD::VRSHRs; break;
11159  case Intrinsic::arm_neon_vrshiftu:
11160  VShiftOpc = ARMISD::VRSHRu; break;
11161  case Intrinsic::arm_neon_vrshiftn:
11162  VShiftOpc = ARMISD::VRSHRN; break;
11163  case Intrinsic::arm_neon_vqshifts:
11164  VShiftOpc = ARMISD::VQSHLs; break;
11165  case Intrinsic::arm_neon_vqshiftu:
11166  VShiftOpc = ARMISD::VQSHLu; break;
11167  case Intrinsic::arm_neon_vqshiftsu:
11168  VShiftOpc = ARMISD::VQSHLsu; break;
11169  case Intrinsic::arm_neon_vqshiftns:
11170  VShiftOpc = ARMISD::VQSHRNs; break;
11171  case Intrinsic::arm_neon_vqshiftnu:
11172  VShiftOpc = ARMISD::VQSHRNu; break;
11173  case Intrinsic::arm_neon_vqshiftnsu:
11174  VShiftOpc = ARMISD::VQSHRNsu; break;
11175  case Intrinsic::arm_neon_vqrshiftns:
11176  VShiftOpc = ARMISD::VQRSHRNs; break;
11177  case Intrinsic::arm_neon_vqrshiftnu:
11178  VShiftOpc = ARMISD::VQRSHRNu; break;
11179  case Intrinsic::arm_neon_vqrshiftnsu:
11180  VShiftOpc = ARMISD::VQRSHRNsu; break;
11181  }
11182 
11183  SDLoc dl(N);
11184  return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
11185  N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32));
11186  }
11187 
11188  case Intrinsic::arm_neon_vshiftins: {
11189  EVT VT = N->getOperand(1).getValueType();
11190  int64_t Cnt;
11191  unsigned VShiftOpc = 0;
11192 
11193  if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
11194  VShiftOpc = ARMISD::VSLI;
11195  else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
11196  VShiftOpc = ARMISD::VSRI;
11197  else {
11198  llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
11199  }
11200 
11201  SDLoc dl(N);
11202  return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
11203  N->getOperand(1), N->getOperand(2),
11204  DAG.getConstant(Cnt, dl, MVT::i32));
11205  }
11206 
11207  case Intrinsic::arm_neon_vqrshifts:
11208  case Intrinsic::arm_neon_vqrshiftu:
11209  // No immediate versions of these to check for.
11210  break;
11211  }
11212 
11213  return SDValue();
11214 }
11215 
11216 /// PerformShiftCombine - Checks for immediate versions of vector shifts and
11217 /// lowers them. As with the vector shift intrinsics, this is done during DAG
11218 /// combining instead of DAG legalizing because the build_vectors for 64-bit
11219 /// vector element shift counts are generally not legal, and it is hard to see
11220 /// their values after they get legalized to loads from a constant pool.
11222  const ARMSubtarget *ST) {
11223  EVT VT = N->getValueType(0);
11224  if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) {
11225  // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high
11226  // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16.
11227  SDValue N1 = N->getOperand(1);
11228  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
11229  SDValue N0 = N->getOperand(0);
11230  if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP &&
11231  DAG.MaskedValueIsZero(N0.getOperand(0),
11232  APInt::getHighBitsSet(32, 16)))
11233  return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1);
11234  }
11235  }
11236 
11237  // Nothing to be done for scalar shifts.
11238  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11239  if (!VT.isVector() || !TLI.isTypeLegal(VT))
11240  return SDValue();
11241 
11242  assert(ST->hasNEON() && "unexpected vector shift");
11243  int64_t Cnt;
11244 
11245  switch (N->getOpcode()) {
11246  default: llvm_unreachable("unexpected shift opcode");
11247 
11248  case ISD::SHL:
11249  if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
11250  SDLoc dl(N);
11251  return DAG.getNode(ARMISD::VSHL, dl, VT, N->getOperand(0),
11252  DAG.getConstant(Cnt, dl, MVT::i32));
11253  }
11254  break;
11255 
11256  case ISD::SRA:
11257  case ISD::SRL:
11258  if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
11259  unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
11261  SDLoc dl(N);
11262  return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
11263  DAG.getConstant(Cnt, dl, MVT::i32));
11264  }
11265  }
11266  return SDValue();
11267 }
11268 
11269 /// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
11270 /// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
11272  const ARMSubtarget *ST) {
11273  SDValue N0 = N->getOperand(0);
11274 
11275  // Check for sign- and zero-extensions of vector extract operations of 8-
11276  // and 16-bit vector elements. NEON supports these directly. They are
11277  // handled during DAG combining because type legalization will promote them
11278  // to 32-bit types and it is messy to recognize the operations after that.
11279  if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
11280  SDValue Vec = N0.getOperand(0);
11281  SDValue Lane = N0.getOperand(1);
11282  EVT VT = N->getValueType(0);
11283  EVT EltVT = N0.getValueType();
11284  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11285 
11286  if (VT == MVT::i32 &&
11287  (EltVT == MVT::i8 || EltVT == MVT::i16) &&
11288  TLI.isTypeLegal(Vec.getValueType()) &&
11289  isa<ConstantSDNode>(Lane)) {
11290 
11291  unsigned Opc = 0;
11292  switch (N->getOpcode()) {
11293  default: llvm_unreachable("unexpected opcode");
11294  case ISD::SIGN_EXTEND:
11295  Opc = ARMISD::VGETLANEs;
11296  break;
11297  case ISD::ZERO_EXTEND:
11298  case ISD::ANY_EXTEND:
11299  Opc = ARMISD::VGETLANEu;
11300  break;
11301  }
11302  return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane);
11303  }
11304  }
11305 
11306  return SDValue();
11307 }
11308 
11309 static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt &KnownZero,
11310  APInt &KnownOne) {
11311  if (Op.getOpcode() == ARMISD::BFI) {
11312  // Conservatively, we can recurse down the first operand
11313  // and just mask out all affected bits.
11314  computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne);
11315 
11316  // The operand to BFI is already a mask suitable for removing the bits it
11317  // sets.
11318  ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
11319  const APInt &Mask = CI->getAPIntValue();
11320  KnownZero &= Mask;
11321  KnownOne &= Mask;
11322  return;
11323  }
11324  if (Op.getOpcode() == ARMISD::CMOV) {
11325  APInt KZ2(KnownZero.getBitWidth(), 0);
11326  APInt KO2(KnownOne.getBitWidth(), 0);
11327  computeKnownBits(DAG, Op.getOperand(1), KnownZero, KnownOne);
11328  computeKnownBits(DAG, Op.getOperand(2), KZ2, KO2);
11329 
11330  KnownZero &= KZ2;
11331  KnownOne &= KO2;
11332  return;
11333  }
11334  return DAG.computeKnownBits(Op, KnownZero, KnownOne);
11335 }
11336 
11338  // If we have a CMOV, OR and AND combination such as:
11339  // if (x & CN)
11340  // y |= CM;
11341  //
11342  // And:
11343  // * CN is a single bit;
11344  // * All bits covered by CM are known zero in y
11345  //
11346  // Then we can convert this into a sequence of BFI instructions. This will
11347  // always be a win if CM is a single bit, will always be no worse than the
11348  // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is
11349  // three bits (due to the extra IT instruction).
11350 
11351  SDValue Op0 = CMOV->getOperand(0);
11352  SDValue Op1 = CMOV->getOperand(1);
11353  auto CCNode = cast<ConstantSDNode>(CMOV->getOperand(2));
11354  auto CC = CCNode->getAPIntValue().getLimitedValue();
11355  SDValue CmpZ = CMOV->getOperand(4);
11356 
11357  // The compare must be against zero.
11358  if (!isNullConstant(CmpZ->getOperand(1)))
11359  return SDValue();
11360 
11361  assert(CmpZ->getOpcode() == ARMISD::CMPZ);
11362  SDValue And = CmpZ->getOperand(0);
11363  if (And->getOpcode() != ISD::AND)
11364  return SDValue();
11365  ConstantSDNode *AndC = dyn_cast<ConstantSDNode>(And->getOperand(1));
11366  if (!AndC || !AndC->getAPIntValue().isPowerOf2())
11367  return SDValue();
11368  SDValue X = And->getOperand(0);
11369 
11370  if (CC == ARMCC::EQ) {
11371  // We're performing an "equal to zero" compare. Swap the operands so we
11372  // canonicalize on a "not equal to zero" compare.
11373  std::swap(Op0, Op1);
11374  } else {
11375  assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?");
11376  }
11377 
11378  if (Op1->getOpcode() != ISD::OR)
11379  return SDValue();
11380 
11382  if (!OrC)
11383  return SDValue();
11384  SDValue Y = Op1->getOperand(0);
11385 
11386  if (Op0 != Y)
11387  return SDValue();
11388 
11389  // Now, is it profitable to continue?
11390  APInt OrCI = OrC->getAPIntValue();
11391  unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
11392  if (OrCI.countPopulation() > Heuristic)
11393  return SDValue();
11394 
11395  // Lastly, can we determine that the bits defined by OrCI
11396  // are zero in Y?
11397  APInt KnownZero, KnownOne;
11398  computeKnownBits(DAG, Y, KnownZero, KnownOne);
11399  if ((OrCI & KnownZero) != OrCI)
11400  return SDValue();
11401 
11402  // OK, we can do the combine.
11403  SDValue V = Y;
11404  SDLoc dl(X);
11405  EVT VT = X.getValueType();
11406  unsigned BitInX = AndC->getAPIntValue().logBase2();
11407 
11408  if (BitInX != 0) {
11409  // We must shift X first.
11410  X = DAG.getNode(ISD::SRL, dl, VT, X,
11411  DAG.getConstant(BitInX, dl, VT));
11412  }
11413 
11414  for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();
11415  BitInY < NumActiveBits; ++BitInY) {
11416  if (OrCI[BitInY] == 0)
11417  continue;
11418  APInt Mask(VT.getSizeInBits(), 0);
11419  Mask.setBit(BitInY);
11420  V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,
11421  // Confusingly, the operand is an *inverted* mask.
11422  DAG.getConstant(~Mask, dl, VT));
11423  }
11424 
11425  return V;
11426 }
11427 
11428 /// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
11429 SDValue
11431  SDValue Cmp = N->getOperand(4);
11432  if (Cmp.getOpcode() != ARMISD::CMPZ)
11433  // Only looking at NE cases.
11434  return SDValue();
11435 
11436  EVT VT = N->getValueType(0);
11437  SDLoc dl(N);
11438  SDValue LHS = Cmp.getOperand(0);
11439  SDValue RHS = Cmp.getOperand(1);
11440  SDValue Chain = N->getOperand(0);
11441  SDValue BB = N->getOperand(1);
11442  SDValue ARMcc = N->getOperand(2);
11443  ARMCC::CondCodes CC =
11444  (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
11445 
11446  // (brcond Chain BB ne CPSR (cmpz (and (cmov 0 1 CC CPSR Cmp) 1) 0))
11447  // -> (brcond Chain BB CC CPSR Cmp)
11448  if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&
11449  LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
11450  LHS->getOperand(0)->hasOneUse()) {
11451  auto *LHS00C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)->getOperand(0));
11452  auto *LHS01C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)->getOperand(1));
11453  auto *LHS1C = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
11454  auto *RHSC = dyn_cast<ConstantSDNode>(RHS);
11455  if ((LHS00C && LHS00C->getZExtValue() == 0) &&
11456  (LHS01C && LHS01C->getZExtValue() == 1) &&
11457  (LHS1C && LHS1C->getZExtValue() == 1) &&
11458  (RHSC && RHSC->getZExtValue() == 0)) {
11459  return DAG.getNode(
11460  ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2),
11461  LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4));
11462  }
11463  }
11464 
11465  return SDValue();
11466 }
11467 
11468 /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
11469 SDValue
11471  SDValue Cmp = N->getOperand(4);
11472  if (Cmp.getOpcode() != ARMISD::CMPZ)
11473  // Only looking at EQ and NE cases.
11474  return SDValue();
11475 
11476  EVT VT = N->getValueType(0);
11477  SDLoc dl(N);
11478  SDValue LHS = Cmp.getOperand(0);
11479  SDValue RHS = Cmp.getOperand(1);
11480  SDValue FalseVal = N->getOperand(0);
11481  SDValue TrueVal = N->getOperand(1);
11482  SDValue ARMcc = N->getOperand(2);
11483  ARMCC::CondCodes CC =
11484  (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
11485 
11486  // BFI is only available on V6T2+.
11487  if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
11488  SDValue R = PerformCMOVToBFICombine(N, DAG);
11489  if (R)
11490  return R;
11491  }
11492 
11493  // Simplify
11494  // mov r1, r0
11495  // cmp r1, x
11496  // mov r0, y
11497  // moveq r0, x
11498  // to
11499  // cmp r0, x
11500  // movne r0, y
11501  //
11502  // mov r1, r0
11503  // cmp r1, x
11504  // mov r0, x
11505  // movne r0, y
11506  // to
11507  // cmp r0, x
11508  // movne r0, y
11509  /// FIXME: Turn this into a target neutral optimization?
11510  SDValue Res;
11511  if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
11512  Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
11513  N->getOperand(3), Cmp);
11514  } else if (CC == ARMCC::EQ && TrueVal == RHS) {
11515  SDValue ARMcc;
11516  SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
11517  Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
11518  N->getOperand(3), NewCmp);
11519  }
11520 
11521  // (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0))
11522  // -> (cmov F T CC CPSR Cmp)
11523  if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse()) {
11524  auto *LHS0C = dyn_cast<ConstantSDNode>(LHS->getOperand(0));
11525  auto *LHS1C = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
11526  auto *RHSC = dyn_cast<ConstantSDNode>(RHS);
11527  if ((LHS0C && LHS0C->getZExtValue() == 0) &&
11528  (LHS1C && LHS1C->getZExtValue() == 1) &&
11529  (RHSC && RHSC->getZExtValue() == 0)) {
11530  return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
11531  LHS->getOperand(2), LHS->getOperand(3),
11532  LHS->getOperand(4));
11533  }
11534  }
11535 
11536  if (Res.getNode()) {
11537  APInt KnownZero, KnownOne;
11538  DAG.computeKnownBits(SDValue(N,0), KnownZero, KnownOne);
11539  // Capture demanded bits information that would be otherwise lost.
11540  if (KnownZero == 0xfffffffe)
11541  Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
11542  DAG.getValueType(MVT::i1));
11543  else if (KnownZero == 0xffffff00)
11544  Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
11545  DAG.getValueType(MVT::i8));
11546  else if (KnownZero == 0xffff0000)
11547  Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
11548  DAG.getValueType(MVT::i16));
11549  }
11550 
11551  return Res;
11552 }
11553 
11555  DAGCombinerInfo &DCI) const {
11556  switch (N->getOpcode()) {
11557  default: break;
11558  case ISD::ADDC: return PerformADDCCombine(N, DCI, Subtarget);
11559  case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
11560  case ISD::SUB: return PerformSUBCombine(N, DCI);
11561  case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
11562  case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
11563  case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
11564  case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
11565  case ARMISD::BFI: return PerformBFICombine(N, DCI);
11566  case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
11567  case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
11568  case ISD::STORE: return PerformSTORECombine(N, DCI);
11569  case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
11570  case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
11572  case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
11573  case ARMISD::VDUP: return PerformVDUPCombine(N, DCI);
11574  case ISD::FP_TO_SINT:
11575  case ISD::FP_TO_UINT:
11576  return PerformVCVTCombine(N, DCI.DAG, Subtarget);
11577  case ISD::FDIV:
11578  return PerformVDIVCombine(N, DCI.DAG, Subtarget);
11580  case ISD::SHL:
11581  case ISD::SRA:
11582  case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget);
11583  case ISD::SIGN_EXTEND:
11584  case ISD::ZERO_EXTEND:
11585  case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
11586  case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
11587  case ARMISD::BRCOND: return PerformBRCONDCombine(N, DCI.DAG);
11588  case ISD::LOAD: return PerformLOADCombine(N, DCI);
11589  case ARMISD::VLD1DUP:
11590  case ARMISD::VLD2DUP:
11591  case ARMISD::VLD3DUP:
11592  case ARMISD::VLD4DUP:
11593  return PerformVLDCombine(N, DCI);
11594  case ARMISD::BUILD_VECTOR:
11595  return PerformARMBUILD_VECTORCombine(N, DCI);
11596  case ISD::INTRINSIC_VOID:
11598  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
11599  case Intrinsic::arm_neon_vld1:
11600  case Intrinsic::arm_neon_vld2:
11601  case Intrinsic::arm_neon_vld3:
11602  case Intrinsic::arm_neon_vld4:
11603  case Intrinsic::arm_neon_vld2lane:
11604  case Intrinsic::arm_neon_vld3lane:
11605  case Intrinsic::arm_neon_vld4lane:
11606  case Intrinsic::arm_neon_vst1:
11607  case Intrinsic::arm_neon_vst2:
11608  case Intrinsic::arm_neon_vst3:
11609  case Intrinsic::arm_neon_vst4:
11610  case Intrinsic::arm_neon_vst2lane:
11611  case Intrinsic::arm_neon_vst3lane:
11612  case Intrinsic::arm_neon_vst4lane:
11613  return PerformVLDCombine(N, DCI);
11614  default: break;
11615  }
11616  break;
11617  }
11618  return SDValue();
11619 }
11620 
11622  EVT VT) const {
11623  return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
11624 }
11625 
11627  unsigned,
11628  unsigned,
11629  bool *Fast) const {
11630  // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
11631  bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
11632 
11633  switch (VT.getSimpleVT().SimpleTy) {
11634  default:
11635  return false;
11636  case MVT::i8:
11637  case MVT::i16:
11638  case MVT::i32: {
11639  // Unaligned access can use (for example) LRDB, LRDH, LDR
11640  if (AllowsUnaligned) {
11641  if (Fast)
11642  *Fast = Subtarget->hasV7Ops();
11643  return true;
11644  }
11645  return false;
11646  }
11647  case MVT::f64:
11648  case MVT::v2f64: {
11649  // For any little-endian targets with neon, we can support unaligned ld/st
11650  // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
11651  // A big-endian target may also explicitly support unaligned accesses
11652  if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
11653  if (Fast)
11654  *Fast = true;
11655  return true;
11656  }
11657  return false;
11658  }
11659  }
11660 }
11661 
11662 static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
11663  unsigned AlignCheck) {
11664  return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
11665  (DstAlign == 0 || DstAlign % AlignCheck == 0));
11666 }
11667 
11669  unsigned DstAlign, unsigned SrcAlign,
11670  bool IsMemset, bool ZeroMemset,
11671  bool MemcpyStrSrc,
11672  MachineFunction &MF) const {
11673  const Function *F = MF.getFunction();
11674 
11675  // See if we can use NEON instructions for this...
11676  if ((!IsMemset || ZeroMemset) && Subtarget->hasNEON() &&
11677  !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
11678  bool Fast;
11679  if (Size >= 16 &&
11680  (memOpAlign(SrcAlign, DstAlign, 16) ||
11681  (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, &Fast) && Fast))) {
11682  return MVT::v2f64;
11683  } else if (Size >= 8 &&
11684  (memOpAlign(SrcAlign, DstAlign, 8) ||
11685  (allowsMisalignedMemoryAccesses(MVT::f64, 0, 1, &Fast) &&
11686  Fast))) {
11687  return MVT::f64;
11688  }
11689  }
11690 
11691  // Lowering to i32/i16 if the size permits.
11692  if (Size >= 4)
11693  return MVT::i32;
11694  else if (Size >= 2)
11695  return MVT::i16;
11696 
11697  // Let the target-independent logic figure it out.
11698  return MVT::Other;
11699 }
11700 
11702  if (Val.getOpcode() != ISD::LOAD)
11703  return false;
11704 
11705  EVT VT1 = Val.getValueType();
11706  if (!VT1.isSimple() || !VT1.isInteger() ||
11707  !VT2.isSimple() || !VT2.isInteger())
11708  return false;
11709 
11710  switch (VT1.getSimpleVT().SimpleTy) {
11711  default: break;
11712  case MVT::i1:
11713  case MVT::i8:
11714  case MVT::i16:
11715  // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
11716  return true;
11717  }
11718 
11719  return false;
11720 }
11721 
11723  EVT VT = ExtVal.getValueType();
11724 
11725  if (!isTypeLegal(VT))
11726  return false;
11727 
11728  // Don't create a loadext if we can fold the extension into a wide/long
11729  // instruction.
11730  // If there's more than one user instruction, the loadext is desirable no
11731  // matter what. There can be two uses by the same instruction.
11732  if (ExtVal->use_empty() ||
11733  !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode()))
11734  return true;
11735 
11736  SDNode *U = *ExtVal->use_begin();
11737  if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
11738  U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHL))
11739  return false;
11740 
11741  return true;
11742 }
11743 
11745  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
11746  return false;
11747 
11748  if (!isTypeLegal(EVT::getEVT(Ty1)))
11749  return false;
11750 
11751  assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");
11752 
11753  // Assuming the caller doesn't have a zeroext or signext return parameter,
11754  // truncation all the way down to i1 is valid.
11755  return true;
11756 }
11757 
11759  const AddrMode &AM, Type *Ty,
11760  unsigned AS) const {
11761  if (isLegalAddressingMode(DL, AM, Ty, AS)) {
11762  if (Subtarget->hasFPAO())
11763  return AM.Scale < 0 ? 1 : 0; // positive offsets execute faster
11764  return 0;
11765  }
11766  return -1;
11767 }
11768 
11769 
11770 static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
11771  if (V < 0)
11772  return false;
11773 
11774  unsigned Scale = 1;
11775  switch (VT.getSimpleVT().SimpleTy) {
11776  default: return false;
11777  case MVT::i1:
11778  case MVT::i8:
11779  // Scale == 1;
11780  break;
11781  case MVT::i16:
11782  // Scale == 2;
11783  Scale = 2;
11784  break;
11785  case MVT::i32:
11786  // Scale == 4;
11787  Scale = 4;
11788  break;
11789  }
11790 
11791  if ((V & (Scale - 1)) != 0)
11792  return false;
11793  V /= Scale;
11794  return V == (V & ((1LL << 5) - 1));
11795 }
11796 
11797 static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
11798  const ARMSubtarget *Subtarget) {
11799  bool isNeg = false;
11800  if (V < 0) {
11801  isNeg = true;
11802  V = - V;
11803  }
11804 
11805  switch (VT.getSimpleVT().SimpleTy) {
11806  default: return false;
11807  case MVT::i1:
11808  case MVT::i8:
11809  case MVT::i16:
11810  case MVT::i32:
11811  // + imm12 or - imm8
11812  if (isNeg)
11813  return V == (V & ((1LL << 8) - 1));
11814  return V == (V & ((1LL << 12) - 1));
11815  case MVT::f32:
11816  case MVT::f64:
11817  // Same as ARM mode. FIXME: NEON?
11818  if (!Subtarget->hasVFP2())
11819  return false;
11820  if ((V & 3) != 0)
11821  return false;
11822  V >>= 2;
11823  return V == (V & ((1LL << 8) - 1));
11824  }
11825 }
11826 
11827 /// isLegalAddressImmediate - Return true if the integer value can be used
11828 /// as the offset of the target addressing mode for load / store of the
11829 /// given type.
11830 static bool isLegalAddressImmediate(int64_t V, EVT VT,
11831  const ARMSubtarget *Subtarget) {
11832  if (V == 0)
11833  return true;
11834 
11835  if (!VT.isSimple())
11836  return false;
11837 
11838  if (Subtarget->isThumb1Only())
11839  return isLegalT1AddressImmediate(V, VT);
11840  else if (Subtarget->isThumb2())
11841  return isLegalT2AddressImmediate(V, VT, Subtarget);
11842 
11843  // ARM mode.
11844  if (V < 0)
11845  V = - V;
11846  switch (VT.getSimpleVT().SimpleTy) {
11847  default: return false;
11848  case MVT::i1:
11849  case MVT::i8:
11850  case MVT::i32:
11851  // +- imm12
11852  return V == (V & ((1LL << 12) - 1));
11853  case MVT::i16:
11854  // +- imm8
11855  return V == (V & ((1LL << 8) - 1));
11856  case MVT::f32:
11857  case MVT::f64:
11858  if (!Subtarget->hasVFP2()) // FIXME: NEON?
11859  return false;
11860  if ((V & 3) != 0)
11861  return false;
11862  V >>= 2;
11863  return V == (V & ((1LL << 8) - 1));
11864  }
11865 }
11866 
11868  EVT VT) const {
11869  int Scale = AM.Scale;
11870  if (Scale < 0)
11871  return false;
11872 
11873  switch (VT.getSimpleVT().SimpleTy) {
11874  default: return false;
11875  case MVT::i1:
11876  case MVT::i8:
11877  case MVT::i16:
11878  case MVT::i32:
11879  if (Scale == 1)
11880  return true;
11881  // r + r << imm
11882  Scale = Scale & ~1;
11883  return Scale == 2 || Scale == 4 || Scale == 8;
11884  case MVT::i64:
11885  // r + r
11886  if (((unsigned)AM.HasBaseReg + Scale) <= 2)
11887  return true;
11888  return false;
11889  case MVT::isVoid:
11890  // Note, we allow "void" uses (basically, uses that aren't loads or
11891  // stores), because arm allows folding a scale into many arithmetic
11892  // operations. This should be made more precise and revisited later.
11893 
11894  // Allow r << imm, but the imm has to be a multiple of two.
11895  if (Scale & 1) return false;
11896  return isPowerOf2_32(Scale);
11897  }
11898 }
11899 
11900 /// isLegalAddressingMode - Return true if the addressing mode represented
11901 /// by AM is legal for this target, for a load/store of the specified type.
11903  const AddrMode &AM, Type *Ty,
11904  unsigned AS) const {
11905  EVT VT = getValueType(DL, Ty, true);
11906  if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
11907  return false;
11908 
11909  // Can never fold addr of global into load/store.
11910  if (AM.BaseGV)
11911  return false;
11912 
11913  switch (AM.Scale) {
11914  case 0: // no scale reg, must be "r+i" or "r", or "i".
11915  break;
11916  case 1:
11917  if (Subtarget->isThumb1Only())
11918  return false;
11920  default:
11921  // ARM doesn't support any R+R*scale+imm addr modes.
11922  if (AM.BaseOffs)
11923  return false;
11924 
11925  if (!VT.isSimple())
11926  return false;
11927 
11928  if (Subtarget->isThumb2())
11929  return isLegalT2ScaledAddressingMode(AM, VT);
11930 
11931  int Scale = AM.Scale;
11932  switch (VT.getSimpleVT().SimpleTy) {
11933  default: return false;
11934  case MVT::i1:
11935  case MVT::i8:
11936  case MVT::i32:
11937  if (Scale < 0) Scale = -Scale;
11938  if (Scale == 1)
11939  return true;
11940  // r + r << imm
11941  return isPowerOf2_32(Scale & ~1);
11942  case MVT::i16:
11943  case MVT::i64:
11944  // r + r
11945  if (((unsigned)AM.HasBaseReg + Scale) <= 2)
11946  return true;
11947  return false;
11948 
11949  case MVT::isVoid:
11950  // Note, we allow "void" uses (basically, uses that aren't loads or
11951  // stores), because arm allows folding a scale into many arithmetic
11952  // operations. This should be made more precise and revisited later.
11953 
11954  // Allow r << imm, but the imm has to be a multiple of two.
11955  if (Scale & 1) return false;
11956  return isPowerOf2_32(Scale);
11957  }
11958  }
11959  return true;
11960 }
11961 
11962 /// isLegalICmpImmediate - Return true if the specified immediate is legal
11963 /// icmp immediate, that is the target has icmp instructions which can compare
11964 /// a register against the immediate without having to materialize the
11965 /// immediate into a register.
11967  // Thumb2 and ARM modes can use cmn for negative immediates.
11968  if (!Subtarget->isThumb())
11969  return ARM_AM::getSOImmVal(std::abs(Imm)) != -1;
11970  if (Subtarget->isThumb2())
11971  return ARM_AM::getT2SOImmVal(std::abs(Imm)) != -1;
11972  // Thumb1 doesn't have cmn, and only 8-bit immediates.
11973  return Imm >= 0 && Imm <= 255;
11974 }
11975 
11976 /// isLegalAddImmediate - Return true if the specified immediate is a legal add
11977 /// *or sub* immediate, that is the target has add or sub instructions which can
11978 /// add a register with the immediate without having to materialize the
11979 /// immediate into a register.
11981  // Same encoding for add/sub, just flip the sign.
11982  int64_t AbsImm = std::abs(Imm);
11983  if (!Subtarget->isThumb())
11984  return ARM_AM::getSOImmVal(AbsImm) != -1;
11985  if (Subtarget->isThumb2())
11986  return ARM_AM::getT2SOImmVal(AbsImm) != -1;
11987  // Thumb1 only has 8-bit unsigned immediate.
11988  return AbsImm >= 0 && AbsImm <= 255;
11989 }
11990 
11992  bool isSEXTLoad, SDValue &Base,
11993  SDValue &Offset, bool &isInc,
11994  SelectionDAG &DAG) {
11995  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
11996  return false;
11997 
11998  if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
11999  // AddressingMode 3
12000  Base = Ptr->getOperand(0);
12001  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
12002  int RHSC = (int)RHS->getZExtValue();
12003  if (RHSC < 0 && RHSC > -256) {
12004  assert(Ptr->getOpcode() == ISD::ADD);
12005  isInc = false;
12006  Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
12007  return true;
12008  }
12009  }
12010  isInc = (Ptr->getOpcode() == ISD::ADD);
12011  Offset = Ptr->getOperand(1);
12012  return true;
12013  } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
12014  // AddressingMode 2
12015  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
12016  int RHSC = (int)RHS->getZExtValue();
12017  if (RHSC < 0 && RHSC > -0x1000) {
12018  assert(Ptr->getOpcode() == ISD::ADD);
12019  isInc = false;
12020  Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
12021  Base = Ptr->getOperand(0);
12022  return true;
12023  }
12024  }
12025 
12026  if (Ptr->getOpcode() == ISD::ADD) {
12027  isInc = true;
12028  ARM_AM::ShiftOpc ShOpcVal=
12030  if (ShOpcVal != ARM_AM::no_shift) {
12031  Base = Ptr->getOperand(1);
12032  Offset = Ptr->getOperand(0);
12033  } else {
12034  Base = Ptr->getOperand(0);
12035  Offset = Ptr->getOperand(1);
12036  }
12037  return true;
12038  }
12039 
12040  isInc = (Ptr->getOpcode() == ISD::ADD);
12041  Base = Ptr->getOperand(0);
12042  Offset = Ptr->getOperand(1);
12043  return true;
12044  }
12045 
12046  // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
12047  return false;
12048 }
12049 
12051  bool isSEXTLoad, SDValue &Base,
12052  SDValue &Offset, bool &isInc,
12053  SelectionDAG &DAG) {
12054  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
12055  return false;
12056 
12057  Base = Ptr->getOperand(0);
12058  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
12059  int RHSC = (int)RHS->getZExtValue();
12060  if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
12061  assert(Ptr->getOpcode() == ISD::ADD);
12062  isInc = false;
12063  Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
12064  return true;
12065  } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
12066  isInc = Ptr->getOpcode() == ISD::ADD;
12067  Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
12068  return true;
12069  }
12070  }
12071 
12072  return false;
12073 }
12074 
12075 /// getPreIndexedAddressParts - returns true by value, base pointer and
12076 /// offset pointer and addressing mode by reference if the node's address
12077 /// can be legally represented as pre-indexed load / store address.
12078 bool
12080  SDValue &Offset,
12081  ISD::MemIndexedMode &AM,
12082  SelectionDAG &DAG) const {
12083  if (Subtarget->isThumb1Only())
12084  return false;
12085 
12086  EVT VT;
12087  SDValue Ptr;
12088  bool isSEXTLoad = false;
12089  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
12090  Ptr = LD->getBasePtr();
12091  VT = LD->getMemoryVT();
12092  isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
12093  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
12094  Ptr = ST->getBasePtr();
12095  VT = ST->getMemoryVT();
12096  } else
12097  return false;
12098 
12099  bool isInc;
12100  bool isLegal = false;
12101  if (Subtarget->isThumb2())
12102  isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
12103  Offset, isInc, DAG);
12104  else
12105  isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
12106  Offset, isInc, DAG);
12107  if (!isLegal)
12108  return false;
12109 
12110  AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
12111  return true;
12112 }
12113 
12114 /// getPostIndexedAddressParts - returns true by value, base pointer and
12115 /// offset pointer and addressing mode by reference if this node can be
12116 /// combined with a load / store to form a post-indexed load / store.
12118  SDValue &Base,
12119  SDValue &Offset,
12120  ISD::MemIndexedMode &AM,
12121  SelectionDAG &DAG) const {
12122  EVT VT;
12123  SDValue Ptr;
12124  bool isSEXTLoad = false, isNonExt;
12125  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
12126  VT = LD->getMemoryVT();
12127  Ptr = LD->getBasePtr();
12128  isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
12129  isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
12130  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
12131  VT = ST->getMemoryVT();
12132  Ptr = ST->getBasePtr();
12133  isNonExt = !ST->isTruncatingStore();
12134  } else
12135  return false;
12136 
12137  if (Subtarget->isThumb1Only()) {
12138  // Thumb-1 can do a limited post-inc load or store as an updating LDM. It
12139  // must be non-extending/truncating, i32, with an offset of 4.
12140  assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!");
12141  if (Op->getOpcode() != ISD::ADD || !isNonExt)
12142  return false;
12143  auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));
12144  if (!RHS || RHS->getZExtValue() != 4)
12145  return false;
12146 
12147  Offset = Op->getOperand(1);
12148  Base = Op->getOperand(0);
12149  AM = ISD::POST_INC;
12150  return true;
12151  }
12152 
12153  bool isInc;
12154  bool isLegal = false;
12155  if (Subtarget->isThumb2())
12156  isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
12157  isInc, DAG);
12158  else
12159  isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
12160  isInc, DAG);
12161  if (!isLegal)
12162  return false;
12163 
12164  if (Ptr != Base) {
12165  // Swap base ptr and offset to catch more post-index load / store when
12166  // it's legal. In Thumb2 mode, offset must be an immediate.
12167  if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
12168  !Subtarget->isThumb2())
12169  std::swap(Base, Offset);
12170 
12171  // Post-indexed load / store update the base pointer.
12172  if (Ptr != Base)
12173  return false;
12174  }
12175 
12176  AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
12177  return true;
12178 }
12179 
12181  APInt &KnownZero,
12182  APInt &KnownOne,
12183  const SelectionDAG &DAG,
12184  unsigned Depth) const {
12185  unsigned BitWidth = KnownOne.getBitWidth();
12186  KnownZero = KnownOne = APInt(BitWidth, 0);
12187  switch (Op.getOpcode()) {
12188  default: break;
12189  case ARMISD::ADDC:
12190  case ARMISD::ADDE:
12191  case ARMISD::SUBC:
12192  case ARMISD::SUBE:
12193  // These nodes' second result is a boolean
12194  if (Op.getResNo() == 0)
12195  break;
12196  KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
12197  break;
12198  case ARMISD::CMOV: {
12199  // Bits are known zero/one if known on the LHS and RHS.
12200  DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
12201  if (KnownZero == 0 && KnownOne == 0) return;
12202 
12203  APInt KnownZeroRHS, KnownOneRHS;
12204  DAG.computeKnownBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1);
12205  KnownZero &= KnownZeroRHS;
12206  KnownOne &= KnownOneRHS;
12207  return;
12208  }
12209  case ISD::INTRINSIC_W_CHAIN: {
12210  ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
12211  Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
12212  switch (IntID) {
12213  default: return;
12214  case Intrinsic::arm_ldaex:
12215  case Intrinsic::arm_ldrex: {
12216  EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
12217  unsigned MemBits = VT.getScalarSizeInBits();
12218  KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
12219  return;
12220  }
12221  }
12222  }
12223  }
12224 }
12225 
12226 //===----------------------------------------------------------------------===//
12227 // ARM Inline Assembly Support
12228 //===----------------------------------------------------------------------===//
12229 
12231  // Looking for "rev" which is V6+.
12232  if (!Subtarget->hasV6Ops())
12233  return false;
12234 
12235  InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
12236  std::string AsmStr = IA->getAsmString();
12237  SmallVector<StringRef, 4> AsmPieces;
12238  SplitString(AsmStr, AsmPieces, ";\n");
12239 
12240  switch (AsmPieces.size()) {
12241  default: return false;
12242  case 1:
12243  AsmStr = AsmPieces[0];
12244  AsmPieces.clear();
12245  SplitString(AsmStr, AsmPieces, " \t,");
12246 
12247  // rev $0, $1
12248  if (AsmPieces.size() == 3 &&
12249  AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
12250  IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
12251  IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
12252  if (Ty && Ty->getBitWidth() == 32)
12254  }
12255  break;
12256  }
12257 
12258  return false;
12259 }
12260 
12261 const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
12262  // At this point, we have to lower this constraint to something else, so we
12263  // lower it to an "r" or "w". However, by doing this we will force the result
12264  // to be in register, while the X constraint is much more permissive.
12265  //
12266  // Although we are correct (we are free to emit anything, without
12267  // constraints), we might break use cases that would expect us to be more
12268  // efficient and emit something else.
12269  if (!Subtarget->hasVFP2())
12270  return "r";
12271  if (ConstraintVT.isFloatingPoint())
12272  return "w";
12273  if (ConstraintVT.isVector() && Subtarget->hasNEON() &&
12274  (ConstraintVT.getSizeInBits() == 64 ||
12275  ConstraintVT.getSizeInBits() == 128))
12276  return "w";
12277 
12278  return "r";
12279 }
12280 
12281 /// getConstraintType - Given a constraint letter, return the type of
12282 /// constraint it is for this target.
12285  if (Constraint.size() == 1) {
12286  switch (Constraint[0]) {
12287  default: break;
12288  case 'l': return C_RegisterClass;
12289  case 'w': return C_RegisterClass;
12290  case 'h': return C_RegisterClass;
12291  case 'x': return C_RegisterClass;
12292  case 't': return C_RegisterClass;
12293  case 'j': return C_Other; // Constant for movw.
12294  // An address with a single base register. Due to the way we
12295  // currently handle addresses it is the same as an 'r' memory constraint.
12296  case 'Q': return C_Memory;
12297  }
12298  } else if (Constraint.size() == 2) {
12299  switch (Constraint[0]) {
12300  default: break;
12301  // All 'U+' constraints are addresses.
12302  case 'U': return C_Memory;
12303  }
12304  }
12305  return TargetLowering::getConstraintType(Constraint);
12306 }
12307 
12308 /// Examine constraint type and operand type and determine a weight value.
12309 /// This object must already have been set up with the operand type
12310 /// and the current alternative constraint selected.
12313  AsmOperandInfo &info, const char *constraint) const {
12314  ConstraintWeight weight = CW_Invalid;
12315  Value *CallOperandVal = info.CallOperandVal;
12316  // If we don't have a value, we can't do a match,
12317  // but allow it at the lowest weight.
12318  if (!CallOperandVal)
12319  return CW_Default;
12320  Type *type = CallOperandVal->getType();
12321  // Look at the constraint type.
12322  switch (*constraint) {
12323  default:
12324  weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
12325  break;
12326  case 'l':
12327  if (type->isIntegerTy()) {
12328  if (Subtarget->isThumb())
12329  weight = CW_SpecificReg;
12330  else
12331  weight = CW_Register;
12332  }
12333  break;
12334  case 'w':
12335  if (type->isFloatingPointTy())
12336  weight = CW_Register;
12337  break;
12338  }
12339  return weight;
12340 }
12341 
12342 typedef std::pair<unsigned, const TargetRegisterClass*> RCPair;
12344  const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
12345  if (Constraint.size() == 1) {
12346  // GCC ARM Constraint Letters
12347  switch (Constraint[0]) {
12348  case 'l': // Low regs or general regs.
12349  if (Subtarget->isThumb())
12350  return RCPair(0U, &ARM::tGPRRegClass);
12351  return RCPair(0U, &ARM::GPRRegClass);
12352  case 'h': // High regs or no regs.
12353  if (Subtarget->isThumb())
12354  return RCPair(0U, &ARM::hGPRRegClass);
12355  break;
12356  case 'r':
12357  if (Subtarget->isThumb1Only())
12358  return RCPair(0U, &ARM::tGPRRegClass);
12359  return RCPair(0U, &ARM::GPRRegClass);
12360  case 'w':
12361  if (VT == MVT::Other)
12362  break;
12363  if (VT == MVT::f32)
12364  return RCPair(0U, &ARM::SPRRegClass);
12365  if (VT.getSizeInBits() == 64)
12366  return RCPair(0U, &ARM::DPRRegClass);
12367  if (VT.getSizeInBits() == 128)
12368  return RCPair(0U, &ARM::QPRRegClass);
12369  break;
12370  case 'x':
12371  if (VT == MVT::Other)
12372  break;
12373  if (VT == MVT::f32)
12374  return RCPair(0U, &ARM::SPR_8RegClass);
12375  if (VT.getSizeInBits() == 64)
12376  return RCPair(0U, &ARM::DPR_8RegClass);
12377  if (VT.getSizeInBits() == 128)
12378  return RCPair(0U, &ARM::QPR_8RegClass);
12379  break;
12380  case 't':
12381  if (VT == MVT::f32)
12382  return RCPair(0U, &ARM::SPRRegClass);
12383  break;
12384  }
12385  }
12386  if (StringRef("{cc}").equals_lower(Constraint))
12387  return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
12388 
12389  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
12390 }
12391 
12392 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
12393 /// vector. If it is invalid, don't add anything to Ops.
12395  std::string &Constraint,
12396  std::vector<SDValue>&Ops,
12397  SelectionDAG &DAG) const {
12398  SDValue Result;
12399 
12400  // Currently only support length 1 constraints.
12401  if (Constraint.length() != 1) return;
12402 
12403  char ConstraintLetter = Constraint[0];
12404  switch (ConstraintLetter) {
12405  default: break;
12406  case 'j':
12407  case 'I': case 'J': case 'K': case 'L':
12408  case 'M': case 'N': case 'O':
12410  if (!C)
12411  return;
12412 
12413  int64_t CVal64 = C->getSExtValue();
12414  int CVal = (int) CVal64;
12415  // None of these constraints allow values larger than 32 bits. Check
12416  // that the value fits in an int.
12417  if (CVal != CVal64)
12418  return;
12419 
12420  switch (ConstraintLetter) {
12421  case 'j':
12422  // Constant suitable for movw, must be between 0 and
12423  // 65535.
12424  if (Subtarget->hasV6T2Ops())
12425  if (CVal >= 0 && CVal <= 65535)
12426  break;
12427  return;
12428  case 'I':
12429  if (Subtarget->isThumb1Only()) {
12430  // This must be a constant between 0 and 255, for ADD
12431  // immediates.
12432  if (CVal >= 0 && CVal <= 255)
12433  break;
12434  } else if (Subtarget->isThumb2()) {
12435  // A constant that can be used as an immediate value in a
12436  // data-processing instruction.
12437  if (ARM_AM::getT2SOImmVal(CVal) != -1)
12438  break;
12439  } else {
12440  // A constant that can be used as an immediate value in a
12441  // data-processing instruction.
12442  if (ARM_AM::getSOImmVal(CVal) != -1)
12443  break;
12444  }
12445  return;
12446 
12447  case 'J':
12448  if (Subtarget->isThumb1Only()) {
12449  // This must be a constant between -255 and -1, for negated ADD
12450  // immediates. This can be used in GCC with an "n" modifier that
12451  // prints the negated value, for use with SUB instructions. It is
12452  // not useful otherwise but is implemented for compatibility.
12453  if (CVal >= -255 && CVal <= -1)
12454  break;
12455  } else {
12456  // This must be a constant between -4095 and 4095. It is not clear
12457  // what this constraint is intended for. Implemented for
12458  // compatibility with GCC.
12459  if (CVal >= -4095 && CVal <= 4095)
12460  break;
12461  }
12462  return;
12463 
12464  case 'K':
12465  if (Subtarget->isThumb1Only()) {
12466  // A 32-bit value where only one byte has a nonzero value. Exclude
12467  // zero to match GCC. This constraint is used by GCC internally for
12468  // constants that can be loaded with a move/shift combination.
12469  // It is not useful otherwise but is implemented for compatibility.
12470  if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
12471  break;
12472  } else if (Subtarget->isThumb2()) {
12473  // A constant whose bitwise inverse can be used as an immediate
12474  // value in a data-processing instruction. This can be used in GCC
12475  // with a "B" modifier that prints the inverted value, for use with
12476  // BIC and MVN instructions. It is not useful otherwise but is
12477  // implemented for compatibility.
12478  if (ARM_AM::getT2SOImmVal(~CVal) != -1)
12479  break;
12480  } else {
12481  // A constant whose bitwise inverse can be used as an immediate
12482  // value in a data-processing instruction. This can be used in GCC
12483  // with a "B" modifier that prints the inverted value, for use with
12484  // BIC and MVN instructions. It is not useful otherwise but is
12485  // implemented for compatibility.
12486  if (ARM_AM::getSOImmVal(~CVal) != -1)
12487  break;
12488  }
12489  return;
12490 
12491  case 'L':
12492  if (Subtarget->isThumb1Only()) {
12493  // This must be a constant between -7 and 7,
12494  // for 3-operand ADD/SUB immediate instructions.
12495  if (CVal >= -7 && CVal < 7)
12496  break;
12497  } else if (Subtarget->isThumb2()) {
12498  // A constant whose negation can be used as an immediate value in a
12499  // data-processing instruction. This can be used in GCC with an "n"
12500  // modifier that prints the negated value, for use with SUB
12501  // instructions. It is not useful otherwise but is implemented for
12502  // compatibility.
12503  if (ARM_AM::getT2SOImmVal(-CVal) != -1)
12504  break;
12505  } else {
12506  // A constant whose negation can be used as an immediate value in a
12507  // data-processing instruction. This can be used in GCC with an "n"
12508  // modifier that prints the negated value, for use with SUB
12509  // instructions. It is not useful otherwise but is implemented for
12510  // compatibility.
12511  if (ARM_AM::getSOImmVal(-CVal) != -1)
12512  break;
12513  }
12514  return;
12515 
12516  case 'M':
12517  if (Subtarget->isThumb1Only()) {
12518  // This must be a multiple of 4 between 0 and 1020, for
12519  // ADD sp + immediate.
12520  if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
12521  break;
12522  } else {
12523  // A power of two or a constant between 0 and 32. This is used in
12524  // GCC for the shift amount on shifted register operands, but it is
12525  // useful in general for any shift amounts.
12526  if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
12527  break;
12528  }
12529  return;
12530 
12531  case 'N':
12532  if (Subtarget->isThumb()) { // FIXME thumb2
12533  // This must be a constant between 0 and 31, for shift amounts.
12534  if (CVal >= 0 && CVal <= 31)
12535  break;
12536  }
12537  return;
12538 
12539  case 'O':
12540  if (Subtarget->isThumb()) { // FIXME thumb2
12541  // This must be a multiple of 4 between -508 and 508, for
12542  // ADD/SUB sp = sp + immediate.
12543  if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
12544  break;
12545  }
12546  return;
12547  }
12548  Result = DAG.getTargetConstant(CVal, SDLoc(Op), Op.getValueType());
12549  break;
12550  }
12551 
12552  if (Result.getNode()) {
12553  Ops.push_back(Result);
12554  return;
12555  }
12556  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
12557 }
12558 
12560  const SDNode *N, MVT::SimpleValueType SVT) {
12561  assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
12562  N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
12563  "Unhandled Opcode in getDivRemLibcall");
12564  bool isSigned = N->getOpcode() == ISD::SDIVREM ||
12565  N->getOpcode() == ISD::SREM;
12566  RTLIB::Libcall LC;
12567  switch (SVT) {
12568  default: llvm_unreachable("Unexpected request for libcall!");
12569  case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
12570  case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
12571  case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
12572  case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
12573  }
12574  return LC;
12575 }
12576 
12578  const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget) {
12579  assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
12580  N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
12581  "Unhandled Opcode in getDivRemArgList");
12582  bool isSigned = N->getOpcode() == ISD::SDIVREM ||
12583  N->getOpcode() == ISD::SREM;
12586  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
12587  EVT ArgVT = N->getOperand(i).getValueType();
12588  Type *ArgTy = ArgVT.getTypeForEVT(*Context);
12589  Entry.Node = N->getOperand(i);
12590  Entry.Ty = ArgTy;
12591  Entry.isSExt = isSigned;
12592  Entry.isZExt = !isSigned;
12593  Args.push_back(Entry);
12594  }
12595  if (Subtarget->isTargetWindows() && Args.size() >= 2)
12596  std::swap(Args[0], Args[1]);
12597  return Args;
12598 }
12599 
12600 SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
12601  assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
12602  Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
12603  Subtarget->isTargetWindows()) &&
12604  "Register-based DivRem lowering only");
12605  unsigned Opcode = Op->getOpcode();
12606  assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
12607  "Invalid opcode for Div/Rem lowering");
12608  bool isSigned = (Opcode == ISD::SDIVREM);
12609  EVT VT = Op->getValueType(0);
12610  Type *Ty = VT.getTypeForEVT(*DAG.getContext());
12611  SDLoc dl(Op);
12612 
12613  // If the target has hardware divide, use divide + multiply + subtract:
12614  // div = a / b
12615  // rem = a - b * div
12616  // return {div, rem}
12617  // This should be lowered into UDIV/SDIV + MLS later on.
12618  if (Subtarget->hasDivide() && Op->getValueType(0).isSimple() &&
12619  Op->getSimpleValueType(0) == MVT::i32) {
12620  unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
12621  const SDValue Dividend = Op->getOperand(0);
12622  const SDValue Divisor = Op->getOperand(1);
12623  SDValue Div = DAG.getNode(DivOpcode, dl, VT, Dividend, Divisor);
12624  SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Div, Divisor);
12625  SDValue Rem = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
12626 
12627  SDValue Values[2] = {Div, Rem};
12628  return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VT, VT), Values);
12629  }
12630 
12632  VT.getSimpleVT().SimpleTy);
12633  SDValue InChain = DAG.getEntryNode();
12634 
12636  DAG.getContext(),
12637  Subtarget);
12638 
12639  SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
12640  getPointerTy(DAG.getDataLayout()));
12641 
12642  Type *RetTy = (Type*)StructType::get(Ty, Ty, nullptr);
12643 
12644  if (Subtarget->isTargetWindows())
12645  InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain);
12646 
12648  CLI.setDebugLoc(dl).setChain(InChain)
12649  .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
12650  .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);
12651 
12652  std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
12653  return CallInfo.first;
12654 }
12655 
12656 // Lowers REM using divmod helpers
12657 // see RTABI section 4.2/4.3
12658 SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
12659  // Build return types (div and rem)
12660  std::vector<Type*> RetTyParams;
12661  Type *RetTyElement;
12662 
12663  switch (N->getValueType(0).getSimpleVT().SimpleTy) {
12664  default: llvm_unreachable("Unexpected request for libcall!");
12665  case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break;
12666  case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break;
12667  case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break;
12668  case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break;
12669  }
12670 
12671  RetTyParams.push_back(RetTyElement);
12672  RetTyParams.push_back(RetTyElement);
12673  ArrayRef<Type*> ret = ArrayRef<Type*>(RetTyParams);
12674  Type *RetTy = StructType::get(*DAG.getContext(), ret);
12675 
12677  SimpleTy);
12678  SDValue InChain = DAG.getEntryNode();
12680  Subtarget);
12681  bool isSigned = N->getOpcode() == ISD::SREM;
12682  SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
12683  getPointerTy(DAG.getDataLayout()));
12684 
12685  if (Subtarget->isTargetWindows())
12686  InChain = WinDBZCheckDenominator(DAG, N, InChain);
12687 
12688  // Lower call
12689  CallLoweringInfo CLI(DAG);
12690  CLI.setChain(InChain)
12691  .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args))
12692  .setSExtResult(isSigned).setZExtResult(!isSigned).setDebugLoc(SDLoc(N));
12693  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
12694 
12695  // Return second (rem) result operand (first contains div)
12696  SDNode *ResNode = CallResult.first.getNode();
12697  assert(ResNode->getNumOperands() == 2 && "divmod should return two operands");
12698  return ResNode->getOperand(1);
12699 }
12700 
12701 SDValue
12702 ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
12703  assert(Subtarget->isTargetWindows() && "unsupported target platform");
12704  SDLoc DL(Op);
12705 
12706  // Get the inputs.
12707  SDValue Chain = Op.getOperand(0);
12708  SDValue Size = Op.getOperand(1);
12709 
12710  SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
12711  DAG.getConstant(2, DL, MVT::i32));
12712 
12713  SDValue Flag;
12714  Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag);
12715  Flag = Chain.getValue(1);
12716 
12717  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
12718  Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag);
12719 
12720  SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
12721  Chain = NewSP.getValue(1);
12722 
12723  SDValue Ops[2] = { NewSP, Chain };
12724  return DAG.getMergeValues(Ops, DL);
12725 }
12726 
12727 SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
12728  assert(Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() &&
12729  "Unexpected type for custom-lowering FP_EXTEND");
12730 
12731  RTLIB::Libcall LC;
12733 
12734  SDValue SrcVal = Op.getOperand(0);
12735  return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
12736  SDLoc(Op)).first;
12737 }
12738 
12739 SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
12740  assert(Op.getOperand(0).getValueType() == MVT::f64 &&
12741  Subtarget->isFPOnlySP() &&
12742  "Unexpected type for custom-lowering FP_ROUND");
12743 
12744  RTLIB::Libcall LC;
12746 
12747  SDValue SrcVal = Op.getOperand(0);
12748  return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
12749  SDLoc(Op)).first;
12750 }
12751 
12752 bool
12754  // The ARM target isn't yet aware of offsets.
12755  return false;
12756 }
12757 
12759  if (v == 0xffffffff)
12760  return false;
12761 
12762  // there can be 1's on either or both "outsides", all the "inside"
12763  // bits must be 0's
12764  return isShiftedMask_32(~v);
12765 }
12766 
12767 /// isFPImmLegal - Returns true if the target can instruction select the
12768 /// specified FP immediate natively. If false, the legalizer will
12769 /// materialize the FP immediate as a load from a constant pool.
12770 bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
12771  if (!Subtarget->hasVFP3())
12772  return false;
12773  if (VT == MVT::f32)
12774  return ARM_AM::getFP32Imm(Imm) != -1;
12775  if (VT == MVT::f64 && !Subtarget->isFPOnlySP())
12776  return ARM_AM::getFP64Imm(Imm) != -1;
12777  return false;
12778 }
12779 
12780 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
12781 /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
12782 /// specified in the intrinsic calls.
12784  const CallInst &I,
12785  unsigned Intrinsic) const {
12786  switch (Intrinsic) {
12787  case Intrinsic::arm_neon_vld1:
12788  case Intrinsic::arm_neon_vld2:
12789  case Intrinsic::arm_neon_vld3:
12790  case Intrinsic::arm_neon_vld4:
12791  case Intrinsic::arm_neon_vld2lane:
12792  case Intrinsic::arm_neon_vld3lane:
12793  case Intrinsic::arm_neon_vld4lane: {
12794  Info.opc = ISD::INTRINSIC_W_CHAIN;
12795  // Conservatively set memVT to the entire set of vectors loaded.
12796  auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
12797  uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
12798  Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
12799  Info.ptrVal = I.getArgOperand(0);
12800  Info.offset = 0;
12801  Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
12802  Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
12803  Info.vol = false; // volatile loads with NEON intrinsics not supported
12804  Info.readMem = true;
12805  Info.writeMem = false;
12806  return true;
12807  }
12808  case Intrinsic::arm_neon_vst1:
12809  case Intrinsic::arm_neon_vst2:
12810  case Intrinsic::arm_neon_vst3:
12811  case Intrinsic::arm_neon_vst4:
12812  case Intrinsic::arm_neon_vst2lane:
12813  case Intrinsic::arm_neon_vst3lane:
12814  case Intrinsic::arm_neon_vst4lane: {
12815  Info.opc = ISD::INTRINSIC_VOID;
12816  // Conservatively set memVT to the entire set of vectors stored.
12817  auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
12818  unsigned NumElts = 0;
12819  for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
12820  Type *ArgTy = I.getArgOperand(ArgI)->getType();
12821  if (!ArgTy->isVectorTy())
12822  break;
12823  NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
12824  }
12825  Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
12826  Info.ptrVal = I.getArgOperand(0);
12827  Info.offset = 0;
12828  Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
12829  Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
12830  Info.vol = false; // volatile stores with NEON intrinsics not supported
12831  Info.readMem = false;
12832  Info.writeMem = true;
12833  return true;
12834  }
12835  case Intrinsic::arm_ldaex:
12836  case Intrinsic::arm_ldrex: {
12837  auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
12838  PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
12839  Info.opc = ISD::INTRINSIC_W_CHAIN;
12840  Info.memVT = MVT::getVT(PtrTy->getElementType());
12841  Info.ptrVal = I.getArgOperand(0);
12842  Info.offset = 0;
12843  Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
12844  Info.vol = true;
12845  Info.readMem = true;
12846  Info.writeMem = false;
12847  return true;
12848  }
12849  case Intrinsic::arm_stlex:
12850  case Intrinsic::arm_strex: {
12851  auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
12852  PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
12853  Info.opc = ISD::INTRINSIC_W_CHAIN;
12854  Info.memVT = MVT::getVT(PtrTy->getElementType());
12855  Info.ptrVal = I.getArgOperand(1);
12856  Info.offset = 0;
12857  Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
12858  Info.vol = true;
12859  Info.readMem = false;
12860  Info.writeMem = true;
12861  return true;
12862  }
12863  case Intrinsic::arm_stlexd:
12864  case Intrinsic::arm_strexd: {
12865  Info.opc = ISD::INTRINSIC_W_CHAIN;
12866  Info.memVT = MVT::i64;
12867  Info.ptrVal = I.getArgOperand(2);
12868  Info.offset = 0;
12869  Info.align = 8;
12870  Info.vol = true;
12871  Info.readMem = false;
12872  Info.writeMem = true;
12873  return true;
12874  }
12875  case Intrinsic::arm_ldaexd:
12876  case Intrinsic::arm_ldrexd: {
12877  Info.opc = ISD::INTRINSIC_W_CHAIN;
12878  Info.memVT = MVT::i64;
12879  Info.ptrVal = I.getArgOperand(0);
12880  Info.offset = 0;
12881  Info.align = 8;
12882  Info.vol = true;
12883  Info.readMem = true;
12884  Info.writeMem = false;
12885  return true;
12886  }
12887  default:
12888  break;
12889  }
12890 
12891  return false;
12892 }
12893 
12894 /// \brief Returns true if it is beneficial to convert a load of a constant
12895 /// to just the constant itself.
12897  Type *Ty) const {
12898  assert(Ty->isIntegerTy());
12899 
12900  unsigned Bits = Ty->getPrimitiveSizeInBits();
12901  if (Bits == 0 || Bits > 32)
12902  return false;
12903  return true;
12904 }
12905 
12907  unsigned Index) const {
12909  return false;
12910 
12911  return (Index == 0 || Index == ResVT.getVectorNumElements());
12912 }
12913 
12915  ARM_MB::MemBOpt Domain) const {
12916  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
12917 
12918  // First, if the target has no DMB, see what fallback we can use.
12919  if (!Subtarget->hasDataBarrier()) {
12920  // Some ARMv6 cpus can support data barriers with an mcr instruction.
12921  // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
12922  // here.
12923  if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
12924  Function *MCR = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);
12925  Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
12926  Builder.getInt32(0), Builder.getInt32(7),
12927  Builder.getInt32(10), Builder.getInt32(5)};
12928  return Builder.CreateCall(MCR, args);
12929  } else {
12930  // Instead of using barriers, atomic accesses on these subtargets use
12931  // libcalls.
12932  llvm_unreachable("makeDMB on a target so old that it has no barriers");
12933  }
12934  } else {
12935  Function *DMB = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
12936  // Only a full system barrier exists in the M-class architectures.
12937  Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
12938  Constant *CDomain = Builder.getInt32(Domain);
12939  return Builder.CreateCall(DMB, CDomain);
12940  }
12941 }
12942 
12943 // Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
12945  AtomicOrdering Ord, bool IsStore,
12946  bool IsLoad) const {
12947  switch (Ord) {
12950  llvm_unreachable("Invalid fence: unordered/non-atomic");
12953  return nullptr; // Nothing to do
12955  if (!IsStore)
12956  return nullptr; // Nothing to do
12957  /*FALLTHROUGH*/
12960  if (Subtarget->preferISHSTBarriers())
12961  return makeDMB(Builder, ARM_MB::ISHST);
12962  // FIXME: add a comment with a link to documentation justifying this.
12963  else
12964  return makeDMB(Builder, ARM_MB::ISH);
12965  }
12966  llvm_unreachable("Unknown fence ordering in emitLeadingFence");
12967 }
12968 
12970  AtomicOrdering Ord, bool IsStore,
12971  bool IsLoad) const {
12972  switch (Ord) {
12975  llvm_unreachable("Invalid fence: unordered/not-atomic");
12978  return nullptr; // Nothing to do
12982  return makeDMB(Builder, ARM_MB::ISH);
12983  }
12984  llvm_unreachable("Unknown fence ordering in emitTrailingFence");
12985 }
12986 
12987 // Loads and stores less than 64-bits are already atomic; ones above that
12988 // are doomed anyway, so defer to the default libcall and blame the OS when
12989 // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
12990 // anything for those.
12992  unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
12993  return (Size == 64) && !Subtarget->isMClass();
12994 }
12995 
12996 // Loads and stores less than 64-bits are already atomic; ones above that
12997 // are doomed anyway, so defer to the default libcall and blame the OS when
12998 // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
12999 // anything for those.
13000 // FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
13001 // guarantee, see DDI0406C ARM architecture reference manual,
13002 // sections A8.8.72-74 LDRD)
13005  unsigned Size = LI->getType()->getPrimitiveSizeInBits();
13006  return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLOnly
13008 }
13009 
13010 // For the real atomic operations, we have ldrex/strex up to 32 bits,
13011 // and up to 64 bits on the non-M profiles
13014  unsigned Size = AI->getType()->getPrimitiveSizeInBits();
13015  bool hasAtomicRMW = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
13016  return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW)
13019 }
13020 
13022  AtomicCmpXchgInst *AI) const {
13023  // At -O0, fast-regalloc cannot cope with the live vregs necessary to
13024  // implement cmpxchg without spilling. If the address being exchanged is also
13025  // on the stack and close enough to the spill slot, this can lead to a
13026  // situation where the monitor always gets cleared and the atomic operation
13027  // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
13028  bool hasAtomicCmpXchg =
13029  !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
13030  return getTargetMachine().getOptLevel() != 0 && hasAtomicCmpXchg;
13031 }
13032 
13034  const Instruction *I) const {
13035  return InsertFencesForAtomic;
13036 }
13037 
13038 // This has so far only been implemented for MachO.
13040  return Subtarget->isTargetMachO();
13041 }
13042 
13044  unsigned &Cost) const {
13045  // If we do not have NEON, vector types are not natively supported.
13046  if (!Subtarget->hasNEON())
13047  return false;
13048 
13049  // Floating point values and vector values map to the same register file.
13050  // Therefore, although we could do a store extract of a vector type, this is
13051  // better to leave at float as we have more freedom in the addressing mode for
13052  // those.
13053  if (VectorTy->isFPOrFPVectorTy())
13054  return false;
13055 
13056  // If the index is unknown at compile time, this is very expensive to lower
13057  // and it is not possible to combine the store with the extract.
13058  if (!isa<ConstantInt>(Idx))
13059  return false;
13060 
13061  assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
13062  unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth();
13063  // We can do a store + vector extract on any vector that fits perfectly in a D
13064  // or Q register.
13065  if (BitWidth == 64 || BitWidth == 128) {
13066  Cost = 0;
13067  return true;
13068  }
13069  return false;
13070 }
13071 
13073  return Subtarget->hasV6T2Ops();
13074 }
13075 
13077  return Subtarget->hasV6T2Ops();
13078 }
13079 
13080 Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
13081  AtomicOrdering Ord) const {
13082  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
13083  Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
13084  bool IsAcquire = isAcquireOrStronger(Ord);
13085 
13086  // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
13087  // intrinsic must return {i32, i32} and we have to recombine them into a
13088  // single i64 here.
13089  if (ValTy->getPrimitiveSizeInBits() == 64) {
13090  Intrinsic::ID Int =
13091  IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
13092  Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int);
13093 
13094  Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
13095  Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
13096 
13097  Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
13098  Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
13099  if (!Subtarget->isLittle())
13100  std::swap (Lo, Hi);
13101  Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
13102  Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
13103  return Builder.CreateOr(
13104  Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64");
13105  }
13106 
13107  Type *Tys[] = { Addr->getType() };
13108  Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
13109  Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys);
13110 
13111  return Builder.CreateTruncOrBitCast(
13112  Builder.CreateCall(Ldrex, Addr),
13113  cast<PointerType>(Addr->getType())->getElementType());
13114 }
13115 
13117  IRBuilder<> &Builder) const {
13118  if (!Subtarget->hasV7Ops())
13119  return;
13120  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
13121  Builder.CreateCall(llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_clrex));
13122 }
13123 
13125  Value *Addr,
13126  AtomicOrdering Ord) const {
13127  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
13128  bool IsRelease = isReleaseOrStronger(Ord);
13129 
13130  // Since the intrinsics must have legal type, the i64 intrinsics take two
13131  // parameters: "i32, i32". We must marshal Val into the appropriate form
13132  // before the call.
13133  if (Val->getType()->getPrimitiveSizeInBits() == 64) {
13134  Intrinsic::ID Int =
13135  IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
13136  Function *Strex = Intrinsic::getDeclaration(M, Int);
13137  Type *Int32Ty = Type::getInt32Ty(M->getContext());
13138 
13139  Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
13140  Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
13141  if (!Subtarget->isLittle())
13142  std::swap (Lo, Hi);
13143  Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
13144  return Builder.CreateCall(Strex, {Lo, Hi, Addr});
13145  }
13146 
13147  Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
13148  Type *Tys[] = { Addr->getType() };
13149  Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
13150 
13151  return Builder.CreateCall(
13152  Strex, {Builder.CreateZExtOrBitCast(
13153  Val, Strex->getFunctionType()->getParamType(0)),
13154  Addr});
13155 }
13156 
13157 /// \brief Lower an interleaved load into a vldN intrinsic.
13158 ///
13159 /// E.g. Lower an interleaved load (Factor = 2):
13160 /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
13161 /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
13162 /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
13163 ///
13164 /// Into:
13165 /// %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4)
13166 /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0
13167 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1
13170  ArrayRef<unsigned> Indices, unsigned Factor) const {
13171  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
13172  "Invalid interleave factor");
13173  assert(!Shuffles.empty() && "Empty shufflevector input");
13174  assert(Shuffles.size() == Indices.size() &&
13175  "Unmatched number of shufflevectors and indices");
13176 
13177  VectorType *VecTy = Shuffles[0]->getType();
13178  Type *EltTy = VecTy->getVectorElementType();
13179 
13180  const DataLayout &DL = LI->getModule()->getDataLayout();
13181  unsigned VecSize = DL.getTypeSizeInBits(VecTy);
13182  bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64;
13183 
13184  // Skip if we do not have NEON and skip illegal vector types and vector types
13185  // with i64/f64 elements (vldN doesn't support i64/f64 elements).
13186  if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128) || EltIs64Bits)
13187  return false;
13188 
13189  // A pointer vector can not be the return type of the ldN intrinsics. Need to
13190  // load integer vectors first and then convert to pointer vectors.
13191  if (EltTy->isPointerTy())
13192  VecTy =
13193  VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements());
13194 
13195  static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
13196  Intrinsic::arm_neon_vld3,
13197  Intrinsic::arm_neon_vld4};
13198 
13199  IRBuilder<> Builder(LI);
13201 
13202  Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace());
13203  Ops.push_back(Builder.CreateBitCast(LI->getPointerOperand(), Int8Ptr));
13204  Ops.push_back(Builder.getInt32(LI->getAlignment()));
13205 
13206  Type *Tys[] = { VecTy, Int8Ptr };
13207  Function *VldnFunc =
13208  Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
13209  CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN");
13210 
13211  // Replace uses of each shufflevector with the corresponding vector loaded
13212  // by ldN.
13213  for (unsigned i = 0; i < Shuffles.size(); i++) {
13214  ShuffleVectorInst *SV = Shuffles[i];
13215  unsigned Index = Indices[i];
13216 
13217  Value *SubVec = Builder.CreateExtractValue(VldN, Index);
13218 
13219  // Convert the integer vector to pointer vector if the element is pointer.
13220  if (EltTy->isPointerTy())
13221  SubVec = Builder.CreateIntToPtr(SubVec, SV->getType());
13222 
13223  SV->replaceAllUsesWith(SubVec);
13224  }
13225 
13226  return true;
13227 }
13228 
13229 /// \brief Get a mask consisting of sequential integers starting from \p Start.
13230 ///
13231 /// I.e. <Start, Start + 1, ..., Start + NumElts - 1>
13232 static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start,
13233  unsigned NumElts) {
13235  for (unsigned i = 0; i < NumElts; i++)
13236  Mask.push_back(Builder.getInt32(Start + i));
13237 
13238  return ConstantVector::get(Mask);
13239 }
13240 
13241 /// \brief Lower an interleaved store into a vstN intrinsic.
13242 ///
13243 /// E.g. Lower an interleaved store (Factor = 3):
13244 /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
13245 /// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
13246 /// store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4
13247 ///
13248 /// Into:
13249 /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
13250 /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
13251 /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
13252 /// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
13253 ///
13254 /// Note that the new shufflevectors will be removed and we'll only generate one
13255 /// vst3 instruction in CodeGen.
13256 ///
13257 /// Example for a more general valid mask (Factor 3). Lower:
13258 /// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
13259 /// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
13260 /// store <12 x i32> %i.vec, <12 x i32>* %ptr
13261 ///
13262 /// Into:
13263 /// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
13264 /// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
13265 /// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
13266 /// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
13268  ShuffleVectorInst *SVI,
13269  unsigned Factor) const {
13270  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
13271  "Invalid interleave factor");
13272 
13273  VectorType *VecTy = SVI->getType();
13274  assert(VecTy->getVectorNumElements() % Factor == 0 &&
13275  "Invalid interleaved store");
13276 
13277  unsigned LaneLen = VecTy->getVectorNumElements() / Factor;
13278  Type *EltTy = VecTy->getVectorElementType();
13279  VectorType *SubVecTy = VectorType::get(EltTy, LaneLen);
13280 
13281  const DataLayout &DL = SI->getModule()->getDataLayout();
13282  unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
13283  bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64;
13284 
13285  // Skip if we do not have NEON and skip illegal vector types and vector types
13286  // with i64/f64 elements (vstN doesn't support i64/f64 elements).
13287  if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize != 128) ||
13288  EltIs64Bits)
13289  return false;
13290 
13291  Value *Op0 = SVI->getOperand(0);
13292  Value *Op1 = SVI->getOperand(1);
13293  IRBuilder<> Builder(SI);
13294 
13295  // StN intrinsics don't support pointer vectors as arguments. Convert pointer
13296  // vectors to integer vectors.
13297  if (EltTy->isPointerTy()) {
13298  Type *IntTy = DL.getIntPtrType(EltTy);
13299 
13300  // Convert to the corresponding integer vector.
13301  Type *IntVecTy =
13302  VectorType::get(IntTy, Op0->getType()->getVectorNumElements());
13303  Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
13304  Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
13305 
13306  SubVecTy = VectorType::get(IntTy, LaneLen);
13307  }
13308 
13309  static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
13310  Intrinsic::arm_neon_vst3,
13311  Intrinsic::arm_neon_vst4};
13313 
13314  Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
13315  Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), Int8Ptr));
13316 
13317  Type *Tys[] = { Int8Ptr, SubVecTy };
13318  Function *VstNFunc = Intrinsic::getDeclaration(
13319  SI->getModule(), StoreInts[Factor - 2], Tys);
13320 
13321  // Split the shufflevector operands into sub vectors for the new vstN call.
13322  auto Mask = SVI->getShuffleMask();
13323  for (unsigned i = 0; i < Factor; i++) {
13324  if (Mask[i] >= 0) {
13325  Ops.push_back(Builder.CreateShuffleVector(
13326  Op0, Op1, getSequentialMask(Builder, Mask[i], LaneLen)));
13327  } else {
13328  unsigned StartMask = 0;
13329  for (unsigned j = 1; j < LaneLen; j++) {
13330  if (Mask[j*Factor + i] >= 0) {
13331  StartMask = Mask[j*Factor + i] - j;
13332  break;
13333  }
13334  }
13335  // Note: If all elements in a chunk are undefs, StartMask=0!
13336  // Note: Filling undef gaps with random elements is ok, since
13337  // those elements were being written anyway (with undefs).
13338  // In the case of all undefs we're defaulting to using elems from 0
13339  // Note: StartMask cannot be negative, it's checked in isReInterleaveMask
13340  Ops.push_back(Builder.CreateShuffleVector(
13341  Op0, Op1, getSequentialMask(Builder, StartMask, LaneLen)));
13342  }
13343  }
13344 
13345  Ops.push_back(Builder.getInt32(SI->getAlignment()));
13346  Builder.CreateCall(VstNFunc, Ops);
13347  return true;
13348 }
13349 
13356 };
13357 
13358 static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,
13359  uint64_t &Members) {
13360  if (auto *ST = dyn_cast<StructType>(Ty)) {
13361  for (unsigned i = 0; i < ST->getNumElements(); ++i) {
13362  uint64_t SubMembers = 0;
13363  if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
13364  return false;
13365  Members += SubMembers;
13366  }
13367  } else if (auto *AT = dyn_cast<ArrayType>(Ty)) {
13368  uint64_t SubMembers = 0;
13369  if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
13370  return false;
13371  Members += SubMembers * AT->getNumElements();
13372  } else if (Ty->isFloatTy()) {
13373  if (Base != HA_UNKNOWN && Base != HA_FLOAT)
13374  return false;
13375  Members = 1;
13376  Base = HA_FLOAT;
13377  } else if (Ty->isDoubleTy()) {
13378  if (Base != HA_UNKNOWN && Base != HA_DOUBLE)
13379  return false;
13380  Members = 1;
13381  Base = HA_DOUBLE;
13382  } else if (auto *VT = dyn_cast<VectorType>(Ty)) {
13383  Members = 1;
13384  switch (Base) {
13385  case HA_FLOAT:
13386  case HA_DOUBLE:
13387  return false;
13388  case HA_VECT64:
13389  return VT->getBitWidth() == 64;
13390  case HA_VECT128:
13391  return VT->getBitWidth() == 128;
13392  case HA_UNKNOWN:
13393  switch (VT->getBitWidth()) {
13394  case 64:
13395  Base = HA_VECT64;
13396  return true;
13397  case 128:
13398  Base = HA_VECT128;
13399  return true;
13400  default:
13401  return false;
13402  }
13403  }
13404  }
13405 
13406  return (Members > 0 && Members <= 4);
13407 }
13408 
13409 /// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
13410 /// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
13411 /// passing according to AAPCS rules.
13413  Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
13414  if (getEffectiveCallingConv(CallConv, isVarArg) !=
13416  return false;
13417 
13418  HABaseType Base = HA_UNKNOWN;
13419  uint64_t Members = 0;
13420  bool IsHA = isHomogeneousAggregate(Ty, Base, Members);
13421  DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump());
13422 
13423  bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
13424  return IsHA || IsIntArray;
13425 }
13426 
13428  const Constant *PersonalityFn) const {
13429  // Platforms which do not use SjLj EH may return values in these registers
13430  // via the personality function.
13431  return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R0;
13432 }
13433 
13435  const Constant *PersonalityFn) const {
13436  // Platforms which do not use SjLj EH may return values in these registers
13437  // via the personality function.
13438  return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R1;
13439 }
13440 
13441 void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
13442  // Update IsSplitCSR in ARMFunctionInfo.
13443  ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();
13444  AFI->setIsSplitCSR(true);
13445 }
13446 
13447 void ARMTargetLowering::insertCopiesSplitCSR(
13448  MachineBasicBlock *Entry,
13449  const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
13450  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
13451  const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
13452  if (!IStart)
13453  return;
13454 
13455  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
13456  MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
13457  MachineBasicBlock::iterator MBBI = Entry->begin();
13458  for (const MCPhysReg *I = IStart; *I; ++I) {
13459  const TargetRegisterClass *RC = nullptr;
13460  if (ARM::GPRRegClass.contains(*I))
13461  RC = &ARM::GPRRegClass;
13462  else if (ARM::DPRRegClass.contains(*I))
13463  RC = &ARM::DPRRegClass;
13464  else
13465  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
13466 
13467  unsigned NewVR = MRI->createVirtualRegister(RC);
13468  // Create copy from CSR to a virtual register.
13469  // FIXME: this currently does not emit CFI pseudo-instructions, it works
13470  // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
13471  // nounwind. If we want to generalize this later, we may need to emit
13472  // CFI pseudo-instructions.
13474  Attribute::NoUnwind) &&
13475  "Function should be nounwind in insertCopiesSplitCSR!");
13476  Entry->addLiveIn(*I);
13477  BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
13478  .addReg(*I);
13479 
13480  // Insert the copy-back instructions right before the terminator.
13481  for (auto *Exit : Exits)
13482  BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
13483  TII->get(TargetOpcode::COPY), *I)
13484  .addReg(NewVR);
13485  }
13486 }
static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG)
PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG)
static bool isSimpleType(Type *T)
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG)
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type (if unknown returns 0).
bool useSjLjEH() const
Definition: ARMSubtarget.h:498
int getFunctionContextIndex() const
Return the index for the function context object.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
void setFrameAddressIsTaken(bool T)
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
int getPromotedConstpoolIncrease() const
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:500
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:467
Value * getValueOperand()
Definition: Instructions.h:391
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
Definition: ARMBaseInfo.h:299
static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC, SelectionDAG &DAG)
BC is a bitcast that is about to be turned into a VMOVDRR.
bool use_empty() const
Return true if there are no uses of this node.
const Value * getCalledValue() const
Get a pointer to the function that is invoked by this instruction.
static MVT getIntegerVT(unsigned BitWidth)
void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool isEHPad() const
Returns true if the block is a landing pad.
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG)
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:102
static bool isVZIPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:524
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
SDValue getValue(unsigned R) const
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
MVT getValVT() const
void markGlobalAsPromotedToConstantPool(const GlobalVariable *GV)
Indicate to the backend that GV has had its storage changed to inside a constant pool.
static SDValue PerformBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformBUILD_VECTORCombine - Target-specific dag combine xforms for ISD::BUILD_VECTOR.
bool hasCallSiteLandingPad(MCSymbol *Sym)
Return true if the landing pad Eh symbol has an associated call site.
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt)
isVShiftLImm - Check if this is a valid build_vector for the immediate operand of a vector shift left...
static MachinePointerInfo getJumpTable(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a jump table entry.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand...
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the value type to use for ISD::SETCC.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant, which is required to be operand #1) half of the integer or float value specified as operand #0.
Definition: ISDOpcodes.h:184
#define R4(n)
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:226
bool isFPOnlySP() const
Definition: ARMSubtarget.h:471
Flags getFlags() const
Return the raw flags of the source value,.
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:458
LLVMContext * getContext() const
Definition: SelectionDAG.h:333
LLVM Argument representation.
Definition: Argument.h:34
bool isOSBinFormatMachO() const
Tests whether the environment is MachO.
Definition: Triple.h:575
LLVMContext & Context
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG)
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1309
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd)...
Definition: SelectionDAG.h:724
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain)
static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG)
STATISTIC(NumFunctions,"Total number of functions")
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag...
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:304
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:572
size_t i
static SDValue PerformVMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMULCombine Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the special multi...
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, SelectionDAG &DAG)
LocInfo getLocInfo() const
static MVT getVectorVT(MVT VT, unsigned NumElements)
static bool IsVUZPShuffleNode(SDNode *N)
TOF
Target Operand Flag enum.
Definition: ARMBaseInfo.h:278
ARM_APCS - ARM Procedure Calling Standard calling convention (obsolete, but still used on some target...
Definition: CallingConv.h:95
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:313
MachineBasicBlock * getMBB() const
static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:39
const MCPhysReg * getCalleeSavedRegsViaCopy(const MachineFunction *MF) const
unsigned EnableFastISel
EnableFastISel - This flag enables fast-path instruction selection which trades away generated code q...
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:52
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
void getInRegsParamInfo(unsigned InRegsParamRecordIndex, unsigned &BeginReg, unsigned &EndReg) const
const char * LowerXConstraint(EVT ConstraintVT) const override
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
void setIsEHPad(bool V=true)
Indicates the block is a landing pad.
const TargetMachine & getTargetMachine() const
an instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
Definition: Instructions.h:504
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:216
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool hasFPAO() const
Definition: ARMSubtarget.h:476
static unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG)
lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the bit-count for each 32-bit eleme...
static SDValue PerformInsertEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformInsertEltCombine - Target-specific dag combine xforms for ISD::INSERT_VECTOR_ELT.
EABI EABIVersion
EABIVersion - This flag specifies the EABI version.
static bool isVTRNMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG &DAG)
SkipLoadExtensionForVMULL - return a load of the original vector size that does not do any sign/zero ...
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:219
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:329
ARMConstantPoolValue - ARM specific constantpool value.
static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, bool isSigned)
isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each element has been zero/sign-...
unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:699
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override
iterator end() const
Definition: ArrayRef.h:130
bool isDead() const
static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, uint64_t &Members)
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
BBTy * getParent() const
Get the basic block containing the call site.
Definition: CallSite.h:98
Y = RRC X, rotate right via carry.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
static bool hasNormalLoadOperand(SDNode *N)
hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node are normal, non-volatile loads.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain...
Definition: ISDOpcodes.h:615
bool hasT2ExtractPack() const
Definition: ARMSubtarget.h:460
This class represents a function call, abstracting a target machine's calling convention.
unsigned InferPtrAlignment(SDValue Ptr) const
Infer alignment of a load / store address.
void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in the KnownZero/KnownO...
EK_Inline - Jump table entries are emitted inline at their point of use.
size_type count(PtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:380
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:536
const GlobalValue * getGlobal() const
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
const std::string & getAsmString() const
Definition: InlineAsm.h:82
Global Offset Table, Thread Pointer Offset.
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
static MVT getFloatingPointVT(unsigned BitWidth)
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:237
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs...
static bool isLegalT1AddressImmediate(int64_t V, EVT VT)
static cl::opt< bool > ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true))
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Function Alias Analysis Results
Type * getTypeForEVT(LLVMContext &Context) const
getTypeForEVT - This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:204
unsigned getSizeInBits() const
This instruction constructs a fixed permutation of two input vectors.
const unsigned char * bytes_end() const
Definition: StringRef.h:110
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:148
unsigned getByValSize() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
If this value is smaller than the specified limit, return it, otherwise return the limit value...
Definition: APInt.h:409
unsigned getInRegsParamsCount() const
static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG)
SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending load, or BUILD_VECTOR with extended elements, return the unextended value.
static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, const ARMSubtarget *ST, const SDLoc &dl)
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:270
unsigned getNumOperands() const
Return the number of values used by this operation.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:736
bool hasV5TEOps() const
Definition: ARMSubtarget.h:416
bool isROPI() const
const std::string & getConstraintString() const
Definition: InlineAsm.h:83
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:100
unsigned getNumOperands() const
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
bool hasV6Ops() const
Definition: ARMSubtarget.h:417
A debug info location.
Definition: DebugLoc.h:34
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
const SDValue & getOperand(unsigned Num) const
void setIsDead(bool Val=true)
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:234
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
setjmp/longjmp based exceptions
An instruction for reading from memory.
Definition: Instructions.h:164
static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1...
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:65
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:170
[US]{MIN/MAX} - Binary minimum or maximum or signed or unsigned integers.
Definition: ISDOpcodes.h:330
an instruction that atomically reads a memory location, combines it with another value, and then stores the result back.
Definition: Instructions.h:669
#define R2(n)
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned char TargetFlags=0)
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Definition: CallSite.h:266
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
bool hasV6T2Ops() const
Definition: ARMSubtarget.h:420
static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
Type * getElementType() const
Definition: DerivedTypes.h:462
Same for subtraction.
Definition: ISDOpcodes.h:240
Global Offset Table, PC Relative.
Value * CallOperandVal
If this is the result output operand or a clobber, this is null, otherwise it is the incoming operand...
static IntegerType * getInt16Ty(LLVMContext &C)
Definition: Type.cpp:168
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override
createFastISel - This method returns a target specific FastISel object, or null if the target does no...
Thread Pointer Offset.
bool isThumb1Only() const
Definition: ARMSubtarget.h:577
static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue CombineBaseUpdate(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineBaseUpdate - Target-specific DAG combine function for VLDDUP, NEON load/store intrinsics...
unsigned getValNo() const
const SDValue & getBasePtr() const
bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
bool hasDivide() const
Definition: ARMSubtarget.h:458
static SDValue PerformSUBCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:180
uint64_t High
static bool isThumb(const MCSubtargetInfo &STI)
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:690
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:369
unsigned getResNo() const
get the index which selects a specific result in the SDNode
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
bool bitsLT(EVT VT) const
bitsLT - Return true if this has less bits than VT.
Definition: ValueTypes.h:212
bool isUndef() const
Return true if the type of the node type undefined.
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition: ISDOpcodes.h:263
static bool allUsersAreInFunction(const Value *V, const Function *F)
Return true if all users of V are within function F, looking through ConstantExprs.
bool isRegLoc() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:461
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
SDValue getExternalSymbol(const char *Sym, EVT VT)
return AArch64::GPR64RegClass contains(Reg)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:165
bool isOSWindows() const
Tests whether the OS is Windows.
Definition: Triple.h:540
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool hasPerfMon() const
Definition: ARMSubtarget.h:472
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vstN intrinsic.
MachineJumpTableInfo * getOrCreateJumpTableInfo(unsigned JTEntryKind)
getOrCreateJumpTableInfo - Get the JumpTableInfo for this function, if it does already exist...
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG)
const Triple & getTargetTriple() const
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:376
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic...
Definition: ISDOpcodes.h:114
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:209
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:172
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations...
Definition: ISDOpcodes.h:388
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:133
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
lazy value info
The address of a basic block.
Definition: Constants.h:822
bool isTargetAEABI() const
Definition: ARMSubtarget.h:528
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
bool useSoftFloat() const
Definition: ARMSubtarget.h:575
bool hasV8Ops() const
Definition: ARMSubtarget.h:422
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
int getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
static const MachineInstrBuilder & AddDefaultPred(const MachineInstrBuilder &MIB)
A description of a memory reference used in the backend.
bool hasVFP3() const
Definition: ARMSubtarget.h:446
static SDValue PerformLOADCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
struct fuzzer::@269 Flags
ParmContext getCallOrPrologue() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
const HexagonInstrInfo * TII
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a vldN-lane (N > 1) intrinsic...
bool hasThumb2() const
Definition: ARMSubtarget.h:579
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:440
bool isTargetELF() const
Definition: ARMSubtarget.h:518
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false)
Shift and rotation operations.
Definition: ISDOpcodes.h:344
static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG)
static bool isVUZPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
bool isTargetDarwin() const
Definition: ARMSubtarget.h:508
static bool isUpperSaturate(const SDValue LHS, const SDValue RHS, const SDValue TrueVal, const SDValue FalseVal, const ISD::CondCode CC, const SDValue K)
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:452
static Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with array type with an element count and element type matchin...
Definition: Constants.cpp:2416
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef...
Type * getArrayElementType() const
Definition: Type.h:347
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
static SDValue findMUL_LOHI(SDValue V)
static bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:327
static bool isVZIP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of "vector_shuffle v...
Value * emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
unsigned getFrameRegister(const MachineFunction &MF) const override
static void advance(T &it, size_t Val)
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.cpp:1122
CallLoweringInfo & setChain(SDValue InChain)
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:190
static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG)
PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for ISD::VECTOR_SHUFFLE.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:32
separate const offset from Split GEPs to a variadic base and a constant offset for better CSE
bool isReg() const
isReg - Tests if this is a MO_Register operand.
unsigned getNumArgOperands() const
Return the number of call arguments.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:170
static SDValue PerformADDCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCCombine - Target-specific dag combine transform from ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL or ISD::ADDC, ISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL.
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:994
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:588
unsigned getArgRegsSaveSize() const
LLVM_ATTRIBUTE_ALWAYS_INLINE R Default(const T &Value) const
Definition: StringSwitch.h:244
FLT_ROUNDS_ - Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest 2 Round to ...
Definition: ISDOpcodes.h:475
std::vector< MachineBasicBlock * >::iterator succ_iterator
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const override
Returns the target specific optimal type for load and store operations as a result of memset...
Reg
All possible values of the reg field in the ModR/M byte.
static cl::opt< bool > EnableConstpoolPromotion("arm-promote-constant", cl::Hidden, cl::desc("Enable / disable promotion of unnamed_addr constants into ""constant pools"), cl::init(true))
SimpleValueType SimpleTy
static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getZeroVector - Returns a vector of specified type with all zero elements.
The memory access is dereferenceable (i.e., doesn't trap).
EVT getScalarType() const
getScalarType - If this is a vector type, return the element type, otherwise return this...
Definition: ValueTypes.h:233
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amt) For double-word atomic operations: ValLo, ValHi, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amtLo, amtHi) ValLo, ValHi, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amtLo, amtHi) These correspond to the atomicrmw instruction.
Definition: ISDOpcodes.h:719
ObjectFormatType getObjectFormat() const
getFormat - Get the object format for this triple.
Definition: Triple.h:300
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool isLittle() const
Definition: ARMSubtarget.h:613
bool hasMPExtension() const
Definition: ARMSubtarget.h:495
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:509
int getMaskElt(unsigned Idx) const
static bool isSingletonVEXTMask(ArrayRef< int > M, EVT VT, unsigned &Imm)
bool isPositionIndependent() const
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
static bool isZeroExtended(SDNode *N, SelectionDAG &DAG)
isZeroExtended - Check if a node is a vector value that is zero-extended or a constant BUILD_VECTOR w...
static ShiftOpc getShiftOpcForNode(unsigned Opcode)
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG...
Definition: ISDOpcodes.h:73
bool isFPBrccSlow() const
Definition: ARMSubtarget.h:470
Type * getVectorElementType() const
Definition: Type.h:353
CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const
SmallVectorImpl< unsigned > & getCallSiteLandingPad(MCSymbol *Sym)
Get the call site indexes for a landing pad EH symbol.
static SDValue PerformMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
#define im(i)
bool isThumb() const
Definition: ARMSubtarget.h:576
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
Definition: ValueTypes.h:123
static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2)
Return the store opcode for a given store size.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:60
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:277
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPostIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mo...
SmallVector< ISD::InputArg, 32 > Ins
AtomicOrdering
Atomic ordering for LLVM's memory model.
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:611
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const override
Return true if the target can combine store(extractelement VectorTy, Idx).
EVT getVectorElementType() const
getVectorElementType - Given a vector type, return the type of each element.
Definition: ValueTypes.h:239
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:399
bool hasV7Ops() const
Definition: ARMSubtarget.h:421
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector...
static const unsigned PerfectShuffleTable[6561+1]
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1358
static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG)
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
void RemoveOperand(unsigned i)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vldN intrinsic.
unsigned getLocReg() const
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:31
Value * emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type...
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1362
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself...
#define F(x, y, z)
Definition: MD5.cpp:51
bool isKill() const
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:410
const Triple & getTargetTriple() const
Definition: ARMSubtarget.h:506
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
const uint32_t * getThisReturnPreservedMask(const MachineFunction &MF, CallingConv::ID) const
getThisReturnPreservedMask - Returns a call preserved mask specific to the case that 'returned' is on...
OUTCHAIN = EH_SJLJ_SETUP_DISPATCH(INCHAIN) The target initializes the dispatch table here...
Definition: ISDOpcodes.h:118
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
static unsigned createNEONModImm(unsigned OpCmode, unsigned Val)
unsigned createJumpTableIndex(const std::vector< MachineBasicBlock * > &DestBBs)
createJumpTableIndex - Create a new jump table.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
bool isTargetCOFF() const
Definition: ARMSubtarget.h:517
SDValue getRegisterMask(const uint32_t *RegMask)
bool hasStructRetAttr() const
Determine if the function returns a structure through first pointer argument.
Definition: Function.h:421
MachineBasicBlock * MBB
static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG)
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:363
bool isTargetMachO() const
Definition: ARMSubtarget.h:519
This contains information for each constraint that we are lowering.
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
static int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
void addInRegsParamInfo(unsigned RegBegin, unsigned RegEnd)
SmallVector< ISD::OutputArg, 32 > Outs
void dump() const
Definition: AsmWriter.cpp:3544
bool hasFPARMv8() const
Definition: ARMSubtarget.h:448
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1279
CallLoweringInfo & setZExtResult(bool Value=true)
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:160
bool hasNEON() const
Definition: ARMSubtarget.h:449
bool hasAnyDataBarrier() const
Definition: ARMSubtarget.h:464
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:949
const SDValue & getBasePtr() const
MachineConstantPoolValue * getMachineCPVal() const
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:737
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
An instruction for storing to memory.
Definition: Instructions.h:300
static mvt_range integer_vector_valuetypes()
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:842
const APInt & getAPIntValue() const
bool hasVFP4() const
Definition: ARMSubtarget.h:447
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:401
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:210
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
EVT getMemoryVT() const
Return the type of the in-memory value.
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
int64_t getImm() const
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1301
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:487
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:656
static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask)
bool isTargetWatchOS() const
Definition: ARMSubtarget.h:510
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
Definition: StringRef.h:135
static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG)
static const MCPhysReg GPRArgRegs[]
Value * CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1370
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
const uint32_t * getSjLjDispatchPreservedMask(const MachineFunction &MF) const
bool hasARMOps() const
Definition: ARMSubtarget.h:443
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:141
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:328
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset)
Stack pointer relative access.
static SDValue FindBFIToCombineWith(SDNode *N)
Class to represent pointers.
Definition: DerivedTypes.h:443
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
UNDEF - An undefined node.
Definition: ISDOpcodes.h:178
This class is used to represent ISD::STORE nodes.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:273
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:453
static bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG)
PerformVMOVDRRCombine - Target-specific dag combine xforms for ARMISD::VMOVDRR.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:121
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:131
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:274
unsigned getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1003
TargetInstrInfo - Interface to description of machine instruction set.
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:518
INITIALIZE_PASS(HexagonEarlyIfConversion,"hexagon-eif","Hexagon early if conversion", false, false) bool HexagonEarlyIfConversion MachineBasicBlock * SB
static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned LdSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment load operation with given size.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL) const
Soften the operands of a comparison.
bool isTargetIOS() const
Definition: ARMSubtarget.h:509
static EVT getExtensionTo64Bits(const EVT &OrigVT)
SDNode * getNode() const
get the SDNode which holds the desired result
The memory access is volatile.
void setReturnRegsCount(unsigned s)
bool isExtractSubvectorCheap(EVT ResVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for this result type with this index.
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
MinAlign - A and B are either alignments or offsets.
Definition: MathExtras.h:589
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:262
unsigned getStoreSize() const
getStoreSize - Return the number of bytes overwritten by a store of the specified value type...
Definition: ValueTypes.h:268
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:43
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:133
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set, or Regs.size() if they are all allocated.
static SDValue PerformADDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:166
static SDValue PerformBFICombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:85
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:348
bool useSoftFloat() const override
unsigned const MachineRegisterInfo * MRI
static bool isSignExtended(SDNode *N, SelectionDAG &DAG)
isSignExtended - Check if a node is a vector value that is sign-extended or a constant BUILD_VECTOR w...
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:111
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:916
constexpr bool isPowerOf2_32(uint32_t Value)
isPowerOf2_32 - This function returns true if the argument is a power of two > 0. ...
Definition: MathExtras.h:399
An array constant whose element type is a simple 1/2/4/8-byte integer or float/double, and whose elements are just simple data values (i.e.
Definition: Constants.h:676
bool isMachineConstantPoolEntry() const
SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const
static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) can replace combinations of ...
CodeModel::Model getCodeModel() const
Returns the code model.
MVT - Machine Value Type.
ParmContext
ParmContext - This enum tracks whether calling convention lowering is in the context of prologue or c...
static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
LLVM Basic Block Representation.
Definition: BasicBlock.h:51
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
const SDValue & getOperand(unsigned i) const
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
static SDValue PerformVDUPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
size_type size() const
Definition: SmallPtrSet.h:99
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, unsigned &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:48
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
Simple binary floating point operators.
Definition: ISDOpcodes.h:246
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:219
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, bool Vol=false, bool ReadMem=true, bool WriteMem=true, unsigned Size=0)
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
bool supportsTailCall() const
Definition: ARMSubtarget.h:605
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MVT getLocVT() const
bool isOptionalDef() const
Set if this operand is a optional def.
Definition: MCInstrDesc.h:99
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
This is an important base class in LLVM.
Definition: Constant.h:42
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE...
Definition: ISDOpcodes.h:637
bool isMClass() const
Definition: ARMSubtarget.h:580
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:115
static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG)
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:279
const Constant * getConstVal() const
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:279
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
static cl::opt< unsigned > ConstpoolPromotionMaxSize("arm-promote-constant-max-size", cl::Hidden, cl::desc("Maximum size of constant to promote into a constant pool"), cl::init(64))
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:228
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:145
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:368
static bool isReverseMask(ArrayRef< int > M, EVT VT)
unsigned getInRegsParamsProcessed() const
unsigned getScalarValueSizeInBits() const
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
static bool isLowerSaturate(const SDValue LHS, const SDValue RHS, const SDValue TrueVal, const SDValue FalseVal, const ISD::CondCode CC, const SDValue K)
static mvt_range fp_valuetypes()
static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC)
IntCCToARMCC - Convert a DAG integer condition code to an ARM CC.
static bool BitsProperlyConcatenate(const APInt &A, const APInt &B)
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
constexpr bool isPowerOf2_64(uint64_t Value)
isPowerOf2_64 - This function returns true if the argument is a power of two 0 (64 bit edition...
Definition: MathExtras.h:405
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:154
This class provides iterator support for SDUse operands that use a specific SDNode.
static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl, SelectionDAG &DAG)
uint32_t Offset
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang","erlang-compatible garbage collector")
static EVT getFloatingPointVT(unsigned BitWidth)
getFloatingPointVT - Returns the EVT that represents a floating point type with the given number of b...
Definition: ValueTypes.h:55
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1255
iterator begin() const
Definition: ArrayRef.h:129
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
unsigned getOpcode() const
static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:676
Thread Local Storage (General Dynamic Mode)
bool isTargetWatchABI() const
Definition: ARMSubtarget.h:511
Value * getOperand(unsigned i) const
Definition: User.h:145
unsigned convertAddSubFlagsOpcode(unsigned OldOpc)
Map pseudo instructions that imply an 'S' bit onto real opcodes.
static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, SDValue &RetVal1, SDValue &RetVal2)
Value * getPointerOperand()
Definition: Instructions.h:270
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:57
static bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
static mvt_range vector_valuetypes()
std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override
Return the largest legal super-reg register class of the register class for the specified type and it...
int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override
getScalingFactorCost - Return the cost of the scaling used in addressing mode represented by AM...
SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const
PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic...
arg_iterator arg_begin()
Definition: Function.h:550
static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG)
getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count for each 16-bit element fr...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all...
bool hasVMLxForwarding() const
Definition: ARMSubtarget.h:469
self_iterator getIterator()
Definition: ilist_node.h:81
The memory access is non-temporal.
Class to represent integer types.
Definition: DerivedTypes.h:39
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:281
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool Immutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
unsigned countPopulation() const
Count the number of bits set.
Definition: APInt.h:1397
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:628
static void ReplaceCMP_SWAP_64Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
bool useNEONForSinglePrecisionFP() const
Definition: ARMSubtarget.h:454
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:136
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool hasV8MBaselineOps() const
Definition: ARMSubtarget.h:425
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
bool isVolatile() const
const SDValue & getValue() const
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
unsigned MaxStoresPerMemmove
Specify maximum bytes of store instructions per memmove call.
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:350
static SDValue PerformVDUPLANECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformVDUPLANECombine - Target-specific dag combine xforms for ARMISD::VDUPLANE. ...
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo...
Definition: ISDOpcodes.h:705
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:485
bool isCheapToSpeculateCttz() const override
Return true if it is cheap to speculate a call to intrinsic cttz.
SmallPtrSet< const GlobalVariable *, 2 > & getGlobalsPromotedToConstantPool()
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
EVT - Extended Value Type.
Definition: ValueTypes.h:31
static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformORCombine - Target-specific dag combine xforms for ISD::OR.
bool isIntN(unsigned N, int64_t x)
isIntN - Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:366
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:213
std::vector< ArgListEntry > ArgListTy
const APFloat & getValueAPF() const
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
bool hasSinCos() const
This function returns true if the target has sincos() routine in its compiler runtime or math librari...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:1671
This structure contains all information that is necessary for lowering calls.
bool shouldAssumeDSOLocal(const Module &M, const GlobalValue *GV) const
static bool isLTorLE(ISD::CondCode CC)
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:183
PointerType * getInt8PtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer to an 8-bit integer value.
Definition: IRBuilder.h:385
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:213
bool hasRetAddrStack() const
Definition: ARMSubtarget.h:494
static ARMConstantPoolMBB * Create(LLVMContext &C, const MachineBasicBlock *mbb, unsigned ID, unsigned char PCAdj)
static int Large
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements)
getVectorVT - Returns the EVT that represents a vector NumElements in length, where each element is o...
Definition: ValueTypes.h:70
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, bool isSigned, const SDLoc &dl, bool doesNotReturn=false, bool isReturnValueUsed=true) const
Returns a pair of (return value, chain).
This class contains a discriminated union of information about pointers in memory operands...
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:391
static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl, SelectionDAG &DAG)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC)
Set the CallingConv that should be used for the specified libcall.
std::pair< unsigned, const TargetRegisterClass * > RCPair
static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG)
lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the bit-count for each 16-bit eleme...
static Constant * getSequentialMask(IRBuilder<> &Builder, unsigned Start, unsigned NumElts)
Get a mask consisting of sequential integers starting from Start.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1298
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:378
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
bool genExecuteOnly() const
Definition: ARMSubtarget.h:500
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
bool isRWPI() const
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
bool needsCustom() const
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
static bool isReleaseOrStronger(AtomicOrdering ao)
const MachinePointerInfo & getPointerInfo() const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC)
Override the default CondCode to be used to test the result of the comparison libcall against zero...
unsigned getByValAlign() const
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
The memory access writes data.
static const int BlockSize
Definition: TarWriter.cpp:34
void SplitString(StringRef Source, SmallVectorImpl< StringRef > &OutFragments, StringRef Delimiters=" \t\n\v\f\r")
SplitString - Split up the specified string according to the specified delimiters, appending the result fragments to the output list.
bool bitsGT(EVT VT) const
bitsGT - Return true if this has more bits than VT.
Definition: ValueTypes.h:200
bool genLongCalls() const
Definition: ARMSubtarget.h:499
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
ArrayRef< int > getMask() const
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.cpp:703
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:709
static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformShiftCombine - Checks for immediate versions of vector shifts and lowers them.
static AddrOpc getAM2Op(unsigned AM2Opc)
SDValue getTargetConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:546
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:540
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:416
static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG, EVT PtrVT, SDLoc dl)
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
bool hasDLLImportStorageClass() const
Definition: GlobalValue.h:247
static const MachineInstrBuilder & AddDefaultCC(const MachineInstrBuilder &MIB)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Iterator for intrusive lists based on ilist_node.
void setPromotedConstpoolIncrease(int Sz)
CCState - This class holds information needed while lowering arguments and return values...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:425
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
auto find(R &&Range, const T &Val) -> decltype(std::begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:757
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
static bool isStrongerThanMonotonic(AtomicOrdering ao)
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:285
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:408
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:330
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG)
static unsigned getAM2Offset(unsigned AM2Opc)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:166
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:58
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt)
Getvshiftimm - Check if this is a valid build_vector for the immediate operand of a vector shift oper...
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
unsigned logBase2() const
Definition: APInt.h:1507
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned char TargetFlags=0) const
MachineOperand class - Representation of each machine instruction operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
static bool isReadOnly(const GlobalValue *GV)
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, const EVT &OrigTy, const EVT &ExtTy, unsigned ExtOpcode)
AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total value size to 64 bits...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:230
const InstrItineraryData * getInstrItineraryData() const override
getInstrItins - Return the instruction itineraries based on subtarget selection.
Definition: ARMSubtarget.h:632
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:639
Provides information about what library functions are available for the current target.
CCValAssign - Represent assignment of one arg/retval to a location.
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:649
static void ReplaceREADCYCLECOUNTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:566
SDNode * getGluedUser() const
If this node has a glue value with a user, return the user (there is at most one).
const DataFlowGraph & G
Definition: RDFGraph.cpp:206
const SDValue & getChain() const
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:347
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:510
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:382
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:307
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
This is an abstract virtual class for memory operations.
bool hasVFP2() const
Definition: ARMSubtarget.h:445
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:121
bool isTargetAndroid() const
Definition: ARMSubtarget.h:564
static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:1656
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
CallLoweringInfo & setSExtResult(bool Value=true)
void dump(const TargetInstrInfo *TII=nullptr) const
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:558
Function * getCalledFunction() const
Return the function called, or null if this is an indirect function invocation.
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override
isFPImmLegal - Returns true if the target can instruction select the specified FP immediate natively...
Represents one node in the SelectionDAG.
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
const MachineInstrBuilder & addFrameIndex(int Idx) const
static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, bool &swpCmpOps, bool &swpVselOps)
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:275
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG)
ExpandBITCAST - If the target supports VFP, this function is called to expand a bit convert where eit...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:584
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
static mvt_range integer_valuetypes()
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
unsigned Log2_32(uint32_t Value)
Log2_32 - This function returns the floor log base 2 of the specified value, -1 if the value is zero...
Definition: MathExtras.h:513
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
virtual unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const
If the specified machine instruction is a direct load from a stack slot, return the virtual or physic...
SDValue getCALLSEQ_START(SDValue Chain, SDValue Op, const SDLoc &DL)
Return a new CALLSEQ_START node, which always must have a glue result (to ensure it's not CSE'd)...
Definition: SelectionDAG.h:715
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
static unsigned isNEONTwoResultShuffleMask(ArrayRef< int > ShuffleMask, EVT VT, unsigned &WhichResult, bool &isV_UNDEF)
Check if ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN), and return the corresponding AR...
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool ExpandInlineAsm(CallInst *CI) const override
This hook allows the target to expand an inline asm call to be explicit llvm code if it wants to...
Class to represent vector types.
Definition: DerivedTypes.h:369
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SynchronizationScope SynchScope=CrossThread, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
Class for arbitrary precision integers.
Definition: APInt.h:77
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:354
void setMinFunctionAlignment(unsigned Align)
Set the target's minimum function alignment (in log2(bytes))
int64_t getSExtValue() const
op_iterator op_begin() const
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:195
unsigned EmulatedTLS
EmulatedTLS - This flag enables emulated TLS model, using emutls function in the runtime library...
virtual const TargetRegisterClass * getRegClassFor(MVT VT) const
Return the register class that should be used for the specified value type.
static use_iterator use_end()
iterator_range< user_iterator > users()
Definition: Value.h:370
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:400
Instruction * emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
const GlobalObject * getBaseObject() const
Definition: GlobalValue.h:517
bool isMemLoc() const
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:438
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:932
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:403
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:250
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:464
bool useNaClTrap() const
Definition: ARMSubtarget.h:497
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:259
FMINNAN/FMAXNAN - Behave identically to FMINNUM/FMAXNUM, except that when a single input is NaN...
Definition: ISDOpcodes.h:527
static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, const TargetInstrInfo *TII)
MatchingStackOffset - Return true if the given stack call argument is already available in the same p...
bool isTargetGNUAEABI() const
Definition: ARMSubtarget.h:533
static int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
void setArgumentStackSize(unsigned size)
bool isOSVersionLT(unsigned Major, unsigned Minor=0, unsigned Micro=0) const
isOSVersionLT - Helper function for doing comparisons against version numbers included in the target ...
Definition: Triple.h:388
static SDValue PerformXORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool isVTBLMask(ArrayRef< int > M, EVT VT)
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
Definition: APInt.h:1942
static bool isAcquireOrStronger(AtomicOrdering ao)
bool isCheapToSpeculateCtlz() const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) can replace combinations of ...
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
CallLoweringInfo & setTailCall(bool Value=true)
const TargetRegisterClass * getRegClassFor(MVT VT) const override
getRegClassFor - Return the register class that should be used for the specified value type...
Value * CreateTruncOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1386
Section Relative (Windows TLS)
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1132
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:560
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const override
getTgtMemIntrinsic - Represent NEON load and store intrinsics as MemIntrinsicNodes.
bool isAllOnesValue() const
Determine if all bits are set.
Definition: APInt.h:342
Representation of each machine instruction.
Definition: MachineInstr.h:52
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer, a SRCVALUE for the destination, and a SRCVALUE for the source.
Definition: ISDOpcodes.h:633
bool isAPCS_ABI() const
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
static CondCodes getOppositeCondition(CondCodes CC)
Definition: ARMBaseInfo.h:47
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:384
static bool canChangeToInt(SDValue Op, bool &SeenZero, const ARMSubtarget *Subtarget)
canChangeToInt - Given the fp compare operand, return true if it is suitable to morph to an integer c...
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:119
void setVarArgsFrameIndex(int Index)
SmallVector< SDValue, 32 > OutVals
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:556
static SDValue PerformVMOVRRDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMOVRRDCombine - Target-specific dag combine xforms for ARMISD::VMOVRRD.
static bool LowerToByteSwap(CallInst *CI)
LowerToByteSwap - Replace a call instruction into a call to bswap intrinsic.
bool isOSBinFormatELF() const
Tests whether the OS uses the ELF binary format.
Definition: Triple.h:565
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:333
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:226
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
bool isUndef() const
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:610
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:169
bool isThumb2() const
Definition: ARMSubtarget.h:578
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
static bool isVUZP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of "vector_shuffle v...
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:205
static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget, MachineInstr &MI, const SDNode *Node)
Attaches vregs to MEMCPY that it will use as scratch registers when it is expanded into LDM/STM...
static SDValue LowerInterruptReturn(SmallVectorImpl< SDValue > &RetOps, const SDLoc &DL, SelectionDAG &DAG)
bool isAAPCS_ABI() const
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:418
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:227
bool hasV5TOps() const
Definition: ARMSubtarget.h:415
bool isVectorLoadExtDesirable(SDValue ExtVal) const override
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable...
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
static bool isLegalT2AddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:536
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:256
static bool isLegalAddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
isLegalAddressImmediate - Return true if the integer value can be used as the offset of the target ad...
void ReplaceAllUsesWith(SDValue From, SDValue Op)
Modify anything using 'From' to use 'To' instead.
void setReg(unsigned Reg)
Change the register this operand corresponds to.
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned char TargetFlags=0) const
void setArgRegsSaveSize(unsigned s)
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:534
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.cpp:230
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned StSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment store operation with given size.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1099
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:385
static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, ARMCC::CondCodes &CondCode2)
FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
The memory access always returns the same value (or traps).
CallInst * CreateCall(Value *Callee, ArrayRef< Value * > Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1579
unsigned MaxStoresPerMemmoveOptSize
Maximum number of store instructions that may be substituted for a call to memmove, used for functions with OptSize attribute.
unsigned MaxStoresPerMemcpyOptSize
Maximum number of store operations that may be substituted for a call to memcpy, used for functions w...
static cl::opt< unsigned > ConstpoolPromotionMaxTotal("arm-promote-constant-max-total", cl::Hidden, cl::desc("Maximum size of ALL constants to promote into a constant pool"), cl::init(128))
void setStackPointerRegisterToSaveRestore(unsigned R)
If set to a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save and restore.
bool isTailCall() const
bool allowsUnalignedMem() const
Definition: ARMSubtarget.h:607
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
VectorType * getType() const
Overload to return most specific vector type.
static volatile int Zero
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate, that is the target has add instructions which can add a register and the immediate without having to materialize the immediate into a register.
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
static SDValue LowerSETCCE(SDValue Op, SelectionDAG &DAG)
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:530
unsigned MaxStoresPerMemcpy
Specify maximum bytes of store instructions per memcpy call.
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:312
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
getEVT - Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:309
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns true if the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass into a ...
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.setjmp intrinsic.
Definition: ISDOpcodes.h:108
static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
bool hasDSP() const
Definition: ARMSubtarget.h:496
int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, uint32_t BitWidth) const
If this is a constant FP splat and the splatted constant FP is an exact power or 2, return the log base 2 integer value.
static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:175
EVT getValueType() const
Return the ValueType of the referenced return value.
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1354
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool useMovt(const MachineFunction &MF) const
SDValue getCondCode(ISD::CondCode Cond)
EnvironmentType getEnvironment() const
getEnvironment - Get the parsed environment type of this triple.
Definition: Triple.h:288
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a carry value...
Definition: ISDOpcodes.h:383
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:153
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
static bool isGTorGE(ISD::CondCode CC)
static SDValue PerformVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:291
unsigned getReg() const
getReg - Returns the register number.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:178
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
Definition: ValueTypes.h:118
static RTLIB::Libcall getDivRemLibcall(const SDNode *N, MVT::SimpleValueType SVT)
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructions which can compare a register against the immediate without having to materialize the immediate into a register.
bool isTargetMuslAEABI() const
Definition: ARMSubtarget.h:538
static bool isVTRN_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of "vector_shuffle v...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, SDValue &CC, bool &Invert, SDValue &OtherOp, SelectionDAG &DAG)
void insert(iterator MBBI, MachineBasicBlock *MBB)
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
SDValue PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const
PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
void setReturnAddressIsTaken(bool s)
bool isSimple() const
isSimple - Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:107
const unsigned char * bytes_begin() const
Definition: StringRef.h:107
unsigned getAlignment() const
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
void setMinStackArgumentAlignment(unsigned Align)
Set the minimum stack alignment of an argument (in log2(bytes)).
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
bool isShuffleMaskLegal(const SmallVectorImpl< int > &M, EVT VT) const override
isShuffleMaskLegal - Targets can use this to indicate that they only support some VECTOR_SHUFFLE oper...
CallLoweringInfo & setInRegister(bool Value=true)
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:108
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
static bool isSaturatingConditional(const SDValue &Op, SDValue &V, uint64_t &K)
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:537
LLVM Value Representation.
Definition: Value.h:71
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:249
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
bool useLoadStackGuardNode() const override
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
Instruction * makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) const
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:631
static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2)
Return the load opcode for a given load size.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isTruncatingStore() const
Return true if the op does a truncation before store.
void push_back(MachineBasicBlock *MBB)
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:830
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:239
ARMTargetLowering(const TargetMachine &TM, const ARMSubtarget &STI)
bool hasFP16() const
Definition: ARMSubtarget.h:502
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
SDValue getValueType(EVT)
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:174
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:81
bool isWindowsItaniumEnvironment() const
Definition: Triple.h:516
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:685
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override
Returns true if an argument of type Ty needs to be passed in a contiguous block of registers in calli...
MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it represents a symbol which...
Definition: ARMBaseInfo.h:312
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target...
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:533
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:951
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:331
unsigned getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG)
void rewindByValRegsInfo()
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.Val alone...
#define DEBUG(X)
Definition: Debug.h:100
bool preferISHSTBarriers() const
Definition: ARMSubtarget.h:481
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
Definition: APInt.h:1343
Primary interface to the complete machine description for the target machine.
bool hasDataBarrier() const
Definition: ARMSubtarget.h:461
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:678
IRTranslator LLVM IR MI
static SDValue PerformSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformSTORECombine - Target-specific dag combine xforms for ISD::STORE.
bool hasDivideInARMMode() const
Definition: ARMSubtarget.h:459
static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG)
constexpr bool isShiftedMask_32(uint32_t Value)
isShiftedMask_32 - This function returns true if the argument contains a non-empty sequence of ones w...
Definition: MathExtras.h:387
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:377
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
unsigned MaxStoresPerMemsetOptSize
Maximum number of stores operations that may be substituted for the call to memset, used for functions with OptSize attribute.
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
static BranchProbability getZero()
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml","ocaml 3.10-compatible collector")
static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget, SelectionDAG &DAG)
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1722
bool hasOptionalDef(QueryType Type=IgnoreBundle) const
Set if this instruction has an optional definition, e.g.
Definition: MachineInstr.h:410
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:197
unsigned getLocMemOffset() const
MVT getVectorElementType() const
static bool isVolatile(Instruction *Inst)
Conversion operators.
Definition: ISDOpcodes.h:397
int * Ptr
isLittleEndian(LE)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:381
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:698
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG)
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:406
bool isUIntN(unsigned N, uint64_t x)
isUIntN - Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:360
unsigned getAlignment() const
bool isBitFieldInvertedMask(unsigned v)
unsigned AllocateReg(unsigned Reg)
AllocateReg - Attempt to allocate one register.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass...
static void getShuffleMask(Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
bool isBigEndian() const
Definition: DataLayout.h:221
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns true if the given (atomic) store should be expanded by the IR-level AtomicExpand pass into an...
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static SDValue AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static GCRegistry::Add< ErlangGC > A("erlang","erlang-compatible garbage collector")
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const
static bool allUsersAreInFunctions(const Value *V)
Return true if all users of V are within some (any) function, looking through ConstantExprs.
bool is64BitVector() const
is64BitVector - Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:148
FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, FLOG, FLOG2, FLOG10, FEXP, FEXP2, FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary floating point operations.
Definition: ISDOpcodes.h:516
bool isEmpty() const
Returns true if there are no itineraries.
Value * getPointerOperand()
Definition: Instructions.h:394
ARM_AAPCS - ARM Architecture Procedure Calling Standard calling convention (aka EABI).
Definition: CallingConv.h:99
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num)
allocateMemRefsArray - Allocate an array to hold MachineMemOperand pointers.
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:167
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:694
bool isTargetWindows() const
Definition: ARMSubtarget.h:515
ARM_AAPCS_VFP - Same as ARM_AAPCS, but uses hard floating point ABI.
Definition: CallingConv.h:102
const BasicBlock * getParent() const
Definition: Instruction.h:62
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
static ARMConstantPoolConstant * Create(const Constant *C, unsigned ID)
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:321
static bool isSplatMask(const int *Mask, EVT VT)
unsigned getConstantPoolIndex(const Constant *C, unsigned Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one...
static bool isVEXTMask(ArrayRef< int > M, EVT VT, bool &ReverseVEXT, unsigned &Imm)
EVT changeVectorElementTypeToInteger() const
changeVectorElementTypeToInteger - Return a vector with the same number of elements as this vector...
Definition: ValueTypes.h:80
static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, unsigned AlignCheck)
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
const uint32_t * getTLSCallPreservedMask(const MachineFunction &MF) const
static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG)
Instruction * emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const override
auto count_if(R &&Range, UnaryPredicate P) -> typename std::iterator_traits< decltype(std::begin(Range))>::difference_type
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:799
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:226
static const MachineInstrBuilder & AddDefaultT1CC(const MachineInstrBuilder &MIB, bool isDead=false)
CARRY_FALSE - This node is used when folding other nodes, like ADDC/SUBC, which indicate the carry re...
Definition: ISDOpcodes.h:213
IntegerType * Int32Ty
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:222
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:529
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, unsigned Align, bool *Fast) const override
allowsMisalignedMemoryAccesses - Returns true if the target allows unaligned memory accesses of the s...
void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
FloatABI::ABIType FloatABIType
FloatABIType - This setting is set by -float-abi=xxx option is specfied on the command line...
uint64_t getZExtValue() const
static uint64_t decodeNEONModImm(unsigned ModImm, unsigned &EltBits)
decodeNEONModImm - Decode a NEON modified immediate value into the element value and the element size...
void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:799
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:358
static TargetLowering::ArgListTy getDivRemArgList(const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget)
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:326
static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, const SDLoc &dl, EVT &VT, bool is128Bits, NEONModImmType type)
isNEONModifiedImm - Check if the specified splat value corresponds to a valid vector constant for a N...
char * PC
unsigned getVectorNumElements() const
getVectorNumElements - Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:248
This class is used to represent ISD::LOAD nodes.
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V)
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary...
Definition: ISDOpcodes.h:545